21. Transformers examples#
import torch
from transformers import pipeline
summarizer = pipeline('summarization')
import os
path = 'D:/Data/Reasonable_10K'
files = os.listdir(path)
text = open(os.path.join(path,files[0])).read()[8824:9567]
text
'Synovis Life Technologies, Inc. is a diversified medical device\n company engaged in developing, manufacturing, marketing and\n selling products for the surgical and interventional treatment\n of disease. Our business is conducted in two operating segments,\n the surgical business and the interventional business, with\n segmentation based upon the similarities of the underlying\n business operations, products and markets of each.\n\nOur surgical business develops, manufactures, markets and sells\n implantable biomaterial products, devices for microsurgery and\n surgical tools, all designed to reduce risk\n and/or \n facilitate critical surgeries, leading to better patient\n outcomes\n and/or lower\n costs.\n'
summarizer(text,max_length=75)
[{'summary_text': ' Synovis Life Technologies, Inc. is a diversified medical device company engaged in developing, manufacturing, marketing and selling products for the surgical and interventional treatment of disease . Our surgical business develops, manufactures, markets and sells implantable biomaterial products, devices for microsurgery and surgical tools .'}]
text_gen = pipeline('text-generation')
Some weights of GPT2Model were not initialized from the model checkpoint at gpt2 and are newly initialized: ['h.0.attn.masked_bias', 'h.1.attn.masked_bias', 'h.2.attn.masked_bias', 'h.3.attn.masked_bias', 'h.4.attn.masked_bias', 'h.5.attn.masked_bias', 'h.6.attn.masked_bias', 'h.7.attn.masked_bias', 'h.8.attn.masked_bias', 'h.9.attn.masked_bias', 'h.10.attn.masked_bias', 'h.11.attn.masked_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
text_gen('The yearly profit was')
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
[{'generated_text': 'The yearly profit was paid out in part to the local community, and in part to an army of loyalists who, in the year 1942, won nearly four million gold rings."\n\nThe war had been a "tongue in cheek"'}]
text_gen('Mickey Mouse is a friend of')
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
[{'generated_text': 'Mickey Mouse is a friend of mine and I always take this to the house. There is a big room there for my son and his friends. When we do an event with friends we do a lot of work for them. The room is so'}]
ner = pipeline('ner',model='dslim/bert-base-NER')
ner('Mickey Mouse is a friend of mine since he was eight. He lives in California.')
[{'word': 'Mickey',
'score': 0.9994354248046875,
'entity': 'B-PER',
'index': 1,
'start': 0,
'end': 6},
{'word': 'Mouse',
'score': 0.9812304377555847,
'entity': 'I-PER',
'index': 2,
'start': 7,
'end': 12},
{'word': 'California',
'score': 0.9996417164802551,
'entity': 'B-LOC',
'index': 16,
'start': 65,
'end': 75}]
questions = pipeline('question-answering')
context = """Synovis Life Technologies, Inc. is a diversified medical device company engaged in developing, manufacturing, marketing and
selling products for the surgical and interventional treatment of disease. Our business is conducted in two operating segments,
the surgical business and the interventional business, with segmentation based upon the similarities of the underlying
business operations, products and markets of each. Our surgical business develops, manufactures, markets and sells
implantable biomaterial products, devices for microsurgery and surgical tools, all designed to reduce risk and/or
facilitate critical surgeries, leading to better patient outcomes and/or lower costs."""
questions(question = 'What Synovis develops?',context=context)
{'score': 0.08829150348901749,
'start': 487,
'end': 519,
'answer': 'implantable biomaterial products'}
mask = pipeline("fill-mask")
Some weights of RobertaForMaskedLM were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['lm_head.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
mask(f'Tesla produces {mask.tokenizer.mask_token} for the US market.')
[{'sequence': '<s>Tesla produces batteries for the US market.</s>',
'score': 0.2810230851173401,
'token': 10535,
'token_str': 'Ġbatteries'},
{'sequence': '<s>Tesla produces cars for the US market.</s>',
'score': 0.12935157120227814,
'token': 1677,
'token_str': 'Ġcars'},
{'sequence': '<s>Tesla produces EVs for the US market.</s>',
'score': 0.10034559667110443,
'token': 34857,
'token_str': 'ĠEVs'},
{'sequence': '<s>Tesla produces vehicles for the US market.</s>',
'score': 0.049529068171978,
'token': 1734,
'token_str': 'Ġvehicles'},
{'sequence': '<s>Tesla produces rockets for the US market.</s>',
'score': 0.021711379289627075,
'token': 19367,
'token_str': 'Ġrockets'}]
mask(f'Mickey Mouse likes to {mask.tokenizer.mask_token} while walking in a park.')
[{'sequence': '<s>Mickey Mouse likes to fart while walking in a park.</s>',
'score': 0.08874308317899704,
'token': 36762,
'token_str': 'Ġfart'},
{'sequence': '<s>Mickey Mouse likes to smile while walking in a park.</s>',
'score': 0.061683233827352524,
'token': 6675,
'token_str': 'Ġsmile'},
{'sequence': '<s>Mickey Mouse likes to dance while walking in a park.</s>',
'score': 0.05867791920900345,
'token': 3836,
'token_str': 'Ġdance'},
{'sequence': '<s>Mickey Mouse likes to poop while walking in a park.</s>',
'score': 0.051350075751543045,
'token': 36733,
'token_str': 'Ġpoop'},
{'sequence': '<s>Mickey Mouse likes to duck while walking in a park.</s>',
'score': 0.04298650845885277,
'token': 15223,
'token_str': 'Ġduck'}]