Running BERT
Preliminaries
- Install miniconda: https://docs.conda.io/en/latest/miniconda.html
- create a virtual environment with conda (replace
my-env-name
with your favorite name!)
conda create -n my-env-name python=3.8
- Start the environment
conda activate my-env-name
- Install the huggingface library
pip install transformers
- Install pytorch
conda install -c pytorch pytorch
BERT
- open a
.py
file
from transformers import AutoTokenizer, AutoModelForSequenceClassification
use_gpu = True
if use_gpu:
device = 'cuda'
else:
device = 'cpu'
model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(
model_name,
num_labels=2
).to(device)
sentences = ['Boxing is one of my favorite activities', 'In costco you can get free cardboard boxes']
query = 'sport'
inputs = list(map(lambda x: query + ' [SEP] ' + x, sentences))
tokens = tokenizer(
sentences,
return_tensors="pt", # return a pytorch tensor; use "tf" if you want to use tensorflow instead
truncation=True, # truncate to input limit of the model
padding=True # pad short input with the special [PAD] token
).to(device)
output = model(**tokens)
logits = output.logits
print(f"logits: {logits}")
predictions = logits.argmax(axis=1)
print(f"prediction: {predictions}")
- Train your model
from transformers import Trainer, TrainingArguments
labels = [1,0,0]
training_args = TrainingArguments(
do_train=True,
seed=41,
num_train_epochs=5,
per_device_train_batch_size=4,
output_dir='/tmp'
)
def tokenize_function(examples):
return tokenizer(examples["text"], truncation=True, padding=True)
dict_dataset= dict(labels=labels, text=sentences)
raw_datasets = Dataset.from_dict(dict_dataset)
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True).shuffle(seed=42)
training_dataset = tokenized_datasets
trainer = Trainer(model=model, args=training_args, train_dataset=training_dataset)
trainer.train()
- Test again:
model.to(device)
tokens.to(device)
output = model(**tokens)
logits = output.logits
print(f"logits: {logits}")
predictions = logits.argmax(axis=1)
print(f"prediction: {predictions}")
T5
Here’s a script you can use to play around with T5
from transformers import T5Tokenizer, T5ForConditionalGeneration
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")
while True:
inp = input("enter sentence: ")
tokens = tokenizer(inp, return_tensors='pt').input_ids
output = model.generate(tokens)
print(tokenizer.decode(output[0]))