CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
huggingface

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: huggingface/notebooks
Path: blob/main/sagemaker/18_inferentia_inference/code/inference.py
Views: 2554
1
import os
2
from transformers import AutoConfig, AutoTokenizer
3
import torch
4
import torch.neuron
5
6
# To use one neuron core per worker
7
os.environ["NEURON_RT_NUM_CORES"] = "1"
8
9
# saved weights name
10
AWS_NEURON_TRACED_WEIGHTS_NAME = "neuron_model.pt"
11
12
13
def model_fn(model_dir):
14
# load tokenizer and neuron model from model_dir
15
tokenizer = AutoTokenizer.from_pretrained(model_dir)
16
model = torch.jit.load(os.path.join(model_dir, AWS_NEURON_TRACED_WEIGHTS_NAME))
17
model_config = AutoConfig.from_pretrained(model_dir)
18
19
return model, tokenizer, model_config
20
21
22
def predict_fn(data, model_tokenizer_model_config):
23
# destruct model, tokenizer and model config
24
model, tokenizer, model_config = model_tokenizer_model_config
25
26
# create embeddings for inputs
27
inputs = data.pop("inputs", data)
28
embeddings = tokenizer(
29
inputs,
30
return_tensors="pt",
31
max_length=model_config.traced_sequence_length,
32
padding="max_length",
33
truncation=True,
34
)
35
# convert to tuple for neuron model
36
neuron_inputs = tuple(embeddings.values())
37
38
# run prediciton
39
with torch.no_grad():
40
predictions = model(*neuron_inputs)[0]
41
scores = torch.nn.Softmax(dim=1)(predictions)
42
43
# return dictonary, which will be json serializable
44
return [{"label": model_config.id2label[item.argmax().item()], "score": item.max().item()} for item in scores]
45
46