Manipulation de plusieurs séquences (TensorFlow)

Installez la bibliothèque 🤗 Transformers pour exécuter ce notebook.

In [ ]:

!pip install transformers[sentencepiece]

In [ ]:

import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

checkpoint = "tblard/tf-allocine"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
sequence = "J'ai attendu un cours d’HuggingFace toute ma vie."

tokens = tokenizer.tokenize(sequence)
ids = tokenizer.convert_tokens_to_ids(tokens)
input_ids = tf.constant(ids)
# Cette ligne va échouer
model(input_ids)

In [ ]:

tokenized_inputs = tokenizer(sequence, return_tensors="tf")
print(tokenized_inputs["input_ids"])

In [ ]:

import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

checkpoint = "tblard/tf-allocine"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
sequence = "J'ai attendu un cours d’HuggingFace toute ma vie."


tokens = tokenizer.tokenize(sequence)
ids = tokenizer.convert_tokens_to_ids(tokens)

input_ids = tf.constant([ids])
print("Input IDs:", input_ids)

output = model(input_ids)
print("Logits:", output.logits)

In [ ]:

batched_ids = [
    [200, 200, 200],
    [200, 200]
]

In [ ]:

padding_id = 100

batched_ids = [
    [200, 200, 200],
    [200, 200, padding_id],
]

In [ ]:

model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)

sequence1_ids = [[200, 200, 200]]
sequence2_ids = [[200, 200]]
batched_ids = [
    [200, 200, 200],
    [200, 200, tokenizer.pad_token_id],
]

print(model(tf.constant(sequence1_ids)).logits)
print(model(tf.constant(sequence2_ids)).logits)
print(model(tf.constant(batched_ids)).logits)

In [ ]:

batched_ids = [
    [200, 200, 200],
    [200, 200, tokenizer.pad_token_id],
]

attention_mask = [
    [1, 1, 1],
    [1, 1, 0],
]

outputs = model(tf.constant(batched_ids), attention_mask=tf.constant(attention_mask))
print(outputs.logits)

In [ ]:

# max_sequence_length = 512
equence = sequence[:max_sequence_length]

Manipulation de plusieurs séquences (TensorFlow)

Product

Resources

Company