Path: blob/main/course/fr/chapter2/section5_tf.ipynb
4655 views
Kernel: Python 3
Manipulation de plusieurs séquences (TensorFlow)
Installez la bibliothèque 🤗 Transformers pour exécuter ce notebook.
In [ ]:
!pip install transformers[sentencepiece]
In [ ]:
import tensorflow as tf from transformers import AutoTokenizer, TFAutoModelForSequenceClassification checkpoint = "tblard/tf-allocine" tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint) sequence = "J'ai attendu un cours d’HuggingFace toute ma vie." tokens = tokenizer.tokenize(sequence) ids = tokenizer.convert_tokens_to_ids(tokens) input_ids = tf.constant(ids) # Cette ligne va échouer model(input_ids)
In [ ]:
tokenized_inputs = tokenizer(sequence, return_tensors="tf") print(tokenized_inputs["input_ids"])
In [ ]:
import tensorflow as tf from transformers import AutoTokenizer, TFAutoModelForSequenceClassification checkpoint = "tblard/tf-allocine" tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint) sequence = "J'ai attendu un cours d’HuggingFace toute ma vie." tokens = tokenizer.tokenize(sequence) ids = tokenizer.convert_tokens_to_ids(tokens) input_ids = tf.constant([ids]) print("Input IDs:", input_ids) output = model(input_ids) print("Logits:", output.logits)
In [ ]:
batched_ids = [ [200, 200, 200], [200, 200] ]
In [ ]:
padding_id = 100 batched_ids = [ [200, 200, 200], [200, 200, padding_id], ]
In [ ]:
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint) sequence1_ids = [[200, 200, 200]] sequence2_ids = [[200, 200]] batched_ids = [ [200, 200, 200], [200, 200, tokenizer.pad_token_id], ] print(model(tf.constant(sequence1_ids)).logits) print(model(tf.constant(sequence2_ids)).logits) print(model(tf.constant(batched_ids)).logits)
In [ ]:
batched_ids = [ [200, 200, 200], [200, 200, tokenizer.pad_token_id], ] attention_mask = [ [1, 1, 1], [1, 1, 0], ] outputs = model(tf.constant(batched_ids), attention_mask=tf.constant(attention_mask)) print(outputs.logits)
In [ ]:
# max_sequence_length = 512 equence = sequence[:max_sequence_length]