Load your dataset to Argilla

In [ ]:

!pip install argilla datasets

In [ ]:

import argilla as rg

HF_TOKEN = "..."  # only for private spaces

client = rg.Argilla(
    api_url="...",
    api_key="...",
    headers={"Authorization": f"Bearer {HF_TOKEN}"},  # only for private spaces
)

In [ ]:

from datasets import load_dataset

data = load_dataset("SetFit/ag_news", split="train")
data.features

{'text': Value(dtype='string', id=None),
 'label': Value(dtype='int64', id=None),
 'label_text': Value(dtype='string', id=None)}

In [ ]:

settings = rg.Settings(
    fields=[rg.TextField(name="text")],
    questions=[
        rg.LabelQuestion(
            name="label", title="Classify the text:", labels=data.unique("label_text")
        ),
        rg.SpanQuestion(
            name="entities",
            title="Highlight all the entities in the text:",
            labels=["PERSON", "ORG", "LOC", "EVENT"],
            field="text",
        ),
    ],
)

In [ ]:

dataset = rg.Dataset(name="ag_news", settings=settings)

dataset.create()

In [ ]:

dataset.records.log(data, mapping={"label_text": "label"})

Load your dataset to Argilla

Product

Resources

Company