CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
huggingface

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: huggingface/notebooks
Path: blob/main/course/en/chapter13/grpo_length.py
Views: 2935
1
import marimo
2
3
__generated_with = "0.10.6"
4
app = marimo.App(width="medium")
5
6
7
@app.cell(hide_code=True)
8
def _():
9
import marimo as mo
10
11
mo.md(
12
"## Length based reward\nAdjust the slider to see how the reward changes for different completion lengths."
13
)
14
return (mo,)
15
16
17
@app.cell(hide_code=True)
18
def _(mo):
19
slider = mo.ui.slider(start=5, stop=50, step=5, label="Ideal Length (characters)")
20
slider
21
return (slider,)
22
23
24
@app.cell(hide_code=True)
25
def _(mo, slider):
26
import plotly.express as px
27
28
# Toy dataset with 5 samples of different lengths
29
completions = [
30
"Short", # 5 chars
31
"Medium length text", # 18 chars
32
"This is about twenty chars", # 25 chars
33
"This is a slightly longer completion", # 36 chars
34
"This is a much longer completion with more words", # 45 chars
35
]
36
37
maximum_length = max(len(completion) for completion in completions)
38
minimum_length = min(len(completion) for completion in completions)
39
40
def length_reward(completions, ideal_length):
41
"""
42
Calculate rewards based on the length of completions.
43
44
Args:
45
completions: List of text completions
46
ideal_length: Target length in characters
47
48
Returns:
49
List of reward scores for each completion
50
"""
51
rewards = []
52
53
for completion in completions:
54
length = len(completion)
55
# Simple reward function: negative absolute difference
56
reward = maximum_length - abs(length - ideal_length)
57
reward = max(0, reward)
58
reward = min(1, reward / (maximum_length - minimum_length))
59
rewards.append(reward)
60
61
return rewards
62
63
# Calculate rewards for the examples
64
rewards = length_reward(completions=completions, ideal_length=slider.value)
65
66
# Display the examples and their rewards
67
results = []
68
for completion, reward in zip(completions, rewards):
69
results.append(
70
{"Completion": completion, "Length": len(completion), "Reward": reward}
71
)
72
73
fig = px.bar(results, x="Completion", y="Reward", color="Length")
74
mo.ui.plotly(fig)
75
76
77
if __name__ == "__main__":
78
app.run()
79
80