Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Path: blob/main/course/en/chapter13/grpo_length.py
Views: 2935
import marimo12__generated_with = "0.10.6"3app = marimo.App(width="medium")456@app.cell(hide_code=True)7def _():8import marimo as mo910mo.md(11"## Length based reward\nAdjust the slider to see how the reward changes for different completion lengths."12)13return (mo,)141516@app.cell(hide_code=True)17def _(mo):18slider = mo.ui.slider(start=5, stop=50, step=5, label="Ideal Length (characters)")19slider20return (slider,)212223@app.cell(hide_code=True)24def _(mo, slider):25import plotly.express as px2627# Toy dataset with 5 samples of different lengths28completions = [29"Short", # 5 chars30"Medium length text", # 18 chars31"This is about twenty chars", # 25 chars32"This is a slightly longer completion", # 36 chars33"This is a much longer completion with more words", # 45 chars34]3536maximum_length = max(len(completion) for completion in completions)37minimum_length = min(len(completion) for completion in completions)3839def length_reward(completions, ideal_length):40"""41Calculate rewards based on the length of completions.4243Args:44completions: List of text completions45ideal_length: Target length in characters4647Returns:48List of reward scores for each completion49"""50rewards = []5152for completion in completions:53length = len(completion)54# Simple reward function: negative absolute difference55reward = maximum_length - abs(length - ideal_length)56reward = max(0, reward)57reward = min(1, reward / (maximum_length - minimum_length))58rewards.append(reward)5960return rewards6162# Calculate rewards for the examples63rewards = length_reward(completions=completions, ideal_length=slider.value)6465# Display the examples and their rewards66results = []67for completion, reward in zip(completions, rewards):68results.append(69{"Completion": completion, "Length": len(completion), "Reward": reward}70)7172fig = px.bar(results, x="Completion", y="Reward", color="Length")73mo.ui.plotly(fig)747576if __name__ == "__main__":77app.run()787980