import random
from evalit import Experiment
def mock_llm(prompt: str) -> str:
if "please" in prompt.lower():
return "The correct answer is 42." if random.random() > 0.3 else "I don't know."
else:
return "The correct answer is 42." if random.random() > 0.6 else "I don't know."
dataset = [
{"id": f"q_{i}", "inputs": {"question": "What is the meaning of life?"}, "expected_output": "42"}
for i in range(20)
]
variants = {
"control": pm.get("meaning-of-life-prompt", version=1),
"challenger": pm.get("meaning-of-life-prompt", version=2)
}
exp = Experiment(name="Politeness Test", variants=variants)
exp.run(dataset=dataset, llm_function=mock_llm, budget=100)
report = exp.analyze()
print(report["winner"]) # e.g., "challenger"