Basic Usage¶
This guide walks through common usage patterns with complete, working examples.
Minimal Example¶
The simplest usage generates queries from a dimension model:
import asyncio
from pydantic import BaseModel, Field
from evaluateur import Evaluator
class Query(BaseModel):
topic: str = Field(..., description="the subject area")
difficulty: str = Field(..., description="complexity level")
async def main() -> None:
evaluator = Evaluator(Query)
async for q in evaluator.run(
instructions="Generate diverse educational topics",
tuple_count=5,
):
print(q.query)
asyncio.run(main())
Step-by-Step Control¶
For more control, call each method separately:
import asyncio
from pydantic import BaseModel, Field
from evaluateur import Evaluator, TupleStrategy
class CustomerQuery(BaseModel):
product: str = Field(..., description="product category")
issue_type: str = Field(..., description="type of customer issue")
sentiment: str = Field(..., description="customer emotional state")
async def main() -> None:
evaluator = Evaluator(CustomerQuery)
# Step 1: Generate options
options = await evaluator.options(
instructions="Focus on e-commerce scenarios",
count_per_field=4,
)
print("Options generated:")
for field, values in options.model_dump().items():
print(f" {field}: {values}")
# Step 2: Generate tuples
tuples = []
async for t in evaluator.tuples(
options,
strategy=TupleStrategy.CROSS_PRODUCT,
count=10,
seed=123,
):
tuples.append(t)
print(f"Tuple: {t.model_dump()}")
# Step 3: Generate queries
print("\nQueries:")
async for q in evaluator.queries(
tuples=tuples,
instructions="Write as if you're a frustrated customer",
):
print(f" {q.query}")
asyncio.run(main())
Using Fixed Options¶
Pre-define specific values for certain dimensions:
from pydantic import BaseModel, Field
from evaluateur import Evaluator
class SupportTicket(BaseModel):
# Fixed options - these won't change
priority: list[str] = ["low", "medium", "high", "critical"]
channel: list[str] = ["email", "chat", "phone"]
# Dynamic options - generated by the LLM
product_area: str = Field(..., description="part of the product")
issue_category: str = Field(..., description="type of technical issue")
async def main() -> None:
evaluator = Evaluator(SupportTicket)
# Only generates options for product_area and issue_category
options = await evaluator.options(count_per_field=5)
print("Priority options (fixed):", options.priority)
print("Product areas (generated):", options.product_area)
Collecting Results¶
Store generated queries for later use:
import asyncio
import json
from pydantic import BaseModel, Field
from evaluateur import Evaluator
class Query(BaseModel):
domain: str = Field(..., description="knowledge domain")
complexity: str = Field(..., description="question difficulty")
async def main() -> None:
evaluator = Evaluator(Query)
results = []
async for q in evaluator.run(tuple_count=20, seed=42):
results.append({
"query": q.query,
"tuple": q.source_tuple.model_dump(),
"metadata": q.metadata.model_dump(),
})
# Save to JSON
with open("queries.json", "w") as f:
json.dump(results, f, indent=2)
print(f"Saved {len(results)} queries")
asyncio.run(main())
Reproducible Results¶
Use seeds for reproducible sampling:
import asyncio
from pydantic import BaseModel, Field
from evaluateur import Evaluator, TupleStrategy
class Query(BaseModel):
category: str = Field(..., description="content category")
tone: str = Field(..., description="writing tone")
async def main() -> None:
evaluator = Evaluator(Query)
# Same seed = same tuples
async for q in evaluator.run(
tuple_strategy=TupleStrategy.CROSS_PRODUCT,
tuple_count=5,
seed=42, # Fixed seed
):
print(q.source_tuple.model_dump())
print("---")
# Different seed = different tuples
async for q in evaluator.run(
tuple_strategy=TupleStrategy.CROSS_PRODUCT,
tuple_count=5,
seed=43, # Different seed
):
print(q.source_tuple.model_dump())
asyncio.run(main())
Processing Queries as They Stream¶
Handle queries one at a time for memory efficiency:
import asyncio
from pydantic import BaseModel, Field
from evaluateur import Evaluator
class Query(BaseModel):
topic: str = Field(..., description="subject matter")
async def process_query(query_text: str) -> None:
"""Your custom processing logic."""
print(f"Processing: {query_text[:50]}...")
# Run your LLM, save to database, etc.
async def main() -> None:
evaluator = Evaluator(Query)
count = 0
async for q in evaluator.run(tuple_count=100):
await process_query(q.query)
count += 1
if count % 10 == 0:
print(f"Processed {count} queries")
asyncio.run(main())
Error Handling¶
Handle potential errors gracefully:
import asyncio
from pydantic import BaseModel, Field
from evaluateur import Evaluator
class Query(BaseModel):
field: str = Field(..., description="example field")
async def main() -> None:
try:
evaluator = Evaluator(Query, llm="openai/gpt-4.1-mini")
async for q in evaluator.run(tuple_count=10):
print(q.query)
except Exception as e:
print(f"Error during generation: {e}")
# Handle rate limits, API errors, etc.
asyncio.run(main())
Logging¶
Enable debug logging to see what's happening:
import asyncio
import logging
from pydantic import BaseModel, Field
from evaluateur import Evaluator
# Enable debug logging
logging.basicConfig(level=logging.DEBUG)
class Query(BaseModel):
topic: str = Field(..., description="subject")
async def main() -> None:
evaluator = Evaluator(Query)
async for q in evaluator.run(tuple_count=3):
print(q.query)
asyncio.run(main())
This reveals the internal flow: option generation, tuple creation, and query generation steps.