Quickstart: Pydantic to Guaranteed JSON

This guide demonstrates how to use the Proxy Structuring Engine (PSE) with a Hugging Face transformers model (using PyTorch) to generate JSON output guaranteed to match a Pydantic schema.
import torch
from transformers import AutoTokenizer, LlamaForCausalLM
from pydantic import BaseModel

# Assuming PSE is installed:
from pse import StructuringEngine
from pse.util.torch_mixin import PSETorchMixin # Optional: Mixin for easy HF integration

# 1. Define your desired output structure using Pydantic
class UserProfile(BaseModel):
    user_id: int
    username: str
    is_active: bool
    roles: list[str]

# 2. (Optional) Apply the PSE mixin to your Hugging Face model class
#    This simplifies integration by adding the `engine` attribute and overriding `generate`.
class PSE_Llama(PSETorchMixin, LlamaForCausalLM):
    pass

# 3. Load your model and tokenizer
#    Replace with your desired model path.
model_path = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Use the mixed-in class or your base model class
model = PSE_Llama.from_pretrained(
    model_path,
    torch_dtype=torch.bfloat16, # Use appropriate dtype for your model/hardware
    device_map="auto"           # Load model efficiently across devices
)

# Ensure padding token is set for generation (important!)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
# Ensure the model config also reflects the pad token id
if model.config.pad_token_id is None:
     model.config.pad_token_id = tokenizer.pad_token_id

# 4. Create the StructuringEngine instance.
#    If using the mixin, it's automatically attached as `model.engine`.
#    If not using the mixin, instantiate it separately:
#    engine = StructuringEngine(tokenizer)
model.engine = StructuringEngine(tokenizer) # Assumes mixin usage for simplicity

# 5. Configure the engine with your Pydantic model.
#    PSE compiles this into an efficient HSM representation.
model.engine.configure(UserProfile)

# 6. Create your prompt, instructing the LLM to generate the desired structure.
prompt = f"Generate a user profile for user ID 999, username 'tester', active status true, roles ['qa', 'dev']. Output ONLY the JSON object."
messages = [{"role": "user", "content": prompt}]
input_ids = tokenizer.apply_chat_template(
    messages,
    return_tensors="pt",
    add_generation_prompt=True # Crucial for instruction-following models
).to(model.device)

# 7. Generate using the engine's processor and sampler.
#    If using the mixin, `model.generate` is already overridden.
#    If not using the mixin, pass the engine hooks manually:
#    output_ids = model.generate(
#        input_ids,
#        max_new_tokens=150,
#        do_sample=True, # Or False for greedy decoding
#        logits_processor=[engine.process_logits],
#        sampler=engine.sample
#    )
output_ids = model.generate(
    input_ids,
    max_new_tokens=150,
    do_sample=True # Example: using sampling
    # No need to pass hooks explicitly if using the mixin
)

# 8. Decode and parse the guaranteed structured output
#    Extract only the newly generated tokens, excluding the prompt.
output_text = tokenizer.decode(output_ids[0][input_ids.shape[-1]:], skip_special_tokens=True)
print("Raw Output (Guided by PSE):\n", output_text)

# PSE guarantees this output can be parsed directly into your Pydantic model
# Use the engine instance (model.engine if using mixin, or your separate instance)
structured_output: UserProfile = model.engine.get_structured_output(UserProfile)

# Verify the output
if structured_output:
    print("\nParsed Pydantic Object:\n", structured_output)
    # Example Parsed Output:
    # UserProfile(user_id=999, username='tester', is_active=True, roles=['qa', 'dev'])
else:
    print("\nFailed to generate structured output.")
This example shows the basic workflow: Define -> Configure -> Generate -> Parse. PSE ensures the structured_output reliably conforms to the UserProfile Pydantic model.