Structured Output¶

Generate output that conforms to a JSON Schema, ensuring the model produces valid, parseable structured data. This is essential for applications that need to extract information, generate API responses, or produce machine-readable output.

Overview¶

Structured output works by converting a JSON Schema into a GBNF grammar that constrains the model's output. The model can only produce tokens that form valid JSON matching your schema, guaranteeing parseable results every time.

JSON Schema-Based Generation¶

Define your desired output structure as a JSON Schema:

Node.jsPythonRustCLI

import { Model, Context, StructuredOutput } from 'mullama';

const model = await Model.load('./model.gguf');
const context = new Context(model);

const schema = {
  type: 'object',
  properties: {
    name: { type: 'string' },
    age: { type: 'integer' },
    email: { type: 'string' },
  },
  required: ['name', 'age'],
};

const response = await context.generate(
  "Extract user info: John Doe is 30 years old, john@example.com",
  200,
  { schema }
);

const data = JSON.parse(response);
console.log(data.name);  // "John Doe"
console.log(data.age);   // 30

import json
from mullama import Model, Context, StructuredOutput

model = Model.load("./model.gguf")
context = Context(model)

schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "integer"},
        "email": {"type": "string"},
    },
    "required": ["name", "age"],
}

response = context.generate(
    "Extract user info: John Doe is 30 years old, john@example.com",
    max_tokens=200,
    schema=schema,
)

data = json.loads(response)
print(data["name"])   # "John Doe"
print(data["age"])    # 30

use mullama::{Context, ContextParams, StructuredOutput};
use serde_json::Value;

let schema = serde_json::json!({
    "type": "object",
    "properties": {
        "name": { "type": "string" },
        "age": { "type": "integer" },
        "email": { "type": "string" }
    },
    "required": ["name", "age"]
});

let structured = StructuredOutput::from_schema(&schema)?;
let response = context.generate_with_grammar(
    "Extract user info: John Doe is 30 years old, john@example.com",
    200,
    &structured.grammar()
)?;

let data: Value = serde_json::from_str(&response)?;
println!("Name: {}", data["name"]);
println!("Age: {}", data["age"]);

mullama run llama3.2:1b \
  "Extract user info: John Doe is 30 years old, john@example.com" \
  --schema '{"type":"object","properties":{"name":{"type":"string"},"age":{"type":"integer"}},"required":["name","age"]}'

Converting JSON Schema to Grammar¶

Under the hood, Mullama converts JSON Schemas to GBNF grammars using JsonSchemaConverter:

Node.jsPythonRustCLI

import { JsonSchemaConverter } from 'mullama';

const schema = {
  type: 'object',
  properties: {
    title: { type: 'string' },
    score: { type: 'number', minimum: 0, maximum: 100 },
  },
  required: ['title', 'score'],
};

// Convert schema to grammar string
const grammar = JsonSchemaConverter.convert(schema);
console.log(grammar);  // GBNF grammar string

// Use grammar directly
const response = await context.generate("Rate this movie:", 200, { grammar });

from mullama import JsonSchemaConverter

schema = {
    "type": "object",
    "properties": {
        "title": {"type": "string"},
        "score": {"type": "number", "minimum": 0, "maximum": 100},
    },
    "required": ["title", "score"],
}

# Convert schema to grammar string
grammar = JsonSchemaConverter.convert(schema)
print(grammar)  # GBNF grammar string

# Use grammar directly
response = context.generate("Rate this movie:", max_tokens=200, grammar=grammar)

use mullama::structured::JsonSchemaConverter;

let schema = serde_json::json!({
    "type": "object",
    "properties": {
        "title": { "type": "string" },
        "score": { "type": "number", "minimum": 0, "maximum": 100 }
    },
    "required": ["title", "score"]
});

let grammar = JsonSchemaConverter::convert(&schema)?;
println!("{}", grammar);

let response = context.generate_with_grammar("Rate this movie:", 200, &grammar)?;

# Convert schema to grammar file, then use it
mullama grammar from-schema schema.json > output.gbnf
mullama run llama3.2:1b "Rate this movie:" --grammar output.gbnf

Supported Types¶

The JSON Schema converter supports all standard JSON types:

Primitive Types¶

Type	JSON Schema	Example Output
String	`{"type": "string"}`	`"hello world"`
Number	`{"type": "number"}`	`3.14`
Integer	`{"type": "integer"}`	`42`
Boolean	`{"type": "boolean"}`	`true`
Null	`{"type": "null"}`	`null`

Objects¶

Node.jsPythonRustCLI

const schema = {
  type: 'object',
  properties: {
    name: { type: 'string' },
    address: {
      type: 'object',
      properties: {
        street: { type: 'string' },
        city: { type: 'string' },
        zip: { type: 'string' },
      },
      required: ['street', 'city'],
    },
  },
  required: ['name', 'address'],
};

schema = {
    "type": "object",
    "properties": {
        "name": {"type": "string"},
        "address": {
            "type": "object",
            "properties": {
                "street": {"type": "string"},
                "city": {"type": "string"},
                "zip": {"type": "string"},
            },
            "required": ["street", "city"],
        },
    },
    "required": ["name", "address"],
}

let schema = serde_json::json!({
    "type": "object",
    "properties": {
        "name": { "type": "string" },
        "address": {
            "type": "object",
            "properties": {
                "street": { "type": "string" },
                "city": { "type": "string" },
                "zip": { "type": "string" }
            },
            "required": ["street", "city"]
        }
    },
    "required": ["name", "address"]
});

# Schema can be passed as a file
mullama run llama3.2:1b "Extract address:" --schema-file address_schema.json

Arrays¶

Node.jsPythonRustCLI

const schema = {
  type: 'object',
  properties: {
    tags: {
      type: 'array',
      items: { type: 'string' },
    },
    scores: {
      type: 'array',
      items: { type: 'number' },
      minItems: 1,
      maxItems: 5,
    },
  },
};

schema = {
    "type": "object",
    "properties": {
        "tags": {
            "type": "array",
            "items": {"type": "string"},
        },
        "scores": {
            "type": "array",
            "items": {"type": "number"},
            "minItems": 1,
            "maxItems": 5,
        },
    },
}

let schema = serde_json::json!({
    "type": "object",
    "properties": {
        "tags": {
            "type": "array",
            "items": { "type": "string" }
        },
        "scores": {
            "type": "array",
            "items": { "type": "number" },
            "minItems": 1,
            "maxItems": 5
        }
    }
});

mullama run llama3.2:1b "List programming languages:" \
  --schema '{"type":"object","properties":{"languages":{"type":"array","items":{"type":"string"}}}}'

Enum Constraints¶

Restrict string values to a fixed set:

Node.jsPythonRustCLI

const schema = {
  type: 'object',
  properties: {
    sentiment: {
      type: 'string',
      enum: ['positive', 'negative', 'neutral'],
    },
    confidence: { type: 'number' },
  },
  required: ['sentiment', 'confidence'],
};

const response = await context.generate(
  "Analyze sentiment: I love this product!",
  100,
  { schema }
);
// Output: {"sentiment": "positive", "confidence": 0.95}

schema = {
    "type": "object",
    "properties": {
        "sentiment": {
            "type": "string",
            "enum": ["positive", "negative", "neutral"],
        },
        "confidence": {"type": "number"},
    },
    "required": ["sentiment", "confidence"],
}

response = context.generate(
    "Analyze sentiment: I love this product!",
    max_tokens=100,
    schema=schema,
)
# Output: {"sentiment": "positive", "confidence": 0.95}

let schema = serde_json::json!({
    "type": "object",
    "properties": {
        "sentiment": {
            "type": "string",
            "enum": ["positive", "negative", "neutral"]
        },
        "confidence": { "type": "number" }
    },
    "required": ["sentiment", "confidence"]
});

mullama run llama3.2:1b "Analyze sentiment: I love this product!" \
  --schema '{"type":"object","properties":{"sentiment":{"type":"string","enum":["positive","negative","neutral"]},"confidence":{"type":"number"}},"required":["sentiment","confidence"]}'

Required vs Optional Properties¶

Properties listed in required must appear in the output. Optional properties may or may not be included:

Node.jsPythonRustCLI

const schema = {
  type: 'object',
  properties: {
    title: { type: 'string' },         // Required
    author: { type: 'string' },        // Required
    year: { type: 'integer' },         // Optional
    isbn: { type: 'string' },          // Optional
  },
  required: ['title', 'author'],       // Only these are mandatory
};

schema = {
    "type": "object",
    "properties": {
        "title": {"type": "string"},       # Required
        "author": {"type": "string"},      # Required
        "year": {"type": "integer"},       # Optional
        "isbn": {"type": "string"},        # Optional
    },
    "required": ["title", "author"],       # Only these are mandatory
}

let schema = serde_json::json!({
    "type": "object",
    "properties": {
        "title": { "type": "string" },
        "author": { "type": "string" },
        "year": { "type": "integer" },
        "isbn": { "type": "string" }
    },
    "required": ["title", "author"]
});

mullama run llama3.2:1b "Extract book info:" --schema-file book.json

Integration with Application Code¶

Parse the structured output and use it in your application:

Node.jsPythonRustCLI

import { Model, Context } from 'mullama';

const schema = {
  type: 'object',
  properties: {
    entities: {
      type: 'array',
      items: {
        type: 'object',
        properties: {
          name: { type: 'string' },
          type: { type: 'string', enum: ['person', 'org', 'location'] },
        },
        required: ['name', 'type'],
      },
    },
  },
  required: ['entities'],
};

const response = await context.generate(
  "Extract entities: Apple CEO Tim Cook visited Paris last week.",
  300,
  { schema }
);

const result = JSON.parse(response);
for (const entity of result.entities) {
  console.log(`${entity.name} (${entity.type})`);
}
// Tim Cook (person)
// Apple (org)
// Paris (location)

import json
from dataclasses import dataclass
from typing import List

@dataclass
class Entity:
    name: str
    type: str

schema = {
    "type": "object",
    "properties": {
        "entities": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "type": {"type": "string", "enum": ["person", "org", "location"]},
                },
                "required": ["name", "type"],
            },
        },
    },
    "required": ["entities"],
}

response = context.generate(
    "Extract entities: Apple CEO Tim Cook visited Paris last week.",
    max_tokens=300,
    schema=schema,
)

result = json.loads(response)
entities = [Entity(**e) for e in result["entities"]]
for entity in entities:
    print(f"{entity.name} ({entity.type})")

use serde::Deserialize;

#[derive(Deserialize)]
struct ExtractionResult {
    entities: Vec<Entity>,
}

#[derive(Deserialize)]
struct Entity {
    name: String,
    #[serde(rename = "type")]
    entity_type: String,
}

let response = context.generate_with_grammar(
    "Extract entities: Apple CEO Tim Cook visited Paris last week.",
    300,
    &grammar
)?;

let result: ExtractionResult = serde_json::from_str(&response)?;
for entity in &result.entities {
    println!("{} ({})", entity.name, entity.entity_type);
}

# Pipe output to jq for processing
mullama run llama3.2:1b \
  "Extract entities: Apple CEO Tim Cook visited Paris last week." \
  --schema-file entities.json | jq '.entities[].name'

Error Handling¶

Grammar-constrained generation always produces valid JSON matching the schema. However, you should still handle potential issues:

Node.jsPythonRustCLI

try {
  const response = await context.generate(prompt, 200, { schema });
  const data = JSON.parse(response);

  // Validate business logic
  if (data.age < 0 || data.age > 150) {
    console.warn('Implausible age value');
  }
} catch (error) {
  if (error instanceof SyntaxError) {
    // Should not happen with grammar constraints, but be defensive
    console.error('Invalid JSON output');
  } else {
    console.error(`Generation error: ${error.message}`);
  }
}

import json

try:
    response = context.generate(prompt, max_tokens=200, schema=schema)
    data = json.loads(response)

    # Validate business logic
    if data.get("age", 0) < 0 or data.get("age", 0) > 150:
        print("Warning: implausible age value")
except json.JSONDecodeError:
    # Should not happen with grammar constraints
    print("Invalid JSON output")
except Exception as e:
    print(f"Generation error: {e}")

match context.generate_with_grammar(prompt, 200, &grammar) {
    Ok(response) => {
        match serde_json::from_str::<Value>(&response) {
            Ok(data) => println!("Parsed: {:?}", data),
            Err(e) => eprintln!("Parse error: {}", e),
        }
    }
    Err(e) => eprintln!("Generation error: {}", e),
}

# Validate output with jq
mullama run llama3.2:1b "Extract info:" --schema-file schema.json | jq '.' || echo "Invalid output"

Max Tokens

If max_tokens is too low, the output may be truncated before the JSON is complete. Set max_tokens high enough to accommodate your schema's maximum possible output size.

Examples¶

Generating API Responses¶

Node.jsPythonRustCLI

const apiResponseSchema = {
  type: 'object',
  properties: {
    status: { type: 'string', enum: ['success', 'error'] },
    data: {
      type: 'object',
      properties: {
        summary: { type: 'string' },
        keywords: { type: 'array', items: { type: 'string' } },
        wordCount: { type: 'integer' },
      },
      required: ['summary', 'keywords'],
    },
  },
  required: ['status', 'data'],
};

const response = await context.generate(
  "Summarize: Rust is a systems programming language focused on safety...",
  500,
  { schema: apiResponseSchema }
);

api_response_schema = {
    "type": "object",
    "properties": {
        "status": {"type": "string", "enum": ["success", "error"]},
        "data": {
            "type": "object",
            "properties": {
                "summary": {"type": "string"},
                "keywords": {"type": "array", "items": {"type": "string"}},
                "word_count": {"type": "integer"},
            },
            "required": ["summary", "keywords"],
        },
    },
    "required": ["status", "data"],
}

response = context.generate(
    "Summarize: Rust is a systems programming language focused on safety...",
    max_tokens=500,
    schema=api_response_schema,
)

let schema = serde_json::json!({
    "type": "object",
    "properties": {
        "status": { "type": "string", "enum": ["success", "error"] },
        "data": {
            "type": "object",
            "properties": {
                "summary": { "type": "string" },
                "keywords": { "type": "array", "items": { "type": "string" } },
                "word_count": { "type": "integer" }
            },
            "required": ["summary", "keywords"]
        }
    },
    "required": ["status", "data"]
});

mullama run llama3.2:1b "Summarize: Rust is a systems language..." \
  --schema-file api_response.json

Structured Output¶

Overview¶

JSON Schema-Based Generation¶

Converting JSON Schema to Grammar¶

Supported Types¶

Primitive Types¶

Objects¶

Arrays¶

Enum Constraints¶

Required vs Optional Properties¶

Integration with Application Code¶

Error Handling¶

Examples¶

Generating API Responses¶

See Also¶