Custom Schema Generators¶
Custom schema generators give you complete control over JSON schema generation. This guide covers creating custom generators, advanced patterns, and integration with schema generation tools.
Understanding Schema Generators¶
Pydantic's GenerateJsonSchema class controls how models are converted to JSON schemas. By subclassing it, you can customize every aspect of schema generation:
from pydantic import BaseModel
from pydantic.json_schema import GenerateJsonSchema, JsonSchemaMode
from typing import Any
from pyrmute import ModelManager, SchemaConfig
manager = ModelManager()
class CustomGenerator(GenerateJsonSchema):
"""Custom schema generator with additional metadata."""
def generate(
self,
schema: dict[str, Any],
mode: JsonSchemaMode = "validation"
) -> dict[str, Any]:
"""Generate schema with custom modifications.
Args:
schema: Core schema dict from Pydantic
mode: 'validation' or 'serialization'
Returns:
Modified JSON schema
"""
# Call parent to get base schema
json_schema = super().generate(schema, mode=mode)
# Add custom metadata
json_schema["x-custom"] = "metadata"
json_schema["$schema"] = self.schema_dialect
return json_schema
# Use custom generator
config = SchemaConfig(schema_generator=CustomGenerator)
manager = ModelManager(default_schema_config=config)
@manager.model("User", "1.0.0")
class UserV1(BaseModel):
name: str
email: str
schema = manager.get_schema("User", "1.0.0")
print(schema["x-custom"]) # "metadata"
Basic Customizations¶
Adding Global Metadata¶
Add metadata to all generated schemas:
class MetadataGenerator(GenerateJsonSchema):
"""Add organization metadata to all schemas."""
def generate(
self,
schema: dict[str, Any],
mode: JsonSchemaMode = "validation"
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
# Add company information
json_schema["x-organization"] = "Acme Corp"
json_schema["x-contact"] = "api@acme.com"
json_schema["x-generated-by"] = "pyrmute"
# Add JSON Schema version
json_schema["$schema"] = self.schema_dialect
return json_schema
Custom Schema Dialect¶
Specify a different JSON Schema version:
class Draft2020Generator(GenerateJsonSchema):
"""Use JSON Schema Draft 2020-12."""
# Override class attribute
schema_dialect = "https://json-schema.org/draft/2020-12/schema"
def generate(
self,
schema: dict[str, Any],
mode: JsonSchemaMode = "validation"
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
json_schema["$schema"] = self.schema_dialect
return json_schema
Adding Examples¶
Automatically add examples to all schemas:
class ExampleGenerator(GenerateJsonSchema):
"""Add example data to schemas."""
def generate(
self,
schema: dict[str, Any],
mode: JsonSchemaMode = "validation"
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
# Add examples if not already present
if "examples" not in json_schema:
json_schema["examples"] = self._generate_examples(json_schema)
return json_schema
def _generate_examples(self, schema: dict[str, Any]) -> list[dict[str, Any]]:
"""Generate example data based on schema."""
examples = []
properties = schema.get("properties", {})
required = schema.get("required", [])
if properties:
example = {}
for prop_name, prop_schema in properties.items():
if prop_name in required:
# Generate example value based on type
example[prop_name] = self._example_value(prop_schema)
if example:
examples.append(example)
return examples
def _example_value(self, prop_schema: dict[str, Any]) -> Any:
"""Generate example value for a property."""
prop_type = prop_schema.get("type")
if prop_type == "string":
return prop_schema.get("default", "example")
elif prop_type == "integer":
return prop_schema.get("default", 42)
elif prop_type == "number":
return prop_schema.get("default", 3.14)
elif prop_type == "boolean":
return prop_schema.get("default", True)
elif prop_type == "array":
return []
elif prop_type == "object":
return {}
else:
return None
Advanced Patterns¶
OpenAPI Extensions¶
Add OpenAPI-specific extensions:
class OpenAPIGenerator(GenerateJsonSchema):
"""Generate schemas with OpenAPI extensions."""
def generate(
self,
schema: dict[str, Any],
mode: JsonSchemaMode = "validation"
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
# Add OpenAPI extensions
json_schema["x-tags"] = self._extract_tags(schema)
json_schema["x-security"] = self._determine_security(schema)
# Add response codes for serialization mode
if mode == "serialization":
json_schema["x-response-code"] = 200
return json_schema
def _extract_tags(self, schema: dict[str, Any]) -> list[str]:
"""Extract tags from model docstring or metadata."""
# Extract from title or add default
title = schema.get("title", "")
return [title.lower()] if title else ["general"]
def _determine_security(self, schema: dict[str, Any]) -> list[dict[str, Any]]:
"""Determine security requirements."""
# Check if schema has sensitive fields
properties = schema.get("properties", {})
has_auth = any(
"password" in name or "token" in name or "secret" in name
for name in properties.keys()
)
if has_auth:
return [{"bearerAuth": []}]
return []
Field-Level Customization¶
Modify specific field schemas:
class FieldCustomizer(GenerateJsonSchema):
"""Customize specific field types."""
def generate(
self,
schema: dict[str, Any],
mode: JsonSchemaMode = "validation"
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
# Process all properties
if "properties" in json_schema:
for field_name, field_schema in json_schema["properties"].items():
self._customize_field(field_name, field_schema)
return json_schema
def _customize_field(self, name: str, field_schema: dict[str, Any]) -> None:
"""Customize individual field schema."""
# Add format hints for common field names
if "email" in name.lower():
field_schema["format"] = "email"
field_schema["x-validation"] = "email"
elif "url" in name.lower() or "link" in name.lower():
field_schema["format"] = "uri"
field_schema["x-validation"] = "url"
elif "phone" in name.lower():
field_schema["format"] = "phone"
field_schema["pattern"] = r"^\+?[1-9]\d{1,14}$"
elif "date" in name.lower() and field_schema.get("type") == "string":
field_schema["format"] = "date"
# Add UI hints
if "password" in name.lower():
field_schema["x-ui"] = {"widget": "password"}
elif "description" in name.lower() or "bio" in name.lower():
field_schema["x-ui"] = {"widget": "textarea"}
Conditional Schema Generation¶
Generate different schemas based on conditions:
import os
class ConditionalGenerator(GenerateJsonSchema):
"""Generate schemas with environment-specific features."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.environment = os.getenv("ENVIRONMENT", "production")
self.include_debug_info = self.environment == "development"
def generate(
self,
schema: dict[str, Any],
mode: JsonSchemaMode = "validation"
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
# Add debug information in development
if self.include_debug_info:
json_schema["x-debug"] = {
"environment": self.environment,
"mode": mode,
"generated_at": self._get_timestamp(),
}
# Add stricter validation in production
if self.environment == "production":
self._add_strict_validation(json_schema)
return json_schema
def _get_timestamp(self) -> str:
"""Get current timestamp."""
from datetime import datetime
return datetime.utcnow().isoformat()
def _add_strict_validation(self, schema: dict[str, Any]) -> None:
"""Add stricter validation rules for production."""
# Disable additional properties
if "additionalProperties" not in schema:
schema["additionalProperties"] = False
# Make all string fields non-empty
if "properties" in schema:
for field_schema in schema["properties"].values():
if field_schema.get("type") == "string":
field_schema["minLength"] = 1
Version-Aware Schema Generation¶
Include version information in schemas:
class VersionedGenerator(GenerateJsonSchema):
"""Add version information to schemas."""
def __init__(self, model_name: str = None, version: str = None, *args, **kwargs):
super().__init__(*args, **kwargs)
self.model_name = model_name
self.version = version
def generate(
self,
schema: dict[str, Any],
mode: JsonSchemaMode = "validation"
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
# Add version metadata
if self.version:
json_schema["x-version"] = self.version
json_schema["x-model-name"] = self.model_name
# Add $id with version
if self.model_name and self.version:
json_schema["$id"] = f"https://api.example.com/schemas/{self.model_name}/v{self.version}"
return json_schema
# Usage with pyrmute
class VersionedSchemaConfig(SchemaConfig):
"""SchemaConfig that passes version info to generator."""
def __init__(self, model_name: str, version: str, **kwargs):
self.model_name = model_name
self.version = version
super().__init__(**kwargs)
def to_kwargs(self) -> dict[str, Any]:
kwargs = super().to_kwargs()
# Create generator instance with version info
if self.schema_generator is VersionedGenerator:
kwargs["schema_generator"] = VersionedGenerator(
model_name=self.model_name,
version=self.version
)
return kwargs
Handling Definitions¶
Custom Definition Processing¶
Control how $defs are generated:
class DefinitionCustomizer(GenerateJsonSchema):
"""Customize how definitions are structured."""
def generate(
self,
schema: dict[str, Any],
mode: JsonSchemaMode = "validation"
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
# Process definitions
if "$defs" in json_schema:
self._customize_definitions(json_schema["$defs"])
return json_schema
def _customize_definitions(self, defs: dict[str, Any]) -> None:
"""Customize each definition."""
for def_name, def_schema in defs.items():
# Add metadata to each definition
def_schema["x-definition-name"] = def_name
# Add examples if not present
if "examples" not in def_schema and "properties" in def_schema:
def_schema["examples"] = [
self._generate_example(def_schema)
]
def _generate_example(self, schema: dict[str, Any]) -> dict[str, Any]:
"""Generate example from schema."""
example = {}
for prop_name, prop_schema in schema.get("properties", {}).items():
prop_type = prop_schema.get("type")
if prop_type == "string":
example[prop_name] = "example"
elif prop_type == "integer":
example[prop_name] = 0
elif prop_type == "boolean":
example[prop_name] = True
return example
Flattening Definitions¶
Inline all definitions instead of using $ref:
class FlatteningGenerator(GenerateJsonSchema):
"""Flatten all $ref into inline definitions."""
def generate(
self,
schema: dict[str, Any],
mode: JsonSchemaMode = "validation"
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
# Get definitions
defs = json_schema.pop("$defs", {})
# Replace all $ref with actual definitions
if defs:
self._inline_refs(json_schema, defs)
return json_schema
def _inline_refs(self, schema: dict[str, Any], defs: dict[str, Any]) -> None:
"""Recursively replace $ref with inline definitions."""
if isinstance(schema, dict):
if "$ref" in schema:
# Extract definition name
ref = schema["$ref"]
if ref.startswith("#/$defs/"):
def_name = ref.split("/")[-1]
if def_name in defs:
# Replace $ref with inline definition
definition = defs[def_name].copy()
schema.clear()
schema.update(definition)
# Recursively process nested schemas
for value in schema.values():
if isinstance(value, dict):
self._inline_refs(value, defs)
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
self._inline_refs(item, defs)
Integration Patterns¶
Combining with Schema Transformers¶
Use generators for broad changes, transformers for model-specific:
class BaseGenerator(GenerateJsonSchema):
"""Base generator for all schemas."""
def generate(
self,
schema: dict[str, Any],
mode: JsonSchemaMode = "validation"
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
# Global changes
json_schema["$schema"] = self.schema_dialect
json_schema["x-generated-by"] = "pyrmute"
return json_schema
# Use with transformers for specific models
config = SchemaConfig(schema_generator=BaseGenerator)
manager = ModelManager(default_schema_config=config)
@manager.model("User", "1.0.0")
class UserV1(BaseModel):
name: str
@manager.schema_transformer("User", "1.0.0")
def add_user_specific_metadata(schema):
"""Model-specific customization."""
schema["x-requires-auth"] = True
schema["x-rate-limit"] = 100
return schema
Per-Model Generators¶
Different generators for different models:
class PublicAPIGenerator(GenerateJsonSchema):
"""Generator for public API schemas."""
def generate(self, schema, mode="validation"):
json_schema = super().generate(schema, mode=mode)
json_schema["x-visibility"] = "public"
json_schema["x-rate-limited"] = True
return json_schema
class InternalAPIGenerator(GenerateJsonSchema):
"""Generator for internal API schemas."""
def generate(self, schema, mode="validation"):
json_schema = super().generate(schema, mode=mode)
json_schema["x-visibility"] = "internal"
json_schema["x-rate-limited"] = False
return json_schema
# Use different generators per call
public_schema = manager.get_schema(
"User",
"1.0.0",
config=SchemaConfig(schema_generator=PublicAPIGenerator)
)
internal_schema = manager.get_schema(
"User",
"1.0.0",
config=SchemaConfig(schema_generator=InternalAPIGenerator)
)
Real-World Examples¶
Swagger/OpenAPI Generator¶
Complete OpenAPI-compatible schema generator:
class SwaggerGenerator(GenerateJsonSchema):
"""Generate OpenAPI 3.0 compatible schemas."""
schema_dialect = "http://json-schema.org/draft-07/schema#"
def generate(
self,
schema: dict[str, Any],
mode: JsonSchemaMode = "validation"
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
# Convert Pydantic schema to OpenAPI format
self._convert_to_openapi(json_schema)
return json_schema
def _convert_to_openapi(self, schema: dict[str, Any]) -> None:
"""Convert schema to OpenAPI 3.0 format."""
# Remove fields not supported in OpenAPI
schema.pop("$schema", None)
# Convert $defs to components/schemas format
if "$defs" in schema:
defs = schema.pop("$defs")
schema["components"] = {"schemas": defs}
# Update $ref format
self._update_refs(schema)
# Add OpenAPI-specific fields
if "title" in schema:
schema["x-swagger-router-model"] = schema["title"]
def _update_refs(self, schema: dict[str, Any]) -> None:
"""Update $ref format for OpenAPI."""
if isinstance(schema, dict):
if "$ref" in schema:
ref = schema["$ref"]
# Convert #/$defs/Model to #/components/schemas/Model
if ref.startswith("#/$defs/"):
schema["$ref"] = ref.replace(
"#/$defs/",
"#/components/schemas/"
)
for value in schema.values():
self._update_refs(value)
elif isinstance(schema, list):
for item in schema:
self._update_refs(item)
Documentation Generator¶
Generate schemas optimized for documentation:
class DocumentationGenerator(GenerateJsonSchema):
"""Generate schemas optimized for documentation."""
def generate(
self,
schema: dict[str, Any],
mode: JsonSchemaMode = "validation"
) -> dict[str, Any]:
json_schema = super().generate(schema, mode=mode)
# Enhance for documentation
self._add_human_readable_descriptions(json_schema)
self._add_examples(json_schema)
self._add_constraints_descriptions(json_schema)
return json_schema
def _add_human_readable_descriptions(self, schema: dict[str, Any]) -> None:
"""Add readable descriptions based on field names."""
if "properties" in schema:
for field_name, field_schema in schema["properties"].items():
if "description" not in field_schema:
# Generate description from field name
field_schema["description"] = (
field_name.replace("_", " ").title()
)
def _add_examples(self, schema: dict[str, Any]) -> None:
"""Add realistic examples."""
if "properties" in schema and "examples" not in schema:
schema["examples"] = [self._generate_realistic_example(schema)]
def _generate_realistic_example(self, schema: dict[str, Any]) -> dict[str, Any]:
"""Generate realistic example data."""
example = {}
properties = schema.get("properties", {})
for field_name, field_schema in properties.items():
field_type = field_schema.get("type")
# Generate realistic values based on field name
if "email" in field_name.lower():
example[field_name] = "user@example.com"
elif "name" in field_name.lower():
example[field_name] = "John Doe"
elif "age" in field_name.lower():
example[field_name] = 30
elif field_type == "string":
example[field_name] = f"Example {field_name}"
elif field_type == "integer":
example[field_name] = 42
elif field_type == "boolean":
example[field_name] = True
return example
def _add_constraints_descriptions(self, schema: dict[str, Any]) -> None:
"""Add descriptions for validation constraints."""
if "properties" in schema:
for field_schema in schema["properties"].values():
constraints = []
if "minLength" in field_schema:
constraints.append(
f"minimum length: {field_schema['minLength']}"
)
if "maxLength" in field_schema:
constraints.append(
f"maximum length: {field_schema['maxLength']}"
)
if "minimum" in field_schema:
constraints.append(
f"minimum value: {field_schema['minimum']}"
)
if "maximum" in field_schema:
constraints.append(
f"maximum value: {field_schema['maximum']}"
)
if "pattern" in field_schema:
constraints.append(
f"pattern: {field_schema['pattern']}"
)
if constraints:
constraint_text = ", ".join(constraints)
current_desc = field_schema.get("description", "")
field_schema["description"] = (
f"{current_desc} ({constraint_text})"
if current_desc
else f"Constraints: {constraint_text}"
)
Testing Custom Generators¶
Always test your custom generators:
def test_custom_generator():
"""Test custom generator produces expected output."""
from pydantic import BaseModel, Field
class TestModel(BaseModel):
name: str = Field(description="User's name")
age: int = Field(ge=0, le=150)
# Create generator
generator = CustomGenerator()
# Generate schema
schema = generator.generate_schema(
TestModel.__pydantic_core_schema__,
mode="validation"
)
# Verify custom fields
assert "x-custom" in schema
assert schema["x-custom"] == "metadata"
# Verify standard fields still present
assert "properties" in schema
assert "name" in schema["properties"]
assert "age" in schema["properties"]
def test_generator_with_manager():
"""Test generator integration with ModelManager."""
config = SchemaConfig(schema_generator=CustomGenerator)
manager = ModelManager(default_schema_config=config)
@manager.model("Test", "1.0.0")
class TestV1(BaseModel):
value: str
schema = manager.get_schema("Test", "1.0.0")
assert "x-custom" in schema
assert schema["properties"]["value"]["type"] == "string"
Best Practices¶
- Call super().generate() - Always call parent implementation first
- Don't mutate input - Work with the returned schema
- Handle all modes - Test both validation and serialization modes
- Document behavior - Explain what customizations you're adding
- Test thoroughly - Verify generated schemas are valid
- Consider performance - Generators run on every schema generation
- Use transformers for simple cases - Reserve generators for complex logic
Common Pitfalls¶
Not Calling Super¶
# ❌ BAD - Doesn't generate base schema
class BadGenerator(GenerateJsonSchema):
def generate(self, schema, mode="validation"):
return {"x-custom": "only"} # Missing all Pydantic fields!
# ✅ GOOD - Builds on base schema
class GoodGenerator(GenerateJsonSchema):
def generate(self, schema, mode="validation"):
json_schema = super().generate(schema, mode=mode)
json_schema["x-custom"] = "added"
return json_schema
Mutating Input¶
# ❌ BAD - Mutates input schema
class BadGenerator(GenerateJsonSchema):
def generate(self, schema, mode="validation"):
schema["x-custom"] = "value" # Mutates input!
return schema
# ✅ GOOD - Works with returned schema
class GoodGenerator(GenerateJsonSchema):
def generate(self, schema, mode="validation"):
json_schema = super().generate(schema, mode=mode)
json_schema["x-custom"] = "value"
return json_schema
Ignoring Mode¶
# ❌ BAD - Ignores mode parameter
class BadGenerator(GenerateJsonSchema):
def generate(self, schema, mode="validation"):
json_schema = super().generate(schema, mode=mode)
json_schema["x-mode"] = "always-validation" # Wrong!
return json_schema
# ✅ GOOD - Respects mode
class GoodGenerator(GenerateJsonSchema):
def generate(self, schema, mode="validation"):
json_schema = super().generate(schema, mode=mode)
json_schema["x-mode"] = mode # Correct!
return json_schema
Next Steps¶
Now that you understand custom schema generators:
Related topics:
- Schema Transformers - Simpler alternative for basic customization
- Schema Generation - Using custom generators
API Reference:
- SchemaConfig API - Complete SchemaConfig details