Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
674 changes: 674 additions & 0 deletions src/schemas/PIPELINE_SCHEMAS.md

Large diffs are not rendered by default.

86 changes: 86 additions & 0 deletions src/schemas/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# ACE Pipeline Schemas

This directory contains standardized schemas for all ACE pipeline stages, ensuring consistent data formats across different implementations.

## Structure

- **[`PIPELINE_SCHEMAS.md`](PIPELINE_SCHEMAS.md)** - Complete documentation of input/output formats for each stage
- **Python Dataclasses** - Type-safe data structures for each stage:
- [`experiment_schemas.py`](experiment_schemas.py) - Experiment and Domain (Stage 0)
- [`metadata_schemas.py`](metadata_schemas.py) - Common metadata (PipelineMetadata)
- [`area_schemas.py`](area_schemas.py) - Area generation (Stage 1)
- [`capability_schemas.py`](capability_schemas.py) - Capability generation (Stage 2)
- [`task_schemas.py`](task_schemas.py) - Task generation (Stage 3)
- [`solution_schemas.py`](solution_schemas.py) - Solution generation (Stage 4)
- [`validation_schemas.py`](validation_schemas.py) - Validation (Stage 5)
- **I/O Utilities** - Save and load functions:
- [`io_utils.py`](io_utils.py) - Functions to save/load all stage outputs (save/load functions for all 7 stage outputs)

## Usage

### Using Python Dataclasses

```python
from src.schemas import (
Experiment,
Domain,
PipelineMetadata,
Area,
Capability,
Task,
TaskSolution,
ValidationResult,
)

# Create area
area = Area(
name="Cash Flow & Budget Management",
area_id="area_000",
description="Design and monitor budgets...",
domain="personal finance",
domain_id="domain_000",
# generation_metadata is optional
)

# Convert to dict for JSON serialization
data = area.to_dict()

# Load from dict
area = Area.from_dict(data)
```

### Using Save/Load Functions

```python
from pathlib import Path
from src.schemas import (
save_areas_output,
load_areas_output,
PipelineMetadata,
Area,
)

# Save areas
areas = [Area(...), Area(...)]
metadata = PipelineMetadata(
experiment_id="r0_10x10",
output_base_dir="agentic_outputs",
timestamp="2025-11-06T12:00:00Z",
output_stage_tag="_20251009_122040"
)
save_areas_output(areas, metadata, Path("output/areas.json"))

# Load areas
areas, metadata = load_areas_output(Path("output/areas.json"))
```

## Pipeline Stages

0. **Experiment Setup** → `Experiment`, `Domain`
1. **Area Generation** → `Area`
2. **Capability Generation** → `Capability`
3. **Task Generation** → `Task`
4. **Solution Generation** → `TaskSolution`
5. **Validation** → `ValidationResult`

See [`PIPELINE_SCHEMAS.md`](PIPELINE_SCHEMAS.md) for detailed specifications.
64 changes: 64 additions & 0 deletions src/schemas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Standardized schemas for ACE pipeline stages.

This module provides standardized data structures for all pipeline stages,
ensuring consistent input/output formats regardless of internal implementation.
"""

from src.schemas.area_schemas import Area
from src.schemas.capability_schemas import Capability
from src.schemas.experiment_schemas import Domain, Experiment
from src.schemas.io_utils import (
load_areas_output,
load_capabilities_output,
load_domain_output,
load_experiment_output,
load_solution_output,
load_tasks_output,
load_validation_output,
save_areas_output,
save_capabilities_output,
save_domain_output,
save_experiment_output,
save_solution_output,
save_tasks_output,
save_validation_output,
)
from src.schemas.metadata_schemas import PipelineMetadata
from src.schemas.solution_schemas import TaskSolution
from src.schemas.task_schemas import Task
from src.schemas.validation_schemas import ValidationResult


__all__ = [
# Metadata
"PipelineMetadata",
# Experiment schemas (Stage 0)
"Experiment",
"Domain",
# Area schemas
"Area",
# Capability schemas
"Capability",
# Task schemas
"Task",
# Solution schemas
"TaskSolution",
# Validation schemas
"ValidationResult",
# I/O functions - Save
"save_experiment_output",
"save_domain_output",
"save_areas_output",
"save_capabilities_output",
"save_tasks_output",
"save_solution_output",
"save_validation_output",
# I/O functions - Load
"load_experiment_output",
"load_domain_output",
"load_areas_output",
"load_capabilities_output",
"load_tasks_output",
"load_solution_output",
"load_validation_output",
]
42 changes: 42 additions & 0 deletions src/schemas/area_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Schemas for area generation stage."""

from dataclasses import dataclass, field
from typing import Dict, Optional


@dataclass
class Area:
"""Represents a domain area."""

name: str
area_id: str
description: Optional[str] = None
domain: str = ""
domain_id: str = ""
generation_metadata: Optional[Dict] = field(default_factory=dict)

def to_dict(self):
"""Convert to dictionary."""
result = {
"name": self.name,
"area_id": self.area_id,
"domain": self.domain,
"domain_id": self.domain_id,
}
if self.description is not None:
result["description"] = self.description
if self.generation_metadata:
result["generation_metadata"] = self.generation_metadata
return result

@classmethod
def from_dict(cls, data: dict):
"""Create from dictionary."""
return cls(
name=data["name"],
area_id=data["area_id"],
description=data.get("description"),
domain=data.get("domain", ""),
domain_id=data.get("domain_id", ""),
generation_metadata=data.get("generation_metadata", {}),
)
48 changes: 48 additions & 0 deletions src/schemas/capability_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Schemas for capability generation stage."""

from dataclasses import dataclass, field
from typing import Dict, Optional


@dataclass
class Capability:
"""Represents a capability within an area."""

name: str
capability_id: str
description: Optional[str] = None
area: str = ""
area_id: str = ""
domain: str = ""
domain_id: str = ""
generation_metadata: Optional[Dict] = field(default_factory=dict)

def to_dict(self):
"""Convert to dictionary."""
result = {
"name": self.name,
"capability_id": self.capability_id,
"area": self.area,
"area_id": self.area_id,
"domain": self.domain,
"domain_id": self.domain_id,
}
if self.description is not None:
result["description"] = self.description
if self.generation_metadata:
result["generation_metadata"] = self.generation_metadata
return result

@classmethod
def from_dict(cls, data: dict):
"""Create from dictionary."""
return cls(
name=data["name"],
capability_id=data["capability_id"],
description=data.get("description"),
area=data.get("area", ""),
area_id=data.get("area_id", ""),
domain=data.get("domain", ""),
domain_id=data.get("domain_id", ""),
generation_metadata=data.get("generation_metadata", {}),
)
71 changes: 71 additions & 0 deletions src/schemas/experiment_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Schemas for experiment setup stage (Stage 0)."""

from dataclasses import dataclass
from typing import Any, Dict, Optional


@dataclass
class Experiment:
"""Represents experiment metadata and configuration."""

experiment_id: str
domain: str
domain_id: str
pipeline_type: Optional[str] = None
configuration: Dict[str, Any] = None

def __post_init__(self):
"""Initialize configuration if not provided."""
if self.configuration is None:
self.configuration = {}

def to_dict(self):
"""Convert to dictionary."""
result = {
"experiment_id": self.experiment_id,
"domain": self.domain,
"domain_id": self.domain_id,
"configuration": self.configuration,
}
if self.pipeline_type is not None:
result["pipeline_type"] = self.pipeline_type
return result

@classmethod
def from_dict(cls, data: dict):
"""Create from dictionary."""
return cls(
experiment_id=data["experiment_id"],
domain=data["domain"],
domain_id=data["domain_id"],
pipeline_type=data.get("pipeline_type"),
configuration=data.get("configuration", {}),
)


@dataclass
class Domain:
"""Represents a domain."""

name: str
domain_id: str
description: Optional[str] = None

def to_dict(self):
"""Convert to dictionary."""
result = {
"name": self.name,
"domain_id": self.domain_id,
}
if self.description is not None:
result["description"] = self.description
return result

@classmethod
def from_dict(cls, data: dict):
"""Create from dictionary."""
return cls(
name=data["name"],
domain_id=data["domain_id"],
description=data.get("description"),
)
Loading
Loading