Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
665 changes: 665 additions & 0 deletions src/schemas/PIPELINE_SCHEMAS.md

Large diffs are not rendered by default.

87 changes: 87 additions & 0 deletions src/schemas/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# ACE Pipeline Schemas

This directory contains standardized schemas for all ACE pipeline stages, ensuring consistent data formats across different implementations.

## Structure

- **[`PIPELINE_SCHEMAS.md`](PIPELINE_SCHEMAS.md)** - Complete documentation of input/output formats for each stage
- **Python Dataclasses** - Type-safe data structures for each stage:
- [`experiment_schemas.py`](experiment_schemas.py) - Experiment (Stage 0)
- [`domain_schemas.py`](domain_schemas.py) - Domain (Stage 0)
- [`metadata_schemas.py`](metadata_schemas.py) - Common metadata (PipelineMetadata)
- [`area_schemas.py`](area_schemas.py) - Area generation (Stage 1)
- [`capability_schemas.py`](capability_schemas.py) - Capability generation (Stage 2)
- [`task_schemas.py`](task_schemas.py) - Task generation (Stage 3)
- [`solution_schemas.py`](solution_schemas.py) - Solution generation (Stage 4)
- [`validation_schemas.py`](validation_schemas.py) - Validation (Stage 5)
- **I/O Utilities** - Save and load functions:
- [`io_utils.py`](io_utils.py) - Functions to save/load all stage outputs (save/load functions for all 7 stage outputs)

## Usage

### Using Python Dataclasses

```python
from src.schemas import (
Domain,
Experiment,
PipelineMetadata,
Area,
Capability,
Task,
TaskSolution,
ValidationResult,
)

# Create area
domain = Domain(name="Personal Finance", domain_id="domain_000")
area = Area(
name="Cash Flow & Budget Management",
area_id="area_000",
description="Design and monitor budgets...",
domain=domain,
# generation_metadata is optional
)

# Convert to dict for JSON serialization
data = area.to_dict()

# Load from dict
area = Area.from_dict(data)
```

### Using Save/Load Functions

```python
from pathlib import Path
from src.schemas import (
save_areas,
load_areas,
PipelineMetadata,
Area,
)

# Save areas
areas = [Area(...), Area(...)]
metadata = PipelineMetadata(
experiment_id="r0_10x10",
output_base_dir="agentic_outputs",
timestamp="2025-11-06T12:00:00Z",
output_stage_tag="_20251009_122040"
)
save_areas(areas, metadata, Path("output/areas.json"))

# Load areas
areas, metadata = load_areas(Path("output/areas.json"))
```

## Pipeline Stages

0. **Experiment Setup** → `Experiment`, `Domain`
1. **Area Generation** → `Area`
2. **Capability Generation** → `Capability`
3. **Task Generation** → `Task`
4. **Solution Generation** → `TaskSolution`
5. **Validation** → `ValidationResult`

See [`PIPELINE_SCHEMAS.md`](PIPELINE_SCHEMAS.md) for detailed specifications.
65 changes: 65 additions & 0 deletions src/schemas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Standardized schemas for ACE pipeline stages.

This module provides standardized data structures for all pipeline stages,
ensuring consistent input/output formats regardless of internal implementation.
"""

from src.schemas.area_schemas import Area
from src.schemas.capability_schemas import Capability
from src.schemas.domain_schemas import Domain
from src.schemas.experiment_schemas import Experiment
from src.schemas.io_utils import (
load_areas,
load_capabilities,
load_domain,
load_experiment,
load_solution,
load_tasks,
load_validation,
save_areas,
save_capabilities,
save_domain,
save_experiment,
save_solution,
save_tasks,
save_validation,
)
from src.schemas.metadata_schemas import PipelineMetadata
from src.schemas.solution_schemas import TaskSolution
from src.schemas.task_schemas import Task
from src.schemas.validation_schemas import ValidationResult


__all__ = [
# Metadata
"PipelineMetadata",
# Experiment schemas (Stage 0)
"Experiment",
"Domain",
# Area schemas
"Area",
# Capability schemas
"Capability",
# Task schemas
"Task",
# Solution schemas
"TaskSolution",
# Validation schemas
"ValidationResult",
# I/O functions - Save
"save_experiment",
"save_domain",
"save_areas",
"save_capabilities",
"save_tasks",
"save_solution",
"save_validation",
# I/O functions - Load
"load_experiment",
"load_domain",
"load_areas",
"load_capabilities",
"load_tasks",
"load_solution",
"load_validation",
]
54 changes: 54 additions & 0 deletions src/schemas/area_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""Schemas for area generation stage (Stage 1).

Defines Area dataclass for domain area. Areas are high-level categories
within a domain (e.g., "Budgeting" within "Personal Finance").
"""

from dataclasses import dataclass, field
from typing import Dict, Optional

from src.schemas.domain_schemas import Domain


@dataclass
class Area:
"""Dataclass for domain area."""

name: str
area_id: str
description: Optional[str] = None
domain: Optional[Domain] = None
generation_metadata: Optional[Dict] = field(default_factory=dict)

def to_dict(self):
"""Convert to dictionary."""
result = {
"name": self.name,
"area_id": self.area_id,
}
if self.domain is not None:
result["domain"] = self.domain.name
result["domain_id"] = self.domain.domain_id
if self.description is not None:
result["description"] = self.description
if self.generation_metadata:
result["generation_metadata"] = self.generation_metadata
return result

@classmethod
def from_dict(cls, data: dict):
"""Create from dictionary."""
domain = None
if "domain" in data and "domain_id" in data:
domain = Domain(
name=data["domain"],
domain_id=data["domain_id"],
description=None,
)
return cls(
name=data["name"],
area_id=data["area_id"],
description=data.get("description"),
domain=domain,
generation_metadata=data.get("generation_metadata", {}),
)
66 changes: 66 additions & 0 deletions src/schemas/capability_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Schemas for capability generation stage (Stage 2).

Defines Capability dataclass for capability within an area. Capabilities
are specific skills or abilities (e.g., "Budget Creation" within "Budgeting" area).
"""

from dataclasses import dataclass, field
from typing import Dict, Optional

from src.schemas.area_schemas import Area
from src.schemas.domain_schemas import Domain


@dataclass
class Capability:
"""Dataclass for capability."""

name: str
capability_id: str
description: Optional[str] = None
area: Optional[Area] = None
generation_metadata: Optional[Dict] = field(default_factory=dict)

def to_dict(self):
"""Convert to dictionary."""
result = {
"name": self.name,
"capability_id": self.capability_id,
}
if self.area is not None:
result["area"] = self.area.name
result["area_id"] = self.area.area_id
if self.area.domain is not None:
result["domain"] = self.area.domain.name
result["domain_id"] = self.area.domain.domain_id
if self.description is not None:
result["description"] = self.description
if self.generation_metadata:
result["generation_metadata"] = self.generation_metadata
return result

@classmethod
def from_dict(cls, data: dict):
"""Create from dictionary."""
area = None
if "area" in data and "area_id" in data:
domain = None
if "domain" in data and "domain_id" in data:
domain = Domain(
name=data["domain"],
domain_id=data["domain_id"],
description=None,
)
area = Area(
name=data["area"],
area_id=data["area_id"],
description=None,
domain=domain,
)
return cls(
name=data["name"],
capability_id=data["capability_id"],
description=data.get("description"),
area=area,
generation_metadata=data.get("generation_metadata", {}),
)
35 changes: 35 additions & 0 deletions src/schemas/domain_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Schemas for domain (Stage 0).

Defines Domain dataclass for domain.
"""

from dataclasses import dataclass
from typing import Optional


@dataclass
class Domain:
"""Dataclass for domain."""

name: str
domain_id: str
description: Optional[str] = None

def to_dict(self):
"""Convert to dictionary."""
result = {
"name": self.name,
"domain_id": self.domain_id,
}
if self.description is not None:
result["description"] = self.description
return result

@classmethod
def from_dict(cls, data: dict):
"""Create from dictionary."""
return cls(
name=data["name"],
domain_id=data["domain_id"],
description=data.get("description"),
)
41 changes: 41 additions & 0 deletions src/schemas/experiment_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Schemas for experiment setup stage (Stage 0).

Defines Experiment dataclass containing experiment configuration and metadata.
"""

from dataclasses import dataclass, field
from typing import Any, Dict, Optional


@dataclass
class Experiment:
"""Dataclass for experiment metadata and configuration."""

experiment_id: str
domain: str
domain_id: str
pipeline_type: Optional[str] = None
configuration: Dict[str, Any] = field(default_factory=dict)

def to_dict(self):
"""Convert to dictionary."""
result = {
"experiment_id": self.experiment_id,
"domain": self.domain,
"domain_id": self.domain_id,
"configuration": self.configuration,
}
if self.pipeline_type is not None:
result["pipeline_type"] = self.pipeline_type
return result

@classmethod
def from_dict(cls, data: dict):
"""Create from dictionary."""
return cls(
experiment_id=data["experiment_id"],
domain=data["domain"],
domain_id=data["domain_id"],
pipeline_type=data.get("pipeline_type"),
configuration=data.get("configuration", {}),
)
Loading
Loading