Skip to content

Commit a08eb36

Browse files
Ben-geopre-commit-ci[bot]adamamer20
authored
Feature/abstract data collector (#156)
* abstract * list removed * descriptions * fleshed out flush * ent * reset functionality * ent * removed register stats func * doc fixes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * more descriptive * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * resolve * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removed doc * add doc to flush * removed load_data * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * condtitional collect * trigger default is pass * added seed and fixed docs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * adding pre-commit and ruff to dev dependencies * precommit * fix: uv.lock was outdated * spell check * periods * precommit * uv * suggested changes * sync --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Adam Amer <[email protected]>
1 parent ddd8031 commit a08eb36

File tree

2 files changed

+240
-30
lines changed

2 files changed

+240
-30
lines changed
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
"""
2+
Abstract base classes for data collection components in mesa-frames.
3+
4+
This module defines the core abstractions for data collection in mesa-frames.
5+
It provides a standardized interface for collecting model- and agent-level
6+
data during simulation runs, supporting flexible triggers, custom statistics,
7+
and optional external storage.
8+
9+
Classes:
10+
AbstractDataCollector:
11+
An abstract base class defining the structure and core logic for
12+
all data collector implementations. It supports flexible reporting
13+
of model and agent attributes, conditional data collection using
14+
triggers, and pluggable backends for storage.
15+
16+
These classes are designed to be subclassed by concrete implementations that
17+
handle the specifics of data collection and storage such as in-memory, CSV,
18+
or database-backed collectors, potentially using Polars for high-performance
19+
tabular operations.
20+
21+
Usage:
22+
These classes should not be instantiated directly. Instead, they should be
23+
subclassed to create concrete DataCollector:
24+
25+
from mesa_frames.abstract.datacollector import AbstractDataCollector
26+
27+
class DataCollector(AbstractDataCollector):
28+
def collect(self):
29+
# Implementation using Polars DataFrame to collect model and agent data
30+
...
31+
32+
def conditional_collect(self):
33+
# Implementation using Polars DataFrame to collect model and agent data if trigger returns True
34+
...
35+
36+
def data(self):
37+
# Returns the data currently in memory
38+
...
39+
40+
def flush(self):
41+
# Persists collected data if configured and optionally deletes data from memory
42+
...
43+
44+
For more detailed information on each class, refer to their individual docstrings.
45+
"""
46+
47+
from abc import ABC, abstractmethod
48+
from typing import Dict, Optional, Union, Any, Literal, List
49+
from collections.abc import Callable
50+
from mesa_frames import ModelDF
51+
import polars as pl
52+
53+
54+
class AbstractDataCollector(ABC):
55+
"""
56+
Abstract Base Class for Mesa-Frames DataCollector.
57+
58+
This class defines methods for collecting data from both model and agents.
59+
Sub classes must implement logic for the methods
60+
"""
61+
62+
_model: ModelDF
63+
_model_reporters: dict[str, Callable] | None
64+
_agent_reporters: dict[str, str | Callable] | None
65+
_trigger: Callable[..., bool]
66+
_reset_memory = bool
67+
_storage_uri: Literal["memory:", "csv:", "postgresql:"]
68+
_frames: list[pl.DataFrame]
69+
70+
def __init__(
71+
self,
72+
model: ModelDF,
73+
model_reporters: dict[str, Callable] | None = None,
74+
agent_reporters: dict[str, str | Callable] | None = None,
75+
trigger: Callable[[Any], bool] | None = None,
76+
reset_memory: bool = True,
77+
storage: Literal["memory:", "csv:", "postgresql:"] = "memory:",
78+
):
79+
"""
80+
Initialize a Datacollector.
81+
82+
Parameters
83+
----------
84+
model : ModelDF
85+
The model object from which data is collected.
86+
model_reporters : dict[str, Callable] | None
87+
Functions to collect data at the model level.
88+
agent_reporters : dict[str, str | Callable] | None
89+
Attributes or functions to collect data at the agent level.
90+
trigger : Callable[[Any], bool] | None
91+
A function(model) -> bool that determines whether to collect data.
92+
reset_memory : bool
93+
Whether to reset in-memory data after flushing. Default is True.
94+
storage : Literal["memory:", "csv:", "postgresql:"]
95+
Storage backend URI (e.g. 'memory:', 'csv:', 'postgresql:').
96+
"""
97+
self._model = model
98+
self._model_reporters = model_reporters or {}
99+
self._agent_reporters = agent_reporters or {}
100+
self._trigger = trigger or (lambda model: False)
101+
self._reset_memory = reset_memory
102+
self._storage_uri = storage or "memory:"
103+
self._frames = []
104+
105+
def collect(self) -> None:
106+
"""
107+
Trigger Data collection.
108+
109+
This method calls _collect() to perform actual data collection.
110+
111+
Example
112+
-------
113+
>>> datacollector.collect()
114+
"""
115+
self._collect()
116+
117+
def conditional_collect(self) -> None:
118+
"""
119+
Trigger data collection if condition is met.
120+
121+
This method caslls _collect() to perform actual data collection
122+
123+
Example
124+
-------
125+
>>> datacollector.conditional_collect()
126+
"""
127+
if self._should_collect():
128+
self._collect()
129+
130+
def _should_collect(self) -> bool:
131+
"""
132+
Evaluate whether data should be collected at current step.
133+
134+
Returns
135+
-------
136+
bool
137+
True if the configured trigger condition is met, False otherwise.
138+
"""
139+
return self._trigger(self._model)
140+
141+
@abstractmethod
142+
def _collect(self):
143+
"""
144+
Perform the actual data collection logic.
145+
146+
This method must be im
147+
"""
148+
pass
149+
150+
@property
151+
@abstractmethod
152+
def data(self) -> Any:
153+
"""
154+
Returns collected data currently in memory as a dataframe.
155+
156+
Example:
157+
-------
158+
>>> df = datacollector.data
159+
>>> print(df)
160+
"""
161+
pass
162+
163+
def flush(self) -> None:
164+
"""
165+
Persist all collected data to configured backend.
166+
167+
After flushing data optionally clears in-memory
168+
data buffer if `reset_memory` is True (default behavior).
169+
170+
use this method to save collected data.
171+
172+
173+
Example
174+
-------
175+
>>> datacollector.flush()
176+
>>> # Data is saved externally and in-memory buffers are cleared if configured
177+
"""
178+
self._flush()
179+
if self._reset_memory:
180+
self._reset()
181+
182+
def _reset(self):
183+
"""
184+
Clear all collected data currently stored in memory.
185+
186+
Use this to free memory or start fresh without affecting persisted data.
187+
188+
"""
189+
self._frames = []
190+
191+
@abstractmethod
192+
def _flush(self) -> None:
193+
"""
194+
Implement persistence of collected data to external storage.
195+
196+
This method must be implemented by subclasses to handle
197+
backend-specific data saving operations.
198+
"""
199+
pass
200+
201+
@property
202+
def seed(self) -> int:
203+
"""
204+
Function to get the model seed.
205+
206+
Example:
207+
--------
208+
>>> seed = datacollector.seed
209+
"""
210+
return self._model._seed

0 commit comments

Comments
 (0)