Skip to content

Commit 40a8f51

Browse files
authored
Merge pull request #33 from TogetherCrew/feat/32-rename-summarizer-codes
feat: rename summarizer to be platform agnostic!
2 parents 4d07bb1 + 4017d31 commit 40a8f51

File tree

6 files changed

+53
-53
lines changed

6 files changed

+53
-53
lines changed

README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ This repository contains TogetherCrew's Temporal Python workflows for data proce
1111

1212
### Hivemind Summarizer
1313

14-
- **Telegram Summaries**: Retrieves and processes summaries from Telegram data stored in Qdrant, with options to fetch by date or date range.
14+
- **Platform Summaries**: Retrieves and processes summaries from Platform data stored in Qdrant, with options to fetch by date or date range.
1515

1616
## Architecture
1717

@@ -49,20 +49,20 @@ The project uses Temporal for workflow orchestration with the following componen
4949

5050
## Usage Examples
5151

52-
### Running a Telegram Summary Workflow
52+
### Running a Platform Summary Workflow
5353

5454
To fetch summaries for a specific community and date range:
5555

5656
```python
5757
from temporalio.client import Client
58-
from hivemind_summarizer.workflows import TelegramSummariesWorkflow
59-
from hivemind_summarizer.schema import TelegramFetchSummariesWorkflowInput
58+
from hivemind_summarizer.workflows import PlatformSummariesWorkflow
59+
from hivemind_summarizer.schema import PlatformFetchSummariesWorkflowInput
6060

61-
async def run_telegram_workflow():
61+
async def run_okatfirn_workflow():
6262
client = await Client.connect("localhost:7233")
6363

6464
# Create workflow input
65-
input_data = TelegramFetchSummariesWorkflowInput(
65+
input_data = PlatformFetchSummariesWorkflowInput(
6666
platform_id="your_platform_id",
6767
community_id="your_community_id",
6868
start_date="2023-05-01",
@@ -72,9 +72,9 @@ async def run_telegram_workflow():
7272

7373
# Execute workflow
7474
result = await client.execute_workflow(
75-
TelegramSummariesWorkflow.run,
75+
PlatformSummariesWorkflow.run,
7676
input_data,
77-
id="telegram-summaries-workflow",
77+
id="platform-summaries-workflow",
7878
task_queue="your_task_queue"
7979
)
8080

hivemind_summarizer/activities.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414

1515
with workflow.unsafe.imports_passed_through():
1616
from hivemind_summarizer.schema import (
17-
TelegramSummariesActivityInput,
18-
TelegramSummariesRangeActivityInput,
19-
TelegramGetCollectionNameInput,
17+
PlatformSummariesActivityInput,
18+
PlatformSummariesRangeActivityInput,
19+
PlatformGetCollectionNameInput,
2020
)
2121

2222

@@ -42,13 +42,13 @@ def extract_summary_text(node_content: dict[str, Any]) -> str:
4242

4343

4444
@activity.defn
45-
async def get_platform_name(input: TelegramGetCollectionNameInput) -> str:
45+
async def get_platform_name(input: PlatformGetCollectionNameInput) -> str:
4646
"""
4747
Activity that extracts collection name from MongoDB based on platform_id and community_id.
4848
4949
Parameters
5050
----------
51-
input: TelegramGetCollectionNameInput
51+
input: PlatformGetCollectionNameInput
5252
Input object containing platform_id and community_id
5353
5454
Returns
@@ -93,15 +93,15 @@ async def get_platform_name(input: TelegramGetCollectionNameInput) -> str:
9393

9494

9595
@activity.defn
96-
async def fetch_telegram_summaries_by_date(
97-
input: TelegramSummariesActivityInput,
96+
async def fetch_platform_summaries_by_date(
97+
input: PlatformSummariesActivityInput,
9898
) -> list[dict[str, Any]] | str:
9999
"""
100-
Activity that fetches Telegram summaries for a specific date from Qdrant.
100+
Activity that fetches Platform summaries for a specific date from Qdrant.
101101
102102
Parameters
103103
----------
104-
input : TelegramSummariesActivityInput
104+
input : PlatformSummariesActivityInput
105105
Input object containing date, collection_name and extract_text_only
106106
107107
Returns
@@ -114,7 +114,7 @@ async def fetch_telegram_summaries_by_date(
114114
collection_name = f"{input.community_id}_{input.platform_name}_summary"
115115
community_id = input.community_id
116116

117-
logging.info("Started fetch_telegram_summaries_by_date!")
117+
logging.info("Started fetch_platform_summaries_by_date!")
118118

119119
if not input.platform_name:
120120
raise ValueError("Platform name is required but was not provided")
@@ -207,15 +207,15 @@ async def fetch_telegram_summaries_by_date(
207207

208208

209209
@activity.defn
210-
async def fetch_telegram_summaries_by_date_range(
211-
input: TelegramSummariesRangeActivityInput,
210+
async def fetch_platform_summaries_by_date_range(
211+
input: PlatformSummariesRangeActivityInput,
212212
) -> dict[str, list[dict[str, Any] | str]]:
213213
"""
214-
Activity that fetches Telegram summaries for a range of dates from Qdrant.
214+
Activity that fetches summaries for a range of dates from Qdrant.
215215
216216
Parameters
217217
----------
218-
input : TelegramSummariesRangeActivityInput
218+
input : PlatformSummariesRangeActivityInput
219219
Input object containing start_date, end_date, platform_name and community_id
220220
221221
Returns
@@ -259,13 +259,13 @@ async def fetch_telegram_summaries_by_date_range(
259259
# Fetch summaries for each date
260260
result = {}
261261
for date in date_range:
262-
date_input = TelegramSummariesActivityInput(
262+
date_input = PlatformSummariesActivityInput(
263263
date=date,
264264
extract_text_only=extract_text_only,
265265
platform_name=input.platform_name,
266266
community_id=community_id,
267267
)
268-
summaries = await fetch_telegram_summaries_by_date(date_input)
268+
summaries = await fetch_platform_summaries_by_date(date_input)
269269
result[date] = summaries
270270

271271
return result

hivemind_summarizer/schema.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
11
from pydantic import BaseModel
22

33

4-
class TelegramSummariesActivityInput(BaseModel):
4+
class PlatformSummariesActivityInput(BaseModel):
55
date: str | None = None
66
extract_text_only: bool = True
77
platform_name: str | None = None
88
community_id: str | None = None
99

1010

11-
class TelegramSummariesRangeActivityInput(BaseModel):
11+
class PlatformSummariesRangeActivityInput(BaseModel):
1212
start_date: str
1313
end_date: str
1414
extract_text_only: bool = True
1515
platform_name: str | None = None
1616
community_id: str | None = None
1717

1818

19-
class TelegramGetCollectionNameInput(BaseModel):
19+
class PlatformGetCollectionNameInput(BaseModel):
2020
platform_id: str
2121
community_id: str
2222

2323

24-
class TelegramFetchSummariesWorkflowInput(BaseModel):
24+
class PlatformFetchSummariesWorkflowInput(BaseModel):
2525
platform_id: str
2626
community_id: str
2727
start_date: str | None = None

hivemind_summarizer/workflows.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,42 +7,42 @@
77

88
with workflow.unsafe.imports_passed_through():
99
from .activities import (
10-
fetch_telegram_summaries_by_date,
11-
fetch_telegram_summaries_by_date_range,
10+
fetch_platform_summaries_by_date,
11+
fetch_platform_summaries_by_date_range,
1212
get_platform_name,
1313
)
1414
from .schema import (
15-
TelegramSummariesActivityInput,
16-
TelegramSummariesRangeActivityInput,
17-
TelegramGetCollectionNameInput,
18-
TelegramFetchSummariesWorkflowInput,
15+
PlatformSummariesActivityInput,
16+
PlatformSummariesRangeActivityInput,
17+
PlatformGetCollectionNameInput,
18+
PlatformFetchSummariesWorkflowInput,
1919
)
2020

2121

2222
@workflow.defn
23-
class TelegramSummariesWorkflow:
23+
class PlatformSummariesWorkflow:
2424
"""
25-
A Temporal workflow that fetches Telegram summaries for a specified date.
25+
A Temporal workflow that fetches summaries for a specified date.
2626
"""
2727

2828
@workflow.run
2929
async def run(
30-
self, input: TelegramFetchSummariesWorkflowInput
30+
self, input: PlatformFetchSummariesWorkflowInput
3131
) -> list[dict[str, Any]]:
3232
"""
33-
Run the workflow to fetch Telegram summaries for the specified date.
33+
Run the workflow to fetch summaries for the specified date.
3434
3535
Parameters
3636
----------
37-
input : TelegramFetchSummariesWorkflowInput
38-
Input containing platform_id, community_id, start_date, end_date, extract_text_only and collection_name
37+
input : PlatformFetchSummariesWorkflowInput
38+
Input containing platform_id, community_id, start_date, end_date, extract_text_only and platform_name
3939
4040
Returns
4141
-------
4242
list[dict[str, Any]]
4343
A list of summary objects for the specified date
4444
"""
45-
logging.info("Started TelegramSummariesWorkflow!")
45+
logging.info("Started PlatformSummariesWorkflow!")
4646
logging.info(
4747
(
4848
f" Platform ID: {input.platform_id}. "
@@ -56,7 +56,7 @@ async def run(
5656
# First, get the collection name
5757
platform_name = await workflow.execute_activity(
5858
get_platform_name,
59-
TelegramGetCollectionNameInput(
59+
PlatformGetCollectionNameInput(
6060
platform_id=input.platform_id, community_id=input.community_id
6161
),
6262
schedule_to_close_timeout=timedelta(minutes=1),
@@ -67,8 +67,8 @@ async def run(
6767
if input.end_date is None:
6868
logging.info("Getting summaries by date!")
6969
summaries = await workflow.execute_activity(
70-
fetch_telegram_summaries_by_date,
71-
TelegramSummariesActivityInput(
70+
fetch_platform_summaries_by_date,
71+
PlatformSummariesActivityInput(
7272
date=input.start_date,
7373
platform_name=platform_name,
7474
community_id=input.community_id,
@@ -81,8 +81,8 @@ async def run(
8181
else:
8282
logging.info("Getting summaries by date range!")
8383
summaries = await workflow.execute_activity(
84-
fetch_telegram_summaries_by_date_range,
85-
TelegramSummariesRangeActivityInput(
84+
fetch_platform_summaries_by_date_range,
85+
PlatformSummariesRangeActivityInput(
8686
start_date=input.start_date,
8787
end_date=input.end_date,
8888
platform_name=platform_name,

registry.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,24 @@
1010
load_mediawiki_data,
1111
)
1212
from hivemind_summarizer.activities import (
13-
fetch_telegram_summaries_by_date,
14-
fetch_telegram_summaries_by_date_range,
13+
fetch_platform_summaries_by_date,
14+
fetch_platform_summaries_by_date_range,
1515
get_platform_name,
1616
)
1717
from workflows import (
1818
CommunityWebsiteWorkflow,
1919
SayHello,
2020
WebsiteIngestionSchedulerWorkflow,
2121
MediaWikiETLWorkflow,
22-
TelegramSummariesWorkflow,
22+
PlatformSummariesWorkflow,
2323
)
2424

2525
WORKFLOWS = [
2626
CommunityWebsiteWorkflow,
2727
SayHello,
2828
WebsiteIngestionSchedulerWorkflow,
2929
MediaWikiETLWorkflow,
30-
TelegramSummariesWorkflow,
30+
PlatformSummariesWorkflow,
3131
]
3232

3333
ACTIVITIES = [
@@ -40,7 +40,7 @@
4040
transform_mediawiki_data,
4141
load_mediawiki_data,
4242
say_hello,
43-
fetch_telegram_summaries_by_date,
44-
fetch_telegram_summaries_by_date_range,
43+
fetch_platform_summaries_by_date,
44+
fetch_platform_summaries_by_date_range,
4545
get_platform_name,
4646
]

workflows.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from hivemind_etl.mediawiki.workflows import (
1111
MediaWikiETLWorkflow,
1212
)
13-
from hivemind_summarizer.workflows import TelegramSummariesWorkflow
13+
from hivemind_summarizer.workflows import PlatformSummariesWorkflow
1414

1515
from temporalio import workflow
1616

0 commit comments

Comments
 (0)