Skip to content

Commit 2c0b6bf

Browse files
committed
feat: 恢复全量同步功能
1 parent f4bbb17 commit 2c0b6bf

File tree

6 files changed

+64
-33
lines changed

6 files changed

+64
-33
lines changed

mcim_sync/checker/curseforge.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,4 +165,4 @@ def check_newest_search_result(classId: int) -> List[int]:
165165

166166
time.sleep(CURSEFORGE_DELAY)
167167

168-
return new_modids
168+
return new_modids

mcim_sync/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def validate_bool(cls, v):
4444
class JobInterval(BaseModel):
4545
curseforge_refresh: int = 60 * 60 * 2 # 2 hours
4646
modrinth_refresh: int = 60 * 60 * 2 # 2 hours
47+
curseforge_refresh_full: int = 60 * 60 * 48 # 48 hours
4748
sync_curseforge_by_queue: int = 60 * 5 # 5 minutes
4849
sync_curseforge_by_search: int = 60 * 60 * 2 # 2 hours
4950
sync_modrinth_by_queue: int = 60 * 5 # 5 minutes

mcim_sync/fetcher/curseforge.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Union, List, Set
1+
from typing import Union, List, Set, Optional
22
from odmantic import query
33
import datetime
44
import time
@@ -53,4 +53,3 @@ def fetch_expired_curseforge_data() -> List[int]:
5353
time.sleep(CURSEFORGE_DELAY)
5454
log.debug(f"Delay {CURSEFORGE_DELAY} seconds")
5555
return list(expired_modids)
56-

mcim_sync/sync/curseforge.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
from typing import List, Optional, Union
1+
from typing import List, Optional
22
from tenacity import retry, stop_after_attempt, wait_fixed
33
from odmantic import query
44
from enum import Enum
5-
import time
65

76
from mcim_sync.models.database.curseforge import (
87
File,
@@ -20,10 +19,12 @@
2019
get_mutil_mods_info,
2120
get_search_result,
2221
)
22+
2323
# from mcim_sync.models.database.file_cdn import File as FileCDN
2424
from mcim_sync.utils.constans import ProjectDetail
2525
from mcim_sync.utils.loger import log
2626
from mcim_sync.utils.model_submitter import ModelSubmitter
27+
2728
# from mcim_sync.utils import find_hash_in_curseforge_hashes
2829
from mcim_sync.database.mongodb import sync_mongo_engine as mongodb_engine
2930
from mcim_sync.config import Config
@@ -87,6 +88,8 @@ def sync_mod_all_files(
8788
params = {"index": 0, "pageSize": 50}
8889
file_id_list = []
8990

91+
original_files_count = mongodb_engine.count(File, File.modId == modId)
92+
9093
while True:
9194
res = get_mod_files(modId, params["index"], params["pageSize"])
9295
append_model_from_files_res(
@@ -108,7 +111,7 @@ def sync_mod_all_files(
108111
File, File.modId == modId, query.not_in(File.id, file_id_list)
109112
)
110113
log.info(
111-
f"Finished sync mod {modId}, total {page.totalCount} files, removed {removed_count} files"
114+
f"Finished sync mod {modId}, total {page.totalCount} files, removed {removed_count} files, original files {original_files_count}"
112115
)
113116

114117
return page.totalCount
@@ -118,7 +121,7 @@ def sync_mod_all_files_at_once(
118121
modId: int, latestFiles: List[dict], need_to_cache: bool = True
119122
) -> Optional[int]:
120123
max_retries = 3
121-
page_size= 10000
124+
page_size = 10000
122125
for i in range(max_retries):
123126
res = get_mod_files(modId, index=0, pageSize=page_size)
124127

@@ -128,17 +131,21 @@ def sync_mod_all_files_at_once(
128131

129132
if page.resultCount != page.totalCount or len(file_id_list) != page.resultCount:
130133
log.warning(
131-
f"ResultCount {page.resultCount} != TotalCount {page.totalCount} for mod {modId}, or the count of files != resultCount, response maybe incomplete, passing sync, retrying {i+1}/{max_retries}"
134+
f"ResultCount {page.resultCount} != TotalCount {page.totalCount} for mod {modId}, or the count of files != resultCount, response maybe incomplete, passing sync, retrying {i + 1}/{max_retries}"
132135
)
133136
# time.sleep(1)
134137
page_size -= 1
135138
continue
136139
else:
137140
break
138141
else:
139-
log.error(f"Failed to get all files for mod {modId} after {max_retries} retries")
142+
log.error(
143+
f"Failed to get all files for mod {modId} after {max_retries} retries"
144+
)
140145
return None
141146

147+
original_files_count = mongodb_engine.count(File, File.modId == modId)
148+
142149
append_model_from_files_res(
143150
res, latestFiles=latestFiles, need_to_cache=need_to_cache
144151
)
@@ -147,7 +154,7 @@ def sync_mod_all_files_at_once(
147154
File, File.modId == modId, query.not_in(File.id, file_id_list)
148155
)
149156
log.info(
150-
f"Finished sync mod {modId}, total {page.totalCount} files, resultCount {page.resultCount}, removed {removed_count} files"
157+
f"Finished sync mod {modId}, total {page.totalCount} files, resultCount {page.resultCount}, removed {removed_count} files, existing files {original_files_count}"
151158
)
152159

153160
return page.totalCount

mcim_sync/tasks/curseforge.py

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@
1717
check_new_modids,
1818
check_newest_search_result,
1919
)
20-
from mcim_sync.fetcher.curseforge import fetch_expired_curseforge_data
20+
from mcim_sync.fetcher.curseforge import (
21+
fetch_expired_curseforge_data,
22+
fetch_all_curseforge_data,
23+
)
2124
from mcim_sync.queues.curseforge import clear_curseforge_all_queues
2225
from mcim_sync.tasks import create_tasks_pool, curseforge_pause_event
2326

@@ -57,7 +60,9 @@ def refresh_curseforge_with_modify_date() -> bool:
5760

5861
failed_count = len(curseforge_expired_modids) - len(projects_detail_info)
5962
failed_modids = [
60-
modid for modid in curseforge_expired_modids if modid not in projects_detail_info
63+
modid
64+
for modid in curseforge_expired_modids
65+
if modid not in projects_detail_info
6166
]
6267
log.info(
6368
f"CurseForge expired data sync finished, total: {len(curseforge_expired_modids)}, "
@@ -218,30 +223,39 @@ def sync_curseforge_by_search():
218223
return True
219224

220225

221-
# def sync_curseforge_full():
222-
# log.info("Start fetching all data.")
223-
# total_data = {
224-
# "curseforge": 0,
225-
# }
226+
def sync_curseforge_full():
227+
log.info("Start fetching curseforge all data.")
226228

227-
# if SYNC_CURSEFORGE:
228-
# curseforge_data = fetch_all_curseforge_data()
229-
# log.info(f"Curseforge data totally fetched: {len(curseforge_data)}")
230-
# total_data["curseforge"] = len(curseforge_data)
229+
curseforge_data = fetch_all_curseforge_data()
230+
log.info(f"Curseforge data totally fetched: {len(curseforge_data)}")
231231

232-
# # 允许请求
233-
# curseforge_pause_event.set()
232+
curseforge_pause_event.set()
234233

235-
# curseforge_pool, curseforge_futures = create_tasks_pool(
236-
# sync_mod_all_files, curseforge_data, MAX_WORKERS, "curseforge"
237-
# )
234+
curseforge_pool, curseforge_futures = create_tasks_pool(
235+
sync_mod, curseforge_data, MAX_WORKERS, "curseforge"
236+
)
238237

239-
# log.info(
240-
# f"All {len(curseforge_futures)} tasks submitted, waiting for completion..."
241-
# )
238+
log.info(
239+
f"All {len(curseforge_futures)} tasks submitted, waiting for completion..."
240+
)
242241

243-
# for future in as_completed(curseforge_futures):
244-
# # 不需要返回值
245-
# pass
242+
projects_detail_info = []
243+
for future in as_completed(curseforge_futures):
244+
result = future.result()
245+
if result:
246+
projects_detail_info.append(result)
247+
else:
248+
curseforge_pool.shutdown()
249+
250+
failed_count = len(curseforge_data) - len(projects_detail_info)
251+
failed_modids = [
252+
modid for modid in curseforge_data if modid not in projects_detail_info
253+
]
254+
255+
log.info(
256+
f"CurseForge full sync finished, total: {len(curseforge_data)}, "
257+
f"success: {len(projects_detail_info)}, failed: {failed_count}, "
258+
f"failed modids: {failed_modids if failed_modids else 'None'}"
259+
)
246260

247-
# curseforge_pool.shutdown()
261+
curseforge_pool.shutdown()

start.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@
1313
refresh_modrinth_with_modify_date,
1414
refresh_modrinth_tags,
1515
sync_modrinth_by_search,
16+
1617
)
1718
from mcim_sync.tasks.curseforge import (
1819
sync_curseforge_queue,
1920
refresh_curseforge_with_modify_date,
2021
refresh_curseforge_categories,
2122
sync_curseforge_by_search,
23+
sync_curseforge_full
2224
)
2325
from mcim_sync.tasks.misc import send_statistics_to_telegram
2426

@@ -47,6 +49,14 @@ def main():
4749
trigger=IntervalTrigger(seconds=config.interval.modrinth_refresh),
4850
name="modrinth_refresh",
4951
)
52+
53+
if config.job_config.curseforge_refresh_full:
54+
# 添加全量刷新任务,每 48 小时执行一次
55+
curseforge_full_refresh_job = scheduler.add_job(
56+
sync_curseforge_full,
57+
trigger=IntervalTrigger(seconds=config.interval.curseforge_refresh_full),
58+
name="curseforge_full_refresh",
59+
)
5060

5161
if config.job_config.sync_curseforge_by_queue:
5262
# 添加定时同步任务,用于检查 api 未找到的请求数据

0 commit comments

Comments
 (0)