22Reads in the contributions.yaml file, and updates the entries by hitting the 'source' url.
33"""
44import argparse
5- from datetime import datetime
5+ from datetime import datetime , UTC
66import pathlib
77from ruamel .yaml import YAML
8+ from multiprocessing import Pool
89
910from parse_and_validate_properties_txt import read_properties_txt , parse_text , validate_existing
1011
1112
1213def update_contribution (contribution , props ):
13- datetime_today = datetime .utcnow ( ).strftime ('%Y-%m-%dT%H:%M:%S%z' )
14+ datetime_today = datetime .now ( UTC ).strftime ('%Y-%m-%dT%H:%M:%S%z' )
1415 contribution ['lastUpdated' ] = datetime_today
1516 if 'previousVersions' not in contribution :
1617 contribution ['previousVersions' ] = []
@@ -29,6 +30,7 @@ def update_contribution(contribution, props):
2930
3031 if 'download' not in contribution :
3132 contribution ['download' ] = contribution ['source' ][:contribution ['source' ].rfind ('.' )] + '.zip'
33+
3234
3335def log_broken (contribution , msg ):
3436 if contribution ['status' ] == 'VALID' :
@@ -37,8 +39,10 @@ def log_broken(contribution, msg):
3739 contribution ['log' ] = []
3840 contribution ['log' ].append (msg )
3941
40- def process_contribution (contribution ):
41- date_today = datetime .utcnow ().strftime ('%Y-%m-%d' )
42+ def process_contribution (item ):
43+ index , contribution = item
44+
45+ date_today = datetime .now (UTC ).strftime ('%Y-%m-%d' )
4246 this_version = '0'
4347
4448 if contribution ['status' ] != 'DEPRECATED' :
@@ -51,16 +55,16 @@ def process_contribution(contribution):
5155 properties_raw = read_properties_txt (contribution ['source' ])
5256 except FileNotFoundError as e :
5357 log_broken (contribution , f'file not found, { e } , { date_today } ' )
54- return
58+ return index , contribution
5559 except Exception :
5660 log_broken (contribution , f'url timeout, { date_today } ' )
57- return
61+ return index , contribution
5862
5963 try :
6064 props = validate_existing (parse_text (properties_raw ))
6165 except Exception :
6266 log_broken (contribution , f'invalid file, { date_today } ' )
63- return
67+ return index , contribution
6468
6569 # some library files have field lastUpdated. This also exists in the database, but is defined
6670 # by our scripts, so remove this field.
@@ -71,6 +75,7 @@ def process_contribution(contribution):
7175 if props ['version' ] != this_version :
7276 # update from online
7377 update_contribution (contribution , props )
78+ return index , contribution
7479
7580
7681if __name__ == "__main__" :
@@ -92,14 +97,22 @@ def process_contribution(contribution):
9297 contributions_list = data ['contributions' ]
9398
9499 if index == 'all' :
95- # update all contributions
96- for contribution in contributions_list :
97- process_contribution (contribution )
100+ total = len (contributions_list )
101+ completed = 0
102+ print (f"Starting processing of { total } contributions..." )
103+
104+ with Pool (processes = 256 ) as pool :
105+ for index , contribution in pool .imap_unordered (process_contribution , enumerate (contributions_list )):
106+ contributions_list [index ] = contribution
107+ completed += 1
108+ print (f"Progress: { completed } /{ total } ({ (completed / total * 100 ):.1f} %)" )
109+
110+ print ("All processing complete" )
98111 else :
99112 # update only contribution with id==index
100113 contribution = next ((x for x in contributions_list if x ['id' ] == int (index )), None )
101114 print (contribution )
102- process_contribution (contribution )
115+ process_contribution (( index , contribution ) )
103116 print (contribution )
104117
105118 # write all contributions to database file
0 commit comments