@@ -102,6 +102,7 @@ def _export_ecosystem_to_bucket(self, ecosystem: str, tmp_dir: str):
102
102
103
103
zip_path = os .path .join (tmp_dir , 'all.zip' )
104
104
with zipfile .ZipFile (zip_path , 'w' , zipfile .ZIP_DEFLATED ) as zip_file :
105
+ files_to_zip = []
105
106
106
107
@ndb .tasklet
107
108
def _export_to_file_and_zipfile (bug ):
@@ -112,15 +113,18 @@ def _export_to_file_and_zipfile(bug):
112
113
file_path = os .path .join (tmp_dir , bug .id () + '.json' )
113
114
vulnerability = yield bug .to_vulnerability_async (include_source = True )
114
115
osv .write_vulnerability (vulnerability , file_path )
115
- # Tasklets are not truly multiple threads;they are actually
116
- # event loops, which makes it safe to write to ZIP files."
117
- # Details: https://cloud.google.com/appengine/docs/legacy/
118
- # standard/python/ndb/async#tasklets
119
- zip_file .write (file_path , os .path .basename (file_path ))
120
116
117
+ files_to_zip .append (file_path )
118
+
119
+ # This *should* pause here until
120
+ # all the exports have been written to disk.
121
121
osv .Bug .query (
122
122
osv .Bug .ecosystem == ecosystem ).map (_export_to_file_and_zipfile )
123
123
124
+ files_to_zip .sort ()
125
+ for file_path in files_to_zip :
126
+ zip_file .write (file_path , os .path .basename (file_path ))
127
+
124
128
with concurrent .futures .ThreadPoolExecutor (
125
129
max_workers = _EXPORT_WORKERS ) as executor :
126
130
# Note: all.zip is included here
0 commit comments