Skip to content

Commit 2c31244

Browse files
authored
Sort zip in exporter (#2311)
Might fix google/osv-scanner#1007 (comment) Sort the entries before adding to the zip archive.
1 parent d619c8f commit 2c31244

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

docker/exporter/exporter.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def _export_ecosystem_to_bucket(self, ecosystem: str, tmp_dir: str):
102102

103103
zip_path = os.path.join(tmp_dir, 'all.zip')
104104
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
105+
files_to_zip = []
105106

106107
@ndb.tasklet
107108
def _export_to_file_and_zipfile(bug):
@@ -112,15 +113,18 @@ def _export_to_file_and_zipfile(bug):
112113
file_path = os.path.join(tmp_dir, bug.id() + '.json')
113114
vulnerability = yield bug.to_vulnerability_async(include_source=True)
114115
osv.write_vulnerability(vulnerability, file_path)
115-
# Tasklets are not truly multiple threads;they are actually
116-
# event loops, which makes it safe to write to ZIP files."
117-
# Details: https://cloud.google.com/appengine/docs/legacy/
118-
# standard/python/ndb/async#tasklets
119-
zip_file.write(file_path, os.path.basename(file_path))
120116

117+
files_to_zip.append(file_path)
118+
119+
# This *should* pause here until
120+
# all the exports have been written to disk.
121121
osv.Bug.query(
122122
osv.Bug.ecosystem == ecosystem).map(_export_to_file_and_zipfile)
123123

124+
files_to_zip.sort()
125+
for file_path in files_to_zip:
126+
zip_file.write(file_path, os.path.basename(file_path))
127+
124128
with concurrent.futures.ThreadPoolExecutor(
125129
max_workers=_EXPORT_WORKERS) as executor:
126130
# Note: all.zip is included here

0 commit comments

Comments
 (0)