Skip to content

Commit 139de7b

Browse files
authored
fix(datafix): let request_worker_update_record.py delete records (#3421)
[There's already logic in the worker to delete from GIT source repos](https://github.com/google/osv.dev/blob/4b9f3e9427de95107e35cb37b1a6797228852414/gcp/workers/worker/worker.py#L346-L349) Added a flag to `request_worker_update_record.py` to send a delete message if getting the record 404s from the git repo, so we can more easily manually withdraw vulnerabilities.
1 parent 4111a67 commit 139de7b

File tree

1 file changed

+26
-9
lines changed

1 file changed

+26
-9
lines changed

tools/datafix/request_worker_update_record.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,12 @@
1717
PUBSUB_TOPIC_ID = "tasks"
1818

1919

20-
def publish_update_message(project_id, topic_id, source, path, original_sha256):
20+
def publish_update_message(project_id,
21+
topic_id,
22+
source,
23+
path,
24+
original_sha256,
25+
deleted=False):
2126
"""Publish a message to a Pub/Sub topic with the provided data as attributes.
2227
2328
Args:
@@ -26,6 +31,7 @@ def publish_update_message(project_id, topic_id, source, path, original_sha256):
2631
source: The record source ID.
2732
path: The record path.
2833
original_sha256: The original SHA256 checksum of the record.
34+
deleted: Whether the record has been deleted.
2935
"""
3036

3137
publisher = pubsub_v1.PublisherClient()
@@ -39,7 +45,7 @@ def publish_update_message(project_id, topic_id, source, path, original_sha256):
3945
"source": source,
4046
"path": path,
4147
"original_sha256": original_sha256,
42-
"deleted": "false",
48+
"deleted": "true" if deleted else "false",
4349
"req_timestamp": str(int(time.time())),
4450
},
4551
)
@@ -59,19 +65,25 @@ def github_raw_url(repo_url, path):
5965
return f'https://raw.githubusercontent.com/{repo}/refs/heads/main/' + path
6066

6167

62-
def request_url_update(record_url, project_id, source, path, timeout):
68+
def request_url_update(record_url, project_id, source, path, timeout,
69+
allow_delete):
6370
"""Request a update based on a URL record."""
6471
print(f'Trying: {record_url}')
6572
response = requests.get(record_url, timeout=timeout)
73+
original_sha256 = ''
74+
deleted = False
6675
try:
6776
response.raise_for_status()
77+
original_sha256 = osv.sha256_bytes(response.text.encode())
6878
except requests.HTTPError as e:
69-
print(e)
70-
return
79+
if not allow_delete or e.response.status_code != 404:
80+
print(e)
81+
return
82+
print(f'Bug was deleted: {record_url}')
83+
deleted = True
7184

72-
original_sha256 = osv.sha256_bytes(response.text.encode())
7385
publish_update_message(project_id, PUBSUB_TOPIC_ID, source, path,
74-
original_sha256)
86+
original_sha256, deleted)
7587

7688

7789
def main():
@@ -85,6 +97,11 @@ def main():
8597
type=int,
8698
default=DEFAULT_TIMEOUT,
8799
help="Default timeout to use for operations")
100+
parser.add_argument(
101+
"--allow-delete",
102+
action="store_true",
103+
default=False,
104+
help="Delete bugs if not found in source (GIT only)")
88105
parser.add_argument(
89106
"bugs", action="append", nargs="+", help="The bug IDs to operate on")
90107

@@ -100,7 +117,7 @@ def main():
100117
record_url = f'{source.link}{bug}{source.extension}'
101118
path = f'{bug}{source.extension}'
102119
request_url_update(record_url, args.project_id, args.source, path,
103-
args.timeout)
120+
args.timeout, False)
104121

105122
if source.type == osv.SourceRepositoryType.GIT:
106123
for bug in args.bugs[0]:
@@ -112,7 +129,7 @@ def main():
112129

113130
record_url = github_raw_url(source.repo_url, path)
114131
request_url_update(record_url, args.project_id, args.source, path,
115-
args.timeout)
132+
args.timeout, args.allow_delete)
116133

117134
if source.type == osv.SourceRepositoryType.BUCKET:
118135
raise NotImplementedError("Use reimport_gcs_record.py for now")

0 commit comments

Comments
 (0)