2121import osv .logs
2222import json
2323import logging
24+ from collections import defaultdict
2425
2526
26- def compute_upstream (target_bug , bugs : dict [str , osv . Bug ]) -> list [str ]:
27+ def compute_upstream (target_bug , bugs : dict [str , set [ str ] ]) -> list [str ]:
2728 """Computes all upstream vulnerabilities for the given bug ID.
2829 The returned list contains all of the bug IDs that are upstream of the
2930 target bug ID, including transitive upstreams."""
3031 visited = set ()
3132
32- target_bug_upstream = target_bug . upstream_raw
33+ target_bug_upstream = target_bug
3334 if not target_bug_upstream :
3435 return []
3536 to_visit = set (target_bug_upstream )
@@ -39,9 +40,9 @@ def compute_upstream(target_bug, bugs: dict[str, osv.Bug]) -> list[str]:
3940 continue
4041 visited .add (bug_id )
4142 upstreams = set ()
42- if bug_id in bugs :
43+ if bug_id in bugs . keys () :
4344 bug = bugs .get (bug_id )
44- upstreams = set (bug . upstream_raw )
45+ upstreams = set (bug )
4546
4647 to_visit .update (upstreams - visited )
4748
@@ -151,15 +152,20 @@ def main():
151152 UpstreamGroups and creating new UpstreamGroups for un-computed bugs."""
152153
153154 # Query for all bugs that have upstreams.
154- # Use (> '' OR < '') instead of (!= '') / (> '') to de-duplicate results
155- # and avoid datastore emulator problems, see issue #2093
156155 updated_bugs = []
157- bugs = osv .Bug .query (
158- ndb .OR (osv .Bug .upstream_raw > '' , osv .Bug .upstream_raw < '' ))
159- bugs = {bug .db_id : bug for bug in bugs .iter ()}
156+ logging .info ('Retrieving bugs...' )
157+ bugs_query = osv .Bug .query (osv .Bug .upstream_raw > '' )
158+
159+ bugs = defaultdict (set )
160+ for bug in bugs_query .iter (projection = [osv .Bug .db_id , osv .Bug .upstream_raw ]):
161+ bugs [bug .db_id ].add (bug .upstream_raw [0 ])
162+ logging .info ('%s Bugs successfully retrieved' , len (bugs ))
163+
164+ logging .info ('Retrieving upstream groups...' )
160165 upstream_groups = {
161166 group .db_id : group for group in osv .UpstreamGroup .query ().iter ()
162167 }
168+ logging .info ('Upstream Groups successfully retrieved' )
163169
164170 for bug_id , bug in bugs .items ():
165171 # Get the specific upstream_group ID
@@ -175,15 +181,18 @@ def main():
175181 continue
176182 updated_bugs .append (new_upstream_group )
177183 upstream_groups [bug_id ] = new_upstream_group
184+ logging .info ('Upstream group updated for bug: %s' , bug_id )
178185 else :
179186 # Create a new UpstreamGroup
180187 new_upstream_group = _create_group (bug_id , upstream_ids )
188+ logging .info ('New upstream group created for bug: %s' , bug_id )
181189 updated_bugs .append (new_upstream_group )
182190 upstream_groups [bug_id ] = new_upstream_group
183191
184192 for group in updated_bugs :
185193 # Recompute the upstream hierarchies
186194 compute_upstream_hierarchy (group , upstream_groups )
195+ logging .info ('Upstream hierarchy updated for bug: %s' , group .db_id )
187196
188197
189198if __name__ == '__main__' :
0 commit comments