Skip to content

Commit d2cf317

Browse files
committed
perf(local): only load advisories that are about the packages being scanned
1 parent efcc36d commit d2cf317

File tree

3 files changed

+140
-21
lines changed

3 files changed

+140
-21
lines changed

internal/clients/clientimpl/localmatcher/localmatcher.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ func (matcher *LocalMatcher) MatchVulnerabilities(ctx context.Context, invs []*e
6969
continue
7070
}
7171

72-
db, err := matcher.loadDBFromCache(ctx, pkg.Ecosystem())
72+
db, err := matcher.loadDBFromCache(ctx, pkg.Ecosystem(), invs)
7373

7474
if err != nil {
7575
// no logging here as the loader will have already done that
@@ -87,12 +87,12 @@ func (matcher *LocalMatcher) MatchVulnerabilities(ctx context.Context, invs []*e
8787
// LoadEcosystem tries to preload the ecosystem into the cache, and returns an error if the ecosystem
8888
// cannot be loaded.
8989
func (matcher *LocalMatcher) LoadEcosystem(ctx context.Context, eco osvecosystem.Parsed) error {
90-
_, err := matcher.loadDBFromCache(ctx, eco)
90+
_, err := matcher.loadDBFromCache(ctx, eco, nil)
9191

9292
return err
9393
}
9494

95-
func (matcher *LocalMatcher) loadDBFromCache(ctx context.Context, eco osvecosystem.Parsed) (*ZipDB, error) {
95+
func (matcher *LocalMatcher) loadDBFromCache(ctx context.Context, eco osvecosystem.Parsed, invs []*extractor.Package) (*ZipDB, error) {
9696
if db, ok := matcher.dbs[eco.Ecosystem]; ok {
9797
return db, nil
9898
}
@@ -101,7 +101,15 @@ func (matcher *LocalMatcher) loadDBFromCache(ctx context.Context, eco osvecosyst
101101
return nil, matcher.failedDBs[eco.Ecosystem]
102102
}
103103

104-
db, err := NewZippedDB(ctx, matcher.dbBasePath, string(eco.Ecosystem), fmt.Sprintf("%s/%s/all.zip", zippedDBRemoteHost, eco.Ecosystem), matcher.userAgent, !matcher.downloadDB)
104+
db, err := NewZippedDB(
105+
ctx,
106+
matcher.dbBasePath,
107+
string(eco.Ecosystem),
108+
fmt.Sprintf("%s/%s/all.zip", zippedDBRemoteHost, eco.Ecosystem),
109+
matcher.userAgent,
110+
!matcher.downloadDB,
111+
invs,
112+
)
105113

106114
if err != nil {
107115
matcher.failedDBs[eco.Ecosystem] = err

internal/clients/clientimpl/localmatcher/zip.go

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"path"
1717
"strings"
1818

19+
"github.com/google/osv-scalibr/extractor"
1920
"github.com/google/osv-scanner/v2/internal/cmdlogger"
2021
"github.com/google/osv-scanner/v2/internal/imodels"
2122
"github.com/google/osv-scanner/v2/internal/utility/vulns"
@@ -143,9 +144,21 @@ func (db *ZipDB) fetchZip(ctx context.Context) ([]byte, error) {
143144
return body, nil
144145
}
145146

147+
func mightAffectPackages(v osvschema.Vulnerability, names []string) bool {
148+
for _, affected := range v.Affected {
149+
for _, name := range names {
150+
if affected.Package.Name == name {
151+
return true
152+
}
153+
}
154+
}
155+
156+
return false
157+
}
158+
146159
// Loads the given zip file into the database as an OSV.
147160
// It is assumed that the file is JSON and in the working directory of the db
148-
func (db *ZipDB) loadZipFile(zipFile *zip.File) {
161+
func (db *ZipDB) loadZipFile(zipFile *zip.File, names []string) {
149162
file, err := zipFile.Open()
150163
if err != nil {
151164
cmdlogger.Warnf("Could not read %s: %v", zipFile.Name, err)
@@ -169,16 +182,23 @@ func (db *ZipDB) loadZipFile(zipFile *zip.File) {
169182
return
170183
}
171184

172-
db.Vulnerabilities = append(db.Vulnerabilities, vulnerability)
185+
// if we have been provided a list of package names, only load advisories
186+
// that might actually affect those packages, rather than all advisories
187+
if len(names) == 0 || mightAffectPackages(vulnerability, names) {
188+
db.Vulnerabilities = append(db.Vulnerabilities, vulnerability)
189+
}
173190
}
174191

175192
// load fetches a zip archive of the OSV database and loads known vulnerabilities
176193
// from it (which are assumed to be in json files following the OSV spec).
177194
//
195+
// If a list of package names is provided, then only advisories with at least
196+
// one affected entry for a listed package will be loaded.
197+
//
178198
// Internally, the archive is cached along with the date that it was fetched
179199
// so that a new version of the archive is only downloaded if it has been
180200
// modified, per HTTP caching standards.
181-
func (db *ZipDB) load(ctx context.Context) error {
201+
func (db *ZipDB) load(ctx context.Context, names []string) error {
182202
db.Vulnerabilities = []osvschema.Vulnerability{}
183203

184204
body, err := db.fetchZip(ctx)
@@ -198,21 +218,30 @@ func (db *ZipDB) load(ctx context.Context) error {
198218
continue
199219
}
200220

201-
db.loadZipFile(zipFile)
221+
db.loadZipFile(zipFile, names)
202222
}
203223

204224
return nil
205225
}
206226

207-
func NewZippedDB(ctx context.Context, dbBasePath, name, url, userAgent string, offline bool) (*ZipDB, error) {
227+
func NewZippedDB(ctx context.Context, dbBasePath, name, url, userAgent string, offline bool, invs []*extractor.Package) (*ZipDB, error) {
208228
db := &ZipDB{
209229
Name: name,
210230
ArchiveURL: url,
211231
Offline: offline,
212232
StoredAt: path.Join(dbBasePath, name, "all.zip"),
213233
UserAgent: userAgent,
214234
}
215-
if err := db.load(ctx); err != nil {
235+
names := make([]string, 0, len(invs))
236+
237+
// map the packages to their names ahead of loading,
238+
// to make things simpler and reduce double working
239+
for _, inv := range invs {
240+
in := imodels.FromInventory(inv)
241+
names = append(names, in.Name())
242+
}
243+
244+
if err := db.load(ctx, names); err != nil {
216245
return nil, fmt.Errorf("unable to fetch OSV database: %w", err)
217246
}
218247

internal/clients/clientimpl/localmatcher/zip_test.go

Lines changed: 93 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"sort"
1717
"testing"
1818

19+
"github.com/google/osv-scalibr/extractor"
1920
"github.com/google/osv-scanner/v2/internal/clients/clientimpl/localmatcher"
2021
"github.com/google/osv-scanner/v2/internal/testutility"
2122
"github.com/google/osv-scanner/v2/internal/version"
@@ -146,7 +147,7 @@ func TestNewZippedDB_Offline_WithoutCache(t *testing.T) {
146147
t.Errorf("a server request was made when running offline")
147148
})
148149

149-
_, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, true)
150+
_, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, true, nil)
150151

151152
if !errors.Is(err, localmatcher.ErrOfflineDatabaseNotFound) {
152153
t.Errorf("expected \"%v\" error but got \"%v\"", localmatcher.ErrOfflineDatabaseNotFound, err)
@@ -178,7 +179,7 @@ func TestNewZippedDB_Offline_WithCache(t *testing.T) {
178179
"GHSA-5.json": {ID: "GHSA-5"},
179180
}))
180181

181-
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, true)
182+
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, true, nil)
182183

183184
if err != nil {
184185
t.Fatalf("unexpected error \"%v\"", err)
@@ -196,7 +197,7 @@ func TestNewZippedDB_BadZip(t *testing.T) {
196197
_, _ = w.Write([]byte("this is not a zip"))
197198
})
198199

199-
_, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false)
200+
_, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false, nil)
200201

201202
if err == nil {
202203
t.Errorf("expected an error but did not get one")
@@ -208,7 +209,7 @@ func TestNewZippedDB_UnsupportedProtocol(t *testing.T) {
208209

209210
testDir := testutility.CreateTestDir(t)
210211

211-
_, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", "file://hello-world", userAgent, false)
212+
_, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", "file://hello-world", userAgent, false, nil)
212213

213214
if err == nil {
214215
t.Errorf("expected an error but did not get one")
@@ -238,7 +239,7 @@ func TestNewZippedDB_Online_WithoutCache(t *testing.T) {
238239
})
239240
})
240241

241-
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false)
242+
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false, nil)
242243

243244
if err != nil {
244245
t.Fatalf("unexpected error \"%v\"", err)
@@ -270,7 +271,7 @@ func TestNewZippedDB_Online_WithoutCacheAndNoHashHeader(t *testing.T) {
270271
}))
271272
})
272273

273-
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false)
274+
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false, nil)
274275

275276
if err != nil {
276277
t.Fatalf("unexpected error \"%v\"", err)
@@ -308,7 +309,7 @@ func TestNewZippedDB_Online_WithSameCache(t *testing.T) {
308309

309310
cacheWrite(t, determineStoredAtPath(testDir, "my-db"), cache)
310311

311-
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false)
312+
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false, nil)
312313

313314
if err != nil {
314315
t.Fatalf("unexpected error \"%v\"", err)
@@ -346,7 +347,7 @@ func TestNewZippedDB_Online_WithDifferentCache(t *testing.T) {
346347
"GHSA-3.json": {ID: "GHSA-3"},
347348
}))
348349

349-
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false)
350+
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false, nil)
350351

351352
if err != nil {
352353
t.Fatalf("unexpected error \"%v\"", err)
@@ -376,7 +377,7 @@ func TestNewZippedDB_Online_WithCacheButNoHashHeader(t *testing.T) {
376377
"GHSA-3.json": {ID: "GHSA-3"},
377378
}))
378379

379-
_, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false)
380+
_, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false, nil)
380381

381382
if err == nil {
382383
t.Errorf("expected an error but did not get one")
@@ -404,7 +405,7 @@ func TestNewZippedDB_Online_WithBadCache(t *testing.T) {
404405

405406
cacheWriteBad(t, determineStoredAtPath(testDir, "my-db"), "this is not json!")
406407

407-
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false)
408+
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false, nil)
408409

409410
if err != nil {
410411
t.Fatalf("unexpected error \"%v\"", err)
@@ -430,11 +431,92 @@ func TestNewZippedDB_FileChecks(t *testing.T) {
430431
})
431432
})
432433

433-
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false)
434+
db, err := localmatcher.NewZippedDB(t.Context(), testDir, "my-db", ts.URL, userAgent, false, nil)
434435

435436
if err != nil {
436437
t.Fatalf("unexpected error \"%v\"", err)
437438
}
438439

439440
expectDBToHaveOSVs(t, db, osvs)
440441
}
442+
443+
func TestNewZippedDB_WithSpecificPackages(t *testing.T) {
444+
t.Parallel()
445+
446+
testDir := testutility.CreateTestDir(t)
447+
448+
ts := createZipServer(t, func(w http.ResponseWriter, _ *http.Request) {
449+
_, _ = writeOSVsZip(t, w, map[string]osvschema.Vulnerability{
450+
"GHSA-1.json": {
451+
ID: "GHSA-1",
452+
Affected: []osvschema.Affected{},
453+
},
454+
"GHSA-2.json": {
455+
ID: "GHSA-2",
456+
Affected: []osvschema.Affected{
457+
{Package: osvschema.Package{Name: "pkg-1"}},
458+
},
459+
},
460+
"GHSA-3.json": {
461+
ID: "GHSA-3",
462+
},
463+
"GHSA-4.json": {
464+
ID: "GHSA-4",
465+
Affected: []osvschema.Affected{
466+
{Package: osvschema.Package{Name: "pkg-2"}},
467+
},
468+
},
469+
"GHSA-5.json": {
470+
ID: "GHSA-5",
471+
Affected: []osvschema.Affected{
472+
{Package: osvschema.Package{Name: "pkg-2"}},
473+
{Package: osvschema.Package{Name: "pkg-1"}},
474+
},
475+
},
476+
"GHSA-6.json": {
477+
ID: "GHSA-6",
478+
Affected: []osvschema.Affected{
479+
{Package: osvschema.Package{Name: "pkg-3"}},
480+
{Package: osvschema.Package{Name: "pkg-2"}},
481+
},
482+
},
483+
})
484+
})
485+
486+
db, err := localmatcher.NewZippedDB(
487+
t.Context(),
488+
testDir,
489+
"my-db",
490+
ts.URL,
491+
userAgent,
492+
false,
493+
[]*extractor.Package{{Name: "pkg-1"}, {Name: "pkg-3"}},
494+
)
495+
496+
if err != nil {
497+
t.Fatalf("unexpected error \"%v\"", err)
498+
}
499+
500+
expectDBToHaveOSVs(t, db, []osvschema.Vulnerability{
501+
{
502+
ID: "GHSA-2",
503+
Affected: []osvschema.Affected{
504+
{Package: osvschema.Package{Name: "pkg-1"}},
505+
},
506+
},
507+
{
508+
ID: "GHSA-5",
509+
Affected: []osvschema.Affected{
510+
{Package: osvschema.Package{Name: "pkg-2"}},
511+
{Package: osvschema.Package{Name: "pkg-1"}},
512+
},
513+
},
514+
{
515+
ID: "GHSA-6",
516+
Affected: []osvschema.Affected{
517+
{Package: osvschema.Package{Name: "pkg-3"}},
518+
{Package: osvschema.Package{Name: "pkg-2"}},
519+
},
520+
},
521+
})
522+
}

0 commit comments

Comments
 (0)