Skip to content

Commit 8a2d490

Browse files
authored
Merge branch 'apache:main' into main
2 parents f1f61e2 + 0d753c4 commit 8a2d490

File tree

21 files changed

+355
-70
lines changed

21 files changed

+355
-70
lines changed

backend/core/utils/time.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
(the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
*/
17+
18+
package utils
19+
20+
import "time"
21+
22+
// NilIfZeroTime returns nil if t is nil or represents the zero time (0001-01-01...).
23+
// Otherwise, it returns t unchanged.
24+
func NilIfZeroTime(t *time.Time) *time.Time {
25+
if t == nil {
26+
return nil
27+
}
28+
if t.IsZero() {
29+
return nil
30+
}
31+
return t
32+
}

backend/core/utils/time_test.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
(the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
*/
17+
package utils
18+
19+
import (
20+
"testing"
21+
"time"
22+
23+
"github.com/stretchr/testify/assert"
24+
)
25+
26+
func TestNilIfZeroTime(t *testing.T) {
27+
type args struct {
28+
t *time.Time
29+
}
30+
tests := []struct {
31+
name string
32+
args args
33+
want *time.Time
34+
}{
35+
{
36+
name: "Empty date should be nil",
37+
args: args{nil},
38+
want: nil,
39+
},
40+
{
41+
name: "Zero date should be nil",
42+
args: args{&time.Time{}},
43+
want: nil,
44+
},
45+
}
46+
for _, tt := range tests {
47+
t.Run(tt.name, func(t *testing.T) {
48+
assert.Equalf(t, tt.want, NilIfZeroTime(tt.args.t), "NilIfZeroTime(%v)", tt.args.t)
49+
})
50+
}
51+
}

backend/plugins/gitextractor/parser/repo_gogit.go

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"encoding/hex"
2424
"fmt"
2525
"regexp"
26+
"strings"
2627

2728
"github.com/apache/incubator-devlake/core/dal"
2829
"github.com/apache/incubator-devlake/core/errors"
@@ -220,9 +221,6 @@ func (r *GogitRepoCollector) CollectBranches(subtaskCtx plugin.SubTaskContext) e
220221
func(r *plumbing.Reference) bool {
221222
return r.Name().IsBranch() || r.Name().IsRemote()
222223
}, refIter)
223-
if err != nil {
224-
return err
225-
}
226224
headRef, err := r.repo.Head()
227225
if err != nil {
228226
return err
@@ -336,7 +334,26 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext) (e
336334
if err != nil {
337335
return err
338336
} else {
337+
excluded := map[string]struct{}{}
338+
for _, ext := range taskOpts.ExcludeFileExtensions {
339+
e := strings.ToLower(strings.TrimSpace(ext))
340+
if e == "" {
341+
continue
342+
}
343+
excluded[e] = struct{}{}
344+
}
339345
for _, stat := range stats {
346+
nameLower := strings.ToLower(stat.Name)
347+
skip := false
348+
for ext := range excluded {
349+
if strings.HasSuffix(nameLower, ext) {
350+
skip = true
351+
break
352+
}
353+
}
354+
if skip {
355+
continue
356+
}
340357
codeCommit.Additions += stat.Addition
341358
// In some repos, deletion may be zero, which is different from git log --stat.
342359
// It seems go-git doesn't get the correct changes.
@@ -363,7 +380,7 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext) (e
363380
return err
364381
}
365382
if !*taskOpts.SkipCommitFiles {
366-
if err := r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit); err != nil {
383+
if err := r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit, taskOpts.ExcludeFileExtensions); err != nil {
367384
return err
368385
}
369386
}
@@ -423,7 +440,7 @@ func (r *GogitRepoCollector) getCurrentAndParentTree(ctx context.Context, commit
423440
return commitTree, firstParentTree, nil
424441
}
425442

426-
func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit *object.Commit) (err error) {
443+
func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit *object.Commit, excludeExts []string) (err error) {
427444
commitTree, firstParentTree, err := r.getCurrentAndParentTree(subtaskCtx.GetContext(), commit)
428445
if err != nil {
429446
return err
@@ -433,12 +450,34 @@ func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx plu
433450
if err != nil {
434451
return err
435452
}
453+
// normalize exclusions
454+
excluded := map[string]struct{}{}
455+
for _, ext := range excludeExts {
456+
e := strings.ToLower(strings.TrimSpace(ext))
457+
if e == "" {
458+
continue
459+
}
460+
excluded[e] = struct{}{}
461+
}
436462
for _, p := range patch.Stats() {
437463
commitFile := &code.CommitFile{
438464
CommitSha: commit.Hash.String(),
439465
}
440466
fileName := p.Name
441467
commitFile.FilePath = fileName
468+
if len(excluded) > 0 {
469+
lower := strings.ToLower(fileName)
470+
skip := false
471+
for ext := range excluded {
472+
if strings.HasSuffix(lower, ext) {
473+
skip = true
474+
break
475+
}
476+
}
477+
if skip {
478+
continue
479+
}
480+
}
442481
commitFile.Id = genCommitFileId(commitFile.CommitSha, fileName)
443482
commitFile.Deletions = p.Deletion
444483
commitFile.Additions = p.Addition

backend/plugins/gitextractor/parser/repo_libgit2.go

Lines changed: 81 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"regexp"
2626
"sort"
2727
"strconv"
28+
"strings"
2829

2930
"github.com/apache/incubator-devlake/core/dal"
3031
"github.com/apache/incubator-devlake/core/errors"
@@ -317,12 +318,13 @@ func (r *Libgit2RepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext)
317318

318319
if !*taskOpts.SkipCommitStat {
319320
var stats *git.DiffStats
320-
if stats, err = r.getDiffComparedToParent(taskOpts, c.Sha, commit, parent, opts, componentMap); err != nil {
321+
var addIncluded, delIncluded int
322+
if stats, addIncluded, delIncluded, err = r.getDiffComparedToParent(taskOpts, c.Sha, commit, parent, opts, componentMap); err != nil {
321323
return err
322324
}
323325
r.logger.Debug("state: %#+v\n", stats.Deletions())
324-
c.Additions += stats.Insertions()
325-
c.Deletions += stats.Deletions()
326+
c.Additions += addIncluded
327+
c.Deletions += delIncluded
326328
}
327329

328330
err = r.store.Commits(c)
@@ -358,39 +360,83 @@ func (r *Libgit2RepoCollector) storeParentCommits(commitSha string, commit *git.
358360
return r.store.CommitParents(commitParents)
359361
}
360362

361-
func (r *Libgit2RepoCollector) getDiffComparedToParent(taskOpts *GitExtractorOptions, commitSha string, commit *git.Commit, parent *git.Commit, opts *git.DiffOptions, componentMap map[string]*regexp.Regexp) (*git.DiffStats, errors.Error) {
363+
func (r *Libgit2RepoCollector) getDiffComparedToParent(taskOpts *GitExtractorOptions, commitSha string, commit *git.Commit, parent *git.Commit, opts *git.DiffOptions, componentMap map[string]*regexp.Regexp) (*git.DiffStats, int, int, errors.Error) {
362364
var err error
363365
var parentTree, tree *git.Tree
364366
if parent != nil {
365367
parentTree, err = parent.Tree()
366368
}
367369
if err != nil {
368-
return nil, errors.Convert(err)
370+
return nil, 0, 0, errors.Convert(err)
369371
}
370372
tree, err = commit.Tree()
371373
if err != nil {
372-
return nil, errors.Convert(err)
374+
return nil, 0, 0, errors.Convert(err)
373375
}
374376
var diff *git.Diff
375377
diff, err = r.repo.DiffTreeToTree(parentTree, tree, opts)
376378
if err != nil {
377-
return nil, errors.Convert(err)
379+
return nil, 0, 0, errors.Convert(err)
380+
}
381+
// build excluded extension set
382+
excluded := map[string]struct{}{}
383+
for _, ext := range taskOpts.ExcludeFileExtensions {
384+
e := strings.ToLower(strings.TrimSpace(ext))
385+
if e == "" {
386+
continue
387+
}
388+
excluded[e] = struct{}{}
378389
}
379390
if !*taskOpts.SkipCommitFiles {
380-
err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap)
391+
err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap, excluded)
381392
if err != nil {
382-
return nil, errors.Convert(err)
393+
return nil, 0, 0, errors.Convert(err)
383394
}
384395
}
385396
var stats *git.DiffStats
386397
stats, err = diff.Stats()
387398
if err != nil {
388-
return nil, errors.Convert(err)
389-
}
390-
return stats, nil
399+
return nil, 0, 0, errors.Convert(err)
400+
}
401+
// calculate included totals with exclusions
402+
addIncluded := 0
403+
delIncluded := 0
404+
if len(excluded) == 0 {
405+
addIncluded = stats.Insertions()
406+
delIncluded = stats.Deletions()
407+
return stats, addIncluded, delIncluded, nil
408+
}
409+
_ = diff.ForEach(func(file git.DiffDelta, progress float64) (git.DiffForEachHunkCallback, error) {
410+
// choose path to check based on delta status; for deletions use old path
411+
pathForCheck := file.NewFile.Path
412+
if file.Status == git.DeltaDeleted || pathForCheck == "" {
413+
pathForCheck = file.OldFile.Path
414+
}
415+
lower := strings.ToLower(pathForCheck)
416+
for ext := range excluded {
417+
if strings.HasSuffix(lower, ext) {
418+
// skip all lines for excluded files
419+
return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, error) {
420+
return func(line git.DiffLine) error { return nil }, nil
421+
}, nil
422+
}
423+
}
424+
return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, error) {
425+
return func(line git.DiffLine) error {
426+
if line.Origin == git.DiffLineAddition {
427+
addIncluded += line.NumLines
428+
}
429+
if line.Origin == git.DiffLineDeletion {
430+
delIncluded += line.NumLines
431+
}
432+
return nil
433+
}, nil
434+
}, nil
435+
}, git.DiffDetailLines)
436+
return stats, addIncluded, delIncluded, nil
391437
}
392438

393-
func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff *git.Diff, componentMap map[string]*regexp.Regexp) errors.Error {
439+
func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff *git.Diff, componentMap map[string]*regexp.Regexp, excluded map[string]struct{}) errors.Error {
394440
var commitFile *code.CommitFile
395441
var commitFileComponent *code.CommitFileComponent
396442
var err error
@@ -404,15 +450,36 @@ func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff *
404450
}
405451
}
406452

453+
// skip files by extension if configured
454+
if len(excluded) > 0 {
455+
pathForCheck := file.NewFile.Path
456+
if file.Status == git.DeltaDeleted || pathForCheck == "" {
457+
pathForCheck = file.OldFile.Path
458+
}
459+
lower := strings.ToLower(pathForCheck)
460+
for ext := range excluded {
461+
if strings.HasSuffix(lower, ext) {
462+
// skip this file entirely
463+
return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, error) {
464+
return func(line git.DiffLine) error { return nil }, nil
465+
}, nil
466+
}
467+
}
468+
}
469+
407470
commitFile = new(code.CommitFile)
408471
commitFile.CommitSha = commitSha
472+
// prefer new path; for deletions fall back to old path
409473
commitFile.FilePath = file.NewFile.Path
474+
if commitFile.FilePath == "" {
475+
commitFile.FilePath = file.OldFile.Path
476+
}
410477

411478
// With some long path,the varchar(255) was not enough both ID and file_path
412479
// So we use the hash to compress the path in ID and add length of file_path.
413480
// Use commitSha and the sha256 of FilePath to create id
414481
shaFilePath := sha256.New()
415-
shaFilePath.Write([]byte(file.NewFile.Path))
482+
shaFilePath.Write([]byte(commitFile.FilePath))
416483
commitFile.Id = commitSha + ":" + hex.EncodeToString(shaFilePath.Sum(nil))
417484

418485
commitFileComponent = new(code.CommitFileComponent)

backend/plugins/gitextractor/parser/taskdata.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,6 @@ type GitExtractorOptions struct {
4747
NoShallowClone bool `json:"noShallowClone" mapstructure:"noShallowClone"`
4848
ConnectionId uint64 `json:"connectionId" mapstructure:"connectionId,omitempty"`
4949
PluginName string `json:"pluginName" mapstructure:"pluginName,omitempty"`
50+
// Configured by upstream plugin (e.g., GitLab) to exclude file extensions from commit stats
51+
ExcludeFileExtensions []string `json:"excludeFileExtensions" mapstructure:"excludeFileExtensions"`
5052
}

backend/plugins/github/models/release.go

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,25 @@ import (
2525

2626
type GithubRelease struct {
2727
common.NoPKModel `json:"-" mapstructure:"-"`
28-
ConnectionId uint64 `json:"connection_id" gorm:"primaryKey"`
29-
GithubId int `json:"github_id"`
30-
Id string `json:"id" gorm:"type:varchar(255);primaryKey"`
31-
AuthorName string `json:"authorName"`
32-
AuthorID string `json:"authorId"`
33-
CreatedAt time.Time `json:"createdAt"`
34-
DatabaseID int `json:"databaseId"`
35-
Description string `json:"description"`
36-
DescriptionHTML string `json:"descriptionHTML"`
37-
IsDraft bool `json:"isDraft"`
38-
IsLatest bool `json:"isLatest"`
39-
IsPrerelease bool `json:"isPrerelease"`
40-
Name string `json:"name"`
41-
PublishedAt time.Time `json:"publishedAt"`
42-
ResourcePath string `json:"resourcePath"`
43-
TagName string `json:"tagName"`
44-
UpdatedAt time.Time `json:"updatedAt"`
45-
CommitSha string `json:"commit_sha"`
46-
URL string `json:"url"`
28+
ConnectionId uint64 `json:"connection_id" gorm:"primaryKey"`
29+
GithubId int `json:"github_id"`
30+
Id string `json:"id" gorm:"type:varchar(255);primaryKey"`
31+
AuthorName string `json:"authorName"`
32+
AuthorID string `json:"authorId"`
33+
CreatedAt time.Time `json:"createdAt"`
34+
DatabaseID int `json:"databaseId"`
35+
Description string `json:"description"`
36+
DescriptionHTML string `json:"descriptionHTML"`
37+
IsDraft bool `json:"isDraft"`
38+
IsLatest bool `json:"isLatest"`
39+
IsPrerelease bool `json:"isPrerelease"`
40+
Name string `json:"name"`
41+
PublishedAt *time.Time `json:"publishedAt"`
42+
ResourcePath string `json:"resourcePath"`
43+
TagName string `json:"tagName"`
44+
UpdatedAt time.Time `json:"updatedAt"`
45+
CommitSha string `json:"commit_sha"`
46+
URL string `json:"url"`
4747
}
4848

4949
func (GithubRelease) TableName() string {

0 commit comments

Comments
 (0)