Skip to content

Commit 0192f69

Browse files
authored
feat(gitextractor): add support for excluding file extensions in commit stats (#8586)
* fix(jira): update epic collector to use new API endpoint and include all fields * fix(jira): enhance epic collector to dynamically select API endpoint based on JIRA version * fix(jira): update epic collector to use correct API endpoint for JIRA Cloud and Server versions * fix(jira): refactor epic collector to streamline API endpoint selection and enhance error handling * fix(jira): fix type for Jira issue descriptions * refactor(jira): update comment and worklog models to use FlexibleDescription type for comments * docs(jira): add ADF reference for FlexibleDescription type in issue model * refactor(migrations): enhance file meta migration to check column existence and nullability before modification * feat(gitlab): add PR size exclusion for specified file extensions
1 parent 0960736 commit 0192f69

File tree

9 files changed

+226
-29
lines changed

9 files changed

+226
-29
lines changed

backend/plugins/gitextractor/parser/repo_gogit.go

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"encoding/hex"
2424
"fmt"
2525
"regexp"
26+
"strings"
2627

2728
"github.com/apache/incubator-devlake/core/dal"
2829
"github.com/apache/incubator-devlake/core/errors"
@@ -220,9 +221,6 @@ func (r *GogitRepoCollector) CollectBranches(subtaskCtx plugin.SubTaskContext) e
220221
func(r *plumbing.Reference) bool {
221222
return r.Name().IsBranch() || r.Name().IsRemote()
222223
}, refIter)
223-
if err != nil {
224-
return err
225-
}
226224
headRef, err := r.repo.Head()
227225
if err != nil {
228226
return err
@@ -336,7 +334,26 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext) (e
336334
if err != nil {
337335
return err
338336
} else {
337+
excluded := map[string]struct{}{}
338+
for _, ext := range taskOpts.ExcludeFileExtensions {
339+
e := strings.ToLower(strings.TrimSpace(ext))
340+
if e == "" {
341+
continue
342+
}
343+
excluded[e] = struct{}{}
344+
}
339345
for _, stat := range stats {
346+
nameLower := strings.ToLower(stat.Name)
347+
skip := false
348+
for ext := range excluded {
349+
if strings.HasSuffix(nameLower, ext) {
350+
skip = true
351+
break
352+
}
353+
}
354+
if skip {
355+
continue
356+
}
340357
codeCommit.Additions += stat.Addition
341358
// In some repos, deletion may be zero, which is different from git log --stat.
342359
// It seems go-git doesn't get the correct changes.
@@ -363,7 +380,7 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext) (e
363380
return err
364381
}
365382
if !*taskOpts.SkipCommitFiles {
366-
if err := r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit); err != nil {
383+
if err := r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit, taskOpts.ExcludeFileExtensions); err != nil {
367384
return err
368385
}
369386
}
@@ -423,7 +440,7 @@ func (r *GogitRepoCollector) getCurrentAndParentTree(ctx context.Context, commit
423440
return commitTree, firstParentTree, nil
424441
}
425442

426-
func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit *object.Commit) (err error) {
443+
func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit *object.Commit, excludeExts []string) (err error) {
427444
commitTree, firstParentTree, err := r.getCurrentAndParentTree(subtaskCtx.GetContext(), commit)
428445
if err != nil {
429446
return err
@@ -433,12 +450,34 @@ func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx plu
433450
if err != nil {
434451
return err
435452
}
453+
// normalize exclusions
454+
excluded := map[string]struct{}{}
455+
for _, ext := range excludeExts {
456+
e := strings.ToLower(strings.TrimSpace(ext))
457+
if e == "" {
458+
continue
459+
}
460+
excluded[e] = struct{}{}
461+
}
436462
for _, p := range patch.Stats() {
437463
commitFile := &code.CommitFile{
438464
CommitSha: commit.Hash.String(),
439465
}
440466
fileName := p.Name
441467
commitFile.FilePath = fileName
468+
if len(excluded) > 0 {
469+
lower := strings.ToLower(fileName)
470+
skip := false
471+
for ext := range excluded {
472+
if strings.HasSuffix(lower, ext) {
473+
skip = true
474+
break
475+
}
476+
}
477+
if skip {
478+
continue
479+
}
480+
}
442481
commitFile.Id = genCommitFileId(commitFile.CommitSha, fileName)
443482
commitFile.Deletions = p.Deletion
444483
commitFile.Additions = p.Addition

backend/plugins/gitextractor/parser/repo_libgit2.go

Lines changed: 81 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"regexp"
2626
"sort"
2727
"strconv"
28+
"strings"
2829

2930
"github.com/apache/incubator-devlake/core/dal"
3031
"github.com/apache/incubator-devlake/core/errors"
@@ -317,12 +318,13 @@ func (r *Libgit2RepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext)
317318

318319
if !*taskOpts.SkipCommitStat {
319320
var stats *git.DiffStats
320-
if stats, err = r.getDiffComparedToParent(taskOpts, c.Sha, commit, parent, opts, componentMap); err != nil {
321+
var addIncluded, delIncluded int
322+
if stats, addIncluded, delIncluded, err = r.getDiffComparedToParent(taskOpts, c.Sha, commit, parent, opts, componentMap); err != nil {
321323
return err
322324
}
323325
r.logger.Debug("state: %#+v\n", stats.Deletions())
324-
c.Additions += stats.Insertions()
325-
c.Deletions += stats.Deletions()
326+
c.Additions += addIncluded
327+
c.Deletions += delIncluded
326328
}
327329

328330
err = r.store.Commits(c)
@@ -358,39 +360,83 @@ func (r *Libgit2RepoCollector) storeParentCommits(commitSha string, commit *git.
358360
return r.store.CommitParents(commitParents)
359361
}
360362

361-
func (r *Libgit2RepoCollector) getDiffComparedToParent(taskOpts *GitExtractorOptions, commitSha string, commit *git.Commit, parent *git.Commit, opts *git.DiffOptions, componentMap map[string]*regexp.Regexp) (*git.DiffStats, errors.Error) {
363+
func (r *Libgit2RepoCollector) getDiffComparedToParent(taskOpts *GitExtractorOptions, commitSha string, commit *git.Commit, parent *git.Commit, opts *git.DiffOptions, componentMap map[string]*regexp.Regexp) (*git.DiffStats, int, int, errors.Error) {
362364
var err error
363365
var parentTree, tree *git.Tree
364366
if parent != nil {
365367
parentTree, err = parent.Tree()
366368
}
367369
if err != nil {
368-
return nil, errors.Convert(err)
370+
return nil, 0, 0, errors.Convert(err)
369371
}
370372
tree, err = commit.Tree()
371373
if err != nil {
372-
return nil, errors.Convert(err)
374+
return nil, 0, 0, errors.Convert(err)
373375
}
374376
var diff *git.Diff
375377
diff, err = r.repo.DiffTreeToTree(parentTree, tree, opts)
376378
if err != nil {
377-
return nil, errors.Convert(err)
379+
return nil, 0, 0, errors.Convert(err)
380+
}
381+
// build excluded extension set
382+
excluded := map[string]struct{}{}
383+
for _, ext := range taskOpts.ExcludeFileExtensions {
384+
e := strings.ToLower(strings.TrimSpace(ext))
385+
if e == "" {
386+
continue
387+
}
388+
excluded[e] = struct{}{}
378389
}
379390
if !*taskOpts.SkipCommitFiles {
380-
err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap)
391+
err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap, excluded)
381392
if err != nil {
382-
return nil, errors.Convert(err)
393+
return nil, 0, 0, errors.Convert(err)
383394
}
384395
}
385396
var stats *git.DiffStats
386397
stats, err = diff.Stats()
387398
if err != nil {
388-
return nil, errors.Convert(err)
389-
}
390-
return stats, nil
399+
return nil, 0, 0, errors.Convert(err)
400+
}
401+
// calculate included totals with exclusions
402+
addIncluded := 0
403+
delIncluded := 0
404+
if len(excluded) == 0 {
405+
addIncluded = stats.Insertions()
406+
delIncluded = stats.Deletions()
407+
return stats, addIncluded, delIncluded, nil
408+
}
409+
_ = diff.ForEach(func(file git.DiffDelta, progress float64) (git.DiffForEachHunkCallback, error) {
410+
// choose path to check based on delta status; for deletions use old path
411+
pathForCheck := file.NewFile.Path
412+
if file.Status == git.DeltaDeleted || pathForCheck == "" {
413+
pathForCheck = file.OldFile.Path
414+
}
415+
lower := strings.ToLower(pathForCheck)
416+
for ext := range excluded {
417+
if strings.HasSuffix(lower, ext) {
418+
// skip all lines for excluded files
419+
return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, error) {
420+
return func(line git.DiffLine) error { return nil }, nil
421+
}, nil
422+
}
423+
}
424+
return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, error) {
425+
return func(line git.DiffLine) error {
426+
if line.Origin == git.DiffLineAddition {
427+
addIncluded += line.NumLines
428+
}
429+
if line.Origin == git.DiffLineDeletion {
430+
delIncluded += line.NumLines
431+
}
432+
return nil
433+
}, nil
434+
}, nil
435+
}, git.DiffDetailLines)
436+
return stats, addIncluded, delIncluded, nil
391437
}
392438

393-
func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff *git.Diff, componentMap map[string]*regexp.Regexp) errors.Error {
439+
func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff *git.Diff, componentMap map[string]*regexp.Regexp, excluded map[string]struct{}) errors.Error {
394440
var commitFile *code.CommitFile
395441
var commitFileComponent *code.CommitFileComponent
396442
var err error
@@ -404,15 +450,36 @@ func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff *
404450
}
405451
}
406452

453+
// skip files by extension if configured
454+
if len(excluded) > 0 {
455+
pathForCheck := file.NewFile.Path
456+
if file.Status == git.DeltaDeleted || pathForCheck == "" {
457+
pathForCheck = file.OldFile.Path
458+
}
459+
lower := strings.ToLower(pathForCheck)
460+
for ext := range excluded {
461+
if strings.HasSuffix(lower, ext) {
462+
// skip this file entirely
463+
return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, error) {
464+
return func(line git.DiffLine) error { return nil }, nil
465+
}, nil
466+
}
467+
}
468+
}
469+
407470
commitFile = new(code.CommitFile)
408471
commitFile.CommitSha = commitSha
472+
// prefer new path; for deletions fall back to old path
409473
commitFile.FilePath = file.NewFile.Path
474+
if commitFile.FilePath == "" {
475+
commitFile.FilePath = file.OldFile.Path
476+
}
410477

411478
// With some long path,the varchar(255) was not enough both ID and file_path
412479
// So we use the hash to compress the path in ID and add length of file_path.
413480
// Use commitSha and the sha256 of FilePath to create id
414481
shaFilePath := sha256.New()
415-
shaFilePath.Write([]byte(file.NewFile.Path))
482+
shaFilePath.Write([]byte(commitFile.FilePath))
416483
commitFile.Id = commitSha + ":" + hex.EncodeToString(shaFilePath.Sum(nil))
417484

418485
commitFileComponent = new(code.CommitFileComponent)

backend/plugins/gitextractor/parser/taskdata.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,6 @@ type GitExtractorOptions struct {
4747
NoShallowClone bool `json:"noShallowClone" mapstructure:"noShallowClone"`
4848
ConnectionId uint64 `json:"connectionId" mapstructure:"connectionId,omitempty"`
4949
PluginName string `json:"pluginName" mapstructure:"pluginName,omitempty"`
50+
// Configured by upstream plugin (e.g., GitLab) to exclude file extensions from commit stats
51+
ExcludeFileExtensions []string `json:"excludeFileExtensions" mapstructure:"excludeFileExtensions"`
5052
}

backend/plugins/gitlab/api/blueprint_v200.go

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -132,17 +132,22 @@ func makePipelinePlanV200(
132132
return nil, err
133133
}
134134
cloneUrl.User = url.UserPassword("git", connection.Token)
135+
gitextOpts := map[string]interface{}{
136+
"url": cloneUrl.String(),
137+
"name": gitlabProject.Name,
138+
"fullName": gitlabProject.PathWithNamespace,
139+
"repoId": didgen.NewDomainIdGenerator(&models.GitlabProject{}).Generate(connection.ID, gitlabProject.GitlabId),
140+
"proxy": connection.Proxy,
141+
"connectionId": gitlabProject.ConnectionId,
142+
"pluginName": "gitlab",
143+
}
144+
if len(scopeConfig.PrSizeExcludedFileExtensions) > 0 {
145+
// pass excluded file extensions to gitextractor to support PR Size exclusion
146+
gitextOpts["excludeFileExtensions"] = scopeConfig.PrSizeExcludedFileExtensions
147+
}
135148
stage = append(stage, &coreModels.PipelineTask{
136-
Plugin: "gitextractor",
137-
Options: map[string]interface{}{
138-
"url": cloneUrl.String(),
139-
"name": gitlabProject.Name,
140-
"fullName": gitlabProject.PathWithNamespace,
141-
"repoId": didgen.NewDomainIdGenerator(&models.GitlabProject{}).Generate(connection.ID, gitlabProject.GitlabId),
142-
"proxy": connection.Proxy,
143-
"connectionId": gitlabProject.ConnectionId,
144-
"pluginName": "gitlab",
145-
},
149+
Plugin: "gitextractor",
150+
Options: gitextOpts,
146151
})
147152
}
148153

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
Licensed to the Apache Software Foundation (ASF) under one or more
3+
contributor license agreements. See the NOTICE file distributed with
4+
this work for additional information regarding copyright ownership.
5+
The ASF licenses this file to You under the Apache License, Version 2.0
6+
(the "License"); you may not use this file except in compliance with
7+
the License. You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.
16+
*/
17+
18+
package migrationscripts
19+
20+
import (
21+
"github.com/apache/incubator-devlake/core/context"
22+
"github.com/apache/incubator-devlake/core/errors"
23+
"github.com/apache/incubator-devlake/core/plugin"
24+
"github.com/apache/incubator-devlake/helpers/migrationhelper"
25+
)
26+
27+
var _ plugin.MigrationScript = (*addPrSizeExcludedFileExtensions)(nil)
28+
29+
type gitlabScopeConfig20250921 struct {
30+
PrSizeExcludedFileExtensions []string `gorm:"type:json" json:"prSizeExcludedFileExtensions" mapstructure:"prSizeExcludedFileExtensions"`
31+
}
32+
33+
func (gitlabScopeConfig20250921) TableName() string {
34+
return "_tool_gitlab_scope_configs"
35+
}
36+
37+
type addPrSizeExcludedFileExtensions struct{}
38+
39+
func (script *addPrSizeExcludedFileExtensions) Up(basicRes context.BasicRes) errors.Error {
40+
return migrationhelper.AutoMigrateTables(
41+
basicRes,
42+
&gitlabScopeConfig20250921{},
43+
)
44+
}
45+
46+
func (*addPrSizeExcludedFileExtensions) Version() uint64 { return 20250921100000 }
47+
48+
func (*addPrSizeExcludedFileExtensions) Name() string {
49+
return "add pr_size_excluded_file_extensions to _tool_gitlab_scope_configs"
50+
}

backend/plugins/gitlab/models/migrationscripts/register.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,6 @@ func All() []plugin.MigrationScript {
5252
new(addGitlabAssigneeAndReviewerPrimaryKey),
5353
new(changeIssueComponentType),
5454
new(addIsChildToPipelines240906),
55+
new(addPrSizeExcludedFileExtensions),
5556
}
5657
}

backend/plugins/gitlab/models/scope_config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ type GitlabScopeConfig struct {
3737
ProductionPattern string `mapstructure:"productionPattern,omitempty" json:"productionPattern" gorm:"type:varchar(255)"`
3838
EnvNamePattern string `mapstructure:"envNamePattern,omitempty" json:"envNamePattern" gorm:"type:varchar(255)"`
3939
Refdiff datatypes.JSONMap `mapstructure:"refdiff,omitempty" json:"refdiff" swaggertype:"object" format:"json"`
40+
// A list of file extensions to exclude when calculating PR Size (affects commit additions/deletions used by dashboards)
41+
PrSizeExcludedFileExtensions []string `mapstructure:"prSizeExcludedFileExtensions" json:"prSizeExcludedFileExtensions" gorm:"type:json;serializer:json"`
4042
}
4143

4244
func (t GitlabScopeConfig) TableName() string {

config-ui/src/plugins/register/gitlab/config.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ export const GitLabConfig: IPluginConfig = {
7878
envNamePattern: '(?i)prod(.*)',
7979
deploymentPattern: '',
8080
productionPattern: '',
81+
prSizeExcludedFileExtensions: [],
8182
},
8283
},
8384
};

0 commit comments

Comments
 (0)