@@ -25,6 +25,7 @@ import (
2525 "regexp"
2626 "sort"
2727 "strconv"
28+ "strings"
2829
2930 "github.com/apache/incubator-devlake/core/dal"
3031 "github.com/apache/incubator-devlake/core/errors"
@@ -317,12 +318,13 @@ func (r *Libgit2RepoCollector) CollectCommits(subtaskCtx plugin.SubTaskContext)
317318
318319 if ! * taskOpts .SkipCommitStat {
319320 var stats * git.DiffStats
320- if stats , err = r .getDiffComparedToParent (taskOpts , c .Sha , commit , parent , opts , componentMap ); err != nil {
321+ var addIncluded , delIncluded int
322+ if stats , addIncluded , delIncluded , err = r .getDiffComparedToParent (taskOpts , c .Sha , commit , parent , opts , componentMap ); err != nil {
321323 return err
322324 }
323325 r .logger .Debug ("state: %#+v\n " , stats .Deletions ())
324- c .Additions += stats . Insertions ()
325- c .Deletions += stats . Deletions ()
326+ c .Additions += addIncluded
327+ c .Deletions += delIncluded
326328 }
327329
328330 err = r .store .Commits (c )
@@ -358,39 +360,83 @@ func (r *Libgit2RepoCollector) storeParentCommits(commitSha string, commit *git.
358360 return r .store .CommitParents (commitParents )
359361}
360362
361- func (r * Libgit2RepoCollector ) getDiffComparedToParent (taskOpts * GitExtractorOptions , commitSha string , commit * git.Commit , parent * git.Commit , opts * git.DiffOptions , componentMap map [string ]* regexp.Regexp ) (* git.DiffStats , errors.Error ) {
363+ func (r * Libgit2RepoCollector ) getDiffComparedToParent (taskOpts * GitExtractorOptions , commitSha string , commit * git.Commit , parent * git.Commit , opts * git.DiffOptions , componentMap map [string ]* regexp.Regexp ) (* git.DiffStats , int , int , errors.Error ) {
362364 var err error
363365 var parentTree , tree * git.Tree
364366 if parent != nil {
365367 parentTree , err = parent .Tree ()
366368 }
367369 if err != nil {
368- return nil , errors .Convert (err )
370+ return nil , 0 , 0 , errors .Convert (err )
369371 }
370372 tree , err = commit .Tree ()
371373 if err != nil {
372- return nil , errors .Convert (err )
374+ return nil , 0 , 0 , errors .Convert (err )
373375 }
374376 var diff * git.Diff
375377 diff , err = r .repo .DiffTreeToTree (parentTree , tree , opts )
376378 if err != nil {
377- return nil , errors .Convert (err )
379+ return nil , 0 , 0 , errors .Convert (err )
380+ }
381+ // build excluded extension set
382+ excluded := map [string ]struct {}{}
383+ for _ , ext := range taskOpts .ExcludeFileExtensions {
384+ e := strings .ToLower (strings .TrimSpace (ext ))
385+ if e == "" {
386+ continue
387+ }
388+ excluded [e ] = struct {}{}
378389 }
379390 if ! * taskOpts .SkipCommitFiles {
380- err = r .storeCommitFilesFromDiff (commitSha , diff , componentMap )
391+ err = r .storeCommitFilesFromDiff (commitSha , diff , componentMap , excluded )
381392 if err != nil {
382- return nil , errors .Convert (err )
393+ return nil , 0 , 0 , errors .Convert (err )
383394 }
384395 }
385396 var stats * git.DiffStats
386397 stats , err = diff .Stats ()
387398 if err != nil {
388- return nil , errors .Convert (err )
389- }
390- return stats , nil
399+ return nil , 0 , 0 , errors .Convert (err )
400+ }
401+ // calculate included totals with exclusions
402+ addIncluded := 0
403+ delIncluded := 0
404+ if len (excluded ) == 0 {
405+ addIncluded = stats .Insertions ()
406+ delIncluded = stats .Deletions ()
407+ return stats , addIncluded , delIncluded , nil
408+ }
409+ _ = diff .ForEach (func (file git.DiffDelta , progress float64 ) (git.DiffForEachHunkCallback , error ) {
410+ // choose path to check based on delta status; for deletions use old path
411+ pathForCheck := file .NewFile .Path
412+ if file .Status == git .DeltaDeleted || pathForCheck == "" {
413+ pathForCheck = file .OldFile .Path
414+ }
415+ lower := strings .ToLower (pathForCheck )
416+ for ext := range excluded {
417+ if strings .HasSuffix (lower , ext ) {
418+ // skip all lines for excluded files
419+ return func (hunk git.DiffHunk ) (git.DiffForEachLineCallback , error ) {
420+ return func (line git.DiffLine ) error { return nil }, nil
421+ }, nil
422+ }
423+ }
424+ return func (hunk git.DiffHunk ) (git.DiffForEachLineCallback , error ) {
425+ return func (line git.DiffLine ) error {
426+ if line .Origin == git .DiffLineAddition {
427+ addIncluded += line .NumLines
428+ }
429+ if line .Origin == git .DiffLineDeletion {
430+ delIncluded += line .NumLines
431+ }
432+ return nil
433+ }, nil
434+ }, nil
435+ }, git .DiffDetailLines )
436+ return stats , addIncluded , delIncluded , nil
391437}
392438
393- func (r * Libgit2RepoCollector ) storeCommitFilesFromDiff (commitSha string , diff * git.Diff , componentMap map [string ]* regexp.Regexp ) errors.Error {
439+ func (r * Libgit2RepoCollector ) storeCommitFilesFromDiff (commitSha string , diff * git.Diff , componentMap map [string ]* regexp.Regexp , excluded map [ string ] struct {} ) errors.Error {
394440 var commitFile * code.CommitFile
395441 var commitFileComponent * code.CommitFileComponent
396442 var err error
@@ -404,15 +450,36 @@ func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff *
404450 }
405451 }
406452
453+ // skip files by extension if configured
454+ if len (excluded ) > 0 {
455+ pathForCheck := file .NewFile .Path
456+ if file .Status == git .DeltaDeleted || pathForCheck == "" {
457+ pathForCheck = file .OldFile .Path
458+ }
459+ lower := strings .ToLower (pathForCheck )
460+ for ext := range excluded {
461+ if strings .HasSuffix (lower , ext ) {
462+ // skip this file entirely
463+ return func (hunk git.DiffHunk ) (git.DiffForEachLineCallback , error ) {
464+ return func (line git.DiffLine ) error { return nil }, nil
465+ }, nil
466+ }
467+ }
468+ }
469+
407470 commitFile = new (code.CommitFile )
408471 commitFile .CommitSha = commitSha
472+ // prefer new path; for deletions fall back to old path
409473 commitFile .FilePath = file .NewFile .Path
474+ if commitFile .FilePath == "" {
475+ commitFile .FilePath = file .OldFile .Path
476+ }
410477
411478 // With some long path,the varchar(255) was not enough both ID and file_path
412479 // So we use the hash to compress the path in ID and add length of file_path.
413480 // Use commitSha and the sha256 of FilePath to create id
414481 shaFilePath := sha256 .New ()
415- shaFilePath .Write ([]byte (file . NewFile . Path ))
482+ shaFilePath .Write ([]byte (commitFile . FilePath ))
416483 commitFile .Id = commitSha + ":" + hex .EncodeToString (shaFilePath .Sum (nil ))
417484
418485 commitFileComponent = new (code.CommitFileComponent )
0 commit comments