Skip to content

Commit 96c7def

Browse files
committed
improve analyze references cmd and remove orphaned file usage
1 parent 27e91a5 commit 96c7def

File tree

6 files changed

+159
-32
lines changed

6 files changed

+159
-32
lines changed

audit-cli/README.md

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,6 @@ This command helps writers:
308308
- Understand the impact of changes to a file (what pages will be affected)
309309
- Find all usages of an include file across the documentation
310310
- Track where code examples are referenced
311-
- Identify orphaned files (files with no references from content inclusion directives)
312311
- Plan refactoring by understanding file dependencies
313312

314313
**Basic Usage:**
@@ -336,8 +335,10 @@ This command helps writers:
336335
- `-v, --verbose` - Show detailed information including line numbers and reference paths
337336
- `-c, --count-only` - Only show the count of references (useful for quick checks and scripting)
338337
- `--paths-only` - Only show the file paths, one per line (useful for piping to other commands)
338+
- `--summary` - Only show summary statistics (total files and references by type, without file list)
339339
- `-t, --directive-type <type>` - Filter by directive type: `include`, `literalinclude`, `io-code-block`, or `toctree`
340340
- `--include-toctree` - Include toctree entries (navigation links) in addition to content inclusion directives
341+
- `--exclude <pattern>` - Exclude paths matching this glob pattern (e.g., `*/archive/*` or `*/deprecated/*`)
341342

342343
**Understanding the Counts:**
343344

@@ -388,7 +389,8 @@ With `--include-toctree`, also tracks:
388389
getting-started
389390
```
390391

391-
**Note:** Only file-based references are tracked. Inline content (e.g., `.. input::` with `:language:` but no file path) aly
392+
**Note:** Only file-based references are tracked. Inline content (e.g., `.. input::` with `:language:` but no file path) is not tracked since it doesn't reference external files.
393+
392394
**Output Formats:**
393395

394396
**Text** (default):
@@ -440,22 +442,22 @@ include : 3 files, 4 references
440442
"total_references": 4,
441443
"referencing_files": [
442444
{
443-
"FilePath": "/path/to/duplicate-include-test.rst",
444-
"DirectiveType": "include",
445-
"ReferencePath": "/includes/intro.rst",
446-
"LineNumber": 6
445+
"file_path": "/path/to/duplicate-include-test.rst",
446+
"directive_type": "include",
447+
"reference_path": "/includes/intro.rst",
448+
"line_number": 6
447449
},
448450
{
449-
"FilePath": "/path/to/duplicate-include-test.rst",
450-
"DirectiveType": "include",
451-
"ReferencePath": "/includes/intro.rst",
452-
"LineNumber": 13
451+
"file_path": "/path/to/duplicate-include-test.rst",
452+
"directive_type": "include",
453+
"reference_path": "/includes/intro.rst",
454+
"line_number": 13
453455
},
454456
{
455-
"FilePath": "/path/to/include-test.rst",
456-
"DirectiveType": "include",
457-
"ReferencePath": "/includes/intro.rst",
458-
"LineNumber": 6
457+
"file_path": "/path/to/include-test.rst",
458+
"directive_type": "include",
459+
"reference_path": "/includes/intro.rst",
460+
"line_number": 6
459461
}
460462
]
461463
}
@@ -480,6 +482,15 @@ include : 3 files, 4 references
480482
./audit-cli analyze file-references ~/docs/source/includes/fact.rst --count-only
481483
# Output: 5
482484

485+
# Show summary statistics only
486+
./audit-cli analyze file-references ~/docs/source/includes/fact.rst --summary
487+
# Output:
488+
# Total Files: 3
489+
# Total References: 5
490+
#
491+
# By Type:
492+
# include : 3 files, 5 references
493+
483494
# Get list of files for piping to other commands
484495
./audit-cli analyze file-references ~/docs/source/includes/fact.rst --paths-only
485496
# Output:
@@ -498,6 +509,10 @@ include : 3 files, 4 references
498509

499510
# Combine filters: list files that use this as an io-code-block
500511
./audit-cli analyze file-references ~/docs/source/code-examples/query.js -t io-code-block --paths-only
512+
513+
# Exclude archived or deprecated files from search
514+
./audit-cli analyze file-references ~/docs/source/includes/fact.rst --exclude "*/archive/*"
515+
./audit-cli analyze file-references ~/docs/source/includes/fact.rst --exclude "*/deprecated/*"
501516
```
502517

503518
### Compare Commands

audit-cli/commands/analyze/file-references/analyzer.go

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,18 @@ import (
2525
// Parameters:
2626
// - targetFile: Absolute path to the file to analyze
2727
// - includeToctree: If true, include toctree entries in the search
28+
// - verbose: If true, show progress information
29+
// - excludePattern: Glob pattern for paths to exclude (empty string means no exclusion)
2830
//
2931
// Returns:
3032
// - *ReferenceAnalysis: The analysis results
3133
// - error: Any error encountered during analysis
32-
func AnalyzeReferences(targetFile string, includeToctree bool) (*ReferenceAnalysis, error) {
34+
func AnalyzeReferences(targetFile string, includeToctree bool, verbose bool, excludePattern string) (*ReferenceAnalysis, error) {
35+
// Check if target file exists
36+
if _, err := os.Stat(targetFile); os.IsNotExist(err) {
37+
return nil, fmt.Errorf("target file does not exist: %s\n\nPlease check:\n - The file path is correct\n - The file hasn't been moved or deleted\n - You have permission to access the file", targetFile)
38+
}
39+
3340
// Get absolute path
3441
absTargetFile, err := filepath.Abs(targetFile)
3542
if err != nil {
@@ -39,7 +46,7 @@ func AnalyzeReferences(targetFile string, includeToctree bool) (*ReferenceAnalys
3946
// Find the source directory
4047
sourceDir, err := pathresolver.FindSourceDirectory(absTargetFile)
4148
if err != nil {
42-
return nil, fmt.Errorf("failed to find source directory: %w", err)
49+
return nil, fmt.Errorf("failed to find source directory: %w\n\nThe source directory is detected by looking for a 'source' directory in the file's path.\nMake sure the target file is within a documentation repository with a 'source' directory.", err)
4350
}
4451

4552
// Initialize analysis result
@@ -49,6 +56,15 @@ func AnalyzeReferences(targetFile string, includeToctree bool) (*ReferenceAnalys
4956
ReferencingFiles: []FileReference{},
5057
}
5158

59+
// Track if we found any RST/YAML files
60+
foundAnyFiles := false
61+
filesProcessed := 0
62+
63+
// Show progress message if verbose
64+
if verbose {
65+
fmt.Fprintf(os.Stderr, "Scanning for references in %s...\n", sourceDir)
66+
}
67+
5268
// Walk through all RST and YAML files in the source directory
5369
err = filepath.Walk(sourceDir, func(path string, info os.FileInfo, err error) error {
5470
if err != nil {
@@ -67,6 +83,26 @@ func AnalyzeReferences(targetFile string, includeToctree bool) (*ReferenceAnalys
6783
return nil
6884
}
6985

86+
// Check if path should be excluded
87+
if excludePattern != "" {
88+
matched, err := filepath.Match(excludePattern, path)
89+
if err != nil {
90+
fmt.Fprintf(os.Stderr, "Warning: invalid exclude pattern: %v\n", err)
91+
} else if matched {
92+
// Skip this file
93+
return nil
94+
}
95+
}
96+
97+
// Mark that we found at least one file
98+
foundAnyFiles = true
99+
filesProcessed++
100+
101+
// Show progress every 100 files if verbose
102+
if verbose && filesProcessed%100 == 0 {
103+
fmt.Fprintf(os.Stderr, "Processed %d files...\n", filesProcessed)
104+
}
105+
70106
// Search for references in this file
71107
refs, err := findReferencesInFile(path, absTargetFile, sourceDir, includeToctree)
72108
if err != nil {
@@ -85,6 +121,16 @@ func AnalyzeReferences(targetFile string, includeToctree bool) (*ReferenceAnalys
85121
return nil, fmt.Errorf("failed to walk source directory: %w", err)
86122
}
87123

124+
// Check if we found any RST/YAML files
125+
if !foundAnyFiles {
126+
return nil, fmt.Errorf("no RST or YAML files found in source directory: %s\n\nThis might not be a documentation repository.\nExpected to find files with extensions: .rst, .txt, .yaml, .yml", sourceDir)
127+
}
128+
129+
// Show completion message if verbose
130+
if verbose {
131+
fmt.Fprintf(os.Stderr, "Scan complete. Processed %d files.\n", filesProcessed)
132+
}
133+
88134
// Update total counts
89135
analysis.TotalReferences = len(analysis.ReferencingFiles)
90136
analysis.TotalFiles = countUniqueFiles(analysis.ReferencingFiles)

audit-cli/commands/analyze/file-references/file_references.go

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
// - Understanding the impact of changes to a file
1212
// - Finding all usages of an include file
1313
// - Tracking code example references
14-
// - Identifying orphaned files (files with no references, including toctree entries)
1514
package filereferences
1615

1716
import (
@@ -41,8 +40,10 @@ func NewFileReferencesCommand() *cobra.Command {
4140
verbose bool
4241
countOnly bool
4342
pathsOnly bool
43+
summaryOnly bool
4444
directiveType string
4545
includeToctree bool
46+
excludePattern string
4647
)
4748

4849
cmd := &cobra.Command{
@@ -69,7 +70,6 @@ This is useful for:
6970
- Understanding the impact of changes to a file
7071
- Finding all usages of an include file
7172
- Tracking code example references
72-
- Identifying orphaned files (files with no references from content inclusion directives)
7373
7474
Examples:
7575
# Find what references an include file
@@ -93,20 +93,28 @@ Examples:
9393
# Just show the file paths
9494
analyze file-references /path/to/file.rst --paths-only
9595
96+
# Show summary statistics only
97+
analyze file-references /path/to/file.rst --summary
98+
99+
# Exclude certain paths from search
100+
analyze file-references /path/to/file.rst --exclude "*/archive/*"
101+
96102
# Filter by directive type
97103
analyze file-references /path/to/file.rst --directive-type include`,
98104
Args: cobra.ExactArgs(1),
99105
RunE: func(cmd *cobra.Command, args []string) error {
100-
return runReferences(args[0], format, verbose, countOnly, pathsOnly, directiveType, includeToctree)
106+
return runReferences(args[0], format, verbose, countOnly, pathsOnly, summaryOnly, directiveType, includeToctree, excludePattern)
101107
},
102108
}
103109

104110
cmd.Flags().StringVar(&format, "format", "text", "Output format (text or json)")
105111
cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Show detailed information including line numbers")
106112
cmd.Flags().BoolVarP(&countOnly, "count-only", "c", false, "Only show the count of references")
107113
cmd.Flags().BoolVar(&pathsOnly, "paths-only", false, "Only show the file paths (one per line)")
114+
cmd.Flags().BoolVar(&summaryOnly, "summary", false, "Only show summary statistics (total files and references by type)")
108115
cmd.Flags().StringVarP(&directiveType, "directive-type", "t", "", "Filter by directive type (include, literalinclude, io-code-block, toctree)")
109116
cmd.Flags().BoolVar(&includeToctree, "include-toctree", false, "Include toctree entries (navigation links) in addition to content inclusion directives")
117+
cmd.Flags().StringVar(&excludePattern, "exclude", "", "Exclude paths matching this glob pattern (e.g., '*/archive/*' or '*/deprecated/*')")
110118

111119
return cmd
112120
}
@@ -121,12 +129,14 @@ Examples:
121129
// - verbose: If true, show detailed information
122130
// - countOnly: If true, only show the count
123131
// - pathsOnly: If true, only show the file paths
132+
// - summaryOnly: If true, only show summary statistics
124133
// - directiveType: Filter by directive type (empty string means all types)
125134
// - includeToctree: If true, include toctree entries in the search
135+
// - excludePattern: Glob pattern for paths to exclude (empty string means no exclusion)
126136
//
127137
// Returns:
128138
// - error: Any error encountered during analysis
129-
func runReferences(targetFile, format string, verbose, countOnly, pathsOnly bool, directiveType string, includeToctree bool) error {
139+
func runReferences(targetFile, format string, verbose, countOnly, pathsOnly, summaryOnly bool, directiveType string, includeToctree bool, excludePattern string) error {
130140
// Validate directive type if specified
131141
if directiveType != "" {
132142
validTypes := map[string]bool{
@@ -147,15 +157,25 @@ func runReferences(targetFile, format string, verbose, countOnly, pathsOnly bool
147157
}
148158

149159
// Validate flag combinations
150-
if countOnly && pathsOnly {
151-
return fmt.Errorf("cannot use --count-only and --paths-only together")
160+
exclusiveFlags := 0
161+
if countOnly {
162+
exclusiveFlags++
163+
}
164+
if pathsOnly {
165+
exclusiveFlags++
152166
}
153-
if (countOnly || pathsOnly) && outputFormat == FormatJSON {
154-
return fmt.Errorf("--count-only and --paths-only are not compatible with --format json")
167+
if summaryOnly {
168+
exclusiveFlags++
169+
}
170+
if exclusiveFlags > 1 {
171+
return fmt.Errorf("cannot use --count-only, --paths-only, and --summary together")
172+
}
173+
if (countOnly || pathsOnly || summaryOnly) && outputFormat == FormatJSON {
174+
return fmt.Errorf("--count-only, --paths-only, and --summary are not compatible with --format json")
155175
}
156176

157177
// Perform analysis
158-
analysis, err := AnalyzeReferences(targetFile, includeToctree)
178+
analysis, err := AnalyzeReferences(targetFile, includeToctree, verbose, excludePattern)
159179
if err != nil {
160180
return fmt.Errorf("failed to analyze references: %w", err)
161181
}
@@ -176,6 +196,11 @@ func runReferences(targetFile, format string, verbose, countOnly, pathsOnly bool
176196
return PrintPathsOnly(analysis)
177197
}
178198

199+
// Handle summary-only output
200+
if summaryOnly {
201+
return PrintSummary(analysis)
202+
}
203+
179204
// Print full results
180205
return PrintAnalysis(analysis, outputFormat, verbose)
181206
}

audit-cli/commands/analyze/file-references/file_references_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ func TestAnalyzeReferences(t *testing.T) {
5151
t.Fatalf("failed to get absolute path: %v", err)
5252
}
5353

54-
// Run analysis (without toctree by default)
55-
analysis, err := AnalyzeReferences(absTargetPath, false)
54+
// Run analysis (without toctree by default, not verbose, no exclude pattern)
55+
analysis, err := AnalyzeReferences(absTargetPath, false, false, "")
5656
if err != nil {
5757
t.Fatalf("AnalyzeReferences failed: %v", err)
5858
}

audit-cli/commands/analyze/file-references/output.go

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,23 @@ func printText(analysis *ReferenceAnalysis, verbose bool) {
5151
if analysis.TotalReferences == 0 {
5252
fmt.Println("No files reference this file.")
5353
fmt.Println()
54+
fmt.Println("This could mean:")
55+
fmt.Println(" - The file is not included in any documentation pages")
56+
fmt.Println(" - The file might be orphaned (not used)")
57+
fmt.Println(" - The file is referenced using a different path")
58+
fmt.Println()
59+
fmt.Println("Note: By default, only content inclusion directives are searched.")
60+
fmt.Println("Use --include-toctree to also search for toctree navigation links.")
61+
fmt.Println()
5462
return
5563
}
5664

5765
// Group references by directive type
5866
byDirectiveType := groupByDirectiveType(analysis.ReferencingFiles)
5967

6068
// Print breakdown by directive type with file and reference counts
61-
for _, directiveType := range []string{"include", "literalinclude", "io-code-block"} {
69+
directiveTypes := []string{"include", "literalinclude", "io-code-block", "toctree"}
70+
for _, directiveType := range directiveTypes {
6271
if refs, ok := byDirectiveType[directiveType]; ok {
6372
uniqueFiles := countUniqueFiles(refs)
6473
totalRefs := len(refs)
@@ -213,3 +222,35 @@ func PrintPathsOnly(analysis *ReferenceAnalysis) error {
213222
return nil
214223
}
215224

225+
// PrintSummary prints only summary statistics without the file list.
226+
//
227+
// This is useful for getting a quick overview of reference counts.
228+
//
229+
// Parameters:
230+
// - analysis: The analysis results
231+
//
232+
// Returns:
233+
// - error: Any error encountered during printing
234+
func PrintSummary(analysis *ReferenceAnalysis) error {
235+
fmt.Printf("Total Files: %d\n", analysis.TotalFiles)
236+
fmt.Printf("Total References: %d\n", analysis.TotalReferences)
237+
238+
if analysis.TotalReferences > 0 {
239+
// Group by directive type
240+
byDirectiveType := groupByDirectiveType(analysis.ReferencingFiles)
241+
242+
// Print breakdown by type
243+
fmt.Println("\nBy Type:")
244+
directiveTypes := []string{"include", "literalinclude", "io-code-block", "toctree"}
245+
for _, directiveType := range directiveTypes {
246+
if refs, ok := byDirectiveType[directiveType]; ok {
247+
uniqueFiles := countUniqueFiles(refs)
248+
totalRefs := len(refs)
249+
fmt.Printf(" %-20s: %d files, %d references\n", directiveType, uniqueFiles, totalRefs)
250+
}
251+
}
252+
}
253+
254+
return nil
255+
}
256+

audit-cli/commands/analyze/file-references/types.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,17 @@ type ReferenceAnalysis struct {
3030
// This structure captures details about how and where the reference occurs.
3131
type FileReference struct {
3232
// FilePath is the absolute path to the file that references the target
33-
FilePath string
33+
FilePath string `json:"file_path"`
3434

3535
// DirectiveType is the type of directive used to reference the file
3636
// Possible values: "include", "literalinclude", "io-code-block", "toctree"
37-
DirectiveType string
37+
DirectiveType string `json:"directive_type"`
3838

3939
// ReferencePath is the path used in the directive (as written in the file)
40-
ReferencePath string
40+
ReferencePath string `json:"reference_path"`
4141

4242
// LineNumber is the line number where the reference occurs
43-
LineNumber int
43+
LineNumber int `json:"line_number"`
4444
}
4545

4646
// ReferenceNode represents a node in the reference tree.

0 commit comments

Comments
 (0)