|
| 1 | +package main |
| 2 | + |
| 3 | +import ( |
| 4 | + "flag" |
| 5 | + "fmt" |
| 6 | + "io/fs" |
| 7 | + "net/http" |
| 8 | + "os" |
| 9 | + "path/filepath" |
| 10 | + "regexp" |
| 11 | + "strings" |
| 12 | + "sync" |
| 13 | + "sync/atomic" |
| 14 | + "time" |
| 15 | +) |
| 16 | + |
| 17 | +type LinkOccurrence struct { |
| 18 | + URL string |
| 19 | + File string |
| 20 | +} |
| 21 | + |
| 22 | +type CheckResult struct { |
| 23 | + URL string |
| 24 | + IsValid bool |
| 25 | + StatusCode int |
| 26 | + ErrorMsg string |
| 27 | +} |
| 28 | + |
| 29 | +func main() { |
| 30 | + docsDir := flag.String("docs-dir", "docs", "directory to scan for .adoc files") |
| 31 | + maxParallel := flag.Int("max-parallel", 10, "maximum number of parallel link checks") |
| 32 | + exitOnError := flag.Bool("exit-on-error", true, "exit with non-zero code if broken links are found") |
| 33 | + flag.Parse() |
| 34 | + |
| 35 | + allLinks, uniqueLinks := extractLinks(*docsDir) |
| 36 | + |
| 37 | + results := checkLinks(uniqueLinks, *maxParallel, len(uniqueLinks)) |
| 38 | + |
| 39 | + brokenLinks := make(map[string]CheckResult) |
| 40 | + for _, result := range results { |
| 41 | + if !result.IsValid { |
| 42 | + brokenLinks[result.URL] = result |
| 43 | + } |
| 44 | + } |
| 45 | + |
| 46 | + for url, result := range brokenLinks { |
| 47 | + var errorCode string |
| 48 | + if result.StatusCode != 0 { |
| 49 | + errorCode = fmt.Sprintf("HTTP %d", result.StatusCode) |
| 50 | + } else { |
| 51 | + errorCode = result.ErrorMsg |
| 52 | + } |
| 53 | + |
| 54 | + fmt.Printf("❌ %s %s\n", errorCode, url) |
| 55 | + fmt.Println("Found in:") |
| 56 | + |
| 57 | + for _, occurrence := range allLinks { |
| 58 | + if occurrence.URL == url { |
| 59 | + relativePath := strings.TrimPrefix(occurrence.File, *docsDir+"/") |
| 60 | + fmt.Printf("- %s\n", relativePath) |
| 61 | + } |
| 62 | + } |
| 63 | + fmt.Println() |
| 64 | + } |
| 65 | + |
| 66 | + if len(brokenLinks) > 0 { |
| 67 | + fmt.Printf("Found %d broken links out of %d total links\n", len(brokenLinks), len(uniqueLinks)) |
| 68 | + fmt.Println("Done") |
| 69 | + |
| 70 | + if *exitOnError { |
| 71 | + os.Exit(1) |
| 72 | + } |
| 73 | + } else { |
| 74 | + fmt.Printf("Success: All %d links are valid\n", len(uniqueLinks)) |
| 75 | + fmt.Println("Done") |
| 76 | + } |
| 77 | +} |
| 78 | + |
| 79 | +func extractLinks(docsDir string) ([]LinkOccurrence, []string) { |
| 80 | + var allLinks []LinkOccurrence |
| 81 | + uniqueLinksMap := make(map[string]bool) |
| 82 | + |
| 83 | + githubLinkRegex := regexp.MustCompile(`https?://github\.com[-a-zA-Z0-9@:%._\+~#=/]*`) |
| 84 | + issuesPRRegex := regexp.MustCompile(`/(?:issues|pull)/[0-9]+`) |
| 85 | + |
| 86 | + filepath.WalkDir(docsDir, func(path string, d fs.DirEntry, err error) error { |
| 87 | + if err != nil { |
| 88 | + return err |
| 89 | + } |
| 90 | + |
| 91 | + if !d.IsDir() && strings.HasSuffix(path, ".adoc") { |
| 92 | + content, err := os.ReadFile(path) |
| 93 | + if err != nil { |
| 94 | + return err |
| 95 | + } |
| 96 | + |
| 97 | + links := githubLinkRegex.FindAllString(string(content), -1) |
| 98 | + |
| 99 | + for _, link := range links { |
| 100 | + cleanedLink := trimTrailingPunctuation(link) |
| 101 | + if !issuesPRRegex.MatchString(cleanedLink) { |
| 102 | + allLinks = append(allLinks, LinkOccurrence{ |
| 103 | + URL: cleanedLink, |
| 104 | + File: path, |
| 105 | + }) |
| 106 | + uniqueLinksMap[cleanedLink] = true |
| 107 | + } |
| 108 | + } |
| 109 | + } |
| 110 | + return nil |
| 111 | + }) |
| 112 | + |
| 113 | + uniqueLinks := make([]string, 0, len(uniqueLinksMap)) |
| 114 | + for link := range uniqueLinksMap { |
| 115 | + uniqueLinks = append(uniqueLinks, link) |
| 116 | + } |
| 117 | + |
| 118 | + return allLinks, uniqueLinks |
| 119 | +} |
| 120 | + |
| 121 | +func trimTrailingPunctuation(url string) string { |
| 122 | + for _, punct := range []string{".", ",", ")", "]", ">"} { |
| 123 | + if strings.HasSuffix(url, punct) { |
| 124 | + return url[:len(url)-len(punct)] |
| 125 | + } |
| 126 | + } |
| 127 | + return url |
| 128 | +} |
| 129 | + |
| 130 | +func checkLinks(links []string, maxParallel int, totalLinks int) []CheckResult { |
| 131 | + results := make([]CheckResult, 0, len(links)) |
| 132 | + resultsChan := make(chan CheckResult, len(links)) |
| 133 | + sem := make(chan struct{}, maxParallel) |
| 134 | + var wg sync.WaitGroup |
| 135 | + var progress int32 |
| 136 | + |
| 137 | + client := &http.Client{ |
| 138 | + Timeout: 10 * time.Second, |
| 139 | + CheckRedirect: func(req *http.Request, via []*http.Request) error { |
| 140 | + return nil |
| 141 | + }, |
| 142 | + } |
| 143 | + |
| 144 | + for _, link := range links { |
| 145 | + wg.Add(1) |
| 146 | + sem <- struct{}{} |
| 147 | + |
| 148 | + go func(url string) { |
| 149 | + defer wg.Done() |
| 150 | + defer func() { <-sem }() |
| 151 | + |
| 152 | + result := CheckResult{URL: url, IsValid: false} |
| 153 | + |
| 154 | + req, err := http.NewRequest("HEAD", url, nil) |
| 155 | + if err != nil { |
| 156 | + result.ErrorMsg = err.Error() |
| 157 | + resultsChan <- result |
| 158 | + return |
| 159 | + } |
| 160 | + |
| 161 | + resp, err := client.Do(req) |
| 162 | + if err != nil { |
| 163 | + getResp, getErr := client.Get(url) |
| 164 | + if getErr != nil { |
| 165 | + result.ErrorMsg = "Connection failed or timed out" |
| 166 | + resultsChan <- result |
| 167 | + return |
| 168 | + } |
| 169 | + resp = getResp |
| 170 | + } |
| 171 | + |
| 172 | + defer resp.Body.Close() |
| 173 | + result.StatusCode = resp.StatusCode |
| 174 | + |
| 175 | + if resp.StatusCode >= 200 && resp.StatusCode < 400 { |
| 176 | + result.IsValid = true |
| 177 | + } |
| 178 | + |
| 179 | + resultsChan <- result |
| 180 | + |
| 181 | + current := atomic.AddInt32(&progress, 1) |
| 182 | + fmt.Fprintf(os.Stderr, "\rProgress: %d/%d links checked", current, totalLinks) |
| 183 | + }(link) |
| 184 | + } |
| 185 | + |
| 186 | + go func() { |
| 187 | + wg.Wait() |
| 188 | + close(resultsChan) |
| 189 | + }() |
| 190 | + |
| 191 | + for result := range resultsChan { |
| 192 | + results = append(results, result) |
| 193 | + } |
| 194 | + |
| 195 | + fmt.Fprintln(os.Stderr) |
| 196 | + return results |
| 197 | +} |
0 commit comments