Skip to content

Commit 13140fb

Browse files
committed
feat(notion-fetch): add cache validation diagnostic command (Phase 3)
Implement comprehensive cache validation tool to help diagnose incremental sync issues and cache inconsistencies. Features: 1. Verify all cached output files exist on disk - Detects missing files that should be regenerated - Reports which pages have missing outputs 2. Find orphaned markdown files - Scans docs/ directory for files not in cache - Helps identify manually created or old files - Warns about files that won't be updated 3. Verify script hash matches current code - Checks if scripts changed since last sync - Indicates if next run will do full rebuild - Shows hash mismatch details 4. Statistics and summary - Total pages in cache - Count of missing/orphaned files - Script hash status - Exit code 0 if valid, 1 if issues found Usage: bun run notion:validate-cache # Quick check bun run notion:validate-cache --verbose # Detailed output Example Output: 🔍 Validating Page Metadata Cache... ✅ All 156 cached files exist ⚠️ Found 3 orphaned markdown files ✅ Script hash matches Benefits: - Diagnose why pages aren't updating - Find stale/orphaned content files - Verify cache integrity - Quick health check before/after sync Related: ISSUE_95_PLAN.md Phase 3
1 parent fabdff6 commit 13140fb

File tree

2 files changed

+264
-0
lines changed

2 files changed

+264
-0
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
"notion:export": "bun scripts/notion-fetch/exportDatabase.ts",
2424
"notion:gen-placeholders": "bun scripts/notion-placeholders",
2525
"notion:fetch-all": "bun scripts/notion-fetch-all",
26+
"notion:validate-cache": "bun scripts/notion-fetch/validateCache.ts",
2627
"clean:generated": "bun scripts/cleanup-generated-content.ts",
2728
"scaffold:test": "bun run scripts/test-scaffold/index.ts",
2829
"scaffold:test:all": "bun run scripts/test-scaffold/index.ts --all",
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
import fs from "node:fs";
2+
import path from "node:path";
3+
import { fileURLToPath } from "node:url";
4+
import chalk from "chalk";
5+
import {
6+
loadPageMetadataCache,
7+
getCacheStats,
8+
type PageMetadataCache,
9+
} from "./pageMetadataCache";
10+
import { computeScriptHash } from "./scriptHasher";
11+
12+
const __filename = fileURLToPath(import.meta.url);
13+
const __dirname = path.dirname(__filename);
14+
const PROJECT_ROOT = path.resolve(__dirname, "../..");
15+
const DOCS_PATH = path.join(PROJECT_ROOT, "docs");
16+
17+
export interface CacheValidationResult {
18+
valid: boolean;
19+
issues: string[];
20+
warnings: string[];
21+
stats: {
22+
totalPages: number;
23+
missingFiles: number;
24+
orphanedFiles: number;
25+
scriptHashMatch: boolean;
26+
};
27+
}
28+
29+
/**
30+
* Validate the page metadata cache and check for inconsistencies.
31+
* This helps diagnose issues with incremental sync.
32+
*/
33+
export async function validateCache(
34+
options: { verbose?: boolean } = {}
35+
): Promise<CacheValidationResult> {
36+
const issues: string[] = [];
37+
const warnings: string[] = [];
38+
const { verbose = false } = options;
39+
40+
console.log(chalk.bold("\n🔍 Validating Page Metadata Cache...\n"));
41+
42+
// Load cache
43+
const cache = loadPageMetadataCache();
44+
45+
if (!cache) {
46+
issues.push("No cache found (this is OK for first run)");
47+
return {
48+
valid: false,
49+
issues,
50+
warnings,
51+
stats: {
52+
totalPages: 0,
53+
missingFiles: 0,
54+
orphanedFiles: 0,
55+
scriptHashMatch: false,
56+
},
57+
};
58+
}
59+
60+
const stats = getCacheStats(cache);
61+
console.log(chalk.blue(`📊 Cache contains ${stats.totalPages} pages`));
62+
if (stats.lastSync) {
63+
console.log(chalk.gray(` Last sync: ${stats.lastSync}`));
64+
}
65+
66+
let missingFilesCount = 0;
67+
let orphanedFilesCount = 0;
68+
69+
// Check 1: Verify output files exist
70+
console.log(chalk.bold("\n1️⃣ Checking output files exist..."));
71+
const cachedPaths = new Set<string>();
72+
73+
for (const [pageId, metadata] of Object.entries(cache.pages)) {
74+
for (const outputPath of metadata.outputPaths) {
75+
if (!outputPath) {
76+
warnings.push(
77+
`Page ${pageId} has empty output path in cache.outputPaths`
78+
);
79+
continue;
80+
}
81+
82+
cachedPaths.add(outputPath);
83+
84+
const absolutePath = path.isAbsolute(outputPath)
85+
? outputPath
86+
: path.join(PROJECT_ROOT, outputPath.replace(/^\//, ""));
87+
88+
if (!fs.existsSync(absolutePath)) {
89+
issues.push(`Missing output file for page ${pageId}: ${outputPath}`);
90+
missingFilesCount++;
91+
if (verbose) {
92+
console.log(chalk.red(` ❌ Missing: ${outputPath}`));
93+
}
94+
} else if (verbose) {
95+
console.log(chalk.green(` ✓ Found: ${outputPath}`));
96+
}
97+
}
98+
}
99+
100+
if (missingFilesCount === 0) {
101+
console.log(
102+
chalk.green(` ✅ All ${cachedPaths.size} cached files exist`)
103+
);
104+
} else {
105+
console.log(
106+
chalk.red(` ❌ ${missingFilesCount} cached files are missing`)
107+
);
108+
}
109+
110+
// Check 2: Find orphaned output files (files in docs/ not in cache)
111+
console.log(chalk.bold("\n2️⃣ Checking for orphaned files..."));
112+
113+
const orphanedFiles: string[] = [];
114+
115+
try {
116+
const scanDirectory = (dir: string) => {
117+
const entries = fs.readdirSync(dir, { withFileTypes: true });
118+
119+
for (const entry of entries) {
120+
const fullPath = path.join(dir, entry.name);
121+
const relativePath = path.relative(PROJECT_ROOT, fullPath);
122+
123+
if (entry.isDirectory()) {
124+
// Skip node_modules, .git, etc
125+
if (![".git", "node_modules", ".cache"].includes(entry.name)) {
126+
scanDirectory(fullPath);
127+
}
128+
} else if (entry.isFile() && entry.name.endsWith(".md")) {
129+
// Check if this markdown file is in our cache
130+
const normalizedPath = relativePath.replace(/\\/g, "/");
131+
132+
// Also check with leading slash
133+
const withLeadingSlash = `/${normalizedPath}`;
134+
135+
if (
136+
!cachedPaths.has(normalizedPath) &&
137+
!cachedPaths.has(withLeadingSlash) &&
138+
!cachedPaths.has(fullPath)
139+
) {
140+
orphanedFiles.push(normalizedPath);
141+
orphanedFilesCount++;
142+
if (verbose) {
143+
console.log(chalk.yellow(` ⚠️ Orphaned: ${normalizedPath}`));
144+
}
145+
}
146+
}
147+
}
148+
};
149+
150+
if (fs.existsSync(DOCS_PATH)) {
151+
scanDirectory(DOCS_PATH);
152+
}
153+
154+
if (orphanedFilesCount === 0) {
155+
console.log(chalk.green(" ✅ No orphaned files found"));
156+
} else {
157+
console.log(
158+
chalk.yellow(
159+
` ⚠️ Found ${orphanedFilesCount} markdown files not in cache`
160+
)
161+
);
162+
warnings.push(
163+
`${orphanedFilesCount} markdown files in docs/ are not tracked in cache (might be manually created or from old runs)`
164+
);
165+
}
166+
} catch (error) {
167+
warnings.push(
168+
`Could not scan docs directory: ${error instanceof Error ? error.message : String(error)}`
169+
);
170+
}
171+
172+
// Check 3: Verify script hash is current
173+
console.log(chalk.bold("\n3️⃣ Checking script hash..."));
174+
175+
const currentHashResult = await computeScriptHash();
176+
const currentHash = currentHashResult.hash;
177+
const scriptHashMatch = cache.scriptHash === currentHash;
178+
179+
if (scriptHashMatch) {
180+
console.log(chalk.green(" ✅ Script hash matches (scripts unchanged)"));
181+
console.log(chalk.gray(` Hash: ${currentHash.substring(0, 12)}...`));
182+
} else {
183+
console.log(chalk.yellow(" ⚠️ Script hash mismatch"));
184+
console.log(
185+
chalk.gray(` Cached: ${cache.scriptHash.substring(0, 12)}...`)
186+
);
187+
console.log(
188+
chalk.gray(` Current: ${currentHash.substring(0, 12)}...`)
189+
);
190+
warnings.push(
191+
"Script files have changed since last sync - next run will do full rebuild"
192+
);
193+
}
194+
195+
// Summary
196+
console.log(chalk.bold("\n📋 Validation Summary:"));
197+
198+
const valid = issues.length === 0;
199+
200+
if (valid) {
201+
console.log(chalk.green("✅ Cache is valid"));
202+
} else {
203+
console.log(chalk.red(`❌ Found ${issues.length} issue(s)`));
204+
}
205+
206+
if (warnings.length > 0) {
207+
console.log(chalk.yellow(`⚠️ ${warnings.length} warning(s)`));
208+
}
209+
210+
return {
211+
valid,
212+
issues,
213+
warnings,
214+
stats: {
215+
totalPages: stats.totalPages,
216+
missingFiles: missingFilesCount,
217+
orphanedFiles: orphanedFilesCount,
218+
scriptHashMatch,
219+
},
220+
};
221+
}
222+
223+
/**
224+
* CLI entry point
225+
*/
226+
if (import.meta.url === `file://${process.argv[1]}`) {
227+
const verbose =
228+
process.argv.includes("--verbose") || process.argv.includes("-v");
229+
230+
validateCache({ verbose })
231+
.then((result) => {
232+
console.log();
233+
234+
if (result.issues.length > 0) {
235+
console.log(chalk.bold.red("\n🔴 Issues Found:"));
236+
result.issues.forEach((issue, i) => {
237+
console.log(chalk.red(`${i + 1}. ${issue}`));
238+
});
239+
}
240+
241+
if (result.warnings.length > 0) {
242+
console.log(chalk.bold.yellow("\n⚠️ Warnings:"));
243+
result.warnings.forEach((warning, i) => {
244+
console.log(chalk.yellow(`${i + 1}. ${warning}`));
245+
});
246+
}
247+
248+
console.log(chalk.bold("\n📊 Statistics:"));
249+
console.log(` Total pages in cache: ${result.stats.totalPages}`);
250+
console.log(` Missing output files: ${result.stats.missingFiles}`);
251+
console.log(` Orphaned markdown files: ${result.stats.orphanedFiles}`);
252+
console.log(
253+
` Script hash match: ${result.stats.scriptHashMatch ? "✅ Yes" : "⚠️ No"}`
254+
);
255+
256+
process.exit(result.valid ? 0 : 1);
257+
})
258+
.catch((error) => {
259+
console.error(chalk.red("\n❌ Validation failed:"));
260+
console.error(error);
261+
process.exit(1);
262+
});
263+
}

0 commit comments

Comments
 (0)