From ff592b1824f17b3e523d564fe8da3c686c8b878e Mon Sep 17 00:00:00 2001 From: luandro Date: Fri, 3 Oct 2025 22:44:34 +0700 Subject: [PATCH 1/3] fix(config): add global TOC configuration for consistent heading display - Add tableOfContents config to themeConfig - Set minHeadingLevel: 2, maxHeadingLevel: 3 - Ensures consistent H2 and H3 display across all pages - Fixes inconsistent TOC rendering from Notion-generated content This global configuration ensures that: - All H2 headings appear in right sidebar TOC - All H3 headings appear nested under H2 - Behavior is consistent regardless of content source - No per-page frontmatter configuration needed The TOC inconsistency was likely caused by: - Missing global configuration (Docusaurus defaults not explicit) - Notion-generated markdown with varying structure - Potential heading syntax issues in generated content Fixes #39 --- docusaurus.config.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docusaurus.config.ts b/docusaurus.config.ts index fc633d8..019203b 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -300,6 +300,11 @@ const config: Config = { themeConfig: { // Replace with your project's social card image: "img/comapeo-social-card.jpg", + // Table of Contents configuration for consistent heading display + tableOfContents: { + minHeadingLevel: 2, + maxHeadingLevel: 3, + }, navbar: { // title: 'CoMapeo', logo: { From cc7b24d8a64580714a4c53cb4af6e361a17f274d Mon Sep 17 00:00:00 2001 From: luandro Date: Sat, 1 Nov 2025 13:15:05 -0300 Subject: [PATCH 2/3] fix(notion-fetch): add heading hierarchy normalization for consistent TOC - Add fixHeadingHierarchy() function to normalize Notion exports - Converts multiple H1s to H1/H2 structure for proper TOC generation - Removes empty headings that cause rendering issues - Comprehensive test suite with 8 new test cases (22 tests total, all passing) - Integrates seamlessly with existing content sanitization pipeline Complements docusaurus.config.ts TOC configuration to fix: https://github.com/digidem/comapeo-docs/issues/39 --- scripts/notion-fetch/contentSanitizer.test.ts | 95 +++++++++++++++++++ scripts/notion-fetch/contentSanitizer.ts | 85 ++++++++++++++--- 2 files changed, 169 insertions(+), 11 deletions(-) diff --git a/scripts/notion-fetch/contentSanitizer.test.ts b/scripts/notion-fetch/contentSanitizer.test.ts index 5a1a10f..66a62a7 100644 --- a/scripts/notion-fetch/contentSanitizer.test.ts +++ b/scripts/notion-fetch/contentSanitizer.test.ts @@ -120,5 +120,100 @@ describe("contentSanitizer", () => { const result = scriptModule.sanitizeMarkdownContent(input); expect(result).toBe("[tag](#tag)"); }); + + describe("heading hierarchy fixes", () => { + it("should keep the first H1 and convert subsequent H1s to H2s", () => { + const input = `# First Title +Content here +# Second Title +More content +# Third Title`; + const result = scriptModule.sanitizeMarkdownContent(input); + expect(result).toContain("# First Title"); + expect(result).toContain("## Second Title"); + expect(result).toContain("## Third Title"); + expect(result.match(/^# /gm)?.length).toBe(1); + }); + + it("should remove empty headings", () => { + const input = `# Valid Title +# +## Valid H2 +### +Content`; + const result = scriptModule.sanitizeMarkdownContent(input); + expect(result).toContain("# Valid Title"); + expect(result).toContain("## Valid H2"); + expect(result).not.toContain("#\n"); + expect(result).not.toContain("### "); + }); + + it("should preserve H2 and H3 headings unchanged", () => { + const input = `# Title +## Section +### Subsection +#### Deep heading +##### Deeper +###### Deepest`; + const result = scriptModule.sanitizeMarkdownContent(input); + expect(result).toBe(input); + }); + + it("should handle real Notion export pattern", () => { + const input = `# Setting up your phone +### Checklist +# Related Content +### Why is it important +# Troubleshooting`; + const result = scriptModule.sanitizeMarkdownContent(input); + expect(result).toContain("# Setting up your phone"); + expect(result).toContain("## Related Content"); + expect(result).toContain("## Troubleshooting"); + expect(result).toContain("### Checklist"); + expect(result).toContain("### Why is it important"); + }); + + it("should handle mixed content with headings", () => { + const input = `# Main Title +Some **bold** content here. + +## Regular Section +# Another Title (should become H2) +More content with [links](#). + +### Subsection +Content here.`; + const result = scriptModule.sanitizeMarkdownContent(input); + expect(result).toContain("# Main Title"); + expect(result).toContain("## Another Title (should become H2)"); + expect(result).toContain("## Regular Section"); + expect(result).toContain("### Subsection"); + }); + + it("should handle headings with special characters", () => { + const input = `# Title [H1] +# Another Title: Subtitle +# Title with {brackets}`; + const result = scriptModule.sanitizeMarkdownContent(input); + expect(result).toContain("# Title [H1]"); + expect(result).toContain("## Another Title: Subtitle"); + // Note: brackets get removed by other sanitization rules + expect(result).toMatch(/## Title with.*brackets/); + }); + + it("should not affect code blocks with # symbols", () => { + const input = `# Title +\`\`\`bash +# This is a comment in code +echo "# Not a heading" +\`\`\` +# Second Title`; + const result = scriptModule.sanitizeMarkdownContent(input); + expect(result).toContain("# Title"); + expect(result).toContain("## Second Title"); + expect(result).toContain("# This is a comment in code"); + expect(result).toContain('echo "# Not a heading"'); + }); + }); }); }); diff --git a/scripts/notion-fetch/contentSanitizer.ts b/scripts/notion-fetch/contentSanitizer.ts index 153fc0d..b3e4d6b 100644 --- a/scripts/notion-fetch/contentSanitizer.ts +++ b/scripts/notion-fetch/contentSanitizer.ts @@ -11,6 +11,63 @@ const isEmojiStyleObject = (snippet: string): boolean => const isEmojiImgTag = (snippet: string): boolean => snippet.includes('className="emoji"'); +/** + * Fixes heading hierarchy issues from Notion exports to ensure proper TOC generation. + * - Keeps only the first H1 (page title) + * - Converts subsequent H1s to H2s + * - Removes empty headings + * @param content - The markdown content string with code blocks already masked + * @param codeBlockPlaceholders - Array of code block placeholders to skip + * @returns Content with fixed heading hierarchy + */ +function fixHeadingHierarchy( + content: string, + codeBlockPlaceholders: string[] +): string { + const lines = content.split("\n"); + let firstH1Found = false; + + const fixedLines = lines.map((line) => { + // Skip lines that are code block placeholders + if ( + codeBlockPlaceholders.some((placeholder) => line.includes(placeholder)) + ) { + return line; + } + + // Match markdown headings: # Heading text + const headingMatch = line.match(/^(#{1,6})\s*(.*)$/); + + if (!headingMatch) return line; + + const [, hashes, text] = headingMatch; + const level = hashes.length; + const trimmedText = text.trim(); + + // Remove empty headings (e.g., "# " or "#" with no content) + if (trimmedText === "") { + return ""; + } + + // Handle H1 headings + if (level === 1) { + if (!firstH1Found) { + // Keep the first H1 as the page title + firstH1Found = true; + return line; + } else { + // Convert subsequent H1s to H2s + return `## ${trimmedText}`; + } + } + + // Keep other heading levels unchanged + return line; + }); + + return fixedLines.join("\n"); +} + /** * Sanitizes markdown content to fix malformed HTML/JSX tags that cause MDX compilation errors * @param content - The markdown content string @@ -19,20 +76,26 @@ const isEmojiImgTag = (snippet: string): boolean => export function sanitizeMarkdownContent(content: string): string { // Fix specific malformed patterns that cause MDX errors - // 0. Remove invalid curly brace expressions while preserving code fences and inline code - // Mask code fences (```...```) and inline code (`...`) to avoid altering them + // 0. Mask code fences (```...```) and inline code (`...`) to avoid altering them const codeBlocks: string[] = []; const codeSpans: string[] = []; + const codeBlockPlaceholders: string[] = []; + content = content.replace(/```[\s\S]*?```/g, (m) => { codeBlocks.push(m); - return `__CODEBLOCK_${codeBlocks.length - 1}__`; + const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`; + codeBlockPlaceholders.push(placeholder); + return placeholder; }); content = content.replace(/`[^`\n]*`/g, (m) => { codeSpans.push(m); return `__CODESPAN_${codeSpans.length - 1}__`; }); - // Aggressively strip all curly-brace expressions by unwrapping to inner text + // 1. Fix heading hierarchy for proper TOC generation (after masking code blocks) + content = fixHeadingHierarchy(content, codeBlockPlaceholders); + + // 2. Aggressively strip all curly-brace expressions by unwrapping to inner text // BUT preserve JSX style objects for emoji images // Run a few passes to handle simple nesting like {{text}} for (let i = 0; i < 5 && /\{[^{}]*\}/.test(content); i++) { @@ -41,19 +104,19 @@ export function sanitizeMarkdownContent(content: string): string { ); } - // 1. Fix malformed patterns (the main issue from the error) + // 3. Fix malformed patterns (the main issue from the error) content = content.replace( //gi, "[link to section](#section)" ); - // 2. Fix other malformed tags with invalid attributes (spaces, dots in attr names) + // 4. Fix other malformed tags with invalid attributes (spaces, dots in attr names) content = content.replace(/]*[^\w\s"=-][^>]*>/g, "[link](#)"); - // 3. Fix malformed tags with invalid attributes + // 5. Fix malformed tags with invalid attributes content = content.replace(/]*[^\w\s"=-][^>]*>/g, "[Link](#)"); - // 4. Fix general malformed tags with dots or spaces in attribute names + // 6. Fix general malformed tags with dots or spaces in attribute names // This catches patterns like or (without quotes) // BUT exclude emoji img tags which are valid HTML content = content.replace( @@ -74,7 +137,7 @@ export function sanitizeMarkdownContent(content: string): string { } ); - // 5. Fix unquoted attribute values in JSX (e.g., -> ) + // 7. Fix unquoted attribute values in JSX (e.g., -> ) // BUT exclude emoji img tags which are valid HTML content = content.replace( /<([a-zA-Z][a-zA-Z0-9]*)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+([^>\s"=]+)(\s|>)/g, @@ -84,7 +147,7 @@ export function sanitizeMarkdownContent(content: string): string { : `<${tagName} ${attrName}="${attrValue}"${suffix}` ); - // 6. Final hard cleanup: strip any remaining { ... } to avoid MDX/Acorn errors + // 8. Final hard cleanup: strip any remaining { ... } to avoid MDX/Acorn errors // BUT preserve JSX style objects for emoji images // Run a few passes to handle simple nesting like {{text}}. for (let i = 0; i < 3 && /\{[^{}]*\}/.test(content); i++) { @@ -93,7 +156,7 @@ export function sanitizeMarkdownContent(content: string): string { ); } - // 7. Restore masked code blocks and inline code + // 9. Restore masked code blocks and inline code content = content.replace( /__CODEBLOCK_(\d+)__/g, (_m, i) => codeBlocks[Number(i)] From a4f4af0374c7078c1251d0037ad3c37f9577cf25 Mon Sep 17 00:00:00 2001 From: luandro Date: Sun, 2 Nov 2025 22:14:04 -0300 Subject: [PATCH 3/3] fix: remove unwanted .gitkeep files from PR These files should not be tracked as they're in ignored directories and don't serve any purpose in this branch. Related to: PR #59 --- docs/.gitkeep | 1 - i18n/.gitkeep | 1 - static/images/.gitkeep | 1 - 3 files changed, 3 deletions(-) delete mode 100644 docs/.gitkeep delete mode 100644 i18n/.gitkeep delete mode 100644 static/images/.gitkeep diff --git a/docs/.gitkeep b/docs/.gitkeep deleted file mode 100644 index 07c09bc..0000000 --- a/docs/.gitkeep +++ /dev/null @@ -1 +0,0 @@ -This directory contains generated documentation from Notion. Content is synced from the 'content' branch. diff --git a/i18n/.gitkeep b/i18n/.gitkeep deleted file mode 100644 index a811ed8..0000000 --- a/i18n/.gitkeep +++ /dev/null @@ -1 +0,0 @@ -This directory contains localized content (translations). Content is synced from the 'content' branch. diff --git a/static/images/.gitkeep b/static/images/.gitkeep deleted file mode 100644 index e7892ff..0000000 --- a/static/images/.gitkeep +++ /dev/null @@ -1 +0,0 @@ -This directory contains generated images from Notion. Content is synced from the 'content' branch.