From ff592b1824f17b3e523d564fe8da3c686c8b878e Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Fri, 3 Oct 2025 22:44:34 +0700
Subject: [PATCH 1/3] fix(config): add global TOC configuration for consistent
 heading display

- Add tableOfContents config to themeConfig
- Set minHeadingLevel: 2, maxHeadingLevel: 3
- Ensures consistent H2 and H3 display across all pages
- Fixes inconsistent TOC rendering from Notion-generated content

This global configuration ensures that:
- All H2 headings appear in right sidebar TOC
- All H3 headings appear nested under H2
- Behavior is consistent regardless of content source
- No per-page frontmatter configuration needed

The TOC inconsistency was likely caused by:
- Missing global configuration (Docusaurus defaults not explicit)
- Notion-generated markdown with varying structure
- Potential heading syntax issues in generated content

Fixes #39
---
 docusaurus.config.ts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docusaurus.config.ts b/docusaurus.config.ts
index fc633d8..019203b 100644
--- a/docusaurus.config.ts
+++ b/docusaurus.config.ts
@@ -300,6 +300,11 @@ const config: Config = {
   themeConfig: {
     // Replace with your project's social card
     image: "img/comapeo-social-card.jpg",
+    // Table of Contents configuration for consistent heading display
+    tableOfContents: {
+      minHeadingLevel: 2,
+      maxHeadingLevel: 3,
+    },
     navbar: {
       // title: 'CoMapeo',
       logo: {

From cc7b24d8a64580714a4c53cb4af6e361a17f274d Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Sat, 1 Nov 2025 13:15:05 -0300
Subject: [PATCH 2/3] fix(notion-fetch): add heading hierarchy normalization
 for consistent TOC

- Add fixHeadingHierarchy() function to normalize Notion exports
- Converts multiple H1s to H1/H2 structure for proper TOC generation
- Removes empty headings that cause rendering issues
- Comprehensive test suite with 8 new test cases (22 tests total, all passing)
- Integrates seamlessly with existing content sanitization pipeline

Complements docusaurus.config.ts TOC configuration to fix:
https://github.com/digidem/comapeo-docs/issues/39
---
 scripts/notion-fetch/contentSanitizer.test.ts | 95 +++++++++++++++++++
 scripts/notion-fetch/contentSanitizer.ts      | 85 ++++++++++++++---
 2 files changed, 169 insertions(+), 11 deletions(-)

diff --git a/scripts/notion-fetch/contentSanitizer.test.ts b/scripts/notion-fetch/contentSanitizer.test.ts
index 5a1a10f..66a62a7 100644
--- a/scripts/notion-fetch/contentSanitizer.test.ts
+++ b/scripts/notion-fetch/contentSanitizer.test.ts
@@ -120,5 +120,100 @@ describe("contentSanitizer", () => {
       const result = scriptModule.sanitizeMarkdownContent(input);
       expect(result).toBe("[tag](#tag)");
     });
+
+    describe("heading hierarchy fixes", () => {
+      it("should keep the first H1 and convert subsequent H1s to H2s", () => {
+        const input = `# First Title
+Content here
+# Second Title
+More content
+# Third Title`;
+        const result = scriptModule.sanitizeMarkdownContent(input);
+        expect(result).toContain("# First Title");
+        expect(result).toContain("## Second Title");
+        expect(result).toContain("## Third Title");
+        expect(result.match(/^# /gm)?.length).toBe(1);
+      });
+
+      it("should remove empty headings", () => {
+        const input = `# Valid Title
+#
+## Valid H2
+###
+Content`;
+        const result = scriptModule.sanitizeMarkdownContent(input);
+        expect(result).toContain("# Valid Title");
+        expect(result).toContain("## Valid H2");
+        expect(result).not.toContain("#\n");
+        expect(result).not.toContain("###   ");
+      });
+
+      it("should preserve H2 and H3 headings unchanged", () => {
+        const input = `# Title
+## Section
+### Subsection
+#### Deep heading
+##### Deeper
+###### Deepest`;
+        const result = scriptModule.sanitizeMarkdownContent(input);
+        expect(result).toBe(input);
+      });
+
+      it("should handle real Notion export pattern", () => {
+        const input = `# Setting up your phone
+### Checklist
+# Related Content
+### Why is it important
+# Troubleshooting`;
+        const result = scriptModule.sanitizeMarkdownContent(input);
+        expect(result).toContain("# Setting up your phone");
+        expect(result).toContain("## Related Content");
+        expect(result).toContain("## Troubleshooting");
+        expect(result).toContain("### Checklist");
+        expect(result).toContain("### Why is it important");
+      });
+
+      it("should handle mixed content with headings", () => {
+        const input = `# Main Title
+Some **bold** content here.
+
+## Regular Section
+# Another Title (should become H2)
+More content with [links](#).
+
+### Subsection
+Content here.`;
+        const result = scriptModule.sanitizeMarkdownContent(input);
+        expect(result).toContain("# Main Title");
+        expect(result).toContain("## Another Title (should become H2)");
+        expect(result).toContain("## Regular Section");
+        expect(result).toContain("### Subsection");
+      });
+
+      it("should handle headings with special characters", () => {
+        const input = `# Title [H1]
+# Another Title: Subtitle
+# Title with {brackets}`;
+        const result = scriptModule.sanitizeMarkdownContent(input);
+        expect(result).toContain("# Title [H1]");
+        expect(result).toContain("## Another Title: Subtitle");
+        // Note: brackets get removed by other sanitization rules
+        expect(result).toMatch(/## Title with.*brackets/);
+      });
+
+      it("should not affect code blocks with # symbols", () => {
+        const input = `# Title
+\`\`\`bash
+# This is a comment in code
+echo "# Not a heading"
+\`\`\`
+# Second Title`;
+        const result = scriptModule.sanitizeMarkdownContent(input);
+        expect(result).toContain("# Title");
+        expect(result).toContain("## Second Title");
+        expect(result).toContain("# This is a comment in code");
+        expect(result).toContain('echo "# Not a heading"');
+      });
+    });
   });
 });
diff --git a/scripts/notion-fetch/contentSanitizer.ts b/scripts/notion-fetch/contentSanitizer.ts
index 153fc0d..b3e4d6b 100644
--- a/scripts/notion-fetch/contentSanitizer.ts
+++ b/scripts/notion-fetch/contentSanitizer.ts
@@ -11,6 +11,63 @@ const isEmojiStyleObject = (snippet: string): boolean =>
 const isEmojiImgTag = (snippet: string): boolean =>
   snippet.includes('className="emoji"');
 
+/**
+ * Fixes heading hierarchy issues from Notion exports to ensure proper TOC generation.
+ * - Keeps only the first H1 (page title)
+ * - Converts subsequent H1s to H2s
+ * - Removes empty headings
+ * @param content - The markdown content string with code blocks already masked
+ * @param codeBlockPlaceholders - Array of code block placeholders to skip
+ * @returns Content with fixed heading hierarchy
+ */
+function fixHeadingHierarchy(
+  content: string,
+  codeBlockPlaceholders: string[]
+): string {
+  const lines = content.split("\n");
+  let firstH1Found = false;
+
+  const fixedLines = lines.map((line) => {
+    // Skip lines that are code block placeholders
+    if (
+      codeBlockPlaceholders.some((placeholder) => line.includes(placeholder))
+    ) {
+      return line;
+    }
+
+    // Match markdown headings: # Heading text
+    const headingMatch = line.match(/^(#{1,6})\s*(.*)$/);
+
+    if (!headingMatch) return line;
+
+    const [, hashes, text] = headingMatch;
+    const level = hashes.length;
+    const trimmedText = text.trim();
+
+    // Remove empty headings (e.g., "# " or "#" with no content)
+    if (trimmedText === "") {
+      return "";
+    }
+
+    // Handle H1 headings
+    if (level === 1) {
+      if (!firstH1Found) {
+        // Keep the first H1 as the page title
+        firstH1Found = true;
+        return line;
+      } else {
+        // Convert subsequent H1s to H2s
+        return `## ${trimmedText}`;
+      }
+    }
+
+    // Keep other heading levels unchanged
+    return line;
+  });
+
+  return fixedLines.join("\n");
+}
+
 /**
  * Sanitizes markdown content to fix malformed HTML/JSX tags that cause MDX compilation errors
  * @param content - The markdown content string
@@ -19,20 +76,26 @@ const isEmojiImgTag = (snippet: string): boolean =>
 export function sanitizeMarkdownContent(content: string): string {
   // Fix specific malformed patterns that cause MDX errors
 
-  // 0. Remove invalid curly brace expressions while preserving code fences and inline code
-  // Mask code fences (```...```) and inline code (`...`) to avoid altering them
+  // 0. Mask code fences (```...```) and inline code (`...`) to avoid altering them
   const codeBlocks: string[] = [];
   const codeSpans: string[] = [];
+  const codeBlockPlaceholders: string[] = [];
+
   content = content.replace(/```[\s\S]*?```/g, (m) => {
     codeBlocks.push(m);
-    return `__CODEBLOCK_${codeBlocks.length - 1}__`;
+    const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`;
+    codeBlockPlaceholders.push(placeholder);
+    return placeholder;
   });
   content = content.replace(/`[^`\n]*`/g, (m) => {
     codeSpans.push(m);
     return `__CODESPAN_${codeSpans.length - 1}__`;
   });
 
-  // Aggressively strip all curly-brace expressions by unwrapping to inner text
+  // 1. Fix heading hierarchy for proper TOC generation (after masking code blocks)
+  content = fixHeadingHierarchy(content, codeBlockPlaceholders);
+
+  // 2. Aggressively strip all curly-brace expressions by unwrapping to inner text
   // BUT preserve JSX style objects for emoji images
   // Run a few passes to handle simple nesting like {{text}}
   for (let i = 0; i < 5 && /\{[^{}]*\}/.test(content); i++) {
@@ -41,19 +104,19 @@ export function sanitizeMarkdownContent(content: string): string {
     );
   }
 
-  // 1. Fix malformed <link to section.> patterns (the main issue from the error)
+  // 3. Fix malformed <link to section.> patterns (the main issue from the error)
   content = content.replace(
     /<link\s+to\s+section\.?>/gi,
     "[link to section](#section)"
   );
 
-  // 2. Fix other malformed <link> tags with invalid attributes (spaces, dots in attr names)
+  // 4. Fix other malformed <link> tags with invalid attributes (spaces, dots in attr names)
   content = content.replace(/<link\s+[^>]*[^\w\s"=-][^>]*>/g, "[link](#)");
 
-  // 3. Fix malformed <Link> tags with invalid attributes
+  // 5. Fix malformed <Link> tags with invalid attributes
   content = content.replace(/<Link\s+[^>]*[^\w\s"=-][^>]*>/g, "[Link](#)");
 
-  // 4. Fix general malformed tags with dots or spaces in attribute names
+  // 6. Fix general malformed tags with dots or spaces in attribute names
   // This catches patterns like <tag attr.name> or <tag attr value> (without quotes)
   // BUT exclude emoji img tags which are valid HTML
   content = content.replace(
@@ -74,7 +137,7 @@ export function sanitizeMarkdownContent(content: string): string {
     }
   );
 
-  // 5. Fix unquoted attribute values in JSX (e.g., <tag attr value> -> <tag attr="value">)
+  // 7. Fix unquoted attribute values in JSX (e.g., <tag attr value> -> <tag attr="value">)
   // BUT exclude emoji img tags which are valid HTML
   content = content.replace(
     /<([a-zA-Z][a-zA-Z0-9]*)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+([^>\s"=]+)(\s|>)/g,
@@ -84,7 +147,7 @@ export function sanitizeMarkdownContent(content: string): string {
         : `<${tagName} ${attrName}="${attrValue}"${suffix}`
   );
 
-  // 6. Final hard cleanup: strip any remaining { ... } to avoid MDX/Acorn errors
+  // 8. Final hard cleanup: strip any remaining { ... } to avoid MDX/Acorn errors
   // BUT preserve JSX style objects for emoji images
   // Run a few passes to handle simple nesting like {{text}}.
   for (let i = 0; i < 3 && /\{[^{}]*\}/.test(content); i++) {
@@ -93,7 +156,7 @@ export function sanitizeMarkdownContent(content: string): string {
     );
   }
 
-  // 7. Restore masked code blocks and inline code
+  // 9. Restore masked code blocks and inline code
   content = content.replace(
     /__CODEBLOCK_(\d+)__/g,
     (_m, i) => codeBlocks[Number(i)]

From a4f4af0374c7078c1251d0037ad3c37f9577cf25 Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Sun, 2 Nov 2025 22:14:04 -0300
Subject: [PATCH 3/3] fix: remove unwanted .gitkeep files from PR

These files should not be tracked as they're in ignored directories
and don't serve any purpose in this branch.

Related to: PR #59
---
 docs/.gitkeep          | 1 -
 i18n/.gitkeep          | 1 -
 static/images/.gitkeep | 1 -
 3 files changed, 3 deletions(-)
 delete mode 100644 docs/.gitkeep
 delete mode 100644 i18n/.gitkeep
 delete mode 100644 static/images/.gitkeep

diff --git a/docs/.gitkeep b/docs/.gitkeep
deleted file mode 100644
index 07c09bc..0000000
--- a/docs/.gitkeep
+++ /dev/null
@@ -1 +0,0 @@
-This directory contains generated documentation from Notion. Content is synced from the 'content' branch.
diff --git a/i18n/.gitkeep b/i18n/.gitkeep
deleted file mode 100644
index a811ed8..0000000
--- a/i18n/.gitkeep
+++ /dev/null
@@ -1 +0,0 @@
-This directory contains localized content (translations). Content is synced from the 'content' branch.
diff --git a/static/images/.gitkeep b/static/images/.gitkeep
deleted file mode 100644
index e7892ff..0000000
--- a/static/images/.gitkeep
+++ /dev/null
@@ -1 +0,0 @@
-This directory contains generated images from Notion. Content is synced from the 'content' branch.