ArcadeAI · nearestnabors · Feb 11, 2026 · Feb 13, 2026 · Feb 13, 2026 · evantahler
diff --git a/app/_lib/posthog-server.ts b/app/_lib/posthog-server.ts
@@ -0,0 +1,181 @@
+import { PostHog } from "posthog-node";
+
+// Server-side PostHog client singleton
+let posthogClient: PostHog | null = null;
+
+export function getPostHogServer(): PostHog {
+  if (!posthogClient) {
+    const apiKey = process.env.NEXT_PUBLIC_POSTHOG_KEY;
+    if (!apiKey) {
+      throw new Error("NEXT_PUBLIC_POSTHOG_KEY is not set");
+    }
+    posthogClient = new PostHog(apiKey, {
+      host: process.env.NEXT_PUBLIC_POSTHOG_HOST || "https://us.i.posthog.com",
+      // Flush events immediately in serverless environments
+      flushAt: 1,
+      flushInterval: 0,
+    });
+  }
+  return posthogClient;
+}
+
+// AI agent detection patterns with classification
+const AI_AGENT_CLASSIFIERS: Array<{
+  pattern: RegExp;
+  type: string;
+  provider: string;
+}> = [
+  // OpenAI
+  { pattern: /GPTBot/i, type: "GPTBot", provider: "OpenAI" },
+  { pattern: /ChatGPT-User/i, type: "ChatGPT-User", provider: "OpenAI" },
+  { pattern: /OAI-SearchBot/i, type: "OAI-SearchBot", provider: "OpenAI" },
+
+  // Anthropic
+  { pattern: /ClaudeBot/i, type: "ClaudeBot", provider: "Anthropic" },
+  { pattern: /Claude-User/i, type: "Claude-User", provider: "Anthropic" },
+  {
+    pattern: /Claude-SearchBot/i,
+    type: "Claude-SearchBot",
+    provider: "Anthropic",
+  },
+  { pattern: /anthropic/i, type: "Anthropic-Agent", provider: "Anthropic" },
+
+  // Perplexity
+  { pattern: /PerplexityBot/i, type: "PerplexityBot", provider: "Perplexity" },
+  {
+    pattern: /Perplexity-User/i,
+    type: "Perplexity-User",
+    provider: "Perplexity",
+  },
+
+  // Google
+  { pattern: /Google-Extended/i, type: "Google-Extended", provider: "Google" },
+  { pattern: /Googlebot/i, type: "Googlebot", provider: "Google" },
+
+  // Amazon
+  { pattern: /Amazonbot/i, type: "Amazonbot", provider: "Amazon" },
+  { pattern: /amazonq/i, type: "Amazon-Q", provider: "Amazon" },
+  { pattern: /amazon-q/i, type: "Amazon-Q", provider: "Amazon" },
+
+  // Apple
+  {
+    pattern: /Applebot-Extended/i,
+    type: "Applebot-Extended",
+    provider: "Apple",
+  },
+
+  // Meta
+  { pattern: /meta-externalagent/i, type: "Meta-Agent", provider: "Meta" },
+
+  // ByteDance
+  { pattern: /Bytespider/i, type: "Bytespider", provider: "ByteDance" },
+
+  // Cohere
+  { pattern: /cohere-ai/i, type: "Cohere-AI", provider: "Cohere" },
+
+  // Common Crawl
+  { pattern: /CCBot/i, type: "CCBot", provider: "CommonCrawl" },
+
+  // Developer tools
+  { pattern: /cursor/i, type: "Cursor", provider: "Cursor" },
+  { pattern: /github.copilot/i, type: "GitHub-Copilot", provider: "GitHub" },
+  { pattern: /copilot/i, type: "Copilot", provider: "GitHub" },
+  { pattern: /codeium/i, type: "Codeium", provider: "Codeium" },
+  { pattern: /tabnine/i, type: "Tabnine", provider: "Tabnine" },
+
+  // Other AI services
+  { pattern: /gemini/i, type: "Gemini", provider: "Google" },
+  { pattern: /bard/i, type: "Bard", provider: "Google" },
+  { pattern: /phind/i, type: "Phind", provider: "Phind" },
+  { pattern: /you\.com/i, type: "You.com", provider: "You.com" },
+  { pattern: /ai21/i, type: "AI21", provider: "AI21" },
+  { pattern: /huggingface/i, type: "HuggingFace", provider: "HuggingFace" },
+
+  // Agent frameworks
+  { pattern: /langchain/i, type: "LangChain", provider: "LangChain" },
+  { pattern: /llamaindex/i, type: "LlamaIndex", provider: "LlamaIndex" },
+  { pattern: /autogpt/i, type: "AutoGPT", provider: "AutoGPT" },
+  { pattern: /agentgpt/i, type: "AgentGPT", provider: "AgentGPT" },
+  { pattern: /babyagi/i, type: "BabyAGI", provider: "BabyAGI" },
+
+  // Doc AI tools
+  { pattern: /kapa\.ai/i, type: "Kapa.ai", provider: "Kapa" },
+  { pattern: /mendable/i, type: "Mendable", provider: "Mendable" },
+  { pattern: /inkeep/i, type: "Inkeep", provider: "Inkeep" },
+  { pattern: /glean/i, type: "Glean", provider: "Glean" },
+];
+
+export type AIAgentClassification = {
+  isAIAgent: boolean;
+  agentType: string | null;
+  agentProvider: string | null;
+};
+
+export function classifyAIAgent(userAgent: string): AIAgentClassification {
+  for (const classifier of AI_AGENT_CLASSIFIERS) {
+    if (classifier.pattern.test(userAgent)) {
+      return {
+        isAIAgent: true,
+        agentType: classifier.type,
+        agentProvider: classifier.provider,
+      };
+    }
+  }
+  return {
+    isAIAgent: false,
+    agentType: null,
+    agentProvider: null,
+  };
+}
+
+export type ServerPageViewEvent = {
+  distinctId: string;
+  pathname: string;
+  userAgent: string;
+  referer?: string;
+  ip?: string;
+  acceptHeader?: string;
+  acceptLanguage?: string;
+};
+
+export async function captureServerPageView(event: ServerPageViewEvent) {
+  const posthog = getPostHogServer();
+  const classification = classifyAIAgent(event.userAgent);
+
+  // Use distinct event name to avoid double-counting with client-side $pageview
+  // This tracks server-side markdown requests (primarily from AI agents)
+  posthog.capture({
+    distinctId: event.distinctId,
+    event: "server_markdown_request",
+    properties: {
+      $current_url: event.pathname,
+      $pathname: event.pathname,
+      $referrer: event.referer,
+      $useragent: event.userAgent,
+
+      // AI agent classification
+      is_ai_agent: classification.isAIAgent,
+      ai_agent_type: classification.agentType,
+      ai_agent_provider: classification.agentProvider,
+
+      // Request metadata
+      request_accept_header: event.acceptHeader,
+      request_accept_language: event.acceptLanguage,
+      request_source: "server",
+
+      // Mark as server-side capture
+      $lib: "posthog-node",
+    },
+  });
+
+  // Flush immediately for serverless
+  await posthog.flush();
+}
+
+// Shutdown handler for graceful shutdown
+export async function shutdownPostHog() {
+  if (posthogClient) {
+    await posthogClient.shutdown();
+    posthogClient = null;
+  }
+}
diff --git a/app/api/markdown/[[...slug]]/route.ts b/app/api/markdown/[[...slug]]/route.ts
@@ -1,6 +1,7 @@
 import { access, readdir, readFile } from "node:fs/promises";
 import { join, normalize, resolve } from "node:path";
 import { type NextRequest, NextResponse } from "next/server";
+import { captureServerPageView } from "@/app/_lib/posthog-server";
 
 export const dynamic = "force-dynamic";
 
@@ -63,11 +64,83 @@ const TOOLKIT_CATEGORIES = [
   "social",
 ];
 
+// Length of base64 slice for generating distinct IDs
+const DISTINCT_ID_LENGTH = 32;
+
+/**
+ * Track markdown request for AI agent analytics.
+ * Non-blocking - errors don't affect response.
+ */
+async function trackMarkdownRequest(request: NextRequest, pathname: string) {
+  try {
+    const userAgent = request.headers.get("user-agent") || "";
+    const referer = request.headers.get("referer") || undefined;
+    const acceptHeader = request.headers.get("accept") || undefined;
+    const acceptLanguage = request.headers.get("accept-language") || undefined;
+
+    // Use IP or a hash as distinct_id for anonymous tracking
+    const ip =
+      request.headers.get("x-forwarded-for")?.split(",")[0]?.trim() ||
+      request.headers.get("x-real-ip") ||
+      "unknown";
+
+    // Create a semi-stable ID from IP + user agent
+    const distinctId = `server_${Buffer.from(`${ip}:${userAgent}`).toString("base64").slice(0, DISTINCT_ID_LENGTH)}`;
+
+    await captureServerPageView({
+      distinctId,
+      pathname,
+      userAgent,
+      referer,
+      ip,
+      acceptHeader,
+      acceptLanguage,
+    });
+  } catch (error) {
+    // Log but don't throw - tracking errors should not affect the response
+    // biome-ignore lint/suspicious/noConsole: intentional error logging for debugging
+    console.error("[PostHog] Failed to track markdown request:", error);
+  }
+}
+
 type ToolkitMarkdownTarget = {
   category: string;
   toolkitId: string;
 };
 
+/**
+ * Try to serve clean pre-generated markdown.
+ * Returns NextResponse if found, null otherwise.
+ */
+async function tryServeCleanMarkdown(
+  request: NextRequest,
+  sanitizedPath: string
+): Promise<NextResponse | null> {
+  const cleanMarkdownPath = join(CLEAN_MARKDOWN_DIR, `${sanitizedPath}.md`);
+
+  try {
+    await access(cleanMarkdownPath);
+    if (!isPathWithinDirectory(cleanMarkdownPath, CLEAN_MARKDOWN_DIR)) {
+      return null;
+    }
+
+    const content = await readFile(cleanMarkdownPath, "utf-8");
+    await trackMarkdownRequest(request, sanitizedPath);
+
+    return new NextResponse(content, {
+      status: 200,
+      headers: {
+        "Content-Type": "text/markdown; charset=utf-8",
+        "Content-Disposition": "inline",
+        "Cache-Control": "public, max-age=3600",
+        Vary: "Accept, User-Agent",
+      },
+    });
+  } catch {
+    return null;
+  }
+}
+
 /**
  * Check if a path matches the toolkit documentation pattern.
  * Handles both actual toolkit IDs and the [toolkitId] dynamic route pattern.
@@ -176,26 +249,9 @@ export async function GET(
       }
     } else {
       // Try clean markdown first (preferred)
-      // e.g., /en/home/quickstart -> public/_markdown/en/home/quickstart.md
-      const cleanMarkdownPath = join(CLEAN_MARKDOWN_DIR, `${sanitizedPath}.md`);
-
-      try {
-        await access(cleanMarkdownPath);
-        if (isPathWithinDirectory(cleanMarkdownPath, CLEAN_MARKDOWN_DIR)) {
-          const content = await readFile(cleanMarkdownPath, "utf-8");
-
-          return new NextResponse(content, {
-            status: 200,
-            headers: {
-              "Content-Type": "text/markdown; charset=utf-8",
-              "Content-Disposition": "inline",
-              "Cache-Control": "public, max-age=3600",
-              Vary: "Accept, User-Agent",
-            },
-          });
-        }
-      } catch {
-        // Clean markdown not found, fall back to raw MDX
+      const cleanResponse = await tryServeCleanMarkdown(request, sanitizedPath);
+      if (cleanResponse) {
+        return cleanResponse;
       }
 
       // Fallback: raw MDX file
@@ -220,14 +276,21 @@ export async function GET(
 
     const content = await readFile(filePath, "utf-8");
 
+    // Track server-side pageview for AI agent analytics
+    await trackMarkdownRequest(request, sanitizedPath);
+
+    const contentSource = filePath.includes(TOOLKIT_MARKDOWN_ROOT)
+      ? "toolkit-markdown"
+      : "raw-mdx";
+
     // Return the raw markdown with proper headers
     return new NextResponse(content, {
       status: 200,
       headers: {
         "Content-Type": "text/markdown; charset=utf-8",
         "Content-Disposition": "inline",
         "Cache-Control": "public, max-age=3600, stale-while-revalidate=86400",
-        "X-Content-Source": "raw-mdx",
+        "X-Content-Source": contentSource,
         Vary: "Accept, User-Agent",
       },
     });

diff --git a/instrumentation-client.ts b/instrumentation-client.ts
@@ -4,6 +4,8 @@ posthog.init(process.env.NEXT_PUBLIC_POSTHOG_KEY as string, {
   api_host: process.env.NEXT_PUBLIC_POSTHOG_HOST || "https://us.i.posthog.com",
   ui_host: process.env.NEXT_PUBLIC_POSTHOG_UI_HOST || "https://us.posthog.com",
   defaults: "2025-11-30",
+  // Allow AI agent traffic to be captured (default filters out bots)
+  opt_out_useragent_filter: true,
   session_recording: {
     maskAllInputs: true,
     blockClass: "ph-no-capture",

diff --git a/package.json b/package.json
@@ -58,6 +58,7 @@
     "nextra": "4.6.0",
     "nextra-theme-docs": "4.6.0",
     "posthog-js": "1.321.2",
+    "posthog-node": "^5.24.15",
     "react": "19.2.3",
     "react-dom": "19.2.3",
     "react-hook-form": "7.65.0",

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml