Skip to content

Commit e0ebf46

Browse files
authored
Markdown rendering update (#1989)
* Enable markdown rendering via web worker and improve code highlighting * LanguageFn typing
1 parent aa82c43 commit e0ebf46

File tree

3 files changed

+140
-50
lines changed

3 files changed

+140
-50
lines changed

src/lib/components/chat/MarkdownRenderer.svelte

Lines changed: 35 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
<script lang="ts">
22
import { processBlocks, processBlocksSync, type BlockToken } from "$lib/utils/marked";
3-
// import MarkdownWorker from "$lib/workers/markdownWorker?worker";
3+
import MarkdownWorker from "$lib/workers/markdownWorker?worker";
44
import MarkdownBlock from "./MarkdownBlock.svelte";
55
import { browser } from "$app/environment";
66
7-
import DOMPurify from "isomorphic-dompurify";
8-
import { onMount } from "svelte";
7+
import { onMount, onDestroy } from "svelte";
98
import { updateDebouncer } from "$lib/utils/updates";
109
1110
interface Props {
@@ -17,53 +16,51 @@
1716
let { content, sources = [], loading = false }: Props = $props();
1817
1918
let blocks: BlockToken[] = $state(processBlocksSync(content, sources));
19+
let worker: Worker | null = null;
20+
let latestRequestId = 0;
2021
21-
async function processContent(
22-
content: string,
23-
sources: { title?: string; link: string }[]
24-
): Promise<BlockToken[]> {
25-
// Note: Worker support for blocks can be added later if needed
26-
// For now, use direct processing which is still efficient due to block memoization
27-
return processBlocks(content, sources);
22+
function handleBlocks(result: BlockToken[], requestId: number) {
23+
if (requestId !== latestRequestId) return;
24+
blocks = result;
25+
updateDebouncer.endRender();
2826
}
2927
3028
$effect(() => {
3129
if (!browser) {
3230
blocks = processBlocksSync(content, sources);
33-
} else {
34-
(async () => {
35-
updateDebouncer.startRender();
36-
blocks = await processContent(content, sources).then(async (processedBlocks) =>
37-
Promise.all(
38-
processedBlocks.map(async (block) => ({
39-
...block,
40-
tokens: await Promise.all(
41-
block.tokens.map(async (token) => {
42-
if (token.type === "text") {
43-
token.html = DOMPurify.sanitize(await token.html);
44-
}
45-
return token;
46-
})
47-
),
48-
}))
49-
)
50-
);
31+
return;
32+
}
33+
34+
const requestId = ++latestRequestId;
5135
52-
updateDebouncer.endRender();
53-
})();
36+
if (worker) {
37+
updateDebouncer.startRender();
38+
worker.postMessage({ type: "process", content, sources, requestId });
39+
return;
5440
}
41+
42+
(async () => {
43+
updateDebouncer.startRender();
44+
const processed = await processBlocks(content, sources);
45+
// Only apply if this is still the latest request
46+
handleBlocks(processed, requestId);
47+
})();
5548
});
5649
5750
onMount(() => {
58-
// todo: fix worker, seems to be transmitting a lot of data
59-
// worker = browser && window.Worker ? new MarkdownWorker() : null;
51+
if (typeof Worker !== "undefined") {
52+
worker = new MarkdownWorker();
53+
worker.onmessage = (event: MessageEvent) => {
54+
const data = event.data as { type?: string; blocks?: BlockToken[]; requestId?: number };
55+
if (data?.type !== "processed" || !data.blocks || data.requestId === undefined) return;
56+
handleBlocks(data.blocks, data.requestId);
57+
};
58+
}
59+
});
6060
61-
DOMPurify.addHook("afterSanitizeAttributes", (node) => {
62-
if (node.tagName === "A") {
63-
node.setAttribute("target", "_blank");
64-
node.setAttribute("rel", "noreferrer");
65-
}
66-
});
61+
onDestroy(() => {
62+
worker?.terminate();
63+
worker = null;
6764
});
6865
</script>
6966

src/lib/utils/marked.ts

Lines changed: 96 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,55 @@ type SimpleSource = {
77
title?: string;
88
link: string;
99
};
10-
import hljs from "highlight.js";
10+
import hljs from "highlight.js/lib/core";
11+
import type { LanguageFn } from "highlight.js";
12+
import javascript from "highlight.js/lib/languages/javascript";
13+
import typescript from "highlight.js/lib/languages/typescript";
14+
import json from "highlight.js/lib/languages/json";
15+
import bash from "highlight.js/lib/languages/bash";
16+
import shell from "highlight.js/lib/languages/shell";
17+
import python from "highlight.js/lib/languages/python";
18+
import go from "highlight.js/lib/languages/go";
19+
import rust from "highlight.js/lib/languages/rust";
20+
import java from "highlight.js/lib/languages/java";
21+
import csharp from "highlight.js/lib/languages/csharp";
22+
import cpp from "highlight.js/lib/languages/cpp";
23+
import cLang from "highlight.js/lib/languages/c";
24+
import xml from "highlight.js/lib/languages/xml";
25+
import css from "highlight.js/lib/languages/css";
26+
import scss from "highlight.js/lib/languages/scss";
27+
import markdownLang from "highlight.js/lib/languages/markdown";
28+
import yaml from "highlight.js/lib/languages/yaml";
29+
import sql from "highlight.js/lib/languages/sql";
30+
import plaintext from "highlight.js/lib/languages/plaintext";
1131
import { parseIncompleteMarkdown } from "./parseIncompleteMarkdown";
1232
import { parseMarkdownIntoBlocks } from "./parseBlocks";
1333

34+
const bundledLanguages: [string, LanguageFn][] = [
35+
["javascript", javascript],
36+
["typescript", typescript],
37+
["json", json],
38+
["bash", bash],
39+
["shell", shell],
40+
["python", python],
41+
["go", go],
42+
["rust", rust],
43+
["java", java],
44+
["csharp", csharp],
45+
["cpp", cpp],
46+
["c", cLang],
47+
["xml", xml],
48+
["html", xml],
49+
["css", css],
50+
["scss", scss],
51+
["markdown", markdownLang],
52+
["yaml", yaml],
53+
["sql", sql],
54+
["plaintext", plaintext],
55+
];
56+
57+
bundledLanguages.forEach(([name, language]) => hljs.registerLanguage(name, language));
58+
1459
interface katexBlockToken extends Tokens.Generic {
1560
type: "katexBlock";
1661
raw: string;
@@ -159,15 +204,40 @@ function addInlineCitations(md: string, webSearchSources: SimpleSource[] = []):
159204
});
160205
}
161206

207+
function sanitizeHref(href?: string | null): string | undefined {
208+
if (!href) return undefined;
209+
const trimmed = href.trim();
210+
const lower = trimmed.toLowerCase();
211+
if (lower.startsWith("javascript:") || lower.startsWith("data:text/html")) {
212+
return undefined;
213+
}
214+
return trimmed.replace(/>$/, "");
215+
}
216+
217+
function highlightCode(text: string, lang?: string): string {
218+
if (lang && hljs.getLanguage(lang)) {
219+
try {
220+
return hljs.highlight(text, { language: lang, ignoreIllegals: true }).value;
221+
} catch {
222+
// fall through to auto-detect
223+
}
224+
}
225+
return hljs.highlightAuto(text).value;
226+
}
227+
162228
function createMarkedInstance(sources: SimpleSource[]): Marked {
163229
return new Marked({
164230
hooks: {
165231
postprocess: (html) => addInlineCitations(html, sources),
166232
},
167233
extensions: [katexBlockExtension, katexInlineExtension],
168234
renderer: {
169-
link: (href, title, text) =>
170-
`<a href="${href?.replace(/>$/, "")}" target="_blank" rel="noreferrer">${text}</a>`,
235+
link: (href, title, text) => {
236+
const safeHref = sanitizeHref(href);
237+
return safeHref
238+
? `<a href="${safeHref}" target="_blank" rel="noreferrer">${text}</a>`
239+
: `<span>${escapeHTML(text ?? "")}</span>`;
240+
},
171241
html: (html) => escapeHTML(html),
172242
},
173243
gfm: true,
@@ -200,6 +270,13 @@ type TextToken = {
200270
html: string | Promise<string>;
201271
};
202272

273+
const blockCache = new Map<string, BlockToken>();
274+
275+
function cacheKey(index: number, blockContent: string, sources: SimpleSource[]) {
276+
const sourceKey = sources.map((s) => s.link).join("|");
277+
return `${index}-${hashString(blockContent)}|${sourceKey}`;
278+
}
279+
203280
export async function processTokens(content: string, sources: SimpleSource[]): Promise<Token[]> {
204281
// Apply incomplete markdown preprocessing for smooth streaming
205282
const processedContent = parseIncompleteMarkdown(content);
@@ -213,7 +290,7 @@ export async function processTokens(content: string, sources: SimpleSource[]): P
213290
return {
214291
type: "code" as const,
215292
lang: token.lang,
216-
code: hljs.highlightAuto(token.text, hljs.getLanguage(token.lang)?.aliases).value,
293+
code: highlightCode(token.text, token.lang),
217294
rawCode: token.text,
218295
isClosed: isFencedBlockClosed(token.raw ?? ""),
219296
};
@@ -240,7 +317,7 @@ export function processTokensSync(content: string, sources: SimpleSource[]): Tok
240317
return {
241318
type: "code" as const,
242319
lang: token.lang,
243-
code: hljs.highlightAuto(token.text, hljs.getLanguage(token.lang)?.aliases).value,
320+
code: highlightCode(token.text, token.lang),
244321
rawCode: token.text,
245322
isClosed: isFencedBlockClosed(token.raw ?? ""),
246323
};
@@ -282,12 +359,18 @@ export async function processBlocks(
282359

283360
return await Promise.all(
284361
blocks.map(async (blockContent, index) => {
362+
const key = cacheKey(index, blockContent, sources);
363+
const cached = blockCache.get(key);
364+
if (cached) return cached;
365+
285366
const tokens = await processTokens(blockContent, sources);
286-
return {
367+
const block: BlockToken = {
287368
id: `${index}-${hashString(blockContent)}`,
288369
content: blockContent,
289370
tokens,
290371
};
372+
blockCache.set(key, block);
373+
return block;
291374
})
292375
);
293376
}
@@ -299,11 +382,17 @@ export function processBlocksSync(content: string, sources: SimpleSource[] = [])
299382
const blocks = parseMarkdownIntoBlocks(content);
300383

301384
return blocks.map((blockContent, index) => {
385+
const key = cacheKey(index, blockContent, sources);
386+
const cached = blockCache.get(key);
387+
if (cached) return cached;
388+
302389
const tokens = processTokensSync(blockContent, sources);
303-
return {
390+
const block: BlockToken = {
304391
id: `${index}-${hashString(blockContent)}`,
305392
content: blockContent,
306393
tokens,
307394
};
395+
blockCache.set(key, block);
396+
return block;
308397
});
309398
}

src/lib/workers/markdownWorker.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,19 @@ type SimpleSource = {
33
title?: string;
44
link: string;
55
};
6-
import { processTokens, type Token } from "$lib/utils/marked";
6+
import { processBlocks, type BlockToken } from "$lib/utils/marked";
77

88
export type IncomingMessage = {
99
type: "process";
1010
content: string;
1111
sources: SimpleSource[];
12+
requestId: number;
1213
};
1314

1415
export type OutgoingMessage = {
1516
type: "processed";
16-
tokens: Token[];
17+
blocks: BlockToken[];
18+
requestId: number;
1719
};
1820

1921
// Flag to track if the worker is currently processing a message
@@ -31,9 +33,11 @@ async function processMessage() {
3133
isProcessing = true;
3234

3335
try {
34-
const { content, sources } = nextMessage;
35-
const processedTokens = await processTokens(content, sources);
36-
postMessage(JSON.parse(JSON.stringify({ type: "processed", tokens: processedTokens })));
36+
const { content, sources, requestId } = nextMessage;
37+
const processedBlocks = await processBlocks(content, sources);
38+
postMessage(
39+
JSON.parse(JSON.stringify({ type: "processed", blocks: processedBlocks, requestId }))
40+
);
3741
} finally {
3842
isProcessing = false;
3943

0 commit comments

Comments
 (0)