fix: drop SanitizeXMLRx #5

condorheroblog · condorheroblog · commit fdea868f556c · 2023-05-10T18:52:08.000+08:00
diff --git a/src/core/outline.ts b/src/core/outline.ts
@@ -9,7 +9,7 @@
 import type { Page } from "puppeteer";
 import type { DictMap } from "pdf-lib/src/core/objects/PDFDict";
 import type { PDFContext, PDFDocument, PDFRef } from "pdf-lib";
-import { decode as htmlEntitiesDecode } from "html-entities";
+import { decode } from "html-entities";
 import { PDFArray, PDFDict, PDFHexString, PDFName, PDFNumber } from "pdf-lib";
 
 export interface RootOutlineNode {
@@ -43,15 +43,6 @@ export interface OutlineRef {
 	color?: number[]
 }
 
-const SanitizeXMLRx = /<[^>]+>/g;
-
-function sanitize(str: string) {
-	if (str.includes("<"))
-		str = str.replace(SanitizeXMLRx, "");
-
-	return htmlEntitiesDecode(str);
-}
-
 /**
  * Parses the outline of a webpage based on specified tags.
  * @param {Element[]} tagsToProcess - An array of HTML elements to process.
@@ -257,7 +248,7 @@ function buildPdfObjectsForOutline(outlinesWithRef: OutlineRef[], context: PDFCo
 		const next = outlinesWithRef[i + 1];
 
 		const pdfObject: DictMap = new Map([]);
-		pdfObject.set(PDFName.of("Title"), PDFHexString.fromText(sanitize(item.title)));
+		pdfObject.set(PDFName.of("Title"), PDFHexString.fromText(decode(item.title)));
 		pdfObject.set(PDFName.of("Dest"), PDFName.of(item.destination));
 		pdfObject.set(PDFName.of("Parent"), item.parentRef);