Skip to content

Commit 160fc39

Browse files
committed
[WIP] Update the page labels tree when a pdf is extracted (bug 1997379)
1 parent c4f05f4 commit 160fc39

File tree

5 files changed

+166
-5
lines changed

5 files changed

+166
-5
lines changed

src/core/catalog.js

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,16 @@ class Catalog {
735735
return rawDests;
736736
}
737737

738+
get rawPageLabels() {
739+
const obj = this.#catDict.getRaw("PageLabels");
740+
if (!obj) {
741+
return null;
742+
}
743+
744+
const numberTree = new NumberTree(obj, this.xref);
745+
return numberTree.getAll();
746+
}
747+
738748
get pageLabels() {
739749
let obj = null;
740750
try {
@@ -749,17 +759,15 @@ class Catalog {
749759
}
750760

751761
#readPageLabels() {
752-
const obj = this.#catDict.getRaw("PageLabels");
753-
if (!obj) {
762+
const nums = this.rawPageLabels;
763+
if (!nums) {
754764
return null;
755765
}
756766

757767
const pageLabels = new Array(this.numPages);
758768
let style = null,
759769
prefix = "";
760770

761-
const numberTree = new NumberTree(obj, this.xref);
762-
const nums = numberTree.getAll();
763771
let currentLabel = "",
764772
currentIndex = 1;
765773

src/core/editor/pdf_editor.js

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import { StringStream } from "../stream.js";
2525
import { stringToAsciiOrUTF16BE } from "../core_utils.js";
2626

2727
const MAX_LEAVES_PER_PAGES_NODE = 16;
28+
const MAX_IN_NAME_TREE_NODE = 64;
2829

2930
class PageData {
3031
constructor(page, documentData) {
@@ -39,6 +40,7 @@ class PageData {
3940
class DocumentData {
4041
constructor(document) {
4142
this.document = document;
43+
this.pageLabels = null;
4244
this.pagesMap = new RefSetCache();
4345
this.oldRefMapping = new RefSetCache();
4446
}
@@ -61,6 +63,7 @@ class PDFEditor {
6163
this.version = "1.7";
6264
this.title = title;
6365
this.author = author;
66+
this.pageLabels = null;
6467
}
6568

6669
/**
@@ -253,6 +256,8 @@ class PDFEditor {
253256
await Promise.all(promises);
254257
promises.length = 0;
255258

259+
this.#collectPageLabels();
260+
256261
for (const page of this.oldPages) {
257262
promises.push(this.#postCollectPageData(page));
258263
}
@@ -270,7 +275,12 @@ class PDFEditor {
270275
* @param {DocumentData} documentData
271276
* @return {Promise<void>}
272277
*/
273-
async #collectDocumentData(documentData) {}
278+
async #collectDocumentData(documentData) {
279+
const { document } = documentData;
280+
await document.pdfManager
281+
.ensureCatalog("rawPageLabels")
282+
.then(pageLabels => (documentData.pageLabels = pageLabels));
283+
}
274284

275285
/**
276286
* Post process the collected page data.
@@ -306,6 +316,56 @@ class PDFEditor {
306316
pageData.annotations = newAnnotations.length > 0 ? newAnnotations : null;
307317
}
308318

319+
async #collectPageLabels() {
320+
// We can only preserve page labels when editing a single PDF file.
321+
// This is consistent with behavior in Adobe Acrobat.
322+
if (!this.hasSingleFile) {
323+
return;
324+
}
325+
const {
326+
documentData: { document, pageLabels },
327+
} = this.oldPages[0];
328+
if (!pageLabels) {
329+
return;
330+
}
331+
const numPages = document.numPages;
332+
const oldPageLabels = [];
333+
const oldPageIndices = new Set(
334+
this.oldPages.map(({ page: { pageIndex } }) => pageIndex)
335+
);
336+
let currentLabel = null;
337+
let stFirstIndex = -1;
338+
for (let i = 0; i < numPages; i++) {
339+
const newLabel = pageLabels.get(i);
340+
if (newLabel) {
341+
currentLabel = newLabel;
342+
stFirstIndex = currentLabel.has("St") ? i : -1;
343+
}
344+
if (!oldPageIndices.has(i)) {
345+
continue;
346+
}
347+
if (stFirstIndex !== -1) {
348+
const st = currentLabel.get("St");
349+
currentLabel = currentLabel.clone();
350+
currentLabel.set("St", st + (i - stFirstIndex));
351+
stFirstIndex = -1;
352+
}
353+
oldPageLabels.push(currentLabel);
354+
}
355+
currentLabel = oldPageLabels[0];
356+
let currentIndex = 0;
357+
const newPageLabels = (this.pageLabels = [[0, currentLabel]]);
358+
for (let i = 0, ii = oldPageLabels.length; i < ii; i++) {
359+
const label = oldPageLabels[i];
360+
if (label === currentLabel) {
361+
continue;
362+
}
363+
currentIndex = i;
364+
currentLabel = label;
365+
newPageLabels.push([currentIndex, currentLabel]);
366+
}
367+
}
368+
309369
/**
310370
* Create a copy of a page.
311371
* @param {number} pageIndex
@@ -423,6 +483,63 @@ class PDFEditor {
423483
}
424484
}
425485

486+
/**
487+
* Create a name or number tree from the given map.
488+
* @param {Array<[string, any]>} map
489+
* @returns {Ref}
490+
*/
491+
#makeNameNumTree(map, areNames) {
492+
const allEntries = map.sort(
493+
areNames
494+
? ([keyA], [keyB]) => keyA.localeCompare(keyB)
495+
: ([keyA], [keyB]) => keyA - keyB
496+
);
497+
const maxLeaves =
498+
MAX_IN_NAME_TREE_NODE <= 1 ? allEntries.length : MAX_IN_NAME_TREE_NODE;
499+
const [treeRef, treeDict] = this.newDict;
500+
const stack = [{ dict: treeDict, entries: allEntries }];
501+
const valueType = areNames ? "Names" : "Nums";
502+
503+
while (stack.length > 0) {
504+
const { dict, entries } = stack.pop();
505+
if (entries.length <= maxLeaves) {
506+
dict.set("Limits", [entries[0][0], entries.at(-1)[0]]);
507+
dict.set(valueType, entries.flat());
508+
continue;
509+
}
510+
const entriesChunks = [];
511+
const chunkSize = Math.ceil(entries.length / maxLeaves);
512+
for (let i = 0; i < entries.length; i += chunkSize) {
513+
entriesChunks.push(entries.slice(i, i + chunkSize));
514+
}
515+
const entriesRefs = [];
516+
dict.set("Kids", entriesRefs);
517+
for (const chunk of entriesChunks) {
518+
const [entriesRef, entriesDict] = this.newDict;
519+
entriesRefs.push(entriesRef);
520+
entriesDict.set("Limits", [chunk[0][0], chunk.at(-1)[0]]);
521+
stack.push({ dict: entriesDict, entries: chunk });
522+
}
523+
}
524+
return treeRef;
525+
}
526+
527+
/**
528+
* Create the page labels tree if it exists.
529+
*/
530+
#makePageLabelsTree() {
531+
const { pageLabels } = this;
532+
if (!pageLabels || pageLabels.length === 0) {
533+
return;
534+
}
535+
const { rootDict } = this;
536+
const pageLabelsRef = this.#makeNameNumTree(
537+
this.pageLabels,
538+
/* areNames = */ false
539+
);
540+
rootDict.set("PageLabels", pageLabelsRef);
541+
}
542+
426543
/**
427544
* Create the root dictionary.
428545
* @returns {Promise<void>}
@@ -432,6 +549,7 @@ class PDFEditor {
432549
rootDict.setIfName("Type", "Catalog");
433550
rootDict.set("Version", this.version);
434551
this.#makePageTree();
552+
this.#makePageLabelsTree();
435553
}
436554

437555
/**

test/pdfs/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,3 +757,4 @@
757757
!doc_1_3_pages.pdf
758758
!doc_2_3_pages.pdf
759759
!doc_3_3_pages.pdf
760+
!labelled_pages.pdf

test/pdfs/labelled_pages.pdf

6.56 KB
Binary file not shown.

test/unit/api_spec.js

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5495,5 +5495,39 @@ small scripts as well as for`);
54955495
await loadingTask.destroy();
54965496
});
54975497
});
5498+
5499+
describe("Page labels", function () {
5500+
it("extract page and check labels", async function () {
5501+
let loadingTask = getDocument(
5502+
buildGetDocumentParams("labelled_pages.pdf")
5503+
);
5504+
const pdfDoc = await loadingTask.promise;
5505+
let labels = await pdfDoc.getPageLabels();
5506+
expect(labels).toEqual([
5507+
"i" /* Page 0 */,
5508+
"ii" /* Page 1 */,
5509+
"iii" /* Page 2 */,
5510+
"iv" /* Page 3 */,
5511+
"1" /* Page 4 */,
5512+
"2" /* Page 5 */,
5513+
"3" /* Page 6 */,
5514+
"a" /* Page 7 */,
5515+
"b" /* Page 8 */,
5516+
"4" /* Page 9 */,
5517+
"5" /* Page 10 */,
5518+
]);
5519+
5520+
const data = await pdfDoc.extractPages({
5521+
document: null,
5522+
includePages: [0, 1, 5, 7, 10],
5523+
});
5524+
await loadingTask.destroy();
5525+
loadingTask = getDocument(data);
5526+
const newPdfDoc = await loadingTask.promise;
5527+
labels = await newPdfDoc.getPageLabels();
5528+
expect(labels).toEqual(["i", "ii", "1", "a", "5"]);
5529+
await loadingTask.destroy();
5530+
});
5531+
});
54985532
});
54995533
});

0 commit comments

Comments
 (0)