Skip to content

Commit 57334bd

Browse files
authored
Merge pull request #20411 from calixteman/split_merge_p2
Update the page labels tree when a pdf is extracted (bug 1997379)
2 parents 85ed401 + ad97c5b commit 57334bd

File tree

5 files changed

+169
-5
lines changed

5 files changed

+169
-5
lines changed

src/core/catalog.js

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,16 @@ class Catalog {
735735
return rawDests;
736736
}
737737

738+
get rawPageLabels() {
739+
const obj = this.#catDict.getRaw("PageLabels");
740+
if (!obj) {
741+
return null;
742+
}
743+
744+
const numberTree = new NumberTree(obj, this.xref);
745+
return numberTree.getAll();
746+
}
747+
738748
get pageLabels() {
739749
let obj = null;
740750
try {
@@ -749,17 +759,15 @@ class Catalog {
749759
}
750760

751761
#readPageLabels() {
752-
const obj = this.#catDict.getRaw("PageLabels");
753-
if (!obj) {
762+
const nums = this.rawPageLabels;
763+
if (!nums) {
754764
return null;
755765
}
756766

757767
const pageLabels = new Array(this.numPages);
758768
let style = null,
759769
prefix = "";
760770

761-
const numberTree = new NumberTree(obj, this.xref);
762-
const nums = numberTree.getAll();
763771
let currentLabel = "",
764772
currentIndex = 1;
765773

src/core/editor/pdf_editor.js

Lines changed: 122 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import { StringStream } from "../stream.js";
2525
import { stringToAsciiOrUTF16BE } from "../core_utils.js";
2626

2727
const MAX_LEAVES_PER_PAGES_NODE = 16;
28+
const MAX_IN_NAME_TREE_NODE = 64;
2829

2930
class PageData {
3031
constructor(page, documentData) {
@@ -39,6 +40,7 @@ class PageData {
3940
class DocumentData {
4041
constructor(document) {
4142
this.document = document;
43+
this.pageLabels = null;
4244
this.pagesMap = new RefSetCache();
4345
this.oldRefMapping = new RefSetCache();
4446
}
@@ -61,6 +63,7 @@ class PDFEditor {
6163
this.version = "1.7";
6264
this.title = title;
6365
this.author = author;
66+
this.pageLabels = null;
6467
}
6568

6669
/**
@@ -253,6 +256,8 @@ class PDFEditor {
253256
await Promise.all(promises);
254257
promises.length = 0;
255258

259+
this.#collectPageLabels();
260+
256261
for (const page of this.oldPages) {
257262
promises.push(this.#postCollectPageData(page));
258263
}
@@ -270,7 +275,12 @@ class PDFEditor {
270275
* @param {DocumentData} documentData
271276
* @return {Promise<void>}
272277
*/
273-
async #collectDocumentData(documentData) {}
278+
async #collectDocumentData(documentData) {
279+
const { document } = documentData;
280+
await document.pdfManager
281+
.ensureCatalog("rawPageLabels")
282+
.then(pageLabels => (documentData.pageLabels = pageLabels));
283+
}
274284

275285
/**
276286
* Post process the collected page data.
@@ -306,6 +316,56 @@ class PDFEditor {
306316
pageData.annotations = newAnnotations.length > 0 ? newAnnotations : null;
307317
}
308318

319+
async #collectPageLabels() {
320+
// We can only preserve page labels when editing a single PDF file.
321+
// This is consistent with behavior in Adobe Acrobat.
322+
if (!this.hasSingleFile) {
323+
return;
324+
}
325+
const {
326+
documentData: { document, pageLabels },
327+
} = this.oldPages[0];
328+
if (!pageLabels) {
329+
return;
330+
}
331+
const numPages = document.numPages;
332+
const oldPageLabels = [];
333+
const oldPageIndices = new Set(
334+
this.oldPages.map(({ page: { pageIndex } }) => pageIndex)
335+
);
336+
let currentLabel = null;
337+
let stFirstIndex = -1;
338+
for (let i = 0; i < numPages; i++) {
339+
const newLabel = pageLabels.get(i);
340+
if (newLabel) {
341+
currentLabel = newLabel;
342+
stFirstIndex = currentLabel.has("St") ? i : -1;
343+
}
344+
if (!oldPageIndices.has(i)) {
345+
continue;
346+
}
347+
if (stFirstIndex !== -1) {
348+
const st = currentLabel.get("St");
349+
currentLabel = currentLabel.clone();
350+
currentLabel.set("St", st + (i - stFirstIndex));
351+
stFirstIndex = -1;
352+
}
353+
oldPageLabels.push(currentLabel);
354+
}
355+
currentLabel = oldPageLabels[0];
356+
let currentIndex = 0;
357+
const newPageLabels = (this.pageLabels = [[0, currentLabel]]);
358+
for (let i = 0, ii = oldPageLabels.length; i < ii; i++) {
359+
const label = oldPageLabels[i];
360+
if (label === currentLabel) {
361+
continue;
362+
}
363+
currentIndex = i;
364+
currentLabel = label;
365+
newPageLabels.push([currentIndex, currentLabel]);
366+
}
367+
}
368+
309369
/**
310370
* Create a copy of a page.
311371
* @param {number} pageIndex
@@ -423,6 +483,66 @@ class PDFEditor {
423483
}
424484
}
425485

486+
/**
487+
* Create a name or number tree from the given map.
488+
* @param {Array<[string, any]>} map
489+
* @returns {Ref}
490+
*/
491+
#makeNameNumTree(map, areNames) {
492+
const allEntries = map.sort(
493+
areNames
494+
? ([keyA], [keyB]) => keyA.localeCompare(keyB)
495+
: ([keyA], [keyB]) => keyA - keyB
496+
);
497+
const maxLeaves =
498+
MAX_IN_NAME_TREE_NODE <= 1 ? allEntries.length : MAX_IN_NAME_TREE_NODE;
499+
const [treeRef, treeDict] = this.newDict;
500+
const stack = [{ dict: treeDict, entries: allEntries }];
501+
const valueType = areNames ? "Names" : "Nums";
502+
503+
while (stack.length > 0) {
504+
const { dict, entries } = stack.pop();
505+
if (entries.length <= maxLeaves) {
506+
dict.set("Limits", [entries[0][0], entries.at(-1)[0]]);
507+
dict.set(valueType, entries.flat());
508+
continue;
509+
}
510+
const entriesChunks = [];
511+
const chunkSize = Math.max(
512+
maxLeaves,
513+
Math.ceil(entries.length / maxLeaves)
514+
);
515+
for (let i = 0; i < entries.length; i += chunkSize) {
516+
entriesChunks.push(entries.slice(i, i + chunkSize));
517+
}
518+
const entriesRefs = [];
519+
dict.set("Kids", entriesRefs);
520+
for (const chunk of entriesChunks) {
521+
const [entriesRef, entriesDict] = this.newDict;
522+
entriesRefs.push(entriesRef);
523+
entriesDict.set("Limits", [chunk[0][0], chunk.at(-1)[0]]);
524+
stack.push({ dict: entriesDict, entries: chunk });
525+
}
526+
}
527+
return treeRef;
528+
}
529+
530+
/**
531+
* Create the page labels tree if it exists.
532+
*/
533+
#makePageLabelsTree() {
534+
const { pageLabels } = this;
535+
if (!pageLabels || pageLabels.length === 0) {
536+
return;
537+
}
538+
const { rootDict } = this;
539+
const pageLabelsRef = this.#makeNameNumTree(
540+
this.pageLabels,
541+
/* areNames = */ false
542+
);
543+
rootDict.set("PageLabels", pageLabelsRef);
544+
}
545+
426546
/**
427547
* Create the root dictionary.
428548
* @returns {Promise<void>}
@@ -432,6 +552,7 @@ class PDFEditor {
432552
rootDict.setIfName("Type", "Catalog");
433553
rootDict.set("Version", this.version);
434554
this.#makePageTree();
555+
this.#makePageLabelsTree();
435556
}
436557

437558
/**

test/pdfs/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,3 +757,4 @@
757757
!doc_1_3_pages.pdf
758758
!doc_2_3_pages.pdf
759759
!doc_3_3_pages.pdf
760+
!labelled_pages.pdf

test/pdfs/labelled_pages.pdf

6.56 KB
Binary file not shown.

test/unit/api_spec.js

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5542,5 +5542,39 @@ small scripts as well as for`);
55425542
await loadingTask.destroy();
55435543
});
55445544
});
5545+
5546+
describe("Page labels", function () {
5547+
it("extract page and check labels", async function () {
5548+
let loadingTask = getDocument(
5549+
buildGetDocumentParams("labelled_pages.pdf")
5550+
);
5551+
const pdfDoc = await loadingTask.promise;
5552+
let labels = await pdfDoc.getPageLabels();
5553+
expect(labels).toEqual([
5554+
"i" /* Page 0 */,
5555+
"ii" /* Page 1 */,
5556+
"iii" /* Page 2 */,
5557+
"iv" /* Page 3 */,
5558+
"1" /* Page 4 */,
5559+
"2" /* Page 5 */,
5560+
"3" /* Page 6 */,
5561+
"a" /* Page 7 */,
5562+
"b" /* Page 8 */,
5563+
"4" /* Page 9 */,
5564+
"5" /* Page 10 */,
5565+
]);
5566+
5567+
const data = await pdfDoc.extractPages({
5568+
document: null,
5569+
includePages: [0, 1, 5, 7, 10],
5570+
});
5571+
await loadingTask.destroy();
5572+
loadingTask = getDocument(data);
5573+
const newPdfDoc = await loadingTask.promise;
5574+
labels = await newPdfDoc.getPageLabels();
5575+
expect(labels).toEqual(["i", "ii", "1", "a", "5"]);
5576+
await loadingTask.destroy();
5577+
});
5578+
});
55455579
});
55465580
});

0 commit comments

Comments
 (0)