Skip to content

Commit f5bc919

Browse files
committed
[WIP] Update the named page destinations when some pdf are combined (bug 1997379)
and remove link annotations pointing on a deleted page.
1 parent 160fc39 commit f5bc919

File tree

6 files changed

+383
-10
lines changed

6 files changed

+383
-10
lines changed

src/core/editor/pdf_editor.js

Lines changed: 224 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ class PageData {
3232
this.page = page;
3333
this.documentData = documentData;
3434
this.annotations = null;
35+
// Named destinations which points to this page.
36+
this.pointingNamedDestinations = null;
3537

3638
documentData.pagesMap.put(page.ref, this);
3739
}
@@ -40,9 +42,13 @@ class PageData {
4042
class DocumentData {
4143
constructor(document) {
4244
this.document = document;
45+
this.destinations = null;
4346
this.pageLabels = null;
4447
this.pagesMap = new RefSetCache();
4548
this.oldRefMapping = new RefSetCache();
49+
this.dedupNamedDestinations = new Map();
50+
this.usedNamedDestinations = new Set();
51+
this.postponedRefCopies = new RefSetCache();
4652
}
4753
}
4854

@@ -64,6 +70,7 @@ class PDFEditor {
6470
this.title = title;
6571
this.author = author;
6672
this.pageLabels = null;
73+
this.namedDestinations = new Map();
6774
}
6875

6976
/**
@@ -134,11 +141,20 @@ class PDFEditor {
134141
return newRef;
135142
}
136143
const promises = [];
144+
const {
145+
currentDocument: { postponedRefCopies },
146+
} = this;
137147
if (Array.isArray(obj)) {
138148
if (mustClone) {
139149
obj = obj.slice();
140150
}
141151
for (let i = 0, ii = obj.length; i < ii; i++) {
152+
const postponedActions = postponedRefCopies.get(obj[i]);
153+
if (postponedActions) {
154+
// The object is a reference that needs to be copied later.
155+
postponedActions.push(ref => (obj[i] = ref));
156+
continue;
157+
}
142158
promises.push(
143159
this.#collectDependencies(obj[i], true, xref).then(
144160
newObj => (obj[i] = newObj)
@@ -159,6 +175,12 @@ class PDFEditor {
159175
}
160176
if (dict) {
161177
for (const [key, rawObj] of dict.getRawEntries()) {
178+
const postponedActions = postponedRefCopies.get(rawObj);
179+
if (postponedActions) {
180+
// The object is a reference that needs to be copied later.
181+
postponedActions.push(ref => dict.set(key, ref));
182+
continue;
183+
}
162184
promises.push(
163185
this.#collectDependencies(rawObj, true, xref).then(newObj =>
164186
dict.set(key, newObj)
@@ -189,11 +211,13 @@ class PDFEditor {
189211
const promises = [];
190212
let newIndex = 0;
191213
this.hasSingleFile = pageInfos.length === 1;
214+
const allDocumentData = [];
192215
for (const { document, includePages, excludePages } of pageInfos) {
193216
if (!document) {
194217
continue;
195218
}
196219
const documentData = new DocumentData(document);
220+
allDocumentData.push(documentData);
197221
promises.push(this.#collectDocumentData(documentData));
198222
let keptIndices, keptRanges, deletedIndices, deletedRanges;
199223
for (const page of includePages || []) {
@@ -256,17 +280,23 @@ class PDFEditor {
256280
await Promise.all(promises);
257281
promises.length = 0;
258282

283+
this.#collectValidDestinations(allDocumentData);
259284
this.#collectPageLabels();
260285

261286
for (const page of this.oldPages) {
262287
promises.push(this.#postCollectPageData(page));
263288
}
264289
await Promise.all(promises);
265290

291+
this.#findDuplicateNamedDestinations();
292+
this.#setPostponedRefCopies(allDocumentData);
293+
266294
for (let i = 0, ii = this.oldPages.length; i < ii; i++) {
267295
this.newPages[i] = await this.#makePageCopy(i, null);
268296
}
269297

298+
this.#fixPostponedRefCopies(allDocumentData);
299+
270300
return this.writePDF();
271301
}
272302

@@ -277,9 +307,14 @@ class PDFEditor {
277307
*/
278308
async #collectDocumentData(documentData) {
279309
const { document } = documentData;
280-
await document.pdfManager
281-
.ensureCatalog("rawPageLabels")
282-
.then(pageLabels => (documentData.pageLabels = pageLabels));
310+
await Promise.all([
311+
document.pdfManager
312+
.ensureCatalog("destinations")
313+
.then(destinations => (documentData.destinations = destinations)),
314+
document.pdfManager
315+
.ensureCatalog("rawPageLabels")
316+
.then(pageLabels => (documentData.pageLabels = pageLabels)),
317+
]);
283318
}
284319

285320
/**
@@ -290,6 +325,7 @@ class PDFEditor {
290325
async #postCollectPageData(pageData) {
291326
const {
292327
page: { xref, annotations },
328+
documentData: { pagesMap, destinations, usedNamedDestinations },
293329
} = pageData;
294330

295331
if (!annotations) {
@@ -300,22 +336,166 @@ class PDFEditor {
300336
let newAnnotations = [];
301337
let newIndex = 0;
302338

303-
// TODO: remove only links to deleted pages.
304339
for (const annotationRef of annotations) {
305340
const newAnnotationIndex = newIndex++;
306341
promises.push(
307342
xref.fetchIfRefAsync(annotationRef).then(async annotationDict => {
308343
if (!isName(annotationDict.get("Subtype"), "Link")) {
309344
newAnnotations[newAnnotationIndex] = annotationRef;
345+
return;
346+
}
347+
const action = annotationDict.get("A");
348+
const dest =
349+
action instanceof Dict
350+
? action.get("D")
351+
: annotationDict.get("Dest");
352+
353+
if (Array.isArray(dest) && pagesMap.has(dest[0])) {
354+
newAnnotations[newAnnotationIndex] = annotationRef;
355+
} else if (typeof dest === "string") {
356+
const destString = stringToPDFString(
357+
dest,
358+
/* keepEscapeSequence = */ true
359+
);
360+
if (destinations.has(destString)) {
361+
newAnnotations[newAnnotationIndex] = annotationRef;
362+
usedNamedDestinations.add(destString);
363+
}
310364
}
311365
})
312366
);
313367
}
368+
314369
await Promise.all(promises);
315370
newAnnotations = newAnnotations.filter(annot => !!annot);
316371
pageData.annotations = newAnnotations.length > 0 ? newAnnotations : null;
317372
}
318373

374+
/**
375+
* Some references cannot be copied right away since they correspond to some
376+
* pages that haven't been processed yet. Postpone the copy of those
377+
* references.
378+
* @param {Array<DocumentData>} allDocumentData
379+
*/
380+
#setPostponedRefCopies(allDocumentData) {
381+
for (const { postponedRefCopies, pagesMap } of allDocumentData) {
382+
for (const oldPageRef of pagesMap.keys()) {
383+
postponedRefCopies.put(oldPageRef, []);
384+
}
385+
}
386+
}
387+
388+
/**
389+
* Fix all postponed reference copies.
390+
* @param {Array<DocumentData>} allDocumentData
391+
*/
392+
#fixPostponedRefCopies(allDocumentData) {
393+
for (const { postponedRefCopies, oldRefMapping } of allDocumentData) {
394+
for (const [oldRef, actions] of postponedRefCopies.items()) {
395+
const newRef = oldRefMapping.get(oldRef);
396+
for (const action of actions) {
397+
action(newRef);
398+
}
399+
}
400+
}
401+
}
402+
403+
/**
404+
* Collect named destinations that are still valid (i.e. pointing to kept
405+
* pages).
406+
* @param {Array<DocumentData>} allDocumentData
407+
*/
408+
#collectValidDestinations(allDocumentData) {
409+
for (const documentData of allDocumentData) {
410+
if (!documentData.destinations) {
411+
continue;
412+
}
413+
const { destinations, pagesMap } = documentData;
414+
const newDestinations = (documentData.destinations = new Map());
415+
for (const [key, dest] of Object.entries(destinations)) {
416+
const pageRef = dest[0];
417+
const pageData = pagesMap.get(pageRef);
418+
if (!pageData) {
419+
continue;
420+
}
421+
(pageData.pointingNamedDestinations ||= new Set()).add(key);
422+
newDestinations.set(key, dest);
423+
}
424+
}
425+
}
426+
427+
#findDuplicateNamedDestinations() {
428+
const { namedDestinations } = this;
429+
for (let i = 0, ii = this.oldPages.length; i < ii; i++) {
430+
const page = this.oldPages[i];
431+
const {
432+
documentData: {
433+
destinations,
434+
dedupNamedDestinations,
435+
usedNamedDestinations,
436+
},
437+
} = page;
438+
let { pointingNamedDestinations } = page;
439+
440+
if (!pointingNamedDestinations) {
441+
// No named destinations pointing to this page.
442+
continue;
443+
}
444+
// Keep only the named destinations that are still used.
445+
page.pointingNamedDestinations = pointingNamedDestinations =
446+
pointingNamedDestinations.intersection(usedNamedDestinations);
447+
448+
for (const pointingDest of pointingNamedDestinations) {
449+
if (!usedNamedDestinations.has(pointingDest)) {
450+
// If the named destination isn't used, we can keep it as is.
451+
continue;
452+
}
453+
const dest = destinations.get(pointingDest).slice();
454+
if (!namedDestinations.has(pointingDest)) {
455+
// If the named destination hasn't been used yet, we can keep it
456+
// as is.
457+
namedDestinations.set(pointingDest, dest);
458+
continue;
459+
}
460+
// Create a new unique named destination.
461+
const newName = `${pointingDest}_p${i + 1}`;
462+
dedupNamedDestinations.set(pointingDest, newName);
463+
namedDestinations.set(newName, dest);
464+
}
465+
}
466+
}
467+
468+
#fixNamedDestinations(annotations, dedupNamedDestinations) {
469+
if (dedupNamedDestinations.size === 0) {
470+
return;
471+
}
472+
const fixDestination = (dict, key, dest) => {
473+
if (typeof dest === "string") {
474+
dict.set(
475+
key,
476+
dedupNamedDestinations.get(
477+
stringToPDFString(dest, /* keepEscapeSequence = */ true)
478+
) || dest
479+
);
480+
}
481+
};
482+
483+
for (const annotRef of annotations) {
484+
const annotDict = this.xref[annotRef.num];
485+
if (!isName(annotDict.get("Subtype"), "Link")) {
486+
continue;
487+
}
488+
const action = annotDict.get("A");
489+
if (action instanceof Dict && action.has("D")) {
490+
const dest = action.get("D");
491+
fixDestination(action, "D", dest);
492+
continue;
493+
}
494+
const dest = annotDict.get("Dest");
495+
fixDestination(annotDict, "Dest", dest);
496+
}
497+
}
498+
319499
async #collectPageLabels() {
320500
// We can only preserve page labels when editing a single PDF file.
321501
// This is consistent with behavior in Adobe Acrobat.
@@ -372,14 +552,23 @@ class PDFEditor {
372552
* @returns {Promise<Ref>} the page reference in the new PDF document.
373553
*/
374554
async #makePageCopy(pageIndex) {
375-
const { page, documentData, annotations } = this.oldPages[pageIndex];
555+
const { page, documentData, annotations, pointingNamedDestinations } =
556+
this.oldPages[pageIndex];
376557
this.currentDocument = documentData;
377-
const { oldRefMapping } = documentData;
558+
const { dedupNamedDestinations, oldRefMapping } = documentData;
378559
const { xref, rotate, mediaBox, resources, ref: oldPageRef } = page;
379560
const pageRef = this.newRef;
380561
const pageDict = (this.xref[pageRef.num] = page.pageDict.clone());
381562
oldRefMapping.put(oldPageRef, pageRef);
382563

564+
if (pointingNamedDestinations) {
565+
for (const pointingDest of pointingNamedDestinations) {
566+
const name = dedupNamedDestinations.get(pointingDest) || pointingDest;
567+
const dest = this.namedDestinations.get(name);
568+
dest[0] = pageRef;
569+
}
570+
}
571+
383572
// No need to keep these entries as we'll set them again later.
384573
for (const key of [
385574
"Rotate",
@@ -416,10 +605,16 @@ class PDFEditor {
416605
"Resources",
417606
await this.#collectDependencies(resources, true, xref)
418607
);
419-
pageDict.setIfArray(
420-
"Annots",
421-
await this.#collectDependencies(annotations, true, xref)
422-
);
608+
609+
if (annotations) {
610+
const newAnnotations = await this.#collectDependencies(
611+
annotations,
612+
true,
613+
xref
614+
);
615+
this.#fixNamedDestinations(newAnnotations, dedupNamedDestinations);
616+
pageDict.setIfArray("Annots", newAnnotations);
617+
}
423618

424619
if (this.useObjectStreams) {
425620
const newLastRef = this.newRefCount;
@@ -540,6 +735,24 @@ class PDFEditor {
540735
rootDict.set("PageLabels", pageLabelsRef);
541736
}
542737

738+
#makeDestinationsTree() {
739+
const { namedDestinations } = this;
740+
if (namedDestinations.size === 0) {
741+
return;
742+
}
743+
if (!this.namesDict) {
744+
[this.namesRef, this.namesDict] = this.newDict;
745+
this.rootDict.set("Names", this.namesRef);
746+
}
747+
this.namesDict.set(
748+
"Dests",
749+
this.#makeNameNumTree(
750+
Array.from(namedDestinations.entries()),
751+
/* areNames = */ true
752+
)
753+
);
754+
}
755+
543756
/**
544757
* Create the root dictionary.
545758
* @returns {Promise<void>}
@@ -550,6 +763,7 @@ class PDFEditor {
550763
rootDict.set("Version", this.version);
551764
this.#makePageTree();
552765
this.#makePageLabelsTree();
766+
this.#makeDestinationsTree();
553767
}
554768

555769
/**

src/core/primitives.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,12 @@ class RefSetCache {
439439
yield [Ref.fromString(ref), value];
440440
}
441441
}
442+
443+
*keys() {
444+
for (const ref of this._map.keys()) {
445+
yield Ref.fromString(ref);
446+
}
447+
}
442448
}
443449

444450
function isName(v, name) {

test/pdfs/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,3 +758,4 @@
758758
!doc_2_3_pages.pdf
759759
!doc_3_3_pages.pdf
760760
!labelled_pages.pdf
761+
!extract_link.pdf

test/pdfs/extract_link.pdf

9.97 KB
Binary file not shown.

0 commit comments

Comments
 (0)