-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmerge_pdf.py
More file actions
executable file
·110 lines (92 loc) · 3.22 KB
/
merge_pdf.py
File metadata and controls
executable file
·110 lines (92 loc) · 3.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python3
"""
PDF Merger
Usage: ./merge_pdf.py /path/to/output.pdf /path/to/1.pdf /path/to/2.pdf ...
Behavior:
- Merge input PDFs in the exact order provided into a single output PDF
- Inputs are untouched; output must not already exist
- All-or-nothing: on any error, prints an error and exits with code 1 (no partial output)
"""
from pathlib import Path
import sys
import time
import shutil
try:
import fitz # PyMuPDF
except ImportError as e:
print(f"Error: Missing dependency - {e}")
sys.exit(1)
def run_pdf_merge(output_path: str, input_paths: list[str]) -> None:
try:
out = Path(output_path)
if out.suffix.lower() != ".pdf":
print("Error: Output must be a .pdf file")
sys.exit(1)
if out.exists():
print("Error: Output file already exists")
sys.exit(1)
if not out.parent.is_dir():
print("Error: Output directory does not exist")
sys.exit(1)
if len(input_paths) < 2:
print("Error: Provide at least two input PDF files to merge")
sys.exit(1)
inputs: list[Path] = []
for p_str in input_paths:
p = Path(p_str)
if not p.is_file() or p.suffix.lower() != ".pdf":
print(f"Error: Invalid input PDF - {p}")
sys.exit(1)
inputs.append(p)
temp_output = out.parent / f".{out.stem}.tmp-{int(time.time())}.pdf"
merged = fitz.open()
try:
for src in inputs:
try:
doc = fitz.open(str(src))
except Exception as e:
raise RuntimeError(f"Failed to open '{src}': {e}")
try:
# Reject encrypted PDFs that require a password
if getattr(doc, "needs_pass", False):
raise RuntimeError(f"Encrypted PDF not supported: {src}")
# Insert all pages
merged.insert_pdf(doc, from_page=0, to_page=doc.page_count - 1)
finally:
doc.close()
# Save to temporary file first
merged.save(str(temp_output), garbage=4, deflate=True)
finally:
merged.close()
# Atomic move into place
try:
shutil.move(str(temp_output), str(out))
except Exception as e:
# Cleanup temp and fail
try:
if temp_output.exists():
temp_output.unlink()
except Exception:
pass
print(f"Error: {e}")
sys.exit(1)
print("Success: merge_pdf completed")
except Exception as e:
# Best-effort cleanup
try:
temp_output # type: ignore[name-defined]
if isinstance(temp_output, Path) and temp_output.exists():
temp_output.unlink()
except Exception:
pass
print(f"Error: {e}")
sys.exit(1)
def main():
if len(sys.argv) < 4:
print("Usage: merge_pdf.py /path/to/output.pdf /path/to/1.pdf /path/to/2.pdf ...")
sys.exit(1)
output = sys.argv[1]
inputs = sys.argv[2:]
run_pdf_merge(output, inputs)
if __name__ == "__main__":
main()