-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdfsplice.py
executable file
·202 lines (167 loc) · 7.48 KB
/
pdfsplice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#!/usr/bin/env python3
# Tool to chop and splice PDFs.
# Takes ranges of pages from one or more PDFs and creates a new PDF from them.
# Requirements:
# pip3 install pypdf2
import os
import sys
import re
from PyPDF2 import PdfFileWriter, PdfFileReader
# We need at least two parameters - an output file and an input file
if len(sys.argv) < 3 or "--help" in sys.argv or "-h" in sys.argv:
print("""Splice PDF files
usage: {0} OUTFILE FILE1 [ROTATION] [PAGES] [ROTATION] [PAGES...] [ FILE2 [=] [ROTATION] [PAGES] [ROTATION] [PAGES...] ...]
{0} OUTFILE FILE1 SPREADFIX
ROTATION can be R0 (no rotation), R90 (90 deg clockwise), R180 or R-180 (180 degrees), R270 or R-90 (270 deg clockwise or 90deg anti-clockwise) and applies to all subsequent pages from the source file. Note that a page can only appear in the ouput with a single rotation.
PAGES can be a single page (e.g. 7) or a range (e.g. 2-5 7- -11)
SPREADFIX reassembles (reorders and rotates) a scanned booklet from "printer spread" order, which must already have been split into individual pages. (Google 'briss' tool for the splitting.)
To interleave pages from a second (or later) file, for example to reassemble a set of front-side and rear-side scans, specify = after the filename.
Examples:
{0} out.pdf input1.pdf 1-5 10 20-
Create out.pdf from input1.pdf pages 1-5, 10 and 20 onwards
{0} out.pdf input1.pdf 1-5 input2.pdf R90 2-2 R0 3-
Create out.pdf from input1.pdf pages 1-5 followed by input2.pdf pages 2-3, rotated 90deg clockwise, and pages 3 onwards, not rotated.
{0} out.pdf fronts.pdf rears.pdf =
Create out.pdf by interleaving fronts.pdf and rears.pdf, page by page
{0} out.pdf scan1_split.pdf SPREADFIX
Restructure a pre-split scan in "printer spread" order, into normal reading order.
""".format(sys.argv[0]))
exit(1)
# Get the output file
outfile_name = sys.argv[1]
print("Generating %s" % outfile_name)
remainingArgs = sys.argv[2:]
# If we are in "spreadfix" mode, generate our own set of arguments
if len(sys.argv)==4 and sys.argv[3].upper()=="SPREADFIX":
# Get the number of pages in the source file
filename = sys.argv[2]
tempReader = PdfFileReader(filename)
numPages = tempReader.numPages
tempReader = None
# Page count must be a multiple of 4
if (numPages % 4):
print("Source file page count %d must be a multiple of 4" % numPages)
exit(2)
remainingArgs = [filename]
# The page order (for e.g. a 36 page book) is 35, 33, 31 ... 1, 2, 4, 6 ... 36.
# Because of the printer layout, rotation switches after each page, except between 1 and 2.
# (Proper explanation of this is too complicated -- draw it out on paper.)
rotation = 90
for page in range (numPages-1, 0, -2):
remainingArgs.append("R%d" % rotation)
remainingArgs.append(str(page))
rotation = -rotation
rotation = -rotation
for page in range (2, numPages+1, 2):
remainingArgs.append("R%d" % rotation)
remainingArgs.append(str(page))
rotation = -rotation
# Split the remaining parameters into sets, each starting with a file.
# Do this by seeing which are extant files.
sourceSections = []
while remainingArgs:
section = []
# The next one should be a file
file = remainingArgs.pop(0)
if not os.path.isfile(file):
print("%s is not a file" % file)
exit(40)
section.append(file)
# Append arguments until we find one which is the next file
while remainingArgs and not os.path.isfile(remainingArgs[0]):
section.append(remainingArgs.pop(0))
sourceSections.append(section)
# Output PDF as an array of pages
outputPdfPages = []
# Process each input file in turn
for section in sourceSections:
# The first item is the input file
filename = section.pop(0)
print("Input file: %s" % filename)
infile = open(filename, "rb")
sourcePdf = PdfFileReader(infile)
append = True # Default behaviour
pageNumbers = []
rotation = 0
# Each remaining item is either a mix spec or a page specification
for arg in section:
# We may specify a mix spec - "+" for append or "=" for interleave
if "=" == arg:
append = False
# If the argument is a single page number, e.g. "7"
elif re.match("^\d+$", arg):
match = re.match("^(\d+)$", arg)
pageNumber = int(match.group(1))
# Sanity checks
if not sourcePdf:
print("Must specify an input PDF before a page number")
exit(20)
if pageNumber < 1 or pageNumber > sourcePdf.numPages:
print("Invalid page number %d: document has %d pages." % (pageNumber, sourcePdf.numPages))
exit(30)
# Start counting at 0
print("Page %d, rotation %d" % (pageNumber, rotation))
pageNumbers.append((pageNumber-1, rotation))
# If a rotation is specified
elif re.match("^R\-?\d+$", arg):
match = re.match("^R(\-?\d+)", arg)
rotation = int(match.group(1))
# If the argument(s) is a range page specifier, e.g. 1-7
elif re.search("^\d*\-\d*$", arg):
# Get the numeric values, if any
matches = re.search("^(\d*)\-(\d*)$", arg)
start = matches.group(1)
end = matches.group(2)
# If a value is omitted, use first or last page
if start=="":
start = 1
if end=="":
end = sourcePdf.numPages
start = int(start)
start = max(1,start)
# Don't allow pages before the start or after the end
end = int(end)
end = min(sourcePdf.numPages, end)
# Get the range. Note that humans number pages starting at 1, but PDF numbers them starting at 0
print("Pages %d - %d, rotation %d" % (start, end, rotation))
if end < start:
# Allow us to reverse the order of a range of pages, e.g. 7-3
for p in range(start-1, end-2, -1):
pageNumbers.append((p, rotation))
else:
for p in range(start-1, end):
pageNumbers.append((p, rotation))
else:
print("Unexpected argument %s" % arg)
exit(50)
# If we did not specify any pages, use all of them
if not pageNumbers:
print("No pages specified. Using all pages.")
for p in range(0, sourcePdf.numPages):
pageNumbers.append((p, rotation))
# We now know everything to retrieve from this file.
# Add the specified pages from the input pdf
print ("Mode: " + "Append" if append else "Interleave")
increment = 2 if append else 1
insertPosition = 1 if append else 0
for page_number, rotation in pageNumbers:
page = sourcePdf.getPage(page_number)
if rotation == 0:
pass
elif rotation == 90:
page.rotateClockwise(90)
elif rotation == 180 or rotation == -180:
page.rotateClockwise(180)
elif rotation == 270 or rotation == -90:
page.rotateCounterClockwise(90)
else:
print("Invalid rotation: %d" % rotation)
outputPdfPages.insert(insertPosition, page)
insertPosition += increment
# We have finished reading stuff. Now compile a PDF from the array of pages.
outputPdf = PdfFileWriter()
for outputPdfPage in outputPdfPages:
outputPdf.addPage(outputPdfPage)
# Write the generated PDF to disk
with open(outfile_name, "wb") as out:
outputPdf.write(out)