-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathadd-letterhead.sh
executable file
·338 lines (254 loc) · 11.3 KB
/
add-letterhead.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
#!/bin/bash
# This script adds extra content in the background (typically a letterhead or watermark)
# to all pages of a PDF document.
#
# The extra content comes from a second PDF file. The path to that second PDF file is hard-coded
# in this script, see LETTERHEAD_FILENAME. You will also need to adjust MAGIC_STRING_IN_LETTERHEAD.
#
# The resulting PDF either replaces the original file, or is placed next to it, see FINAL_FILENAME_SUFFIX.
#
# You need the pdftk tool installed on your system, which is no longer in the Ubuntu/Debian package repository.
# If you install the corresponding Ubuntu Snap package, see COPY_LETTERHEAD_TO_LOCAL for caveats.
#
# Alternatively, this script could use tool qpdf version 8.4 or later, see options --overlay and --underlay .
# But that is not implemented yet.
#
# Copyright (c) 2016-2025 R. Diez - Licensed under the GNU AGPLv3
set -o errexit
set -o nounset
set -o pipefail
declare -r SCRIPT_NAME="${BASH_SOURCE[0]##*/}" # This script's filename only, without any path components.
declare -r SCRIPT_BASENAME="${SCRIPT_NAME%.*}" # Remove the extension (if any) from the filename.
# At the moment this version number is only used for documentation purpuses:
# declare -r SCRIPT_VERSION="2.03"
declare -r -i BOOLEAN_TRUE=0
declare -r -i BOOLEAN_FALSE=1
# Specify here your own PDF file with the background contents.
declare -r LETTERHEAD_FILENAME="/full/path/to/letterhead.pdf"
# If this suffix is not empty, the original PDF will be left untouched, and a new one
# with the letterhead will be created next to it, but with the given suffix.
# For example, file.pdf -> file-with-letterhead.pdf
declare -r FINAL_FILENAME_SUFFIX="-with-letterhead"
# On my Ubuntu 18.04.4 system, I installed pdftk as a Snap package, and then this
# pdftk version 2.02-4 could not open the letterhead file if it was located on a mounted network share.
# The error was:
# Error: Failed to open background PDF file
# It turns out that Snap packages are "confined" by default. The /tmp directory is also prohibited.
# Symbolic links that point to other filesystems do not work either.
# Rather than playing with Snap system permissions, I decided to add an option to copy the file to
# a local directory beforehand. This local directory must be under $HOME.
#
# Note that, if you use this option, you can only run one instance of this script at a time.
# Any second, concurrent instance will fail to acquire a lock file created next to
# the letterhead copy. Because this script is mainly designed for interactive usage,
# this limitation should not be important.
# If it is, consider removing option --nonblock from flock below. But keep in mind then
# that this script might hang for a long time, or even forever, if something is not quite right.
# Depending on how you are using this script, you may not see any error message.
declare -r COPY_LETTERHEAD_TO_LOCAL=false
declare -r LOCAL_LETTERHEAD_FILENAME="$HOME/letterhead-file-for-$SCRIPT_BASENAME.pdf"
# Specify here the magic string to be found inside your letterhead PDF file.
#
# This script checks for the magic string in all 3 files: it should not be in the original document,
# but it should be in the letterhead, and in the resulting document. Therefore, it should be pretty difficult
# to make a mistake when the magic string is in place.
#
# - If your magic string is text:
#
# Use a tool like pdftotext in order to extract text from the letterhead PDF.
#
# Note that searching for a binary string is much faster, because tool 'grep' does not need to
# process the complex PDF file format like tool 'pdfgrep' does.
#
# - If your magic string is binary:
#
# In order to find a good magic string, manually use pdftk to add the letterhead to a document.
# Then use a text editor like Emacs in order to search for a suitable PDF data stream
# (look for 'stream' and 'endstream') that is present in both the letterhead PDF
# and the final document, but not in the original document.
# Then take the first bytes and place them in the variable below.
# For example, such a magic stream could look like this:
# declare -r MAGIC_STRING_IN_LETTERHEAD=$'abc\101\102\103def'
# Instead of using this convoluted method, it would be better to use PDF tags,
# but I need to do more research on this first.
#
declare -r IS_MAGIC_STRING_TEXT=true
declare -r MAGIC_STRING_IN_LETTERHEAD="TextThatIsOnlyInLetterhead"
abort ()
{
echo >&2 && echo "Error in script \"$0\": $*" >&2
exit 1
}
is_tool_installed ()
{
if command -v "$1" >/dev/null 2>&1 ;
then
return $BOOLEAN_TRUE
else
return $BOOLEAN_FALSE
fi
}
verify_tool_is_installed ()
{
local TOOL_NAME="$1"
local DEBIAN_PACKAGE_NAME="$2"
if is_tool_installed "$TOOL_NAME"; then
return
fi
local ERR_MSG="Tool '$TOOL_NAME' is not installed. You may have to install it with your Operating System's package manager."
if [[ $DEBIAN_PACKAGE_NAME != "" ]]; then
ERR_MSG+=" For example, under Ubuntu/Debian the corresponding package is called \"$DEBIAN_PACKAGE_NAME\"."
fi
abort "$ERR_MSG"
}
if (( $# != 1 )); then
abort "Invalid number of command-line arguments. See this script's source code for more information."
fi
declare -r PDF_FILENAME="$1"
declare -r PDFTK_TOOLNAME="pdftk"
# pdftk is no longer in the Ubuntu/Debian repositories.
verify_tool_is_installed "$PDFTK_TOOLNAME" ""
declare -r PDFGREP_TOOLNAME="pdfgrep"
if $IS_MAGIC_STRING_TEXT; then
verify_tool_is_installed "$PDFGREP_TOOLNAME" "pdfgrep"
fi
declare -r FILE_EXTENSION="${PDF_FILENAME##*.}"
declare -r FILE_EXTENSION_UPPERCASE=${FILE_EXTENSION^^}
declare -r EXTENSION="PDF"
if [[ $FILE_EXTENSION_UPPERCASE != "$EXTENSION" ]]; then
abort "This script operates only on .pdf files"
fi
declare -r -i EXTENSION_LEN="${#EXTENSION}"
declare -r PDF_FILENAME_WITHOUT_EXT="${PDF_FILENAME::-$(( EXTENSION_LEN + 1 ))}"
if [[ $PDF_FILENAME_WITHOUT_EXT = "" ]]; then
abort "The PDF filename contains nothing but the file extension."
fi
if [[ $FINAL_FILENAME_SUFFIX = "" ]]; then
declare -r FINAL_FILENAME="$PDF_FILENAME"
else
declare -r FINAL_FILENAME="${PDF_FILENAME_WITHOUT_EXT}${FINAL_FILENAME_SUFFIX}.${FILE_EXTENSION}"
fi
set +o errexit
if $IS_MAGIC_STRING_TEXT; then
"$PDFGREP_TOOLNAME" --quiet --fixed-strings "$MAGIC_STRING_IN_LETTERHEAD" "$LETTERHEAD_FILENAME"
else
grep --quiet --fixed-strings "$MAGIC_STRING_IN_LETTERHEAD" "$LETTERHEAD_FILENAME"
fi
GREP_EXIT_CODE="$?"
set -o errexit
case "$GREP_EXIT_CODE" in
0) ;; # Nothing to do here.
1) abort "The letterhead file does not contain the magic string.";;
2) exit "$GREP_EXIT_CODE";; # grep has printed an error message already.
*) abort "Unexpected exit code $GREP_EXIT_CODE from grep.";;
esac
set +o errexit
if $IS_MAGIC_STRING_TEXT; then
"$PDFGREP_TOOLNAME" --quiet --fixed-strings "$MAGIC_STRING_IN_LETTERHEAD" "$PDF_FILENAME"
else
grep --quiet --fixed-strings "$MAGIC_STRING_IN_LETTERHEAD" "$PDF_FILENAME"
fi
GREP_EXIT_CODE="$?"
set -o errexit
case "$GREP_EXIT_CODE" in
0) abort "The given file already has the letterhead.";;
1) ;; # Nothing to do here.
2) exit "$GREP_EXIT_CODE";; # grep has printed an error message already.
*) abort "Unexpected exit code $GREP_EXIT_CODE from grep.";;
esac
if $COPY_LETTERHEAD_TO_LOCAL; then
echo "Copying $LETTERHEAD_FILENAME to $LOCAL_LETTERHEAD_FILENAME ..."
# Create a lock file in order to prevent 2 instances of this script overwriting
# the LOCAL_LETTERHEAD_FILENAME file at the same time.
declare -r LOCK_FILENAME="$LOCAL_LETTERHEAD_FILENAME.lock"
if false; then
echo "Creating lock file '$LOCK_FILENAME'..."
fi
set +o errexit
exec {LOCK_FILE_FD}>"$LOCK_FILENAME"
EXIT_CODE="$?"
set -o errexit
if (( EXIT_CODE != 0 )); then
abort "Cannot create or write to lock file \"$LOCK_FILENAME\"."
fi
# We are using an advisory lock here, not a mandatory one, which means that a process
# can choose to ignore it. We always check whether the file is already locked,
# so this type of lock is fine for our purposes.
set +o errexit
flock --exclusive --nonblock "$LOCK_FILE_FD"
EXIT_CODE="$?"
set -o errexit
if (( EXIT_CODE != 0 )); then
abort "Cannot lock file \"$LOCK_FILENAME\". Is there another instance of this script ($SCRIPT_NAME) already running?"
fi
cp -- "$LETTERHEAD_FILENAME" "$LOCAL_LETTERHEAD_FILENAME"
declare -r LETTERHEAD_FILENAME_TO_USE="$LOCAL_LETTERHEAD_FILENAME"
else
declare -r LETTERHEAD_FILENAME_TO_USE="$LETTERHEAD_FILENAME"
fi
# The pdftk installed as a Snap package cannot access the /temp directory either.
# It is probably a good idea anyway to create the temporary file next to the output file.
declare -r USE_TMP_FILE_IN_TMP_DIR=false
if $USE_TMP_FILE_IN_TMP_DIR; then
TMP_FILENAME="$(mktemp --tmpdir "tmp.$SCRIPT_BASENAME.doc-with-letterhead.XXXXXXXXXX.pdf")"
if false; then
echo "TMP_FILENAME: $TMP_FILENAME"
fi
# Try to delete the temporary file on exit. It is no hard guarantee,
# but it usually works. If not, hopefully the operating system
# will clean the temporary directory every now and then.
printf -v TRAP_DELETE_CMD "rm -f -- %q" "$TMP_FILENAME"
# shellcheck disable=SC2064
trap "$TRAP_DELETE_CMD" EXIT
else
TMP_FILENAME="$PDF_FILENAME.$SCRIPT_BASENAME-in-progress"
fi
# background = place the other PDF in the background (underneath)
# stamp = place the other PDF in the foreground (on top)
declare -r PDFTK_OPERATION="background"
printf -v CMD \
"%q %q %q %q output %q" \
"$PDFTK_TOOLNAME" \
"$PDF_FILENAME" \
"$PDFTK_OPERATION" \
"$LETTERHEAD_FILENAME_TO_USE" \
"$TMP_FILENAME"
echo "$CMD"
eval "$CMD"
# Release the lock.
if $COPY_LETTERHEAD_TO_LOCAL; then
# Close the lock file, which releases the lock we have on it.
exec {LOCK_FILE_FD}>&-
# Delete the lock file, which is actually an optional step, as this script will run fine
# next time around if the file already exists.
# The lock file survives if you kill the script with a signal like Ctrl+C, but that is a good thing,
# because the presence of the lock file will probably remind the user that the background process
# was abruptly interrupted.
# There is the usual trick of deleting the file upon creation, in order to make sure that it is
# always deleted, even if the process gets killed. However, it is not completely safe,
# as the process could get killed right after creating the file but before deleting it.
# Furthermore, it is confusing, for the file still exists but it is not visible. Finally, I am not sure
# whether flock will work properly if a second process attempts to create a new lock file with
# the same name as the deleted, hidden one.
rm -- "$LOCK_FILENAME"
fi
set +o errexit
if $IS_MAGIC_STRING_TEXT; then
"$PDFGREP_TOOLNAME" --quiet --fixed-strings "$MAGIC_STRING_IN_LETTERHEAD" "$TMP_FILENAME"
else
grep --quiet --fixed-strings "$MAGIC_STRING_IN_LETTERHEAD" "$TMP_FILENAME"
fi
GREP_EXIT_CODE="$?"
set -o errexit
case "$GREP_EXIT_CODE" in
0) ;; # Nothing to do here.
1) abort "The generated PDF file does not contain the letterhead magic string.";;
2) exit "$GREP_EXIT_CODE";; # grep has printed an error message already.
*) abort "Unexpected exit code $GREP_EXIT_CODE from grep.";;
esac
if $USE_TMP_FILE_IN_TMP_DIR; then
# We could use here 'mv' instead, but then we should really cancel the "trap EXIT" above.
cp -- "$TMP_FILENAME" "$FINAL_FILENAME"
else
mv -- "$TMP_FILENAME" "$FINAL_FILENAME"
fi