forked from digital-preservation/pronom-research-week
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Issue digital-preservation#12- Added signature file and notes for PDF…
… Portfolio files
- Loading branch information
Showing
2 changed files
with
105 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
Format Name: | ||
PDF Portfolio | ||
|
||
Version Number: | ||
1.7 | ||
|
||
Extension: | ||
|
||
MIME-Type: | ||
application/pdf | ||
|
||
Description: | ||
PDF Portfolio files are PDFs using a feature added to the PDF specification in version 1.7 called "Collections". Collections offer a way to embed file attachments that are related in structure or content, as well as information that a reader application should use to determine how to present these files. A simple metadata schema can also be defined for the collection, allowing metadata to be provided for each item in the collection. Additionally, folder structure can be provided for the collection items, allowing reader applications to present the attachments with a tree-view. This makes the PDF Portfolio a convenient format to export mailboxes as mail folder structure can be preserved, along with standard email metadata such as "To", "From" and "Subject" fields. | ||
|
||
PDF Portfolio files can specify a "default view", which is a file in the collection that a reader application should display when opening the Portfolio, but it is a container format more than a document format. | ||
|
||
|
||
Format Type: | ||
Container | ||
|
||
Vendor: | ||
Adobe | ||
https://www.adobe.com | ||
|
||
Example File Sources: | ||
Preservica customers, examples forwarded by direct email to pronom team | ||
|
||
|
||
File Format identification signatures: | ||
See signature-file.xml with dummy PUID PRS-fmt/1. | ||
|
||
This signature extends the standard PDF 1.7 signature with two additional byte sequences with no BOF or EOF offset requirements. | ||
|
||
The first is looking for the string "/Collection ", which should be found in the Catalog object of a PDF making use of the Collection feature. Technically, this could be extended to "/Collection {M} {N} R", since this is an indirect object reference to the Collection itself, however since M and N are string representations of numbers, and since there is technically no defined upper limit to the number of objects in a PDF document, and since we cannot use a regex style expression "[30:39]+", the permutations would quickly become very complex. | ||
|
||
The second is looking for the string "<</CI<<", which is the dictionary entry for a Collection Item (which is itself a dictionary) that forms part of the Collection. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<FFSignatureFile xmlns="http://www.nationalarchives.gov.uk/pronom/SignatureFile" Version="1" DateCreated="2019-11-19T13:58:55+00:00"> | ||
<InternalSignatureCollection> | ||
<InternalSignature ID="1"> | ||
<ByteSequence Reference="BOFoffset" Endianness=""> | ||
<SubSequence Position="1" SubSeqMinOffset="0" SubSeqMaxOffset="0" MinFragLength="0"> | ||
<Sequence>255044462D312E37</Sequence> | ||
<DefaultShift>9</DefaultShift> | ||
<Shift Byte="25">8</Shift> | ||
<Shift Byte="2D">4</Shift> | ||
<Shift Byte="2E">2</Shift> | ||
<Shift Byte="31">3</Shift> | ||
<Shift Byte="37">1</Shift> | ||
<Shift Byte="44">6</Shift> | ||
<Shift Byte="46">5</Shift> | ||
<Shift Byte="50">7</Shift> | ||
</SubSequence> | ||
</ByteSequence> | ||
<ByteSequence Endianness=""> | ||
<SubSequence Position="1" SubSeqMinOffset="0" SubSeqMaxOffset="0" MinFragLength="0"> | ||
<Sequence>2F436F6C6C656374696F6E20</Sequence> | ||
<DefaultShift>13</DefaultShift> | ||
<Shift Byte="20">1</Shift> | ||
<Shift Byte="2F">12</Shift> | ||
<Shift Byte="43">11</Shift> | ||
<Shift Byte="63">6</Shift> | ||
<Shift Byte="65">7</Shift> | ||
<Shift Byte="69">4</Shift> | ||
<Shift Byte="6C">8</Shift> | ||
<Shift Byte="6F">3</Shift> | ||
<Shift Byte="6E">2</Shift> | ||
<Shift Byte="74">5</Shift> | ||
</SubSequence> | ||
</ByteSequence> | ||
<ByteSequence Endianness=""> | ||
<SubSequence Position="1" SubSeqMinOffset="0" SubSeqMaxOffset="0" MinFragLength="0"> | ||
<Sequence>3C3C2F43493C3C</Sequence> | ||
<DefaultShift>8</DefaultShift> | ||
<Shift Byte="2F">5</Shift> | ||
<Shift Byte="3C">1</Shift> | ||
<Shift Byte="43">4</Shift> | ||
<Shift Byte="49">3</Shift> | ||
</SubSequence> | ||
</ByteSequence> | ||
<ByteSequence Reference="EOFoffset" Endianness=""> | ||
<SubSequence Position="1" SubSeqMinOffset="0" SubSeqMaxOffset="0" MinFragLength="0"> | ||
<Sequence>2525454F</Sequence> | ||
<DefaultShift>-5</DefaultShift> | ||
<Shift Byte="25">-1</Shift> | ||
<Shift Byte="45">-3</Shift> | ||
<Shift Byte="4F">-4</Shift> | ||
<RightFragment Position="1" MinOffset="0" MaxOffset="0">46</RightFragment> | ||
<RightFragment Position="1" MinOffset="0" MaxOffset="0">460A</RightFragment> | ||
<RightFragment Position="1" MinOffset="0" MaxOffset="0">460D</RightFragment> | ||
<RightFragment Position="1" MinOffset="0" MaxOffset="0">460D0A</RightFragment> | ||
<RightFragment Position="1" MinOffset="0" MaxOffset="0">460D00</RightFragment> | ||
</SubSequence> | ||
</ByteSequence> | ||
</InternalSignature> | ||
</InternalSignatureCollection> | ||
<FileFormatCollection> | ||
<FileFormat ID="1" Name="PDF Portfolio" PUID="PRS-fmt/1" Version="1.7" MIMEType="application/json"> | ||
<InternalSignatureID>1</InternalSignatureID> | ||
<Extension>pdf</Extension> | ||
<HasPriorityOverFileFormatID>fmt/276</HasPriorityOverFileFormatID> | ||
</FileFormat> | ||
</FileFormatCollection> | ||
</FFSignatureFile> |