forked from mozilla/pdf.js
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Joscha Legewie
committed
Mar 31, 2014
1 parent
ab4bba0
commit 4815161
Showing
17 changed files
with
506 additions
and
95 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
## "Extract" overview | ||
|
||
This example is a minimalistic application for the extraction of annotations from pdfs using pdf.js. | ||
|
||
## Getting started | ||
|
||
Point your browser to `index.html`. Voila. Take a peek at `extract.js` to see | ||
how to make basic calls to `pdf.js`. | ||
|
||
|
||
## Additional resources | ||
|
||
+ [GNUpdf - Introduction to PDF](http://gnupdf.org/Introduction_to_PDF) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ | ||
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ | ||
|
||
'use strict'; | ||
|
||
// example to extract annotations | ||
// cd to pdf.js folder | ||
// python -m SimpleHTTPServer 8888 | ||
// http://localhost:8888/examples/extract/index.html | ||
|
||
const SUPPORTED_ANNOTS = ["Text", "Highlight", "Underline"] | ||
|
||
// Fetch the PDF document from the URL using promices | ||
PDFJS.getDocument('/examples/extract/pdf/test.pdf').then(function(pdf) { | ||
|
||
var n_annos = 0, | ||
numPages = pdf.numPages; | ||
// PDF data: pdf.pdfInfo, pdf.isEncrypted(), pdf.getMetadata().then(function(meta) {console.log('Metadata: ' + JSON.stringify(meta));}); | ||
|
||
// function to handle page (render and extract annotations) | ||
var extract = function(page) { | ||
var scale = 1; | ||
var viewport = page.getViewport(scale); | ||
// Prepare canvas using PDF page dimensions | ||
var canvas = document.getElementById('the-canvas'); | ||
var context = canvas.getContext('2d'); | ||
canvas.height = viewport.height; | ||
canvas.width = viewport.width; | ||
// Render PDF page into canvas context | ||
var renderContext = { | ||
canvasContext: context, | ||
viewport: viewport | ||
}; | ||
|
||
// get annotations | ||
var annotations; | ||
page.getAnnotations().then(function extractAnno(annos) { | ||
console.log('Page: '+page.pageNumber + " ("+annos.length+" annotations " + JSON.stringify(annos.map(function(a) {return a.type;})) + ")"); | ||
// filter for supported annotations | ||
annotations = annos.filter(function(anno) {return SUPPORTED_ANNOTS.indexOf(anno.type) >= 0;}); | ||
// skip page if there is nothing interesting | ||
if (annotations.length==0) { | ||
if(numPages>page.pageNumber) pdf.getPage(page.pageNumber+1).then(extract,function(err) {console.log('error getting the page:' + err)}); | ||
return; | ||
} | ||
// render page | ||
page.render(renderContext).then(function() { | ||
console.log('Page: '+page.pageNumber + " (page rendered)"); | ||
// console.log(annotations); | ||
// show annotations | ||
var markup = annotations | ||
.filter(function(anno) {return typeof anno.markup !== 'undefined';}) | ||
.map(function(anno) { | ||
return anno.markup.join(' ').trim() | ||
.replace('\ufb00','ff').replace('\ufb01','fi').replace('\ufb02','fl') | ||
.replace(/\ufb03/g,'ffi').replace(/\ufb04/g,'ffl').replace(/\ufb05/g,'ft') | ||
.replace(/\ufb06/g,'st').replace(/\uFB00/g,'ff').replace(/\uFB01/g,'fi') | ||
.replace(/\uFB02/g,'fl').replace(/\uFB03/g,'ffi').replace(/\uFB04/g,'ffl') | ||
.replace(/\uFB05/g,'ft').replace(/\uFB06/g,'st') | ||
.replace(/\u201D/g,'"').replace(/\u201C/g,'"').replace(/\u2019/g,"'") | ||
.replace(/\u2013/g,"-"); | ||
}); | ||
var content = annotations | ||
.filter(function(anno) {return typeof anno.content !== 'undefined';}) | ||
.map(function(anno) { | ||
return anno.content; | ||
}); | ||
|
||
for (var i = 0; i < annotations.length; i++) { | ||
if(markup[i]!==undefined) console.log(markup[i]); | ||
if(content[i]!==undefined) console.log(content[i]); | ||
} | ||
|
||
// render next page | ||
if(numPages>page.pageNumber) pdf.getPage(page.pageNumber+1).then(extract,function(err) {console.log('error getting the page:' + err)}); | ||
}, | ||
// error handler for page | ||
function(error) { | ||
console.log('Error rendering the page: ', error); | ||
// continue with next page | ||
if(numPages>page.pageNumber) pdf.getPage(page.pageNumber+1).then(extract,function(err) {console.log('error getting the page:' + err)}); | ||
}); | ||
|
||
}, | ||
// error handler for page | ||
function(error) { | ||
console.log(error); | ||
// continue with next page | ||
if(numPages>page.pageNumber) pdf.getPage(page.pageNumber+1).then(extract,function(err) {console.log('error getting the page:' + err)}); | ||
}); | ||
}; | ||
|
||
// Using promise to fetch the page | ||
pdf.getPage(1).then(extract,function(err) {console.log('error getting the page:' + err)}); | ||
|
||
}, | ||
function(err) { | ||
console.log('unable to open pdf: ' + err); | ||
}); | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
<!doctype html> | ||
<html> | ||
|
||
<head> | ||
<!-- In production, only one script (pdf.js) is necessary --> | ||
<!-- In production, change the content of PDFJS.workerSrc below --> | ||
<script type="text/javascript" src="../../src/network.js"></script> | ||
<script type="text/javascript" src="../../src/chunked_stream.js"></script> | ||
<script type="text/javascript" src="../../src/pdf_manager.js"></script> | ||
<script type="text/javascript" src="../../src/core.js"></script> | ||
<script type="text/javascript" src="../../src/util.js"></script> | ||
<script type="text/javascript" src="../../src/api.js"></script> | ||
<script type="text/javascript" src="../../src/canvas.js"></script> | ||
<script type="text/javascript" src="../../src/obj.js"></script> | ||
<script type="text/javascript" src="../../src/function.js"></script> | ||
<script type="text/javascript" src="../../src/charsets.js"></script> | ||
<script type="text/javascript" src="../../src/cidmaps.js"></script> | ||
<script type="text/javascript" src="../../src/colorspace.js"></script> | ||
<script type="text/javascript" src="../../src/crypto.js"></script> | ||
<script type="text/javascript" src="../../src/evaluator.js"></script> | ||
<script type="text/javascript" src="../../src/fonts.js"></script> | ||
<script type="text/javascript" src="../../src/glyphlist.js"></script> | ||
<script type="text/javascript" src="../../src/image.js"></script> | ||
<script type="text/javascript" src="../../src/metrics.js"></script> | ||
<script type="text/javascript" src="../../src/parser.js"></script> | ||
<script type="text/javascript" src="../../src/pattern.js"></script> | ||
<script type="text/javascript" src="../../src/stream.js"></script> | ||
<script type="text/javascript" src="../../src/worker.js"></script> | ||
<script type="text/javascript" src="../../external/jpgjs/jpg.js"></script> | ||
<script type="text/javascript" src="../../src/jpx.js"></script> | ||
<script type="text/javascript" src="../../src/jbig2.js"></script> | ||
|
||
<script type="text/javascript"> | ||
// Specify the main script used to create a new PDF.JS web worker. | ||
// In production, change this to point to the combined `pdf.js` file. | ||
PDFJS.workerSrc = '../../src/worker_loader.js'; | ||
</script> | ||
<script type="text/javascript" src="extract.js"></script> | ||
</head> | ||
|
||
<body> | ||
<canvas id="the-canvas" style="border:1px solid black;"/> | ||
</body> | ||
|
||
</html> |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.