Skip to content

Commit e9cdf6b

Browse files
committed
Merge pull request #4654 from yurydelendik/nodeexampe
Basic node.js example that demonstrate pdf.combined.js file use
2 parents 0845f90 + b8344a5 commit e9cdf6b

File tree

2 files changed

+183
-0
lines changed

2 files changed

+183
-0
lines changed

examples/node/domparsermock.js

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2+
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
3+
/* Any copyright is dedicated to the Public Domain.
4+
* http://creativecommons.org/publicdomain/zero/1.0/ */
5+
6+
// Dummy XML Parser
7+
8+
function DOMNodeMock(nodeName, nodeValue) {
9+
this.nodeName = nodeName;
10+
this.nodeValue = nodeValue;
11+
Object.defineProperty(this, 'parentNode', {value: null, writable: true});
12+
}
13+
DOMNodeMock.prototype = {
14+
get firstChild() {
15+
return this.childNodes[0];
16+
},
17+
get nextSibling() {
18+
var index = this.parentNode.childNodes.indexOf(this);
19+
return this.parentNode.childNodes[index + 1];
20+
},
21+
get textContent() {
22+
if (!this.childNodes) {
23+
return this.nodeValue || '';
24+
}
25+
return this.childNodes.map(function (child) {
26+
return child.textContent;
27+
}).join('');
28+
},
29+
hasChildNodes: function () {
30+
return this.childNodes && this.childNodes.length > 0;
31+
}
32+
};
33+
34+
function decodeXML(text) {
35+
if (text.indexOf('&') < 0) {
36+
return text;
37+
}
38+
return text.replace(/&(#(x[0-9a-f]+|\d+)|\w+);/gi, function (all, entityName, number) {
39+
if (number) {
40+
return String.fromCharCode(number[0] === 'x' ? parseInt(number.substring(1), 16) : +number);
41+
}
42+
switch (entityName) {
43+
case 'amp':
44+
return '&';
45+
case 'lt':
46+
return '<';
47+
case 'gt':
48+
return '>';
49+
case 'quot':
50+
return '\"';
51+
case 'apos':
52+
return '\'';
53+
}
54+
return '&' + entityName + ';';
55+
});
56+
}
57+
58+
function DOMParserMock() {};
59+
DOMParserMock.prototype = {
60+
parseFromString: function (content) {
61+
content = content.replace(/<\?[\s\S]*?\?>|<!--[\s\S]*?-->/g, '').trim();
62+
var nodes = [];
63+
content = content.replace(/>([\s\S]+?)</g, function (all, text) {
64+
var i = nodes.length;
65+
var node = new DOMNodeMock('#text', decodeXML(text));
66+
nodes.push(node);
67+
if (node.textContent.trim().length === 0) {
68+
return '><'; // ignoring whitespaces
69+
}
70+
return '>' + i + ',<';
71+
});
72+
content = content.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, function (all, text) {
73+
var i = nodes.length;
74+
var node = new DOMNodeMock('#text', text);
75+
nodes.push(node);
76+
return i + ',';
77+
});
78+
var lastLength;
79+
do {
80+
lastLength = nodes.length;
81+
content = content.replace(/<([\w\:]+)((?:[\s\w:=]|'[^']*'|"[^"]*")*)(?:\/>|>([\d,]*)<\/[^>]+>)/g,
82+
function (all, name, attrs, content) {
83+
var i = nodes.length;
84+
var node = new DOMNodeMock(name);
85+
var children = [];
86+
if (content) {
87+
content = content.split(',');
88+
content.pop();
89+
content.forEach(function (child) {
90+
var childNode = nodes[+child];
91+
childNode.parentNode = node;
92+
children.push(childNode);
93+
})
94+
}
95+
node.childNodes = children;
96+
nodes.push(node);
97+
return i + ',';
98+
99+
});
100+
} while(lastLength < nodes.length);
101+
return {
102+
documentElement: nodes.pop()
103+
};
104+
}
105+
};
106+
107+
exports.DOMParserMock = DOMParserMock;

examples/node/getinfo.js

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2+
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
3+
/* Any copyright is dedicated to the Public Domain.
4+
* http://creativecommons.org/publicdomain/zero/1.0/ */
5+
6+
//
7+
// Basic node example that prints document metadata and text content.
8+
// Requires single file built version of PDF.js -- please run
9+
// `node make singlefile` before running the example.
10+
//
11+
12+
var fs = require('fs');
13+
14+
// HACK few hacks to let PDF.js be loaded not as a module in global space.
15+
global.window = global;
16+
global.navigator = { userAgent: "node" };
17+
global.PDFJS = {};
18+
global.DOMParser = require('./domparsermock.js').DOMParserMock;
19+
20+
require('../../build/singlefile/build/pdf.combined.js');
21+
22+
// Loading file from file system into typed array
23+
var pdfPath = process.argv[2] || '../../web/compressed.tracemonkey-pldi-09.pdf';
24+
var data = new Uint8Array(fs.readFileSync(pdfPath));
25+
26+
// Will be using promises to load document, pages and misc data instead of
27+
// callback.
28+
PDFJS.getDocument(data).then(function (doc) {
29+
var numPages = doc.numPages;
30+
console.log('# Document Loaded');
31+
console.log('Number of Pages: ' + numPages);
32+
console.log();
33+
34+
var lastPromise; // will be used to chain promises
35+
lastPromise = doc.getMetadata().then(function (data) {
36+
console.log('# Metadata Is Loaded');
37+
console.log('## Info');
38+
console.log(JSON.stringify(data.info, null, 2));
39+
console.log();
40+
if (data.metadata) {
41+
console.log('## Metadata');
42+
console.log(JSON.stringify(data.metadata.metadata, null, 2));
43+
console.log();
44+
}
45+
});
46+
47+
var loadPage = function (pageNum) {
48+
return doc.getPage(pageNum).then(function (page) {
49+
console.log('# Page ' + pageNum);
50+
var viewport = page.getViewport(1.0 /* scale */);
51+
console.log('Size: ' + viewport.width + 'x' + viewport.height);
52+
console.log();
53+
return page.getTextContent().then(function (content) {
54+
// Content contains lots of information about the text layout and
55+
// styles, but we need only strings at the moment
56+
var strings = content.items.map(function (item) {
57+
return item.str;
58+
});
59+
console.log('## Text Content');
60+
console.log(strings.join(' '));
61+
}).then(function () {
62+
console.log();
63+
});
64+
})
65+
};
66+
// Loading of the first page will wait on metadata and subsequent loadings
67+
// will wait on the previous pages.
68+
for (var i = 1; i <= numPages; i++) {
69+
lastPromise = lastPromise.then(loadPage.bind(null, i));
70+
}
71+
return lastPromise;
72+
}).then(function () {
73+
console.log('# End of Document');
74+
}, function (err) {
75+
console.error('Error: ' + err);
76+
});

0 commit comments

Comments
 (0)