Skip to content

Commit 79df166

Browse files
Merge pull request #20846 from Snuffleupagus/internal-viewer-followup
A couple of small improvements of the new internal viewer
2 parents 9d093d9 + 60d6abd commit 79df166

File tree

7 files changed

+215
-161
lines changed

7 files changed

+215
-161
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ PDF.js is built into version 19+ of Firefox.
4444
Chrome, go to `Tools > Extension` and load the (unpackaged) extension from the
4545
directory `build/chromium`.
4646

47+
### PDF debugger
48+
49+
Browser the internal structure of a PDF document with https://mozilla.github.io/pdf.js/internal-viewer/web/pdf_internal_viewer.html
50+
4751
## Getting the Code
4852

4953
To get a local copy of the current code, clone it using git:

eslint.config.mjs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ export default [
8282
...globals.worker,
8383
PDFJSDev: "readonly",
8484
__raw_import__: "readonly",
85+
__eager_import__: "readonly",
8586
},
8687

8788
ecmaVersion: 2025,

external/builder/babel-plugin-pdfjs-preprocessor.mjs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,19 @@ function babelPluginPDFJSPreprocessor(babel, ctx) {
190190
},
191191
];
192192
path.replaceWith(t.importExpression(source));
193+
} else if (t.isIdentifier(node.callee, { name: "__eager_import__" })) {
194+
if (node.arguments.length !== 1) {
195+
throw new Error("Invalid `__eager_import__` usage.");
196+
}
197+
// Replace it with a standard `import`-call and inline the module.
198+
const source = node.arguments[0];
199+
source.leadingComments = [
200+
{
201+
type: "CommentBlock",
202+
value: "webpackMode: 'eager'",
203+
},
204+
];
205+
path.replaceWith(t.importExpression(source));
193206
}
194207
},
195208
"BlockStatement|StaticBlock": {

gulpfile.mjs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ const DEFINES = Object.freeze({
119119
COMPONENTS: false,
120120
LIB: false,
121121
IMAGE_DECODERS: false,
122+
INTERNAL_VIEWER: false,
122123
});
123124

124125
function transform(charEncoding, transformFunction) {
@@ -2410,7 +2411,7 @@ gulp.task(
24102411
"internal-viewer",
24112412
gulp.series(createBuildNumber, function createInternalViewer() {
24122413
console.log("\n### Creating internal viewer");
2413-
const defines = { ...DEFINES, GENERIC: true };
2414+
const defines = { ...DEFINES, GENERIC: true, INTERNAL_VIEWER: true };
24142415
return buildInternalViewer(defines, INTERNAL_VIEWER_DIR);
24152416
})
24162417
);

src/core/document.js

Lines changed: 24 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import {
2222
isArrayEqual,
2323
makeArr,
2424
objectSize,
25-
OPS,
2625
PageActionEventType,
2726
RenderingIntentFlag,
2827
shadow,
@@ -38,17 +37,6 @@ import {
3837
PopupAnnotation,
3938
WidgetAnnotation,
4039
} from "./annotation.js";
41-
import {
42-
Cmd,
43-
Dict,
44-
EOF,
45-
isName,
46-
isRefsEqual,
47-
Name,
48-
Ref,
49-
RefSet,
50-
RefSetCache,
51-
} from "./primitives.js";
5240
import {
5341
collectActions,
5442
getInheritableProperty,
@@ -63,19 +51,28 @@ import {
6351
XRefEntryException,
6452
XRefParseException,
6553
} from "./core_utils.js";
66-
import { EvaluatorPreprocessor, PartialEvaluator } from "./evaluator.js";
54+
import {
55+
Dict,
56+
isName,
57+
isRefsEqual,
58+
Name,
59+
Ref,
60+
RefSet,
61+
RefSetCache,
62+
} from "./primitives.js";
6763
import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js";
68-
import { Lexer, Linearization, Parser } from "./parser.js";
6964
import { NullStream, Stream } from "./stream.js";
7065
import { BaseStream } from "./base_stream.js";
7166
import { calculateMD5 } from "./calculate_md5.js";
7267
import { Catalog } from "./catalog.js";
7368
import { clearGlobalCaches } from "./cleanup_helper.js";
7469
import { DatasetReader } from "./dataset_reader.js";
7570
import { Intersector } from "./intersector.js";
71+
import { Linearization } from "./parser.js";
7672
import { LocalColorSpaceCache } from "./image_utils.js";
7773
import { ObjectLoader } from "./object_loader.js";
7874
import { OperatorList } from "./operator_list.js";
75+
import { PartialEvaluator } from "./evaluator.js";
7976
import { PDFFunctionFactory } from "./function.js";
8077
import { PDFImage } from "./image.js";
8178
import { StreamsSequenceStream } from "./decode_stream.js";
@@ -2038,9 +2035,16 @@ class PDFDocument {
20382035
}
20392036

20402037
async toJSObject(value, firstCall = true) {
2041-
if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
2038+
if (
2039+
typeof PDFJSDev !== "undefined" &&
2040+
!PDFJSDev.test("TESTING || INTERNAL_VIEWER")
2041+
) {
20422042
throw new Error("Not implemented: toJSObject");
20432043
}
2044+
const { InternalViewerUtils } =
2045+
typeof PDFJSDev === "undefined"
2046+
? await import("./internal_viewer_utils.js")
2047+
: await __eager_import__("./internal_viewer_utils.js");
20442048

20452049
if (value === null && firstCall) {
20462050
return this.toJSObject(this.xref.trailer, false);
@@ -2051,7 +2055,7 @@ class PDFDocument {
20512055
for (const [key, val] of value.getRawEntries()) {
20522056
obj[key] =
20532057
isPage && key === "Contents"
2054-
? _getContentTokens(val, this.xref)
2058+
? InternalViewerUtils.getContentTokens(val, this.xref)
20552059
: await this.toJSObject(val, false);
20562060
}
20572061
return obj;
@@ -2109,9 +2113,10 @@ class PDFDocument {
21092113
if (isName(dict.get("Subtype"), "Form")) {
21102114
obj.bytes = value.getString();
21112115
value.reset();
2112-
const { instructions, cmdNames } = _groupIntoInstructions(
2113-
_tokenizeStream(value, this.xref)
2114-
);
2116+
const { instructions, cmdNames } =
2117+
InternalViewerUtils.groupIntoInstructions(
2118+
InternalViewerUtils.tokenizeStream(value, this.xref)
2119+
);
21152120
obj.contentStream = true;
21162121
obj.instructions = instructions;
21172122
obj.cmdNames = cmdNames;
@@ -2125,130 +2130,4 @@ class PDFDocument {
21252130
}
21262131
}
21272132

2128-
function _tokenizeStream(stream, xref) {
2129-
const tokens = [];
2130-
const parser = new Parser({
2131-
lexer: new Lexer(stream),
2132-
xref,
2133-
allowStreams: false,
2134-
});
2135-
while (true) {
2136-
let obj;
2137-
try {
2138-
obj = parser.getObj();
2139-
} catch {
2140-
break;
2141-
}
2142-
if (obj === EOF) {
2143-
break;
2144-
}
2145-
const token = _tokenToJSObject(obj);
2146-
if (token !== null) {
2147-
tokens.push(token);
2148-
}
2149-
}
2150-
return tokens;
2151-
}
2152-
2153-
function _getContentTokens(contentsVal, xref) {
2154-
const refs = Array.isArray(contentsVal) ? contentsVal : [contentsVal];
2155-
const rawContents = [];
2156-
const tokens = [];
2157-
for (const rawRef of refs) {
2158-
if (rawRef instanceof Ref) {
2159-
rawContents.push({ num: rawRef.num, gen: rawRef.gen });
2160-
}
2161-
const stream = xref.fetchIfRef(rawRef);
2162-
if (!(stream instanceof BaseStream)) {
2163-
continue;
2164-
}
2165-
tokens.push(..._tokenizeStream(stream, xref));
2166-
}
2167-
const { instructions, cmdNames } = _groupIntoInstructions(tokens);
2168-
return { contentStream: true, instructions, cmdNames, rawContents };
2169-
}
2170-
2171-
// Lazily-built reverse map: OPS numeric id → property name string.
2172-
let _opsIdToName = null;
2173-
2174-
function _getOpsIdToName() {
2175-
if (!_opsIdToName) {
2176-
_opsIdToName = Object.create(null);
2177-
for (const [name, id] of Object.entries(OPS)) {
2178-
_opsIdToName[id] = name;
2179-
}
2180-
}
2181-
return _opsIdToName;
2182-
}
2183-
2184-
function _groupIntoInstructions(tokens) {
2185-
const { opMap } = EvaluatorPreprocessor;
2186-
const opsIdToName = _getOpsIdToName();
2187-
const instructions = [];
2188-
const cmdNames = Object.create(null);
2189-
const argBuffer = [];
2190-
for (const token of tokens) {
2191-
if (token.type !== "cmd") {
2192-
argBuffer.push(token);
2193-
continue;
2194-
}
2195-
const op = opMap[token.value];
2196-
if (op && !(token.value in cmdNames)) {
2197-
cmdNames[token.value] = opsIdToName[op.id];
2198-
}
2199-
let args;
2200-
if (!op || op.variableArgs) {
2201-
// Unknown command or variable args: consume all pending args.
2202-
args = argBuffer.splice(0);
2203-
} else {
2204-
// Fixed args: consume exactly numArgs, orphan the rest.
2205-
const orphanCount = Math.max(0, argBuffer.length - op.numArgs);
2206-
for (let i = 0; i < orphanCount; i++) {
2207-
instructions.push({ cmd: null, args: [argBuffer.shift()] });
2208-
}
2209-
args = argBuffer.splice(0);
2210-
}
2211-
instructions.push({ cmd: token.value, args });
2212-
}
2213-
for (const t of argBuffer) {
2214-
instructions.push({ cmd: null, args: [t] });
2215-
}
2216-
return { instructions, cmdNames };
2217-
}
2218-
2219-
function _tokenToJSObject(obj) {
2220-
if (obj instanceof Cmd) {
2221-
return { type: "cmd", value: obj.cmd };
2222-
}
2223-
if (obj instanceof Name) {
2224-
return { type: "name", value: obj.name };
2225-
}
2226-
if (obj instanceof Ref) {
2227-
return { type: "ref", num: obj.num, gen: obj.gen };
2228-
}
2229-
if (Array.isArray(obj)) {
2230-
return { type: "array", value: obj.map(_tokenToJSObject) };
2231-
}
2232-
if (obj instanceof Dict) {
2233-
const result = Object.create(null);
2234-
for (const [key, val] of obj.getRawEntries()) {
2235-
result[key] = _tokenToJSObject(val);
2236-
}
2237-
return { type: "dict", value: result };
2238-
}
2239-
if (typeof obj === "number") {
2240-
return { type: "number", value: obj };
2241-
}
2242-
if (typeof obj === "string") {
2243-
return { type: "string", value: obj };
2244-
}
2245-
if (typeof obj === "boolean") {
2246-
return { type: "boolean", value: obj };
2247-
}
2248-
if (obj === null) {
2249-
return { type: "null" };
2250-
}
2251-
return null;
2252-
}
2253-
22542133
export { Page, PDFDocument };

0 commit comments

Comments
 (0)