parser.js 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * Javascript code in this page
  4. *
  5. * Copyright 2021 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * Javascript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.Parser = exports.Linearization = exports.Lexer = void 0;
  27. var _util = require("../shared/util.js");
  28. var _primitives = require("./primitives.js");
  29. var _core_utils = require("./core_utils.js");
  30. var _ascii_85_stream = require("./ascii_85_stream.js");
  31. var _ascii_hex_stream = require("./ascii_hex_stream.js");
  32. var _ccitt_stream = require("./ccitt_stream.js");
  33. var _flate_stream = require("./flate_stream.js");
  34. var _jbig2_stream = require("./jbig2_stream.js");
  35. var _jpeg_stream = require("./jpeg_stream.js");
  36. var _jpx_stream = require("./jpx_stream.js");
  37. var _lzw_stream = require("./lzw_stream.js");
  38. var _stream = require("./stream.js");
  39. var _predictor_stream = require("./predictor_stream.js");
  40. var _run_length_stream = require("./run_length_stream.js");
  41. const MAX_LENGTH_TO_CACHE = 1000;
  42. const MAX_ADLER32_LENGTH = 5552;
  43. function computeAdler32(bytes) {
  44. const bytesLength = bytes.length;
  45. let a = 1,
  46. b = 0;
  47. for (let i = 0; i < bytesLength; ++i) {
  48. a += bytes[i] & 0xff;
  49. b += a;
  50. }
  51. return b % 65521 << 16 | a % 65521;
  52. }
  53. class Parser {
  54. constructor({
  55. lexer,
  56. xref,
  57. allowStreams = false,
  58. recoveryMode = false
  59. }) {
  60. this.lexer = lexer;
  61. this.xref = xref;
  62. this.allowStreams = allowStreams;
  63. this.recoveryMode = recoveryMode;
  64. this.imageCache = Object.create(null);
  65. this.refill();
  66. }
  67. refill() {
  68. this.buf1 = this.lexer.getObj();
  69. this.buf2 = this.lexer.getObj();
  70. }
  71. shift() {
  72. if (this.buf2 instanceof _primitives.Cmd && this.buf2.cmd === "ID") {
  73. this.buf1 = this.buf2;
  74. this.buf2 = null;
  75. } else {
  76. this.buf1 = this.buf2;
  77. this.buf2 = this.lexer.getObj();
  78. }
  79. }
  80. tryShift() {
  81. try {
  82. this.shift();
  83. return true;
  84. } catch (e) {
  85. if (e instanceof _core_utils.MissingDataException) {
  86. throw e;
  87. }
  88. return false;
  89. }
  90. }
  91. getObj(cipherTransform = null) {
  92. const buf1 = this.buf1;
  93. this.shift();
  94. if (buf1 instanceof _primitives.Cmd) {
  95. switch (buf1.cmd) {
  96. case "BI":
  97. return this.makeInlineImage(cipherTransform);
  98. case "[":
  99. const array = [];
  100. while (!(0, _primitives.isCmd)(this.buf1, "]") && this.buf1 !== _primitives.EOF) {
  101. array.push(this.getObj(cipherTransform));
  102. }
  103. if (this.buf1 === _primitives.EOF) {
  104. if (this.recoveryMode) {
  105. return array;
  106. }
  107. throw new _core_utils.ParserEOFException("End of file inside array.");
  108. }
  109. this.shift();
  110. return array;
  111. case "<<":
  112. const dict = new _primitives.Dict(this.xref);
  113. while (!(0, _primitives.isCmd)(this.buf1, ">>") && this.buf1 !== _primitives.EOF) {
  114. if (!(0, _primitives.isName)(this.buf1)) {
  115. (0, _util.info)("Malformed dictionary: key must be a name object");
  116. this.shift();
  117. continue;
  118. }
  119. const key = this.buf1.name;
  120. this.shift();
  121. if (this.buf1 === _primitives.EOF) {
  122. break;
  123. }
  124. dict.set(key, this.getObj(cipherTransform));
  125. }
  126. if (this.buf1 === _primitives.EOF) {
  127. if (this.recoveryMode) {
  128. return dict;
  129. }
  130. throw new _core_utils.ParserEOFException("End of file inside dictionary.");
  131. }
  132. if ((0, _primitives.isCmd)(this.buf2, "stream")) {
  133. return this.allowStreams ? this.makeStream(dict, cipherTransform) : dict;
  134. }
  135. this.shift();
  136. return dict;
  137. default:
  138. return buf1;
  139. }
  140. }
  141. if (Number.isInteger(buf1)) {
  142. if (Number.isInteger(this.buf1) && (0, _primitives.isCmd)(this.buf2, "R")) {
  143. const ref = _primitives.Ref.get(buf1, this.buf1);
  144. this.shift();
  145. this.shift();
  146. return ref;
  147. }
  148. return buf1;
  149. }
  150. if (typeof buf1 === "string") {
  151. if (cipherTransform) {
  152. return cipherTransform.decryptString(buf1);
  153. }
  154. return buf1;
  155. }
  156. return buf1;
  157. }
  158. findDefaultInlineStreamEnd(stream) {
  159. const E = 0x45,
  160. I = 0x49,
  161. SPACE = 0x20,
  162. LF = 0xa,
  163. CR = 0xd,
  164. NUL = 0x0;
  165. const lexer = this.lexer,
  166. startPos = stream.pos,
  167. n = 10;
  168. let state = 0,
  169. ch,
  170. maybeEIPos;
  171. while ((ch = stream.getByte()) !== -1) {
  172. if (state === 0) {
  173. state = ch === E ? 1 : 0;
  174. } else if (state === 1) {
  175. state = ch === I ? 2 : 0;
  176. } else {
  177. (0, _util.assert)(state === 2, "findDefaultInlineStreamEnd - invalid state.");
  178. if (ch === SPACE || ch === LF || ch === CR) {
  179. maybeEIPos = stream.pos;
  180. const followingBytes = stream.peekBytes(n);
  181. for (let i = 0, ii = followingBytes.length; i < ii; i++) {
  182. ch = followingBytes[i];
  183. if (ch === NUL && followingBytes[i + 1] !== NUL) {
  184. continue;
  185. }
  186. if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7f)) {
  187. state = 0;
  188. break;
  189. }
  190. }
  191. if (state !== 2) {
  192. continue;
  193. }
  194. if (lexer.knownCommands) {
  195. const nextObj = lexer.peekObj();
  196. if (nextObj instanceof _primitives.Cmd && !lexer.knownCommands[nextObj.cmd]) {
  197. state = 0;
  198. }
  199. } else {
  200. (0, _util.warn)("findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined.");
  201. }
  202. if (state === 2) {
  203. break;
  204. }
  205. } else {
  206. state = 0;
  207. }
  208. }
  209. }
  210. if (ch === -1) {
  211. (0, _util.warn)("findDefaultInlineStreamEnd: " + "Reached the end of the stream without finding a valid EI marker");
  212. if (maybeEIPos) {
  213. (0, _util.warn)('... trying to recover by using the last "EI" occurrence.');
  214. stream.skip(-(stream.pos - maybeEIPos));
  215. }
  216. }
  217. let endOffset = 4;
  218. stream.skip(-endOffset);
  219. ch = stream.peekByte();
  220. stream.skip(endOffset);
  221. if (!(0, _core_utils.isWhiteSpace)(ch)) {
  222. endOffset--;
  223. }
  224. return stream.pos - endOffset - startPos;
  225. }
  226. findDCTDecodeInlineStreamEnd(stream) {
  227. const startPos = stream.pos;
  228. let foundEOI = false,
  229. b,
  230. markerLength;
  231. while ((b = stream.getByte()) !== -1) {
  232. if (b !== 0xff) {
  233. continue;
  234. }
  235. switch (stream.getByte()) {
  236. case 0x00:
  237. break;
  238. case 0xff:
  239. stream.skip(-1);
  240. break;
  241. case 0xd9:
  242. foundEOI = true;
  243. break;
  244. case 0xc0:
  245. case 0xc1:
  246. case 0xc2:
  247. case 0xc3:
  248. case 0xc5:
  249. case 0xc6:
  250. case 0xc7:
  251. case 0xc9:
  252. case 0xca:
  253. case 0xcb:
  254. case 0xcd:
  255. case 0xce:
  256. case 0xcf:
  257. case 0xc4:
  258. case 0xcc:
  259. case 0xda:
  260. case 0xdb:
  261. case 0xdc:
  262. case 0xdd:
  263. case 0xde:
  264. case 0xdf:
  265. case 0xe0:
  266. case 0xe1:
  267. case 0xe2:
  268. case 0xe3:
  269. case 0xe4:
  270. case 0xe5:
  271. case 0xe6:
  272. case 0xe7:
  273. case 0xe8:
  274. case 0xe9:
  275. case 0xea:
  276. case 0xeb:
  277. case 0xec:
  278. case 0xed:
  279. case 0xee:
  280. case 0xef:
  281. case 0xfe:
  282. markerLength = stream.getUint16();
  283. if (markerLength > 2) {
  284. stream.skip(markerLength - 2);
  285. } else {
  286. stream.skip(-2);
  287. }
  288. break;
  289. }
  290. if (foundEOI) {
  291. break;
  292. }
  293. }
  294. const length = stream.pos - startPos;
  295. if (b === -1) {
  296. (0, _util.warn)("Inline DCTDecode image stream: " + "EOI marker not found, searching for /EI/ instead.");
  297. stream.skip(-length);
  298. return this.findDefaultInlineStreamEnd(stream);
  299. }
  300. this.inlineStreamSkipEI(stream);
  301. return length;
  302. }
  303. findASCII85DecodeInlineStreamEnd(stream) {
  304. const TILDE = 0x7e,
  305. GT = 0x3e;
  306. const startPos = stream.pos;
  307. let ch;
  308. while ((ch = stream.getByte()) !== -1) {
  309. if (ch === TILDE) {
  310. const tildePos = stream.pos;
  311. ch = stream.peekByte();
  312. while ((0, _core_utils.isWhiteSpace)(ch)) {
  313. stream.skip();
  314. ch = stream.peekByte();
  315. }
  316. if (ch === GT) {
  317. stream.skip();
  318. break;
  319. }
  320. if (stream.pos > tildePos) {
  321. const maybeEI = stream.peekBytes(2);
  322. if (maybeEI[0] === 0x45 && maybeEI[1] === 0x49) {
  323. break;
  324. }
  325. }
  326. }
  327. }
  328. const length = stream.pos - startPos;
  329. if (ch === -1) {
  330. (0, _util.warn)("Inline ASCII85Decode image stream: " + "EOD marker not found, searching for /EI/ instead.");
  331. stream.skip(-length);
  332. return this.findDefaultInlineStreamEnd(stream);
  333. }
  334. this.inlineStreamSkipEI(stream);
  335. return length;
  336. }
  337. findASCIIHexDecodeInlineStreamEnd(stream) {
  338. const GT = 0x3e;
  339. const startPos = stream.pos;
  340. let ch;
  341. while ((ch = stream.getByte()) !== -1) {
  342. if (ch === GT) {
  343. break;
  344. }
  345. }
  346. const length = stream.pos - startPos;
  347. if (ch === -1) {
  348. (0, _util.warn)("Inline ASCIIHexDecode image stream: " + "EOD marker not found, searching for /EI/ instead.");
  349. stream.skip(-length);
  350. return this.findDefaultInlineStreamEnd(stream);
  351. }
  352. this.inlineStreamSkipEI(stream);
  353. return length;
  354. }
  355. inlineStreamSkipEI(stream) {
  356. const E = 0x45,
  357. I = 0x49;
  358. let state = 0,
  359. ch;
  360. while ((ch = stream.getByte()) !== -1) {
  361. if (state === 0) {
  362. state = ch === E ? 1 : 0;
  363. } else if (state === 1) {
  364. state = ch === I ? 2 : 0;
  365. } else if (state === 2) {
  366. break;
  367. }
  368. }
  369. }
  370. makeInlineImage(cipherTransform) {
  371. const lexer = this.lexer;
  372. const stream = lexer.stream;
  373. const dict = new _primitives.Dict(this.xref);
  374. let dictLength;
  375. while (!(0, _primitives.isCmd)(this.buf1, "ID") && this.buf1 !== _primitives.EOF) {
  376. if (!(0, _primitives.isName)(this.buf1)) {
  377. throw new _util.FormatError("Dictionary key must be a name object");
  378. }
  379. const key = this.buf1.name;
  380. this.shift();
  381. if (this.buf1 === _primitives.EOF) {
  382. break;
  383. }
  384. dict.set(key, this.getObj(cipherTransform));
  385. }
  386. if (lexer.beginInlineImagePos !== -1) {
  387. dictLength = stream.pos - lexer.beginInlineImagePos;
  388. }
  389. const filter = dict.get("Filter", "F");
  390. let filterName;
  391. if ((0, _primitives.isName)(filter)) {
  392. filterName = filter.name;
  393. } else if (Array.isArray(filter)) {
  394. const filterZero = this.xref.fetchIfRef(filter[0]);
  395. if ((0, _primitives.isName)(filterZero)) {
  396. filterName = filterZero.name;
  397. }
  398. }
  399. const startPos = stream.pos;
  400. let length;
  401. if (filterName === "DCTDecode" || filterName === "DCT") {
  402. length = this.findDCTDecodeInlineStreamEnd(stream);
  403. } else if (filterName === "ASCII85Decode" || filterName === "A85") {
  404. length = this.findASCII85DecodeInlineStreamEnd(stream);
  405. } else if (filterName === "ASCIIHexDecode" || filterName === "AHx") {
  406. length = this.findASCIIHexDecodeInlineStreamEnd(stream);
  407. } else {
  408. length = this.findDefaultInlineStreamEnd(stream);
  409. }
  410. let imageStream = stream.makeSubStream(startPos, length, dict);
  411. let cacheKey;
  412. if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) {
  413. const imageBytes = imageStream.getBytes();
  414. imageStream.reset();
  415. const initialStreamPos = stream.pos;
  416. stream.pos = lexer.beginInlineImagePos;
  417. const dictBytes = stream.getBytes(dictLength);
  418. stream.pos = initialStreamPos;
  419. cacheKey = computeAdler32(imageBytes) + "_" + computeAdler32(dictBytes);
  420. const cacheEntry = this.imageCache[cacheKey];
  421. if (cacheEntry !== undefined) {
  422. this.buf2 = _primitives.Cmd.get("EI");
  423. this.shift();
  424. cacheEntry.reset();
  425. return cacheEntry;
  426. }
  427. }
  428. if (cipherTransform) {
  429. imageStream = cipherTransform.createStream(imageStream, length);
  430. }
  431. imageStream = this.filter(imageStream, dict, length);
  432. imageStream.dict = dict;
  433. if (cacheKey !== undefined) {
  434. imageStream.cacheKey = `inline_${length}_${cacheKey}`;
  435. this.imageCache[cacheKey] = imageStream;
  436. }
  437. this.buf2 = _primitives.Cmd.get("EI");
  438. this.shift();
  439. return imageStream;
  440. }
  441. _findStreamLength(startPos, signature) {
  442. const {
  443. stream
  444. } = this.lexer;
  445. stream.pos = startPos;
  446. const SCAN_BLOCK_LENGTH = 2048;
  447. const signatureLength = signature.length;
  448. while (stream.pos < stream.end) {
  449. const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
  450. const scanLength = scanBytes.length - signatureLength;
  451. if (scanLength <= 0) {
  452. break;
  453. }
  454. let pos = 0;
  455. while (pos < scanLength) {
  456. let j = 0;
  457. while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
  458. j++;
  459. }
  460. if (j >= signatureLength) {
  461. stream.pos += pos;
  462. return stream.pos - startPos;
  463. }
  464. pos++;
  465. }
  466. stream.pos += scanLength;
  467. }
  468. return -1;
  469. }
  470. makeStream(dict, cipherTransform) {
  471. const lexer = this.lexer;
  472. let stream = lexer.stream;
  473. lexer.skipToNextLine();
  474. const startPos = stream.pos - 1;
  475. let length = dict.get("Length");
  476. if (!Number.isInteger(length)) {
  477. (0, _util.info)(`Bad length "${length}" in stream`);
  478. length = 0;
  479. }
  480. stream.pos = startPos + length;
  481. lexer.nextChar();
  482. if (this.tryShift() && (0, _primitives.isCmd)(this.buf2, "endstream")) {
  483. this.shift();
  484. } else {
  485. const ENDSTREAM_SIGNATURE = new Uint8Array([0x65, 0x6e, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d]);
  486. let actualLength = this._findStreamLength(startPos, ENDSTREAM_SIGNATURE);
  487. if (actualLength < 0) {
  488. const MAX_TRUNCATION = 1;
  489. for (let i = 1; i <= MAX_TRUNCATION; i++) {
  490. const end = ENDSTREAM_SIGNATURE.length - i;
  491. const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
  492. const maybeLength = this._findStreamLength(startPos, TRUNCATED_SIGNATURE);
  493. if (maybeLength >= 0) {
  494. const lastByte = stream.peekBytes(end + 1)[end];
  495. if (!(0, _core_utils.isWhiteSpace)(lastByte)) {
  496. break;
  497. }
  498. (0, _util.info)(`Found "${(0, _util.bytesToString)(TRUNCATED_SIGNATURE)}" when ` + "searching for endstream command.");
  499. actualLength = maybeLength;
  500. break;
  501. }
  502. }
  503. if (actualLength < 0) {
  504. throw new _util.FormatError("Missing endstream command.");
  505. }
  506. }
  507. length = actualLength;
  508. lexer.nextChar();
  509. this.shift();
  510. this.shift();
  511. }
  512. this.shift();
  513. stream = stream.makeSubStream(startPos, length, dict);
  514. if (cipherTransform) {
  515. stream = cipherTransform.createStream(stream, length);
  516. }
  517. stream = this.filter(stream, dict, length);
  518. stream.dict = dict;
  519. return stream;
  520. }
  521. filter(stream, dict, length) {
  522. let filter = dict.get("Filter", "F");
  523. let params = dict.get("DecodeParms", "DP");
  524. if ((0, _primitives.isName)(filter)) {
  525. if (Array.isArray(params)) {
  526. (0, _util.warn)("/DecodeParms should not contain an Array, " + "when /Filter contains a Name.");
  527. }
  528. return this.makeFilter(stream, filter.name, length, params);
  529. }
  530. let maybeLength = length;
  531. if (Array.isArray(filter)) {
  532. const filterArray = filter;
  533. const paramsArray = params;
  534. for (let i = 0, ii = filterArray.length; i < ii; ++i) {
  535. filter = this.xref.fetchIfRef(filterArray[i]);
  536. if (!(0, _primitives.isName)(filter)) {
  537. throw new _util.FormatError(`Bad filter name "${filter}"`);
  538. }
  539. params = null;
  540. if (Array.isArray(paramsArray) && i in paramsArray) {
  541. params = this.xref.fetchIfRef(paramsArray[i]);
  542. }
  543. stream = this.makeFilter(stream, filter.name, maybeLength, params);
  544. maybeLength = null;
  545. }
  546. }
  547. return stream;
  548. }
  549. makeFilter(stream, name, maybeLength, params) {
  550. if (maybeLength === 0) {
  551. (0, _util.warn)(`Empty "${name}" stream.`);
  552. return new _stream.NullStream();
  553. }
  554. try {
  555. const xrefStreamStats = this.xref.stats.streamTypes;
  556. if (name === "FlateDecode" || name === "Fl") {
  557. xrefStreamStats[_util.StreamType.FLATE] = true;
  558. if (params) {
  559. return new _predictor_stream.PredictorStream(new _flate_stream.FlateStream(stream, maybeLength), maybeLength, params);
  560. }
  561. return new _flate_stream.FlateStream(stream, maybeLength);
  562. }
  563. if (name === "LZWDecode" || name === "LZW") {
  564. xrefStreamStats[_util.StreamType.LZW] = true;
  565. let earlyChange = 1;
  566. if (params) {
  567. if (params.has("EarlyChange")) {
  568. earlyChange = params.get("EarlyChange");
  569. }
  570. return new _predictor_stream.PredictorStream(new _lzw_stream.LZWStream(stream, maybeLength, earlyChange), maybeLength, params);
  571. }
  572. return new _lzw_stream.LZWStream(stream, maybeLength, earlyChange);
  573. }
  574. if (name === "DCTDecode" || name === "DCT") {
  575. xrefStreamStats[_util.StreamType.DCT] = true;
  576. return new _jpeg_stream.JpegStream(stream, maybeLength, params);
  577. }
  578. if (name === "JPXDecode" || name === "JPX") {
  579. xrefStreamStats[_util.StreamType.JPX] = true;
  580. return new _jpx_stream.JpxStream(stream, maybeLength, params);
  581. }
  582. if (name === "ASCII85Decode" || name === "A85") {
  583. xrefStreamStats[_util.StreamType.A85] = true;
  584. return new _ascii_85_stream.Ascii85Stream(stream, maybeLength);
  585. }
  586. if (name === "ASCIIHexDecode" || name === "AHx") {
  587. xrefStreamStats[_util.StreamType.AHX] = true;
  588. return new _ascii_hex_stream.AsciiHexStream(stream, maybeLength);
  589. }
  590. if (name === "CCITTFaxDecode" || name === "CCF") {
  591. xrefStreamStats[_util.StreamType.CCF] = true;
  592. return new _ccitt_stream.CCITTFaxStream(stream, maybeLength, params);
  593. }
  594. if (name === "RunLengthDecode" || name === "RL") {
  595. xrefStreamStats[_util.StreamType.RLX] = true;
  596. return new _run_length_stream.RunLengthStream(stream, maybeLength);
  597. }
  598. if (name === "JBIG2Decode") {
  599. xrefStreamStats[_util.StreamType.JBIG] = true;
  600. return new _jbig2_stream.Jbig2Stream(stream, maybeLength, params);
  601. }
  602. (0, _util.warn)(`Filter "${name}" is not supported.`);
  603. return stream;
  604. } catch (ex) {
  605. if (ex instanceof _core_utils.MissingDataException) {
  606. throw ex;
  607. }
  608. (0, _util.warn)(`Invalid stream: "${ex}"`);
  609. return new _stream.NullStream();
  610. }
  611. }
  612. }
  613. exports.Parser = Parser;
  614. const specialChars = [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
  615. function toHexDigit(ch) {
  616. if (ch >= 0x30 && ch <= 0x39) {
  617. return ch & 0x0f;
  618. }
  619. if (ch >= 0x41 && ch <= 0x46 || ch >= 0x61 && ch <= 0x66) {
  620. return (ch & 0x0f) + 9;
  621. }
  622. return -1;
  623. }
  624. class Lexer {
  625. constructor(stream, knownCommands = null) {
  626. this.stream = stream;
  627. this.nextChar();
  628. this.strBuf = [];
  629. this.knownCommands = knownCommands;
  630. this._hexStringNumWarn = 0;
  631. this.beginInlineImagePos = -1;
  632. }
  633. nextChar() {
  634. return this.currentChar = this.stream.getByte();
  635. }
  636. peekChar() {
  637. return this.stream.peekByte();
  638. }
  639. getNumber() {
  640. let ch = this.currentChar;
  641. let eNotation = false;
  642. let divideBy = 0;
  643. let sign = 0;
  644. if (ch === 0x2d) {
  645. sign = -1;
  646. ch = this.nextChar();
  647. if (ch === 0x2d) {
  648. ch = this.nextChar();
  649. }
  650. } else if (ch === 0x2b) {
  651. sign = 1;
  652. ch = this.nextChar();
  653. }
  654. if (ch === 0x0a || ch === 0x0d) {
  655. do {
  656. ch = this.nextChar();
  657. } while (ch === 0x0a || ch === 0x0d);
  658. }
  659. if (ch === 0x2e) {
  660. divideBy = 10;
  661. ch = this.nextChar();
  662. }
  663. if (ch < 0x30 || ch > 0x39) {
  664. if (divideBy === 10 && sign === 0 && ((0, _core_utils.isWhiteSpace)(ch) || ch === -1)) {
  665. (0, _util.warn)("Lexer.getNumber - treating a single decimal point as zero.");
  666. return 0;
  667. }
  668. throw new _util.FormatError(`Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`);
  669. }
  670. sign = sign || 1;
  671. let baseValue = ch - 0x30;
  672. let powerValue = 0;
  673. let powerValueSign = 1;
  674. while ((ch = this.nextChar()) >= 0) {
  675. if (ch >= 0x30 && ch <= 0x39) {
  676. const currentDigit = ch - 0x30;
  677. if (eNotation) {
  678. powerValue = powerValue * 10 + currentDigit;
  679. } else {
  680. if (divideBy !== 0) {
  681. divideBy *= 10;
  682. }
  683. baseValue = baseValue * 10 + currentDigit;
  684. }
  685. } else if (ch === 0x2e) {
  686. if (divideBy === 0) {
  687. divideBy = 1;
  688. } else {
  689. break;
  690. }
  691. } else if (ch === 0x2d) {
  692. (0, _util.warn)("Badly formatted number: minus sign in the middle");
  693. } else if (ch === 0x45 || ch === 0x65) {
  694. ch = this.peekChar();
  695. if (ch === 0x2b || ch === 0x2d) {
  696. powerValueSign = ch === 0x2d ? -1 : 1;
  697. this.nextChar();
  698. } else if (ch < 0x30 || ch > 0x39) {
  699. break;
  700. }
  701. eNotation = true;
  702. } else {
  703. break;
  704. }
  705. }
  706. if (divideBy !== 0) {
  707. baseValue /= divideBy;
  708. }
  709. if (eNotation) {
  710. baseValue *= 10 ** (powerValueSign * powerValue);
  711. }
  712. return sign * baseValue;
  713. }
  714. getString() {
  715. let numParen = 1;
  716. let done = false;
  717. const strBuf = this.strBuf;
  718. strBuf.length = 0;
  719. let ch = this.nextChar();
  720. while (true) {
  721. let charBuffered = false;
  722. switch (ch | 0) {
  723. case -1:
  724. (0, _util.warn)("Unterminated string");
  725. done = true;
  726. break;
  727. case 0x28:
  728. ++numParen;
  729. strBuf.push("(");
  730. break;
  731. case 0x29:
  732. if (--numParen === 0) {
  733. this.nextChar();
  734. done = true;
  735. } else {
  736. strBuf.push(")");
  737. }
  738. break;
  739. case 0x5c:
  740. ch = this.nextChar();
  741. switch (ch) {
  742. case -1:
  743. (0, _util.warn)("Unterminated string");
  744. done = true;
  745. break;
  746. case 0x6e:
  747. strBuf.push("\n");
  748. break;
  749. case 0x72:
  750. strBuf.push("\r");
  751. break;
  752. case 0x74:
  753. strBuf.push("\t");
  754. break;
  755. case 0x62:
  756. strBuf.push("\b");
  757. break;
  758. case 0x66:
  759. strBuf.push("\f");
  760. break;
  761. case 0x5c:
  762. case 0x28:
  763. case 0x29:
  764. strBuf.push(String.fromCharCode(ch));
  765. break;
  766. case 0x30:
  767. case 0x31:
  768. case 0x32:
  769. case 0x33:
  770. case 0x34:
  771. case 0x35:
  772. case 0x36:
  773. case 0x37:
  774. let x = ch & 0x0f;
  775. ch = this.nextChar();
  776. charBuffered = true;
  777. if (ch >= 0x30 && ch <= 0x37) {
  778. x = (x << 3) + (ch & 0x0f);
  779. ch = this.nextChar();
  780. if (ch >= 0x30 && ch <= 0x37) {
  781. charBuffered = false;
  782. x = (x << 3) + (ch & 0x0f);
  783. }
  784. }
  785. strBuf.push(String.fromCharCode(x));
  786. break;
  787. case 0x0d:
  788. if (this.peekChar() === 0x0a) {
  789. this.nextChar();
  790. }
  791. break;
  792. case 0x0a:
  793. break;
  794. default:
  795. strBuf.push(String.fromCharCode(ch));
  796. break;
  797. }
  798. break;
  799. default:
  800. strBuf.push(String.fromCharCode(ch));
  801. break;
  802. }
  803. if (done) {
  804. break;
  805. }
  806. if (!charBuffered) {
  807. ch = this.nextChar();
  808. }
  809. }
  810. return strBuf.join("");
  811. }
  812. getName() {
  813. let ch, previousCh;
  814. const strBuf = this.strBuf;
  815. strBuf.length = 0;
  816. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  817. if (ch === 0x23) {
  818. ch = this.nextChar();
  819. if (specialChars[ch]) {
  820. (0, _util.warn)("Lexer_getName: " + "NUMBER SIGN (#) should be followed by a hexadecimal number.");
  821. strBuf.push("#");
  822. break;
  823. }
  824. const x = toHexDigit(ch);
  825. if (x !== -1) {
  826. previousCh = ch;
  827. ch = this.nextChar();
  828. const x2 = toHexDigit(ch);
  829. if (x2 === -1) {
  830. (0, _util.warn)(`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` + "in hexadecimal number.");
  831. strBuf.push("#", String.fromCharCode(previousCh));
  832. if (specialChars[ch]) {
  833. break;
  834. }
  835. strBuf.push(String.fromCharCode(ch));
  836. continue;
  837. }
  838. strBuf.push(String.fromCharCode(x << 4 | x2));
  839. } else {
  840. strBuf.push("#", String.fromCharCode(ch));
  841. }
  842. } else {
  843. strBuf.push(String.fromCharCode(ch));
  844. }
  845. }
  846. if (strBuf.length > 127) {
  847. (0, _util.warn)(`Name token is longer than allowed by the spec: ${strBuf.length}`);
  848. } else if (strBuf.length === 0) {
  849. (0, _util.warn)("Name token is empty.");
  850. }
  851. return _primitives.Name.get(strBuf.join(""));
  852. }
  853. _hexStringWarn(ch) {
  854. const MAX_HEX_STRING_NUM_WARN = 5;
  855. if (this._hexStringNumWarn++ === MAX_HEX_STRING_NUM_WARN) {
  856. (0, _util.warn)("getHexString - ignoring additional invalid characters.");
  857. return;
  858. }
  859. if (this._hexStringNumWarn > MAX_HEX_STRING_NUM_WARN) {
  860. return;
  861. }
  862. (0, _util.warn)(`getHexString - ignoring invalid character: ${ch}`);
  863. }
  864. getHexString() {
  865. const strBuf = this.strBuf;
  866. strBuf.length = 0;
  867. let ch = this.currentChar;
  868. let isFirstHex = true;
  869. let firstDigit, secondDigit;
  870. this._hexStringNumWarn = 0;
  871. while (true) {
  872. if (ch < 0) {
  873. (0, _util.warn)("Unterminated hex string");
  874. break;
  875. } else if (ch === 0x3e) {
  876. this.nextChar();
  877. break;
  878. } else if (specialChars[ch] === 1) {
  879. ch = this.nextChar();
  880. continue;
  881. } else {
  882. if (isFirstHex) {
  883. firstDigit = toHexDigit(ch);
  884. if (firstDigit === -1) {
  885. this._hexStringWarn(ch);
  886. ch = this.nextChar();
  887. continue;
  888. }
  889. } else {
  890. secondDigit = toHexDigit(ch);
  891. if (secondDigit === -1) {
  892. this._hexStringWarn(ch);
  893. ch = this.nextChar();
  894. continue;
  895. }
  896. strBuf.push(String.fromCharCode(firstDigit << 4 | secondDigit));
  897. }
  898. isFirstHex = !isFirstHex;
  899. ch = this.nextChar();
  900. }
  901. }
  902. return strBuf.join("");
  903. }
  904. getObj() {
  905. let comment = false;
  906. let ch = this.currentChar;
  907. while (true) {
  908. if (ch < 0) {
  909. return _primitives.EOF;
  910. }
  911. if (comment) {
  912. if (ch === 0x0a || ch === 0x0d) {
  913. comment = false;
  914. }
  915. } else if (ch === 0x25) {
  916. comment = true;
  917. } else if (specialChars[ch] !== 1) {
  918. break;
  919. }
  920. ch = this.nextChar();
  921. }
  922. switch (ch | 0) {
  923. case 0x30:
  924. case 0x31:
  925. case 0x32:
  926. case 0x33:
  927. case 0x34:
  928. case 0x35:
  929. case 0x36:
  930. case 0x37:
  931. case 0x38:
  932. case 0x39:
  933. case 0x2b:
  934. case 0x2d:
  935. case 0x2e:
  936. return this.getNumber();
  937. case 0x28:
  938. return this.getString();
  939. case 0x2f:
  940. return this.getName();
  941. case 0x5b:
  942. this.nextChar();
  943. return _primitives.Cmd.get("[");
  944. case 0x5d:
  945. this.nextChar();
  946. return _primitives.Cmd.get("]");
  947. case 0x3c:
  948. ch = this.nextChar();
  949. if (ch === 0x3c) {
  950. this.nextChar();
  951. return _primitives.Cmd.get("<<");
  952. }
  953. return this.getHexString();
  954. case 0x3e:
  955. ch = this.nextChar();
  956. if (ch === 0x3e) {
  957. this.nextChar();
  958. return _primitives.Cmd.get(">>");
  959. }
  960. return _primitives.Cmd.get(">");
  961. case 0x7b:
  962. this.nextChar();
  963. return _primitives.Cmd.get("{");
  964. case 0x7d:
  965. this.nextChar();
  966. return _primitives.Cmd.get("}");
  967. case 0x29:
  968. this.nextChar();
  969. throw new _util.FormatError(`Illegal character: ${ch}`);
  970. }
  971. let str = String.fromCharCode(ch);
  972. if (ch < 0x20 || ch > 0x7f) {
  973. const nextCh = this.peekChar();
  974. if (nextCh >= 0x20 && nextCh <= 0x7f) {
  975. this.nextChar();
  976. return _primitives.Cmd.get(str);
  977. }
  978. }
  979. const knownCommands = this.knownCommands;
  980. let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  981. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  982. const possibleCommand = str + String.fromCharCode(ch);
  983. if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
  984. break;
  985. }
  986. if (str.length === 128) {
  987. throw new _util.FormatError(`Command token too long: ${str.length}`);
  988. }
  989. str = possibleCommand;
  990. knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  991. }
  992. if (str === "true") {
  993. return true;
  994. }
  995. if (str === "false") {
  996. return false;
  997. }
  998. if (str === "null") {
  999. return null;
  1000. }
  1001. if (str === "BI") {
  1002. this.beginInlineImagePos = this.stream.pos;
  1003. }
  1004. return _primitives.Cmd.get(str);
  1005. }
  1006. peekObj() {
  1007. const streamPos = this.stream.pos,
  1008. currentChar = this.currentChar,
  1009. beginInlineImagePos = this.beginInlineImagePos;
  1010. let nextObj;
  1011. try {
  1012. nextObj = this.getObj();
  1013. } catch (ex) {
  1014. if (ex instanceof _core_utils.MissingDataException) {
  1015. throw ex;
  1016. }
  1017. (0, _util.warn)(`peekObj: ${ex}`);
  1018. }
  1019. this.stream.pos = streamPos;
  1020. this.currentChar = currentChar;
  1021. this.beginInlineImagePos = beginInlineImagePos;
  1022. return nextObj;
  1023. }
  1024. skipToNextLine() {
  1025. let ch = this.currentChar;
  1026. while (ch >= 0) {
  1027. if (ch === 0x0d) {
  1028. ch = this.nextChar();
  1029. if (ch === 0x0a) {
  1030. this.nextChar();
  1031. }
  1032. break;
  1033. } else if (ch === 0x0a) {
  1034. this.nextChar();
  1035. break;
  1036. }
  1037. ch = this.nextChar();
  1038. }
  1039. }
  1040. }
  1041. exports.Lexer = Lexer;
  1042. class Linearization {
  1043. static create(stream) {
  1044. function getInt(linDict, name, allowZeroValue = false) {
  1045. const obj = linDict.get(name);
  1046. if (Number.isInteger(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
  1047. return obj;
  1048. }
  1049. throw new Error(`The "${name}" parameter in the linearization ` + "dictionary is invalid.");
  1050. }
  1051. function getHints(linDict) {
  1052. const hints = linDict.get("H");
  1053. let hintsLength;
  1054. if (Array.isArray(hints) && ((hintsLength = hints.length) === 2 || hintsLength === 4)) {
  1055. for (let index = 0; index < hintsLength; index++) {
  1056. const hint = hints[index];
  1057. if (!(Number.isInteger(hint) && hint > 0)) {
  1058. throw new Error(`Hint (${index}) in the linearization dictionary is invalid.`);
  1059. }
  1060. }
  1061. return hints;
  1062. }
  1063. throw new Error("Hint array in the linearization dictionary is invalid.");
  1064. }
  1065. const parser = new Parser({
  1066. lexer: new Lexer(stream),
  1067. xref: null
  1068. });
  1069. const obj1 = parser.getObj();
  1070. const obj2 = parser.getObj();
  1071. const obj3 = parser.getObj();
  1072. const linDict = parser.getObj();
  1073. let obj, length;
  1074. if (!(Number.isInteger(obj1) && Number.isInteger(obj2) && (0, _primitives.isCmd)(obj3, "obj") && (0, _primitives.isDict)(linDict) && (0, _util.isNum)(obj = linDict.get("Linearized")) && obj > 0)) {
  1075. return null;
  1076. } else if ((length = getInt(linDict, "L")) !== stream.length) {
  1077. throw new Error('The "L" parameter in the linearization dictionary ' + "does not equal the stream length.");
  1078. }
  1079. return {
  1080. length,
  1081. hints: getHints(linDict),
  1082. objectNumberFirst: getInt(linDict, "O"),
  1083. endFirst: getInt(linDict, "E"),
  1084. numPages: getInt(linDict, "N"),
  1085. mainXRefEntriesOffset: getInt(linDict, "T"),
  1086. pageFirst: linDict.has("P") ? getInt(linDict, "P", true) : 0
  1087. };
  1088. }
  1089. }
  1090. exports.Linearization = Linearization;