parser.js 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * Javascript code in this page
  4. *
  5. * Copyright 2021 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * Javascript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.Parser = exports.Linearization = exports.Lexer = void 0;
  27. var _util = require("../shared/util.js");
  28. var _primitives = require("./primitives.js");
  29. var _core_utils = require("./core_utils.js");
  30. var _ascii_85_stream = require("./ascii_85_stream.js");
  31. var _ascii_hex_stream = require("./ascii_hex_stream.js");
  32. var _ccitt_stream = require("./ccitt_stream.js");
  33. var _flate_stream = require("./flate_stream.js");
  34. var _jbig2_stream = require("./jbig2_stream.js");
  35. var _jpeg_stream = require("./jpeg_stream.js");
  36. var _jpx_stream = require("./jpx_stream.js");
  37. var _lzw_stream = require("./lzw_stream.js");
  38. var _stream = require("./stream.js");
  39. var _predictor_stream = require("./predictor_stream.js");
  40. var _run_length_stream = require("./run_length_stream.js");
  41. const MAX_LENGTH_TO_CACHE = 1000;
  42. const MAX_ADLER32_LENGTH = 5552;
  43. function computeAdler32(bytes) {
  44. const bytesLength = bytes.length;
  45. let a = 1,
  46. b = 0;
  47. for (let i = 0; i < bytesLength; ++i) {
  48. a += bytes[i] & 0xff;
  49. b += a;
  50. }
  51. return b % 65521 << 16 | a % 65521;
  52. }
  53. class Parser {
  54. constructor({
  55. lexer,
  56. xref,
  57. allowStreams = false,
  58. recoveryMode = false
  59. }) {
  60. this.lexer = lexer;
  61. this.xref = xref;
  62. this.allowStreams = allowStreams;
  63. this.recoveryMode = recoveryMode;
  64. this.imageCache = Object.create(null);
  65. this.refill();
  66. }
  67. refill() {
  68. this.buf1 = this.lexer.getObj();
  69. this.buf2 = this.lexer.getObj();
  70. }
  71. shift() {
  72. if (this.buf2 instanceof _primitives.Cmd && this.buf2.cmd === "ID") {
  73. this.buf1 = this.buf2;
  74. this.buf2 = null;
  75. } else {
  76. this.buf1 = this.buf2;
  77. this.buf2 = this.lexer.getObj();
  78. }
  79. }
  80. tryShift() {
  81. try {
  82. this.shift();
  83. return true;
  84. } catch (e) {
  85. if (e instanceof _core_utils.MissingDataException) {
  86. throw e;
  87. }
  88. return false;
  89. }
  90. }
  91. getObj(cipherTransform = null) {
  92. const buf1 = this.buf1;
  93. this.shift();
  94. if (buf1 instanceof _primitives.Cmd) {
  95. switch (buf1.cmd) {
  96. case "BI":
  97. return this.makeInlineImage(cipherTransform);
  98. case "[":
  99. const array = [];
  100. while (!(0, _primitives.isCmd)(this.buf1, "]") && this.buf1 !== _primitives.EOF) {
  101. array.push(this.getObj(cipherTransform));
  102. }
  103. if (this.buf1 === _primitives.EOF) {
  104. if (this.recoveryMode) {
  105. return array;
  106. }
  107. throw new _core_utils.ParserEOFException("End of file inside array.");
  108. }
  109. this.shift();
  110. return array;
  111. case "<<":
  112. const dict = new _primitives.Dict(this.xref);
  113. while (!(0, _primitives.isCmd)(this.buf1, ">>") && this.buf1 !== _primitives.EOF) {
  114. if (!(0, _primitives.isName)(this.buf1)) {
  115. (0, _util.info)("Malformed dictionary: key must be a name object");
  116. this.shift();
  117. continue;
  118. }
  119. const key = this.buf1.name;
  120. this.shift();
  121. if (this.buf1 === _primitives.EOF) {
  122. break;
  123. }
  124. dict.set(key, this.getObj(cipherTransform));
  125. }
  126. if (this.buf1 === _primitives.EOF) {
  127. if (this.recoveryMode) {
  128. return dict;
  129. }
  130. throw new _core_utils.ParserEOFException("End of file inside dictionary.");
  131. }
  132. if ((0, _primitives.isCmd)(this.buf2, "stream")) {
  133. return this.allowStreams ? this.makeStream(dict, cipherTransform) : dict;
  134. }
  135. this.shift();
  136. return dict;
  137. default:
  138. return buf1;
  139. }
  140. }
  141. if (Number.isInteger(buf1)) {
  142. if (Number.isInteger(this.buf1) && (0, _primitives.isCmd)(this.buf2, "R")) {
  143. const ref = _primitives.Ref.get(buf1, this.buf1);
  144. this.shift();
  145. this.shift();
  146. return ref;
  147. }
  148. return buf1;
  149. }
  150. if (typeof buf1 === "string") {
  151. if (cipherTransform) {
  152. return cipherTransform.decryptString(buf1);
  153. }
  154. return buf1;
  155. }
  156. return buf1;
  157. }
  158. findDefaultInlineStreamEnd(stream) {
  159. const E = 0x45,
  160. I = 0x49,
  161. SPACE = 0x20,
  162. LF = 0xa,
  163. CR = 0xd,
  164. NUL = 0x0;
  165. const lexer = this.lexer,
  166. startPos = stream.pos,
  167. n = 10;
  168. let state = 0,
  169. ch,
  170. maybeEIPos;
  171. while ((ch = stream.getByte()) !== -1) {
  172. if (state === 0) {
  173. state = ch === E ? 1 : 0;
  174. } else if (state === 1) {
  175. state = ch === I ? 2 : 0;
  176. } else {
  177. (0, _util.assert)(state === 2, "findDefaultInlineStreamEnd - invalid state.");
  178. if (ch === SPACE || ch === LF || ch === CR) {
  179. maybeEIPos = stream.pos;
  180. const followingBytes = stream.peekBytes(n);
  181. for (let i = 0, ii = followingBytes.length; i < ii; i++) {
  182. ch = followingBytes[i];
  183. if (ch === NUL && followingBytes[i + 1] !== NUL) {
  184. continue;
  185. }
  186. if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7f)) {
  187. state = 0;
  188. break;
  189. }
  190. }
  191. if (state !== 2) {
  192. continue;
  193. }
  194. if (lexer.knownCommands) {
  195. const nextObj = lexer.peekObj();
  196. if (nextObj instanceof _primitives.Cmd && !lexer.knownCommands[nextObj.cmd]) {
  197. state = 0;
  198. }
  199. } else {
  200. (0, _util.warn)("findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined.");
  201. }
  202. if (state === 2) {
  203. break;
  204. }
  205. } else {
  206. state = 0;
  207. }
  208. }
  209. }
  210. if (ch === -1) {
  211. (0, _util.warn)("findDefaultInlineStreamEnd: " + "Reached the end of the stream without finding a valid EI marker");
  212. if (maybeEIPos) {
  213. (0, _util.warn)('... trying to recover by using the last "EI" occurrence.');
  214. stream.skip(-(stream.pos - maybeEIPos));
  215. }
  216. }
  217. let endOffset = 4;
  218. stream.skip(-endOffset);
  219. ch = stream.peekByte();
  220. stream.skip(endOffset);
  221. if (!(0, _core_utils.isWhiteSpace)(ch)) {
  222. endOffset--;
  223. }
  224. return stream.pos - endOffset - startPos;
  225. }
  226. findDCTDecodeInlineStreamEnd(stream) {
  227. const startPos = stream.pos;
  228. let foundEOI = false,
  229. b,
  230. markerLength;
  231. while ((b = stream.getByte()) !== -1) {
  232. if (b !== 0xff) {
  233. continue;
  234. }
  235. switch (stream.getByte()) {
  236. case 0x00:
  237. break;
  238. case 0xff:
  239. stream.skip(-1);
  240. break;
  241. case 0xd9:
  242. foundEOI = true;
  243. break;
  244. case 0xc0:
  245. case 0xc1:
  246. case 0xc2:
  247. case 0xc3:
  248. case 0xc5:
  249. case 0xc6:
  250. case 0xc7:
  251. case 0xc9:
  252. case 0xca:
  253. case 0xcb:
  254. case 0xcd:
  255. case 0xce:
  256. case 0xcf:
  257. case 0xc4:
  258. case 0xcc:
  259. case 0xda:
  260. case 0xdb:
  261. case 0xdc:
  262. case 0xdd:
  263. case 0xde:
  264. case 0xdf:
  265. case 0xe0:
  266. case 0xe1:
  267. case 0xe2:
  268. case 0xe3:
  269. case 0xe4:
  270. case 0xe5:
  271. case 0xe6:
  272. case 0xe7:
  273. case 0xe8:
  274. case 0xe9:
  275. case 0xea:
  276. case 0xeb:
  277. case 0xec:
  278. case 0xed:
  279. case 0xee:
  280. case 0xef:
  281. case 0xfe:
  282. markerLength = stream.getUint16();
  283. if (markerLength > 2) {
  284. stream.skip(markerLength - 2);
  285. } else {
  286. stream.skip(-2);
  287. }
  288. break;
  289. }
  290. if (foundEOI) {
  291. break;
  292. }
  293. }
  294. const length = stream.pos - startPos;
  295. if (b === -1) {
  296. (0, _util.warn)("Inline DCTDecode image stream: " + "EOI marker not found, searching for /EI/ instead.");
  297. stream.skip(-length);
  298. return this.findDefaultInlineStreamEnd(stream);
  299. }
  300. this.inlineStreamSkipEI(stream);
  301. return length;
  302. }
  303. findASCII85DecodeInlineStreamEnd(stream) {
  304. const TILDE = 0x7e,
  305. GT = 0x3e;
  306. const startPos = stream.pos;
  307. let ch;
  308. while ((ch = stream.getByte()) !== -1) {
  309. if (ch === TILDE) {
  310. const tildePos = stream.pos;
  311. ch = stream.peekByte();
  312. while ((0, _core_utils.isWhiteSpace)(ch)) {
  313. stream.skip();
  314. ch = stream.peekByte();
  315. }
  316. if (ch === GT) {
  317. stream.skip();
  318. break;
  319. }
  320. if (stream.pos > tildePos) {
  321. const maybeEI = stream.peekBytes(2);
  322. if (maybeEI[0] === 0x45 && maybeEI[1] === 0x49) {
  323. break;
  324. }
  325. }
  326. }
  327. }
  328. const length = stream.pos - startPos;
  329. if (ch === -1) {
  330. (0, _util.warn)("Inline ASCII85Decode image stream: " + "EOD marker not found, searching for /EI/ instead.");
  331. stream.skip(-length);
  332. return this.findDefaultInlineStreamEnd(stream);
  333. }
  334. this.inlineStreamSkipEI(stream);
  335. return length;
  336. }
  337. findASCIIHexDecodeInlineStreamEnd(stream) {
  338. const GT = 0x3e;
  339. const startPos = stream.pos;
  340. let ch;
  341. while ((ch = stream.getByte()) !== -1) {
  342. if (ch === GT) {
  343. break;
  344. }
  345. }
  346. const length = stream.pos - startPos;
  347. if (ch === -1) {
  348. (0, _util.warn)("Inline ASCIIHexDecode image stream: " + "EOD marker not found, searching for /EI/ instead.");
  349. stream.skip(-length);
  350. return this.findDefaultInlineStreamEnd(stream);
  351. }
  352. this.inlineStreamSkipEI(stream);
  353. return length;
  354. }
  355. inlineStreamSkipEI(stream) {
  356. const E = 0x45,
  357. I = 0x49;
  358. let state = 0,
  359. ch;
  360. while ((ch = stream.getByte()) !== -1) {
  361. if (state === 0) {
  362. state = ch === E ? 1 : 0;
  363. } else if (state === 1) {
  364. state = ch === I ? 2 : 0;
  365. } else if (state === 2) {
  366. break;
  367. }
  368. }
  369. }
  370. makeInlineImage(cipherTransform) {
  371. const lexer = this.lexer;
  372. const stream = lexer.stream;
  373. const dict = new _primitives.Dict(this.xref);
  374. let dictLength;
  375. while (!(0, _primitives.isCmd)(this.buf1, "ID") && this.buf1 !== _primitives.EOF) {
  376. if (!(0, _primitives.isName)(this.buf1)) {
  377. throw new _util.FormatError("Dictionary key must be a name object");
  378. }
  379. const key = this.buf1.name;
  380. this.shift();
  381. if (this.buf1 === _primitives.EOF) {
  382. break;
  383. }
  384. dict.set(key, this.getObj(cipherTransform));
  385. }
  386. if (lexer.beginInlineImagePos !== -1) {
  387. dictLength = stream.pos - lexer.beginInlineImagePos;
  388. }
  389. const filter = dict.get("F", "Filter");
  390. let filterName;
  391. if ((0, _primitives.isName)(filter)) {
  392. filterName = filter.name;
  393. } else if (Array.isArray(filter)) {
  394. const filterZero = this.xref.fetchIfRef(filter[0]);
  395. if ((0, _primitives.isName)(filterZero)) {
  396. filterName = filterZero.name;
  397. }
  398. }
  399. const startPos = stream.pos;
  400. let length;
  401. switch (filterName) {
  402. case "DCT":
  403. case "DCTDecode":
  404. length = this.findDCTDecodeInlineStreamEnd(stream);
  405. break;
  406. case "A85":
  407. case "ASCII85Decode":
  408. length = this.findASCII85DecodeInlineStreamEnd(stream);
  409. break;
  410. case "AHx":
  411. case "ASCIIHexDecode":
  412. length = this.findASCIIHexDecodeInlineStreamEnd(stream);
  413. break;
  414. default:
  415. length = this.findDefaultInlineStreamEnd(stream);
  416. }
  417. let imageStream = stream.makeSubStream(startPos, length, dict);
  418. let cacheKey;
  419. if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) {
  420. const imageBytes = imageStream.getBytes();
  421. imageStream.reset();
  422. const initialStreamPos = stream.pos;
  423. stream.pos = lexer.beginInlineImagePos;
  424. const dictBytes = stream.getBytes(dictLength);
  425. stream.pos = initialStreamPos;
  426. cacheKey = computeAdler32(imageBytes) + "_" + computeAdler32(dictBytes);
  427. const cacheEntry = this.imageCache[cacheKey];
  428. if (cacheEntry !== undefined) {
  429. this.buf2 = _primitives.Cmd.get("EI");
  430. this.shift();
  431. cacheEntry.reset();
  432. return cacheEntry;
  433. }
  434. }
  435. if (cipherTransform) {
  436. imageStream = cipherTransform.createStream(imageStream, length);
  437. }
  438. imageStream = this.filter(imageStream, dict, length);
  439. imageStream.dict = dict;
  440. if (cacheKey !== undefined) {
  441. imageStream.cacheKey = `inline_${length}_${cacheKey}`;
  442. this.imageCache[cacheKey] = imageStream;
  443. }
  444. this.buf2 = _primitives.Cmd.get("EI");
  445. this.shift();
  446. return imageStream;
  447. }
  448. _findStreamLength(startPos, signature) {
  449. const {
  450. stream
  451. } = this.lexer;
  452. stream.pos = startPos;
  453. const SCAN_BLOCK_LENGTH = 2048;
  454. const signatureLength = signature.length;
  455. while (stream.pos < stream.end) {
  456. const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
  457. const scanLength = scanBytes.length - signatureLength;
  458. if (scanLength <= 0) {
  459. break;
  460. }
  461. let pos = 0;
  462. while (pos < scanLength) {
  463. let j = 0;
  464. while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
  465. j++;
  466. }
  467. if (j >= signatureLength) {
  468. stream.pos += pos;
  469. return stream.pos - startPos;
  470. }
  471. pos++;
  472. }
  473. stream.pos += scanLength;
  474. }
  475. return -1;
  476. }
  477. makeStream(dict, cipherTransform) {
  478. const lexer = this.lexer;
  479. let stream = lexer.stream;
  480. lexer.skipToNextLine();
  481. const startPos = stream.pos - 1;
  482. let length = dict.get("Length");
  483. if (!Number.isInteger(length)) {
  484. (0, _util.info)(`Bad length "${length && length.toString()}" in stream.`);
  485. length = 0;
  486. }
  487. stream.pos = startPos + length;
  488. lexer.nextChar();
  489. if (this.tryShift() && (0, _primitives.isCmd)(this.buf2, "endstream")) {
  490. this.shift();
  491. } else {
  492. const ENDSTREAM_SIGNATURE = new Uint8Array([0x65, 0x6e, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d]);
  493. let actualLength = this._findStreamLength(startPos, ENDSTREAM_SIGNATURE);
  494. if (actualLength < 0) {
  495. const MAX_TRUNCATION = 1;
  496. for (let i = 1; i <= MAX_TRUNCATION; i++) {
  497. const end = ENDSTREAM_SIGNATURE.length - i;
  498. const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
  499. const maybeLength = this._findStreamLength(startPos, TRUNCATED_SIGNATURE);
  500. if (maybeLength >= 0) {
  501. const lastByte = stream.peekBytes(end + 1)[end];
  502. if (!(0, _core_utils.isWhiteSpace)(lastByte)) {
  503. break;
  504. }
  505. (0, _util.info)(`Found "${(0, _util.bytesToString)(TRUNCATED_SIGNATURE)}" when ` + "searching for endstream command.");
  506. actualLength = maybeLength;
  507. break;
  508. }
  509. }
  510. if (actualLength < 0) {
  511. throw new _util.FormatError("Missing endstream command.");
  512. }
  513. }
  514. length = actualLength;
  515. lexer.nextChar();
  516. this.shift();
  517. this.shift();
  518. }
  519. this.shift();
  520. stream = stream.makeSubStream(startPos, length, dict);
  521. if (cipherTransform) {
  522. stream = cipherTransform.createStream(stream, length);
  523. }
  524. stream = this.filter(stream, dict, length);
  525. stream.dict = dict;
  526. return stream;
  527. }
  528. filter(stream, dict, length) {
  529. let filter = dict.get("F", "Filter");
  530. let params = dict.get("DP", "DecodeParms");
  531. if ((0, _primitives.isName)(filter)) {
  532. if (Array.isArray(params)) {
  533. (0, _util.warn)("/DecodeParms should not be an Array, when /Filter is a Name.");
  534. }
  535. return this.makeFilter(stream, filter.name, length, params);
  536. }
  537. let maybeLength = length;
  538. if (Array.isArray(filter)) {
  539. const filterArray = filter;
  540. const paramsArray = params;
  541. for (let i = 0, ii = filterArray.length; i < ii; ++i) {
  542. filter = this.xref.fetchIfRef(filterArray[i]);
  543. if (!(0, _primitives.isName)(filter)) {
  544. throw new _util.FormatError(`Bad filter name "${filter}"`);
  545. }
  546. params = null;
  547. if (Array.isArray(paramsArray) && i in paramsArray) {
  548. params = this.xref.fetchIfRef(paramsArray[i]);
  549. }
  550. stream = this.makeFilter(stream, filter.name, maybeLength, params);
  551. maybeLength = null;
  552. }
  553. }
  554. return stream;
  555. }
  556. makeFilter(stream, name, maybeLength, params) {
  557. if (maybeLength === 0) {
  558. (0, _util.warn)(`Empty "${name}" stream.`);
  559. return new _stream.NullStream();
  560. }
  561. const xrefStats = this.xref.stats;
  562. try {
  563. switch (name) {
  564. case "Fl":
  565. case "FlateDecode":
  566. xrefStats.addStreamType(_util.StreamType.FLATE);
  567. if (params) {
  568. return new _predictor_stream.PredictorStream(new _flate_stream.FlateStream(stream, maybeLength), maybeLength, params);
  569. }
  570. return new _flate_stream.FlateStream(stream, maybeLength);
  571. case "LZW":
  572. case "LZWDecode":
  573. xrefStats.addStreamType(_util.StreamType.LZW);
  574. let earlyChange = 1;
  575. if (params) {
  576. if (params.has("EarlyChange")) {
  577. earlyChange = params.get("EarlyChange");
  578. }
  579. return new _predictor_stream.PredictorStream(new _lzw_stream.LZWStream(stream, maybeLength, earlyChange), maybeLength, params);
  580. }
  581. return new _lzw_stream.LZWStream(stream, maybeLength, earlyChange);
  582. case "DCT":
  583. case "DCTDecode":
  584. xrefStats.addStreamType(_util.StreamType.DCT);
  585. return new _jpeg_stream.JpegStream(stream, maybeLength, params);
  586. case "JPX":
  587. case "JPXDecode":
  588. xrefStats.addStreamType(_util.StreamType.JPX);
  589. return new _jpx_stream.JpxStream(stream, maybeLength, params);
  590. case "A85":
  591. case "ASCII85Decode":
  592. xrefStats.addStreamType(_util.StreamType.A85);
  593. return new _ascii_85_stream.Ascii85Stream(stream, maybeLength);
  594. case "AHx":
  595. case "ASCIIHexDecode":
  596. xrefStats.addStreamType(_util.StreamType.AHX);
  597. return new _ascii_hex_stream.AsciiHexStream(stream, maybeLength);
  598. case "CCF":
  599. case "CCITTFaxDecode":
  600. xrefStats.addStreamType(_util.StreamType.CCF);
  601. return new _ccitt_stream.CCITTFaxStream(stream, maybeLength, params);
  602. case "RL":
  603. case "RunLengthDecode":
  604. xrefStats.addStreamType(_util.StreamType.RLX);
  605. return new _run_length_stream.RunLengthStream(stream, maybeLength);
  606. case "JBIG2Decode":
  607. xrefStats.addStreamType(_util.StreamType.JBIG);
  608. return new _jbig2_stream.Jbig2Stream(stream, maybeLength, params);
  609. }
  610. (0, _util.warn)(`Filter "${name}" is not supported.`);
  611. return stream;
  612. } catch (ex) {
  613. if (ex instanceof _core_utils.MissingDataException) {
  614. throw ex;
  615. }
  616. (0, _util.warn)(`Invalid stream: "${ex}"`);
  617. return new _stream.NullStream();
  618. }
  619. }
  620. }
  621. exports.Parser = Parser;
  622. const specialChars = [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
  623. function toHexDigit(ch) {
  624. if (ch >= 0x30 && ch <= 0x39) {
  625. return ch & 0x0f;
  626. }
  627. if (ch >= 0x41 && ch <= 0x46 || ch >= 0x61 && ch <= 0x66) {
  628. return (ch & 0x0f) + 9;
  629. }
  630. return -1;
  631. }
  632. class Lexer {
  633. constructor(stream, knownCommands = null) {
  634. this.stream = stream;
  635. this.nextChar();
  636. this.strBuf = [];
  637. this.knownCommands = knownCommands;
  638. this._hexStringNumWarn = 0;
  639. this.beginInlineImagePos = -1;
  640. }
  641. nextChar() {
  642. return this.currentChar = this.stream.getByte();
  643. }
  644. peekChar() {
  645. return this.stream.peekByte();
  646. }
  647. getNumber() {
  648. let ch = this.currentChar;
  649. let eNotation = false;
  650. let divideBy = 0;
  651. let sign = 0;
  652. if (ch === 0x2d) {
  653. sign = -1;
  654. ch = this.nextChar();
  655. if (ch === 0x2d) {
  656. ch = this.nextChar();
  657. }
  658. } else if (ch === 0x2b) {
  659. sign = 1;
  660. ch = this.nextChar();
  661. }
  662. if (ch === 0x0a || ch === 0x0d) {
  663. do {
  664. ch = this.nextChar();
  665. } while (ch === 0x0a || ch === 0x0d);
  666. }
  667. if (ch === 0x2e) {
  668. divideBy = 10;
  669. ch = this.nextChar();
  670. }
  671. if (ch < 0x30 || ch > 0x39) {
  672. if (divideBy === 10 && sign === 0 && ((0, _core_utils.isWhiteSpace)(ch) || ch === -1)) {
  673. (0, _util.warn)("Lexer.getNumber - treating a single decimal point as zero.");
  674. return 0;
  675. }
  676. throw new _util.FormatError(`Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`);
  677. }
  678. sign = sign || 1;
  679. let baseValue = ch - 0x30;
  680. let powerValue = 0;
  681. let powerValueSign = 1;
  682. while ((ch = this.nextChar()) >= 0) {
  683. if (ch >= 0x30 && ch <= 0x39) {
  684. const currentDigit = ch - 0x30;
  685. if (eNotation) {
  686. powerValue = powerValue * 10 + currentDigit;
  687. } else {
  688. if (divideBy !== 0) {
  689. divideBy *= 10;
  690. }
  691. baseValue = baseValue * 10 + currentDigit;
  692. }
  693. } else if (ch === 0x2e) {
  694. if (divideBy === 0) {
  695. divideBy = 1;
  696. } else {
  697. break;
  698. }
  699. } else if (ch === 0x2d) {
  700. (0, _util.warn)("Badly formatted number: minus sign in the middle");
  701. } else if (ch === 0x45 || ch === 0x65) {
  702. ch = this.peekChar();
  703. if (ch === 0x2b || ch === 0x2d) {
  704. powerValueSign = ch === 0x2d ? -1 : 1;
  705. this.nextChar();
  706. } else if (ch < 0x30 || ch > 0x39) {
  707. break;
  708. }
  709. eNotation = true;
  710. } else {
  711. break;
  712. }
  713. }
  714. if (divideBy !== 0) {
  715. baseValue /= divideBy;
  716. }
  717. if (eNotation) {
  718. baseValue *= 10 ** (powerValueSign * powerValue);
  719. }
  720. return sign * baseValue;
  721. }
  722. getString() {
  723. let numParen = 1;
  724. let done = false;
  725. const strBuf = this.strBuf;
  726. strBuf.length = 0;
  727. let ch = this.nextChar();
  728. while (true) {
  729. let charBuffered = false;
  730. switch (ch | 0) {
  731. case -1:
  732. (0, _util.warn)("Unterminated string");
  733. done = true;
  734. break;
  735. case 0x28:
  736. ++numParen;
  737. strBuf.push("(");
  738. break;
  739. case 0x29:
  740. if (--numParen === 0) {
  741. this.nextChar();
  742. done = true;
  743. } else {
  744. strBuf.push(")");
  745. }
  746. break;
  747. case 0x5c:
  748. ch = this.nextChar();
  749. switch (ch) {
  750. case -1:
  751. (0, _util.warn)("Unterminated string");
  752. done = true;
  753. break;
  754. case 0x6e:
  755. strBuf.push("\n");
  756. break;
  757. case 0x72:
  758. strBuf.push("\r");
  759. break;
  760. case 0x74:
  761. strBuf.push("\t");
  762. break;
  763. case 0x62:
  764. strBuf.push("\b");
  765. break;
  766. case 0x66:
  767. strBuf.push("\f");
  768. break;
  769. case 0x5c:
  770. case 0x28:
  771. case 0x29:
  772. strBuf.push(String.fromCharCode(ch));
  773. break;
  774. case 0x30:
  775. case 0x31:
  776. case 0x32:
  777. case 0x33:
  778. case 0x34:
  779. case 0x35:
  780. case 0x36:
  781. case 0x37:
  782. let x = ch & 0x0f;
  783. ch = this.nextChar();
  784. charBuffered = true;
  785. if (ch >= 0x30 && ch <= 0x37) {
  786. x = (x << 3) + (ch & 0x0f);
  787. ch = this.nextChar();
  788. if (ch >= 0x30 && ch <= 0x37) {
  789. charBuffered = false;
  790. x = (x << 3) + (ch & 0x0f);
  791. }
  792. }
  793. strBuf.push(String.fromCharCode(x));
  794. break;
  795. case 0x0d:
  796. if (this.peekChar() === 0x0a) {
  797. this.nextChar();
  798. }
  799. break;
  800. case 0x0a:
  801. break;
  802. default:
  803. strBuf.push(String.fromCharCode(ch));
  804. break;
  805. }
  806. break;
  807. default:
  808. strBuf.push(String.fromCharCode(ch));
  809. break;
  810. }
  811. if (done) {
  812. break;
  813. }
  814. if (!charBuffered) {
  815. ch = this.nextChar();
  816. }
  817. }
  818. return strBuf.join("");
  819. }
  820. getName() {
  821. let ch, previousCh;
  822. const strBuf = this.strBuf;
  823. strBuf.length = 0;
  824. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  825. if (ch === 0x23) {
  826. ch = this.nextChar();
  827. if (specialChars[ch]) {
  828. (0, _util.warn)("Lexer_getName: " + "NUMBER SIGN (#) should be followed by a hexadecimal number.");
  829. strBuf.push("#");
  830. break;
  831. }
  832. const x = toHexDigit(ch);
  833. if (x !== -1) {
  834. previousCh = ch;
  835. ch = this.nextChar();
  836. const x2 = toHexDigit(ch);
  837. if (x2 === -1) {
  838. (0, _util.warn)(`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` + "in hexadecimal number.");
  839. strBuf.push("#", String.fromCharCode(previousCh));
  840. if (specialChars[ch]) {
  841. break;
  842. }
  843. strBuf.push(String.fromCharCode(ch));
  844. continue;
  845. }
  846. strBuf.push(String.fromCharCode(x << 4 | x2));
  847. } else {
  848. strBuf.push("#", String.fromCharCode(ch));
  849. }
  850. } else {
  851. strBuf.push(String.fromCharCode(ch));
  852. }
  853. }
  854. if (strBuf.length > 127) {
  855. (0, _util.warn)(`Name token is longer than allowed by the spec: ${strBuf.length}`);
  856. }
  857. return _primitives.Name.get(strBuf.join(""));
  858. }
  859. _hexStringWarn(ch) {
  860. const MAX_HEX_STRING_NUM_WARN = 5;
  861. if (this._hexStringNumWarn++ === MAX_HEX_STRING_NUM_WARN) {
  862. (0, _util.warn)("getHexString - ignoring additional invalid characters.");
  863. return;
  864. }
  865. if (this._hexStringNumWarn > MAX_HEX_STRING_NUM_WARN) {
  866. return;
  867. }
  868. (0, _util.warn)(`getHexString - ignoring invalid character: ${ch}`);
  869. }
  870. getHexString() {
  871. const strBuf = this.strBuf;
  872. strBuf.length = 0;
  873. let ch = this.currentChar;
  874. let isFirstHex = true;
  875. let firstDigit, secondDigit;
  876. this._hexStringNumWarn = 0;
  877. while (true) {
  878. if (ch < 0) {
  879. (0, _util.warn)("Unterminated hex string");
  880. break;
  881. } else if (ch === 0x3e) {
  882. this.nextChar();
  883. break;
  884. } else if (specialChars[ch] === 1) {
  885. ch = this.nextChar();
  886. continue;
  887. } else {
  888. if (isFirstHex) {
  889. firstDigit = toHexDigit(ch);
  890. if (firstDigit === -1) {
  891. this._hexStringWarn(ch);
  892. ch = this.nextChar();
  893. continue;
  894. }
  895. } else {
  896. secondDigit = toHexDigit(ch);
  897. if (secondDigit === -1) {
  898. this._hexStringWarn(ch);
  899. ch = this.nextChar();
  900. continue;
  901. }
  902. strBuf.push(String.fromCharCode(firstDigit << 4 | secondDigit));
  903. }
  904. isFirstHex = !isFirstHex;
  905. ch = this.nextChar();
  906. }
  907. }
  908. return strBuf.join("");
  909. }
  910. getObj() {
  911. let comment = false;
  912. let ch = this.currentChar;
  913. while (true) {
  914. if (ch < 0) {
  915. return _primitives.EOF;
  916. }
  917. if (comment) {
  918. if (ch === 0x0a || ch === 0x0d) {
  919. comment = false;
  920. }
  921. } else if (ch === 0x25) {
  922. comment = true;
  923. } else if (specialChars[ch] !== 1) {
  924. break;
  925. }
  926. ch = this.nextChar();
  927. }
  928. switch (ch | 0) {
  929. case 0x30:
  930. case 0x31:
  931. case 0x32:
  932. case 0x33:
  933. case 0x34:
  934. case 0x35:
  935. case 0x36:
  936. case 0x37:
  937. case 0x38:
  938. case 0x39:
  939. case 0x2b:
  940. case 0x2d:
  941. case 0x2e:
  942. return this.getNumber();
  943. case 0x28:
  944. return this.getString();
  945. case 0x2f:
  946. return this.getName();
  947. case 0x5b:
  948. this.nextChar();
  949. return _primitives.Cmd.get("[");
  950. case 0x5d:
  951. this.nextChar();
  952. return _primitives.Cmd.get("]");
  953. case 0x3c:
  954. ch = this.nextChar();
  955. if (ch === 0x3c) {
  956. this.nextChar();
  957. return _primitives.Cmd.get("<<");
  958. }
  959. return this.getHexString();
  960. case 0x3e:
  961. ch = this.nextChar();
  962. if (ch === 0x3e) {
  963. this.nextChar();
  964. return _primitives.Cmd.get(">>");
  965. }
  966. return _primitives.Cmd.get(">");
  967. case 0x7b:
  968. this.nextChar();
  969. return _primitives.Cmd.get("{");
  970. case 0x7d:
  971. this.nextChar();
  972. return _primitives.Cmd.get("}");
  973. case 0x29:
  974. this.nextChar();
  975. throw new _util.FormatError(`Illegal character: ${ch}`);
  976. }
  977. let str = String.fromCharCode(ch);
  978. if (ch < 0x20 || ch > 0x7f) {
  979. const nextCh = this.peekChar();
  980. if (nextCh >= 0x20 && nextCh <= 0x7f) {
  981. this.nextChar();
  982. return _primitives.Cmd.get(str);
  983. }
  984. }
  985. const knownCommands = this.knownCommands;
  986. let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  987. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  988. const possibleCommand = str + String.fromCharCode(ch);
  989. if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
  990. break;
  991. }
  992. if (str.length === 128) {
  993. throw new _util.FormatError(`Command token too long: ${str.length}`);
  994. }
  995. str = possibleCommand;
  996. knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  997. }
  998. if (str === "true") {
  999. return true;
  1000. }
  1001. if (str === "false") {
  1002. return false;
  1003. }
  1004. if (str === "null") {
  1005. return null;
  1006. }
  1007. if (str === "BI") {
  1008. this.beginInlineImagePos = this.stream.pos;
  1009. }
  1010. return _primitives.Cmd.get(str);
  1011. }
  1012. peekObj() {
  1013. const streamPos = this.stream.pos,
  1014. currentChar = this.currentChar,
  1015. beginInlineImagePos = this.beginInlineImagePos;
  1016. let nextObj;
  1017. try {
  1018. nextObj = this.getObj();
  1019. } catch (ex) {
  1020. if (ex instanceof _core_utils.MissingDataException) {
  1021. throw ex;
  1022. }
  1023. (0, _util.warn)(`peekObj: ${ex}`);
  1024. }
  1025. this.stream.pos = streamPos;
  1026. this.currentChar = currentChar;
  1027. this.beginInlineImagePos = beginInlineImagePos;
  1028. return nextObj;
  1029. }
  1030. skipToNextLine() {
  1031. let ch = this.currentChar;
  1032. while (ch >= 0) {
  1033. if (ch === 0x0d) {
  1034. ch = this.nextChar();
  1035. if (ch === 0x0a) {
  1036. this.nextChar();
  1037. }
  1038. break;
  1039. } else if (ch === 0x0a) {
  1040. this.nextChar();
  1041. break;
  1042. }
  1043. ch = this.nextChar();
  1044. }
  1045. }
  1046. }
  1047. exports.Lexer = Lexer;
  1048. class Linearization {
  1049. static create(stream) {
  1050. function getInt(linDict, name, allowZeroValue = false) {
  1051. const obj = linDict.get(name);
  1052. if (Number.isInteger(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
  1053. return obj;
  1054. }
  1055. throw new Error(`The "${name}" parameter in the linearization ` + "dictionary is invalid.");
  1056. }
  1057. function getHints(linDict) {
  1058. const hints = linDict.get("H");
  1059. let hintsLength;
  1060. if (Array.isArray(hints) && ((hintsLength = hints.length) === 2 || hintsLength === 4)) {
  1061. for (let index = 0; index < hintsLength; index++) {
  1062. const hint = hints[index];
  1063. if (!(Number.isInteger(hint) && hint > 0)) {
  1064. throw new Error(`Hint (${index}) in the linearization dictionary is invalid.`);
  1065. }
  1066. }
  1067. return hints;
  1068. }
  1069. throw new Error("Hint array in the linearization dictionary is invalid.");
  1070. }
  1071. const parser = new Parser({
  1072. lexer: new Lexer(stream),
  1073. xref: null
  1074. });
  1075. const obj1 = parser.getObj();
  1076. const obj2 = parser.getObj();
  1077. const obj3 = parser.getObj();
  1078. const linDict = parser.getObj();
  1079. let obj, length;
  1080. if (!(Number.isInteger(obj1) && Number.isInteger(obj2) && (0, _primitives.isCmd)(obj3, "obj") && (0, _primitives.isDict)(linDict) && (0, _util.isNum)(obj = linDict.get("Linearized")) && obj > 0)) {
  1081. return null;
  1082. } else if ((length = getInt(linDict, "L")) !== stream.length) {
  1083. throw new Error('The "L" parameter in the linearization dictionary ' + "does not equal the stream length.");
  1084. }
  1085. return {
  1086. length,
  1087. hints: getHints(linDict),
  1088. objectNumberFirst: getInt(linDict, "O"),
  1089. endFirst: getInt(linDict, "E"),
  1090. numPages: getInt(linDict, "N"),
  1091. mainXRefEntriesOffset: getInt(linDict, "T"),
  1092. pageFirst: linDict.has("P") ? getInt(linDict, "P", true) : 0
  1093. };
  1094. }
  1095. }
  1096. exports.Linearization = Linearization;