parser.js 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * JavaScript code in this page
  4. *
  5. * Copyright 2022 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * JavaScript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.Parser = exports.Linearization = exports.Lexer = void 0;
  27. var _util = require("../shared/util.js");
  28. var _primitives = require("./primitives.js");
  29. var _core_utils = require("./core_utils.js");
  30. var _ascii_85_stream = require("./ascii_85_stream.js");
  31. var _ascii_hex_stream = require("./ascii_hex_stream.js");
  32. var _ccitt_stream = require("./ccitt_stream.js");
  33. var _flate_stream = require("./flate_stream.js");
  34. var _jbig2_stream = require("./jbig2_stream.js");
  35. var _jpeg_stream = require("./jpeg_stream.js");
  36. var _jpx_stream = require("./jpx_stream.js");
  37. var _lzw_stream = require("./lzw_stream.js");
  38. var _stream = require("./stream.js");
  39. var _predictor_stream = require("./predictor_stream.js");
  40. var _run_length_stream = require("./run_length_stream.js");
  41. const MAX_LENGTH_TO_CACHE = 1000;
  42. const MAX_ADLER32_LENGTH = 5552;
  43. function computeAdler32(bytes) {
  44. const bytesLength = bytes.length;
  45. let a = 1,
  46. b = 0;
  47. for (let i = 0; i < bytesLength; ++i) {
  48. a += bytes[i] & 0xff;
  49. b += a;
  50. }
  51. return b % 65521 << 16 | a % 65521;
  52. }
  53. class Parser {
  54. constructor({
  55. lexer,
  56. xref,
  57. allowStreams = false,
  58. recoveryMode = false
  59. }) {
  60. this.lexer = lexer;
  61. this.xref = xref;
  62. this.allowStreams = allowStreams;
  63. this.recoveryMode = recoveryMode;
  64. this.imageCache = Object.create(null);
  65. this.refill();
  66. }
  67. refill() {
  68. this.buf1 = this.lexer.getObj();
  69. this.buf2 = this.lexer.getObj();
  70. }
  71. shift() {
  72. if (this.buf2 instanceof _primitives.Cmd && this.buf2.cmd === "ID") {
  73. this.buf1 = this.buf2;
  74. this.buf2 = null;
  75. } else {
  76. this.buf1 = this.buf2;
  77. this.buf2 = this.lexer.getObj();
  78. }
  79. }
  80. tryShift() {
  81. try {
  82. this.shift();
  83. return true;
  84. } catch (e) {
  85. if (e instanceof _core_utils.MissingDataException) {
  86. throw e;
  87. }
  88. return false;
  89. }
  90. }
  91. getObj(cipherTransform = null) {
  92. const buf1 = this.buf1;
  93. this.shift();
  94. if (buf1 instanceof _primitives.Cmd) {
  95. switch (buf1.cmd) {
  96. case "BI":
  97. return this.makeInlineImage(cipherTransform);
  98. case "[":
  99. const array = [];
  100. while (!(0, _primitives.isCmd)(this.buf1, "]") && this.buf1 !== _primitives.EOF) {
  101. array.push(this.getObj(cipherTransform));
  102. }
  103. if (this.buf1 === _primitives.EOF) {
  104. if (this.recoveryMode) {
  105. return array;
  106. }
  107. throw new _core_utils.ParserEOFException("End of file inside array.");
  108. }
  109. this.shift();
  110. return array;
  111. case "<<":
  112. const dict = new _primitives.Dict(this.xref);
  113. while (!(0, _primitives.isCmd)(this.buf1, ">>") && this.buf1 !== _primitives.EOF) {
  114. if (!(this.buf1 instanceof _primitives.Name)) {
  115. (0, _util.info)("Malformed dictionary: key must be a name object");
  116. this.shift();
  117. continue;
  118. }
  119. const key = this.buf1.name;
  120. this.shift();
  121. if (this.buf1 === _primitives.EOF) {
  122. break;
  123. }
  124. dict.set(key, this.getObj(cipherTransform));
  125. }
  126. if (this.buf1 === _primitives.EOF) {
  127. if (this.recoveryMode) {
  128. return dict;
  129. }
  130. throw new _core_utils.ParserEOFException("End of file inside dictionary.");
  131. }
  132. if ((0, _primitives.isCmd)(this.buf2, "stream")) {
  133. return this.allowStreams ? this.makeStream(dict, cipherTransform) : dict;
  134. }
  135. this.shift();
  136. return dict;
  137. default:
  138. return buf1;
  139. }
  140. }
  141. if (Number.isInteger(buf1)) {
  142. if (Number.isInteger(this.buf1) && (0, _primitives.isCmd)(this.buf2, "R")) {
  143. const ref = _primitives.Ref.get(buf1, this.buf1);
  144. this.shift();
  145. this.shift();
  146. return ref;
  147. }
  148. return buf1;
  149. }
  150. if (typeof buf1 === "string") {
  151. if (cipherTransform) {
  152. return cipherTransform.decryptString(buf1);
  153. }
  154. return buf1;
  155. }
  156. return buf1;
  157. }
  158. findDefaultInlineStreamEnd(stream) {
  159. const E = 0x45,
  160. I = 0x49,
  161. SPACE = 0x20,
  162. LF = 0xa,
  163. CR = 0xd,
  164. NUL = 0x0;
  165. const lexer = this.lexer,
  166. startPos = stream.pos,
  167. n = 10;
  168. let state = 0,
  169. ch,
  170. maybeEIPos;
  171. while ((ch = stream.getByte()) !== -1) {
  172. if (state === 0) {
  173. state = ch === E ? 1 : 0;
  174. } else if (state === 1) {
  175. state = ch === I ? 2 : 0;
  176. } else {
  177. (0, _util.assert)(state === 2, "findDefaultInlineStreamEnd - invalid state.");
  178. if (ch === SPACE || ch === LF || ch === CR) {
  179. maybeEIPos = stream.pos;
  180. const followingBytes = stream.peekBytes(n);
  181. for (let i = 0, ii = followingBytes.length; i < ii; i++) {
  182. ch = followingBytes[i];
  183. if (ch === NUL && followingBytes[i + 1] !== NUL) {
  184. continue;
  185. }
  186. if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7f)) {
  187. state = 0;
  188. break;
  189. }
  190. }
  191. if (state !== 2) {
  192. continue;
  193. }
  194. if (lexer.knownCommands) {
  195. const nextObj = lexer.peekObj();
  196. if (nextObj instanceof _primitives.Cmd && !lexer.knownCommands[nextObj.cmd]) {
  197. state = 0;
  198. }
  199. } else {
  200. (0, _util.warn)("findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined.");
  201. }
  202. if (state === 2) {
  203. break;
  204. }
  205. } else {
  206. state = 0;
  207. }
  208. }
  209. }
  210. if (ch === -1) {
  211. (0, _util.warn)("findDefaultInlineStreamEnd: " + "Reached the end of the stream without finding a valid EI marker");
  212. if (maybeEIPos) {
  213. (0, _util.warn)('... trying to recover by using the last "EI" occurrence.');
  214. stream.skip(-(stream.pos - maybeEIPos));
  215. }
  216. }
  217. let endOffset = 4;
  218. stream.skip(-endOffset);
  219. ch = stream.peekByte();
  220. stream.skip(endOffset);
  221. if (!(0, _core_utils.isWhiteSpace)(ch)) {
  222. endOffset--;
  223. }
  224. return stream.pos - endOffset - startPos;
  225. }
  226. findDCTDecodeInlineStreamEnd(stream) {
  227. const startPos = stream.pos;
  228. let foundEOI = false,
  229. b,
  230. markerLength;
  231. while ((b = stream.getByte()) !== -1) {
  232. if (b !== 0xff) {
  233. continue;
  234. }
  235. switch (stream.getByte()) {
  236. case 0x00:
  237. break;
  238. case 0xff:
  239. stream.skip(-1);
  240. break;
  241. case 0xd9:
  242. foundEOI = true;
  243. break;
  244. case 0xc0:
  245. case 0xc1:
  246. case 0xc2:
  247. case 0xc3:
  248. case 0xc5:
  249. case 0xc6:
  250. case 0xc7:
  251. case 0xc9:
  252. case 0xca:
  253. case 0xcb:
  254. case 0xcd:
  255. case 0xce:
  256. case 0xcf:
  257. case 0xc4:
  258. case 0xcc:
  259. case 0xda:
  260. case 0xdb:
  261. case 0xdc:
  262. case 0xdd:
  263. case 0xde:
  264. case 0xdf:
  265. case 0xe0:
  266. case 0xe1:
  267. case 0xe2:
  268. case 0xe3:
  269. case 0xe4:
  270. case 0xe5:
  271. case 0xe6:
  272. case 0xe7:
  273. case 0xe8:
  274. case 0xe9:
  275. case 0xea:
  276. case 0xeb:
  277. case 0xec:
  278. case 0xed:
  279. case 0xee:
  280. case 0xef:
  281. case 0xfe:
  282. markerLength = stream.getUint16();
  283. if (markerLength > 2) {
  284. stream.skip(markerLength - 2);
  285. } else {
  286. stream.skip(-2);
  287. }
  288. break;
  289. }
  290. if (foundEOI) {
  291. break;
  292. }
  293. }
  294. const length = stream.pos - startPos;
  295. if (b === -1) {
  296. (0, _util.warn)("Inline DCTDecode image stream: " + "EOI marker not found, searching for /EI/ instead.");
  297. stream.skip(-length);
  298. return this.findDefaultInlineStreamEnd(stream);
  299. }
  300. this.inlineStreamSkipEI(stream);
  301. return length;
  302. }
  303. findASCII85DecodeInlineStreamEnd(stream) {
  304. const TILDE = 0x7e,
  305. GT = 0x3e;
  306. const startPos = stream.pos;
  307. let ch;
  308. while ((ch = stream.getByte()) !== -1) {
  309. if (ch === TILDE) {
  310. const tildePos = stream.pos;
  311. ch = stream.peekByte();
  312. while ((0, _core_utils.isWhiteSpace)(ch)) {
  313. stream.skip();
  314. ch = stream.peekByte();
  315. }
  316. if (ch === GT) {
  317. stream.skip();
  318. break;
  319. }
  320. if (stream.pos > tildePos) {
  321. const maybeEI = stream.peekBytes(2);
  322. if (maybeEI[0] === 0x45 && maybeEI[1] === 0x49) {
  323. break;
  324. }
  325. }
  326. }
  327. }
  328. const length = stream.pos - startPos;
  329. if (ch === -1) {
  330. (0, _util.warn)("Inline ASCII85Decode image stream: " + "EOD marker not found, searching for /EI/ instead.");
  331. stream.skip(-length);
  332. return this.findDefaultInlineStreamEnd(stream);
  333. }
  334. this.inlineStreamSkipEI(stream);
  335. return length;
  336. }
  337. findASCIIHexDecodeInlineStreamEnd(stream) {
  338. const GT = 0x3e;
  339. const startPos = stream.pos;
  340. let ch;
  341. while ((ch = stream.getByte()) !== -1) {
  342. if (ch === GT) {
  343. break;
  344. }
  345. }
  346. const length = stream.pos - startPos;
  347. if (ch === -1) {
  348. (0, _util.warn)("Inline ASCIIHexDecode image stream: " + "EOD marker not found, searching for /EI/ instead.");
  349. stream.skip(-length);
  350. return this.findDefaultInlineStreamEnd(stream);
  351. }
  352. this.inlineStreamSkipEI(stream);
  353. return length;
  354. }
  355. inlineStreamSkipEI(stream) {
  356. const E = 0x45,
  357. I = 0x49;
  358. let state = 0,
  359. ch;
  360. while ((ch = stream.getByte()) !== -1) {
  361. if (state === 0) {
  362. state = ch === E ? 1 : 0;
  363. } else if (state === 1) {
  364. state = ch === I ? 2 : 0;
  365. } else if (state === 2) {
  366. break;
  367. }
  368. }
  369. }
  370. makeInlineImage(cipherTransform) {
  371. const lexer = this.lexer;
  372. const stream = lexer.stream;
  373. const dict = new _primitives.Dict(this.xref);
  374. let dictLength;
  375. while (!(0, _primitives.isCmd)(this.buf1, "ID") && this.buf1 !== _primitives.EOF) {
  376. if (!(this.buf1 instanceof _primitives.Name)) {
  377. throw new _util.FormatError("Dictionary key must be a name object");
  378. }
  379. const key = this.buf1.name;
  380. this.shift();
  381. if (this.buf1 === _primitives.EOF) {
  382. break;
  383. }
  384. dict.set(key, this.getObj(cipherTransform));
  385. }
  386. if (lexer.beginInlineImagePos !== -1) {
  387. dictLength = stream.pos - lexer.beginInlineImagePos;
  388. }
  389. const filter = dict.get("F", "Filter");
  390. let filterName;
  391. if (filter instanceof _primitives.Name) {
  392. filterName = filter.name;
  393. } else if (Array.isArray(filter)) {
  394. const filterZero = this.xref.fetchIfRef(filter[0]);
  395. if (filterZero instanceof _primitives.Name) {
  396. filterName = filterZero.name;
  397. }
  398. }
  399. const startPos = stream.pos;
  400. let length;
  401. switch (filterName) {
  402. case "DCT":
  403. case "DCTDecode":
  404. length = this.findDCTDecodeInlineStreamEnd(stream);
  405. break;
  406. case "A85":
  407. case "ASCII85Decode":
  408. length = this.findASCII85DecodeInlineStreamEnd(stream);
  409. break;
  410. case "AHx":
  411. case "ASCIIHexDecode":
  412. length = this.findASCIIHexDecodeInlineStreamEnd(stream);
  413. break;
  414. default:
  415. length = this.findDefaultInlineStreamEnd(stream);
  416. }
  417. let imageStream = stream.makeSubStream(startPos, length, dict);
  418. let cacheKey;
  419. if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) {
  420. const imageBytes = imageStream.getBytes();
  421. imageStream.reset();
  422. const initialStreamPos = stream.pos;
  423. stream.pos = lexer.beginInlineImagePos;
  424. const dictBytes = stream.getBytes(dictLength);
  425. stream.pos = initialStreamPos;
  426. cacheKey = computeAdler32(imageBytes) + "_" + computeAdler32(dictBytes);
  427. const cacheEntry = this.imageCache[cacheKey];
  428. if (cacheEntry !== undefined) {
  429. this.buf2 = _primitives.Cmd.get("EI");
  430. this.shift();
  431. cacheEntry.reset();
  432. return cacheEntry;
  433. }
  434. }
  435. if (cipherTransform) {
  436. imageStream = cipherTransform.createStream(imageStream, length);
  437. }
  438. imageStream = this.filter(imageStream, dict, length);
  439. imageStream.dict = dict;
  440. if (cacheKey !== undefined) {
  441. imageStream.cacheKey = `inline_${length}_${cacheKey}`;
  442. this.imageCache[cacheKey] = imageStream;
  443. }
  444. this.buf2 = _primitives.Cmd.get("EI");
  445. this.shift();
  446. return imageStream;
  447. }
  448. _findStreamLength(startPos, signature) {
  449. const {
  450. stream
  451. } = this.lexer;
  452. stream.pos = startPos;
  453. const SCAN_BLOCK_LENGTH = 2048;
  454. const signatureLength = signature.length;
  455. while (stream.pos < stream.end) {
  456. const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
  457. const scanLength = scanBytes.length - signatureLength;
  458. if (scanLength <= 0) {
  459. break;
  460. }
  461. let pos = 0;
  462. while (pos < scanLength) {
  463. let j = 0;
  464. while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
  465. j++;
  466. }
  467. if (j >= signatureLength) {
  468. stream.pos += pos;
  469. return stream.pos - startPos;
  470. }
  471. pos++;
  472. }
  473. stream.pos += scanLength;
  474. }
  475. return -1;
  476. }
  477. makeStream(dict, cipherTransform) {
  478. const lexer = this.lexer;
  479. let stream = lexer.stream;
  480. lexer.skipToNextLine();
  481. const startPos = stream.pos - 1;
  482. let length = dict.get("Length");
  483. if (!Number.isInteger(length)) {
  484. (0, _util.info)(`Bad length "${length && length.toString()}" in stream.`);
  485. length = 0;
  486. }
  487. stream.pos = startPos + length;
  488. lexer.nextChar();
  489. if (this.tryShift() && (0, _primitives.isCmd)(this.buf2, "endstream")) {
  490. this.shift();
  491. } else {
  492. const ENDSTREAM_SIGNATURE = new Uint8Array([0x65, 0x6e, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d]);
  493. let actualLength = this._findStreamLength(startPos, ENDSTREAM_SIGNATURE);
  494. if (actualLength < 0) {
  495. const MAX_TRUNCATION = 1;
  496. for (let i = 1; i <= MAX_TRUNCATION; i++) {
  497. const end = ENDSTREAM_SIGNATURE.length - i;
  498. const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
  499. const maybeLength = this._findStreamLength(startPos, TRUNCATED_SIGNATURE);
  500. if (maybeLength >= 0) {
  501. const lastByte = stream.peekBytes(end + 1)[end];
  502. if (!(0, _core_utils.isWhiteSpace)(lastByte)) {
  503. break;
  504. }
  505. (0, _util.info)(`Found "${(0, _util.bytesToString)(TRUNCATED_SIGNATURE)}" when ` + "searching for endstream command.");
  506. actualLength = maybeLength;
  507. break;
  508. }
  509. }
  510. if (actualLength < 0) {
  511. throw new _util.FormatError("Missing endstream command.");
  512. }
  513. }
  514. length = actualLength;
  515. lexer.nextChar();
  516. this.shift();
  517. this.shift();
  518. }
  519. this.shift();
  520. stream = stream.makeSubStream(startPos, length, dict);
  521. if (cipherTransform) {
  522. stream = cipherTransform.createStream(stream, length);
  523. }
  524. stream = this.filter(stream, dict, length);
  525. stream.dict = dict;
  526. return stream;
  527. }
  528. filter(stream, dict, length) {
  529. let filter = dict.get("F", "Filter");
  530. let params = dict.get("DP", "DecodeParms");
  531. if (filter instanceof _primitives.Name) {
  532. if (Array.isArray(params)) {
  533. (0, _util.warn)("/DecodeParms should not be an Array, when /Filter is a Name.");
  534. }
  535. return this.makeFilter(stream, filter.name, length, params);
  536. }
  537. let maybeLength = length;
  538. if (Array.isArray(filter)) {
  539. const filterArray = filter;
  540. const paramsArray = params;
  541. for (let i = 0, ii = filterArray.length; i < ii; ++i) {
  542. filter = this.xref.fetchIfRef(filterArray[i]);
  543. if (!(filter instanceof _primitives.Name)) {
  544. throw new _util.FormatError(`Bad filter name "${filter}"`);
  545. }
  546. params = null;
  547. if (Array.isArray(paramsArray) && i in paramsArray) {
  548. params = this.xref.fetchIfRef(paramsArray[i]);
  549. }
  550. stream = this.makeFilter(stream, filter.name, maybeLength, params);
  551. maybeLength = null;
  552. }
  553. }
  554. return stream;
  555. }
  556. makeFilter(stream, name, maybeLength, params) {
  557. if (maybeLength === 0) {
  558. (0, _util.warn)(`Empty "${name}" stream.`);
  559. return new _stream.NullStream();
  560. }
  561. const xrefStats = this.xref.stats;
  562. try {
  563. switch (name) {
  564. case "Fl":
  565. case "FlateDecode":
  566. xrefStats.addStreamType(_util.StreamType.FLATE);
  567. if (params) {
  568. return new _predictor_stream.PredictorStream(new _flate_stream.FlateStream(stream, maybeLength), maybeLength, params);
  569. }
  570. return new _flate_stream.FlateStream(stream, maybeLength);
  571. case "LZW":
  572. case "LZWDecode":
  573. xrefStats.addStreamType(_util.StreamType.LZW);
  574. let earlyChange = 1;
  575. if (params) {
  576. if (params.has("EarlyChange")) {
  577. earlyChange = params.get("EarlyChange");
  578. }
  579. return new _predictor_stream.PredictorStream(new _lzw_stream.LZWStream(stream, maybeLength, earlyChange), maybeLength, params);
  580. }
  581. return new _lzw_stream.LZWStream(stream, maybeLength, earlyChange);
  582. case "DCT":
  583. case "DCTDecode":
  584. xrefStats.addStreamType(_util.StreamType.DCT);
  585. return new _jpeg_stream.JpegStream(stream, maybeLength, params);
  586. case "JPX":
  587. case "JPXDecode":
  588. xrefStats.addStreamType(_util.StreamType.JPX);
  589. return new _jpx_stream.JpxStream(stream, maybeLength, params);
  590. case "A85":
  591. case "ASCII85Decode":
  592. xrefStats.addStreamType(_util.StreamType.A85);
  593. return new _ascii_85_stream.Ascii85Stream(stream, maybeLength);
  594. case "AHx":
  595. case "ASCIIHexDecode":
  596. xrefStats.addStreamType(_util.StreamType.AHX);
  597. return new _ascii_hex_stream.AsciiHexStream(stream, maybeLength);
  598. case "CCF":
  599. case "CCITTFaxDecode":
  600. xrefStats.addStreamType(_util.StreamType.CCF);
  601. return new _ccitt_stream.CCITTFaxStream(stream, maybeLength, params);
  602. case "RL":
  603. case "RunLengthDecode":
  604. xrefStats.addStreamType(_util.StreamType.RLX);
  605. return new _run_length_stream.RunLengthStream(stream, maybeLength);
  606. case "JBIG2Decode":
  607. xrefStats.addStreamType(_util.StreamType.JBIG);
  608. return new _jbig2_stream.Jbig2Stream(stream, maybeLength, params);
  609. }
  610. (0, _util.warn)(`Filter "${name}" is not supported.`);
  611. return stream;
  612. } catch (ex) {
  613. if (ex instanceof _core_utils.MissingDataException) {
  614. throw ex;
  615. }
  616. (0, _util.warn)(`Invalid stream: "${ex}"`);
  617. return new _stream.NullStream();
  618. }
  619. }
  620. }
  621. exports.Parser = Parser;
  622. const specialChars = [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
  623. function toHexDigit(ch) {
  624. if (ch >= 0x30 && ch <= 0x39) {
  625. return ch & 0x0f;
  626. }
  627. if (ch >= 0x41 && ch <= 0x46 || ch >= 0x61 && ch <= 0x66) {
  628. return (ch & 0x0f) + 9;
  629. }
  630. return -1;
  631. }
  632. class Lexer {
  633. constructor(stream, knownCommands = null) {
  634. this.stream = stream;
  635. this.nextChar();
  636. this.strBuf = [];
  637. this.knownCommands = knownCommands;
  638. this._hexStringNumWarn = 0;
  639. this.beginInlineImagePos = -1;
  640. }
  641. nextChar() {
  642. return this.currentChar = this.stream.getByte();
  643. }
  644. peekChar() {
  645. return this.stream.peekByte();
  646. }
  647. getNumber() {
  648. let ch = this.currentChar;
  649. let eNotation = false;
  650. let divideBy = 0;
  651. let sign = 0;
  652. if (ch === 0x2d) {
  653. sign = -1;
  654. ch = this.nextChar();
  655. if (ch === 0x2d) {
  656. ch = this.nextChar();
  657. }
  658. } else if (ch === 0x2b) {
  659. sign = 1;
  660. ch = this.nextChar();
  661. }
  662. if (ch === 0x0a || ch === 0x0d) {
  663. do {
  664. ch = this.nextChar();
  665. } while (ch === 0x0a || ch === 0x0d);
  666. }
  667. if (ch === 0x2e) {
  668. divideBy = 10;
  669. ch = this.nextChar();
  670. }
  671. if (ch < 0x30 || ch > 0x39) {
  672. const msg = `Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`;
  673. if ((0, _core_utils.isWhiteSpace)(ch) || ch === -1) {
  674. (0, _util.info)(`Lexer.getNumber - "${msg}".`);
  675. return 0;
  676. }
  677. throw new _util.FormatError(msg);
  678. }
  679. sign = sign || 1;
  680. let baseValue = ch - 0x30;
  681. let powerValue = 0;
  682. let powerValueSign = 1;
  683. while ((ch = this.nextChar()) >= 0) {
  684. if (ch >= 0x30 && ch <= 0x39) {
  685. const currentDigit = ch - 0x30;
  686. if (eNotation) {
  687. powerValue = powerValue * 10 + currentDigit;
  688. } else {
  689. if (divideBy !== 0) {
  690. divideBy *= 10;
  691. }
  692. baseValue = baseValue * 10 + currentDigit;
  693. }
  694. } else if (ch === 0x2e) {
  695. if (divideBy === 0) {
  696. divideBy = 1;
  697. } else {
  698. break;
  699. }
  700. } else if (ch === 0x2d) {
  701. (0, _util.warn)("Badly formatted number: minus sign in the middle");
  702. } else if (ch === 0x45 || ch === 0x65) {
  703. ch = this.peekChar();
  704. if (ch === 0x2b || ch === 0x2d) {
  705. powerValueSign = ch === 0x2d ? -1 : 1;
  706. this.nextChar();
  707. } else if (ch < 0x30 || ch > 0x39) {
  708. break;
  709. }
  710. eNotation = true;
  711. } else {
  712. break;
  713. }
  714. }
  715. if (divideBy !== 0) {
  716. baseValue /= divideBy;
  717. }
  718. if (eNotation) {
  719. baseValue *= 10 ** (powerValueSign * powerValue);
  720. }
  721. return sign * baseValue;
  722. }
  723. getString() {
  724. let numParen = 1;
  725. let done = false;
  726. const strBuf = this.strBuf;
  727. strBuf.length = 0;
  728. let ch = this.nextChar();
  729. while (true) {
  730. let charBuffered = false;
  731. switch (ch | 0) {
  732. case -1:
  733. (0, _util.warn)("Unterminated string");
  734. done = true;
  735. break;
  736. case 0x28:
  737. ++numParen;
  738. strBuf.push("(");
  739. break;
  740. case 0x29:
  741. if (--numParen === 0) {
  742. this.nextChar();
  743. done = true;
  744. } else {
  745. strBuf.push(")");
  746. }
  747. break;
  748. case 0x5c:
  749. ch = this.nextChar();
  750. switch (ch) {
  751. case -1:
  752. (0, _util.warn)("Unterminated string");
  753. done = true;
  754. break;
  755. case 0x6e:
  756. strBuf.push("\n");
  757. break;
  758. case 0x72:
  759. strBuf.push("\r");
  760. break;
  761. case 0x74:
  762. strBuf.push("\t");
  763. break;
  764. case 0x62:
  765. strBuf.push("\b");
  766. break;
  767. case 0x66:
  768. strBuf.push("\f");
  769. break;
  770. case 0x5c:
  771. case 0x28:
  772. case 0x29:
  773. strBuf.push(String.fromCharCode(ch));
  774. break;
  775. case 0x30:
  776. case 0x31:
  777. case 0x32:
  778. case 0x33:
  779. case 0x34:
  780. case 0x35:
  781. case 0x36:
  782. case 0x37:
  783. let x = ch & 0x0f;
  784. ch = this.nextChar();
  785. charBuffered = true;
  786. if (ch >= 0x30 && ch <= 0x37) {
  787. x = (x << 3) + (ch & 0x0f);
  788. ch = this.nextChar();
  789. if (ch >= 0x30 && ch <= 0x37) {
  790. charBuffered = false;
  791. x = (x << 3) + (ch & 0x0f);
  792. }
  793. }
  794. strBuf.push(String.fromCharCode(x));
  795. break;
  796. case 0x0d:
  797. if (this.peekChar() === 0x0a) {
  798. this.nextChar();
  799. }
  800. break;
  801. case 0x0a:
  802. break;
  803. default:
  804. strBuf.push(String.fromCharCode(ch));
  805. break;
  806. }
  807. break;
  808. default:
  809. strBuf.push(String.fromCharCode(ch));
  810. break;
  811. }
  812. if (done) {
  813. break;
  814. }
  815. if (!charBuffered) {
  816. ch = this.nextChar();
  817. }
  818. }
  819. return strBuf.join("");
  820. }
  821. getName() {
  822. let ch, previousCh;
  823. const strBuf = this.strBuf;
  824. strBuf.length = 0;
  825. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  826. if (ch === 0x23) {
  827. ch = this.nextChar();
  828. if (specialChars[ch]) {
  829. (0, _util.warn)("Lexer_getName: " + "NUMBER SIGN (#) should be followed by a hexadecimal number.");
  830. strBuf.push("#");
  831. break;
  832. }
  833. const x = toHexDigit(ch);
  834. if (x !== -1) {
  835. previousCh = ch;
  836. ch = this.nextChar();
  837. const x2 = toHexDigit(ch);
  838. if (x2 === -1) {
  839. (0, _util.warn)(`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` + "in hexadecimal number.");
  840. strBuf.push("#", String.fromCharCode(previousCh));
  841. if (specialChars[ch]) {
  842. break;
  843. }
  844. strBuf.push(String.fromCharCode(ch));
  845. continue;
  846. }
  847. strBuf.push(String.fromCharCode(x << 4 | x2));
  848. } else {
  849. strBuf.push("#", String.fromCharCode(ch));
  850. }
  851. } else {
  852. strBuf.push(String.fromCharCode(ch));
  853. }
  854. }
  855. if (strBuf.length > 127) {
  856. (0, _util.warn)(`Name token is longer than allowed by the spec: ${strBuf.length}`);
  857. }
  858. return _primitives.Name.get(strBuf.join(""));
  859. }
  860. _hexStringWarn(ch) {
  861. const MAX_HEX_STRING_NUM_WARN = 5;
  862. if (this._hexStringNumWarn++ === MAX_HEX_STRING_NUM_WARN) {
  863. (0, _util.warn)("getHexString - ignoring additional invalid characters.");
  864. return;
  865. }
  866. if (this._hexStringNumWarn > MAX_HEX_STRING_NUM_WARN) {
  867. return;
  868. }
  869. (0, _util.warn)(`getHexString - ignoring invalid character: ${ch}`);
  870. }
  871. getHexString() {
  872. const strBuf = this.strBuf;
  873. strBuf.length = 0;
  874. let ch = this.currentChar;
  875. let isFirstHex = true;
  876. let firstDigit, secondDigit;
  877. this._hexStringNumWarn = 0;
  878. while (true) {
  879. if (ch < 0) {
  880. (0, _util.warn)("Unterminated hex string");
  881. break;
  882. } else if (ch === 0x3e) {
  883. this.nextChar();
  884. break;
  885. } else if (specialChars[ch] === 1) {
  886. ch = this.nextChar();
  887. continue;
  888. } else {
  889. if (isFirstHex) {
  890. firstDigit = toHexDigit(ch);
  891. if (firstDigit === -1) {
  892. this._hexStringWarn(ch);
  893. ch = this.nextChar();
  894. continue;
  895. }
  896. } else {
  897. secondDigit = toHexDigit(ch);
  898. if (secondDigit === -1) {
  899. this._hexStringWarn(ch);
  900. ch = this.nextChar();
  901. continue;
  902. }
  903. strBuf.push(String.fromCharCode(firstDigit << 4 | secondDigit));
  904. }
  905. isFirstHex = !isFirstHex;
  906. ch = this.nextChar();
  907. }
  908. }
  909. return strBuf.join("");
  910. }
  911. getObj() {
  912. let comment = false;
  913. let ch = this.currentChar;
  914. while (true) {
  915. if (ch < 0) {
  916. return _primitives.EOF;
  917. }
  918. if (comment) {
  919. if (ch === 0x0a || ch === 0x0d) {
  920. comment = false;
  921. }
  922. } else if (ch === 0x25) {
  923. comment = true;
  924. } else if (specialChars[ch] !== 1) {
  925. break;
  926. }
  927. ch = this.nextChar();
  928. }
  929. switch (ch | 0) {
  930. case 0x30:
  931. case 0x31:
  932. case 0x32:
  933. case 0x33:
  934. case 0x34:
  935. case 0x35:
  936. case 0x36:
  937. case 0x37:
  938. case 0x38:
  939. case 0x39:
  940. case 0x2b:
  941. case 0x2d:
  942. case 0x2e:
  943. return this.getNumber();
  944. case 0x28:
  945. return this.getString();
  946. case 0x2f:
  947. return this.getName();
  948. case 0x5b:
  949. this.nextChar();
  950. return _primitives.Cmd.get("[");
  951. case 0x5d:
  952. this.nextChar();
  953. return _primitives.Cmd.get("]");
  954. case 0x3c:
  955. ch = this.nextChar();
  956. if (ch === 0x3c) {
  957. this.nextChar();
  958. return _primitives.Cmd.get("<<");
  959. }
  960. return this.getHexString();
  961. case 0x3e:
  962. ch = this.nextChar();
  963. if (ch === 0x3e) {
  964. this.nextChar();
  965. return _primitives.Cmd.get(">>");
  966. }
  967. return _primitives.Cmd.get(">");
  968. case 0x7b:
  969. this.nextChar();
  970. return _primitives.Cmd.get("{");
  971. case 0x7d:
  972. this.nextChar();
  973. return _primitives.Cmd.get("}");
  974. case 0x29:
  975. this.nextChar();
  976. throw new _util.FormatError(`Illegal character: ${ch}`);
  977. }
  978. let str = String.fromCharCode(ch);
  979. if (ch < 0x20 || ch > 0x7f) {
  980. const nextCh = this.peekChar();
  981. if (nextCh >= 0x20 && nextCh <= 0x7f) {
  982. this.nextChar();
  983. return _primitives.Cmd.get(str);
  984. }
  985. }
  986. const knownCommands = this.knownCommands;
  987. let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  988. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  989. const possibleCommand = str + String.fromCharCode(ch);
  990. if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
  991. break;
  992. }
  993. if (str.length === 128) {
  994. throw new _util.FormatError(`Command token too long: ${str.length}`);
  995. }
  996. str = possibleCommand;
  997. knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  998. }
  999. if (str === "true") {
  1000. return true;
  1001. }
  1002. if (str === "false") {
  1003. return false;
  1004. }
  1005. if (str === "null") {
  1006. return null;
  1007. }
  1008. if (str === "BI") {
  1009. this.beginInlineImagePos = this.stream.pos;
  1010. }
  1011. return _primitives.Cmd.get(str);
  1012. }
  1013. peekObj() {
  1014. const streamPos = this.stream.pos,
  1015. currentChar = this.currentChar,
  1016. beginInlineImagePos = this.beginInlineImagePos;
  1017. let nextObj;
  1018. try {
  1019. nextObj = this.getObj();
  1020. } catch (ex) {
  1021. if (ex instanceof _core_utils.MissingDataException) {
  1022. throw ex;
  1023. }
  1024. (0, _util.warn)(`peekObj: ${ex}`);
  1025. }
  1026. this.stream.pos = streamPos;
  1027. this.currentChar = currentChar;
  1028. this.beginInlineImagePos = beginInlineImagePos;
  1029. return nextObj;
  1030. }
  1031. skipToNextLine() {
  1032. let ch = this.currentChar;
  1033. while (ch >= 0) {
  1034. if (ch === 0x0d) {
  1035. ch = this.nextChar();
  1036. if (ch === 0x0a) {
  1037. this.nextChar();
  1038. }
  1039. break;
  1040. } else if (ch === 0x0a) {
  1041. this.nextChar();
  1042. break;
  1043. }
  1044. ch = this.nextChar();
  1045. }
  1046. }
  1047. }
  1048. exports.Lexer = Lexer;
  1049. class Linearization {
  1050. static create(stream) {
  1051. function getInt(linDict, name, allowZeroValue = false) {
  1052. const obj = linDict.get(name);
  1053. if (Number.isInteger(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
  1054. return obj;
  1055. }
  1056. throw new Error(`The "${name}" parameter in the linearization ` + "dictionary is invalid.");
  1057. }
  1058. function getHints(linDict) {
  1059. const hints = linDict.get("H");
  1060. let hintsLength;
  1061. if (Array.isArray(hints) && ((hintsLength = hints.length) === 2 || hintsLength === 4)) {
  1062. for (let index = 0; index < hintsLength; index++) {
  1063. const hint = hints[index];
  1064. if (!(Number.isInteger(hint) && hint > 0)) {
  1065. throw new Error(`Hint (${index}) in the linearization dictionary is invalid.`);
  1066. }
  1067. }
  1068. return hints;
  1069. }
  1070. throw new Error("Hint array in the linearization dictionary is invalid.");
  1071. }
  1072. const parser = new Parser({
  1073. lexer: new Lexer(stream),
  1074. xref: null
  1075. });
  1076. const obj1 = parser.getObj();
  1077. const obj2 = parser.getObj();
  1078. const obj3 = parser.getObj();
  1079. const linDict = parser.getObj();
  1080. let obj, length;
  1081. if (!(Number.isInteger(obj1) && Number.isInteger(obj2) && (0, _primitives.isCmd)(obj3, "obj") && linDict instanceof _primitives.Dict && typeof (obj = linDict.get("Linearized")) === "number" && obj > 0)) {
  1082. return null;
  1083. } else if ((length = getInt(linDict, "L")) !== stream.length) {
  1084. throw new Error('The "L" parameter in the linearization dictionary ' + "does not equal the stream length.");
  1085. }
  1086. return {
  1087. length,
  1088. hints: getHints(linDict),
  1089. objectNumberFirst: getInt(linDict, "O"),
  1090. endFirst: getInt(linDict, "E"),
  1091. numPages: getInt(linDict, "N"),
  1092. mainXRefEntriesOffset: getInt(linDict, "T"),
  1093. pageFirst: linDict.has("P") ? getInt(linDict, "P", true) : 0
  1094. };
  1095. }
  1096. }
  1097. exports.Linearization = Linearization;