parser.js 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * Javascript code in this page
  4. *
  5. * Copyright 2020 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * Javascript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.Parser = exports.Linearization = exports.Lexer = void 0;
  27. var _stream = require("./stream.js");
  28. var _util = require("../shared/util.js");
  29. var _primitives = require("./primitives.js");
  30. var _core_utils = require("./core_utils.js");
  31. var _ccitt_stream = require("./ccitt_stream.js");
  32. var _jbig2_stream = require("./jbig2_stream.js");
  33. var _jpeg_stream = require("./jpeg_stream.js");
  34. var _jpx_stream = require("./jpx_stream.js");
  35. const MAX_LENGTH_TO_CACHE = 1000;
  36. const MAX_ADLER32_LENGTH = 5552;
  37. function computeAdler32(bytes) {
  38. const bytesLength = bytes.length;
  39. let a = 1,
  40. b = 0;
  41. for (let i = 0; i < bytesLength; ++i) {
  42. a += bytes[i] & 0xff;
  43. b += a;
  44. }
  45. return b % 65521 << 16 | a % 65521;
  46. }
  47. class Parser {
  48. constructor({
  49. lexer,
  50. xref,
  51. allowStreams = false,
  52. recoveryMode = false
  53. }) {
  54. this.lexer = lexer;
  55. this.xref = xref;
  56. this.allowStreams = allowStreams;
  57. this.recoveryMode = recoveryMode;
  58. this.imageCache = Object.create(null);
  59. this.refill();
  60. }
  61. refill() {
  62. this.buf1 = this.lexer.getObj();
  63. this.buf2 = this.lexer.getObj();
  64. }
  65. shift() {
  66. if (this.buf2 instanceof _primitives.Cmd && this.buf2.cmd === "ID") {
  67. this.buf1 = this.buf2;
  68. this.buf2 = null;
  69. } else {
  70. this.buf1 = this.buf2;
  71. this.buf2 = this.lexer.getObj();
  72. }
  73. }
  74. tryShift() {
  75. try {
  76. this.shift();
  77. return true;
  78. } catch (e) {
  79. if (e instanceof _core_utils.MissingDataException) {
  80. throw e;
  81. }
  82. return false;
  83. }
  84. }
  85. getObj(cipherTransform = null) {
  86. const buf1 = this.buf1;
  87. this.shift();
  88. if (buf1 instanceof _primitives.Cmd) {
  89. switch (buf1.cmd) {
  90. case "BI":
  91. return this.makeInlineImage(cipherTransform);
  92. case "[":
  93. const array = [];
  94. while (!(0, _primitives.isCmd)(this.buf1, "]") && !(0, _primitives.isEOF)(this.buf1)) {
  95. array.push(this.getObj(cipherTransform));
  96. }
  97. if ((0, _primitives.isEOF)(this.buf1)) {
  98. if (!this.recoveryMode) {
  99. throw new _util.FormatError("End of file inside array");
  100. }
  101. return array;
  102. }
  103. this.shift();
  104. return array;
  105. case "<<":
  106. const dict = new _primitives.Dict(this.xref);
  107. while (!(0, _primitives.isCmd)(this.buf1, ">>") && !(0, _primitives.isEOF)(this.buf1)) {
  108. if (!(0, _primitives.isName)(this.buf1)) {
  109. (0, _util.info)("Malformed dictionary: key must be a name object");
  110. this.shift();
  111. continue;
  112. }
  113. const key = this.buf1.name;
  114. this.shift();
  115. if ((0, _primitives.isEOF)(this.buf1)) {
  116. break;
  117. }
  118. dict.set(key, this.getObj(cipherTransform));
  119. }
  120. if ((0, _primitives.isEOF)(this.buf1)) {
  121. if (!this.recoveryMode) {
  122. throw new _util.FormatError("End of file inside dictionary");
  123. }
  124. return dict;
  125. }
  126. if ((0, _primitives.isCmd)(this.buf2, "stream")) {
  127. return this.allowStreams ? this.makeStream(dict, cipherTransform) : dict;
  128. }
  129. this.shift();
  130. return dict;
  131. default:
  132. return buf1;
  133. }
  134. }
  135. if (Number.isInteger(buf1)) {
  136. if (Number.isInteger(this.buf1) && (0, _primitives.isCmd)(this.buf2, "R")) {
  137. const ref = _primitives.Ref.get(buf1, this.buf1);
  138. this.shift();
  139. this.shift();
  140. return ref;
  141. }
  142. return buf1;
  143. }
  144. if (typeof buf1 === "string") {
  145. if (cipherTransform) {
  146. return cipherTransform.decryptString(buf1);
  147. }
  148. return buf1;
  149. }
  150. return buf1;
  151. }
  152. findDefaultInlineStreamEnd(stream) {
  153. const E = 0x45,
  154. I = 0x49,
  155. SPACE = 0x20,
  156. LF = 0xa,
  157. CR = 0xd,
  158. NUL = 0x0;
  159. const lexer = this.lexer,
  160. startPos = stream.pos,
  161. n = 10;
  162. let state = 0,
  163. ch,
  164. maybeEIPos;
  165. while ((ch = stream.getByte()) !== -1) {
  166. if (state === 0) {
  167. state = ch === E ? 1 : 0;
  168. } else if (state === 1) {
  169. state = ch === I ? 2 : 0;
  170. } else {
  171. (0, _util.assert)(state === 2, "findDefaultInlineStreamEnd - invalid state.");
  172. if (ch === SPACE || ch === LF || ch === CR) {
  173. maybeEIPos = stream.pos;
  174. const followingBytes = stream.peekBytes(n);
  175. for (let i = 0, ii = followingBytes.length; i < ii; i++) {
  176. ch = followingBytes[i];
  177. if (ch === NUL && followingBytes[i + 1] !== NUL) {
  178. continue;
  179. }
  180. if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7f)) {
  181. state = 0;
  182. break;
  183. }
  184. }
  185. if (state !== 2) {
  186. continue;
  187. }
  188. if (lexer.knownCommands) {
  189. const nextObj = lexer.peekObj();
  190. if (nextObj instanceof _primitives.Cmd && !lexer.knownCommands[nextObj.cmd]) {
  191. state = 0;
  192. }
  193. } else {
  194. (0, _util.warn)("findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined.");
  195. }
  196. if (state === 2) {
  197. break;
  198. }
  199. } else {
  200. state = 0;
  201. }
  202. }
  203. }
  204. if (ch === -1) {
  205. (0, _util.warn)("findDefaultInlineStreamEnd: " + "Reached the end of the stream without finding a valid EI marker");
  206. if (maybeEIPos) {
  207. (0, _util.warn)('... trying to recover by using the last "EI" occurrence.');
  208. stream.skip(-(stream.pos - maybeEIPos));
  209. }
  210. }
  211. let endOffset = 4;
  212. stream.skip(-endOffset);
  213. ch = stream.peekByte();
  214. stream.skip(endOffset);
  215. if (!(0, _core_utils.isWhiteSpace)(ch)) {
  216. endOffset--;
  217. }
  218. return stream.pos - endOffset - startPos;
  219. }
  220. findDCTDecodeInlineStreamEnd(stream) {
  221. const startPos = stream.pos;
  222. let foundEOI = false,
  223. b,
  224. markerLength;
  225. while ((b = stream.getByte()) !== -1) {
  226. if (b !== 0xff) {
  227. continue;
  228. }
  229. switch (stream.getByte()) {
  230. case 0x00:
  231. break;
  232. case 0xff:
  233. stream.skip(-1);
  234. break;
  235. case 0xd9:
  236. foundEOI = true;
  237. break;
  238. case 0xc0:
  239. case 0xc1:
  240. case 0xc2:
  241. case 0xc3:
  242. case 0xc5:
  243. case 0xc6:
  244. case 0xc7:
  245. case 0xc9:
  246. case 0xca:
  247. case 0xcb:
  248. case 0xcd:
  249. case 0xce:
  250. case 0xcf:
  251. case 0xc4:
  252. case 0xcc:
  253. case 0xda:
  254. case 0xdb:
  255. case 0xdc:
  256. case 0xdd:
  257. case 0xde:
  258. case 0xdf:
  259. case 0xe0:
  260. case 0xe1:
  261. case 0xe2:
  262. case 0xe3:
  263. case 0xe4:
  264. case 0xe5:
  265. case 0xe6:
  266. case 0xe7:
  267. case 0xe8:
  268. case 0xe9:
  269. case 0xea:
  270. case 0xeb:
  271. case 0xec:
  272. case 0xed:
  273. case 0xee:
  274. case 0xef:
  275. case 0xfe:
  276. markerLength = stream.getUint16();
  277. if (markerLength > 2) {
  278. stream.skip(markerLength - 2);
  279. } else {
  280. stream.skip(-2);
  281. }
  282. break;
  283. }
  284. if (foundEOI) {
  285. break;
  286. }
  287. }
  288. const length = stream.pos - startPos;
  289. if (b === -1) {
  290. (0, _util.warn)("Inline DCTDecode image stream: " + "EOI marker not found, searching for /EI/ instead.");
  291. stream.skip(-length);
  292. return this.findDefaultInlineStreamEnd(stream);
  293. }
  294. this.inlineStreamSkipEI(stream);
  295. return length;
  296. }
  297. findASCII85DecodeInlineStreamEnd(stream) {
  298. const TILDE = 0x7e,
  299. GT = 0x3e;
  300. const startPos = stream.pos;
  301. let ch;
  302. while ((ch = stream.getByte()) !== -1) {
  303. if (ch === TILDE) {
  304. const tildePos = stream.pos;
  305. ch = stream.peekByte();
  306. while ((0, _core_utils.isWhiteSpace)(ch)) {
  307. stream.skip();
  308. ch = stream.peekByte();
  309. }
  310. if (ch === GT) {
  311. stream.skip();
  312. break;
  313. }
  314. if (stream.pos > tildePos) {
  315. const maybeEI = stream.peekBytes(2);
  316. if (maybeEI[0] === 0x45 && maybeEI[1] === 0x49) {
  317. break;
  318. }
  319. }
  320. }
  321. }
  322. const length = stream.pos - startPos;
  323. if (ch === -1) {
  324. (0, _util.warn)("Inline ASCII85Decode image stream: " + "EOD marker not found, searching for /EI/ instead.");
  325. stream.skip(-length);
  326. return this.findDefaultInlineStreamEnd(stream);
  327. }
  328. this.inlineStreamSkipEI(stream);
  329. return length;
  330. }
  331. findASCIIHexDecodeInlineStreamEnd(stream) {
  332. const GT = 0x3e;
  333. const startPos = stream.pos;
  334. let ch;
  335. while ((ch = stream.getByte()) !== -1) {
  336. if (ch === GT) {
  337. break;
  338. }
  339. }
  340. const length = stream.pos - startPos;
  341. if (ch === -1) {
  342. (0, _util.warn)("Inline ASCIIHexDecode image stream: " + "EOD marker not found, searching for /EI/ instead.");
  343. stream.skip(-length);
  344. return this.findDefaultInlineStreamEnd(stream);
  345. }
  346. this.inlineStreamSkipEI(stream);
  347. return length;
  348. }
  349. inlineStreamSkipEI(stream) {
  350. const E = 0x45,
  351. I = 0x49;
  352. let state = 0,
  353. ch;
  354. while ((ch = stream.getByte()) !== -1) {
  355. if (state === 0) {
  356. state = ch === E ? 1 : 0;
  357. } else if (state === 1) {
  358. state = ch === I ? 2 : 0;
  359. } else if (state === 2) {
  360. break;
  361. }
  362. }
  363. }
  364. makeInlineImage(cipherTransform) {
  365. const lexer = this.lexer;
  366. const stream = lexer.stream;
  367. const dict = new _primitives.Dict(this.xref);
  368. let dictLength;
  369. while (!(0, _primitives.isCmd)(this.buf1, "ID") && !(0, _primitives.isEOF)(this.buf1)) {
  370. if (!(0, _primitives.isName)(this.buf1)) {
  371. throw new _util.FormatError("Dictionary key must be a name object");
  372. }
  373. const key = this.buf1.name;
  374. this.shift();
  375. if ((0, _primitives.isEOF)(this.buf1)) {
  376. break;
  377. }
  378. dict.set(key, this.getObj(cipherTransform));
  379. }
  380. if (lexer.beginInlineImagePos !== -1) {
  381. dictLength = stream.pos - lexer.beginInlineImagePos;
  382. }
  383. const filter = dict.get("Filter", "F");
  384. let filterName;
  385. if ((0, _primitives.isName)(filter)) {
  386. filterName = filter.name;
  387. } else if (Array.isArray(filter)) {
  388. const filterZero = this.xref.fetchIfRef(filter[0]);
  389. if ((0, _primitives.isName)(filterZero)) {
  390. filterName = filterZero.name;
  391. }
  392. }
  393. const startPos = stream.pos;
  394. let length;
  395. if (filterName === "DCTDecode" || filterName === "DCT") {
  396. length = this.findDCTDecodeInlineStreamEnd(stream);
  397. } else if (filterName === "ASCII85Decode" || filterName === "A85") {
  398. length = this.findASCII85DecodeInlineStreamEnd(stream);
  399. } else if (filterName === "ASCIIHexDecode" || filterName === "AHx") {
  400. length = this.findASCIIHexDecodeInlineStreamEnd(stream);
  401. } else {
  402. length = this.findDefaultInlineStreamEnd(stream);
  403. }
  404. let imageStream = stream.makeSubStream(startPos, length, dict);
  405. let cacheKey;
  406. if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) {
  407. const imageBytes = imageStream.getBytes();
  408. imageStream.reset();
  409. const initialStreamPos = stream.pos;
  410. stream.pos = lexer.beginInlineImagePos;
  411. const dictBytes = stream.getBytes(dictLength);
  412. stream.pos = initialStreamPos;
  413. cacheKey = computeAdler32(imageBytes) + "_" + computeAdler32(dictBytes);
  414. const cacheEntry = this.imageCache[cacheKey];
  415. if (cacheEntry !== undefined) {
  416. this.buf2 = _primitives.Cmd.get("EI");
  417. this.shift();
  418. cacheEntry.reset();
  419. return cacheEntry;
  420. }
  421. }
  422. if (cipherTransform) {
  423. imageStream = cipherTransform.createStream(imageStream, length);
  424. }
  425. imageStream = this.filter(imageStream, dict, length);
  426. imageStream.dict = dict;
  427. if (cacheKey !== undefined) {
  428. imageStream.cacheKey = `inline_${length}_${cacheKey}`;
  429. this.imageCache[cacheKey] = imageStream;
  430. }
  431. this.buf2 = _primitives.Cmd.get("EI");
  432. this.shift();
  433. return imageStream;
  434. }
  435. _findStreamLength(startPos, signature) {
  436. const {
  437. stream
  438. } = this.lexer;
  439. stream.pos = startPos;
  440. const SCAN_BLOCK_LENGTH = 2048;
  441. const signatureLength = signature.length;
  442. while (stream.pos < stream.end) {
  443. const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
  444. const scanLength = scanBytes.length - signatureLength;
  445. if (scanLength <= 0) {
  446. break;
  447. }
  448. let pos = 0;
  449. while (pos < scanLength) {
  450. let j = 0;
  451. while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
  452. j++;
  453. }
  454. if (j >= signatureLength) {
  455. stream.pos += pos;
  456. return stream.pos - startPos;
  457. }
  458. pos++;
  459. }
  460. stream.pos += scanLength;
  461. }
  462. return -1;
  463. }
  464. makeStream(dict, cipherTransform) {
  465. const lexer = this.lexer;
  466. let stream = lexer.stream;
  467. lexer.skipToNextLine();
  468. const startPos = stream.pos - 1;
  469. let length = dict.get("Length");
  470. if (!Number.isInteger(length)) {
  471. (0, _util.info)(`Bad length "${length}" in stream`);
  472. length = 0;
  473. }
  474. stream.pos = startPos + length;
  475. lexer.nextChar();
  476. if (this.tryShift() && (0, _primitives.isCmd)(this.buf2, "endstream")) {
  477. this.shift();
  478. } else {
  479. const ENDSTREAM_SIGNATURE = new Uint8Array([0x65, 0x6E, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6D]);
  480. let actualLength = this._findStreamLength(startPos, ENDSTREAM_SIGNATURE);
  481. if (actualLength < 0) {
  482. const MAX_TRUNCATION = 1;
  483. for (let i = 1; i <= MAX_TRUNCATION; i++) {
  484. const end = ENDSTREAM_SIGNATURE.length - i;
  485. const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
  486. const maybeLength = this._findStreamLength(startPos, TRUNCATED_SIGNATURE);
  487. if (maybeLength >= 0) {
  488. const lastByte = stream.peekBytes(end + 1)[end];
  489. if (!(0, _core_utils.isWhiteSpace)(lastByte)) {
  490. break;
  491. }
  492. (0, _util.info)(`Found "${(0, _util.bytesToString)(TRUNCATED_SIGNATURE)}" when ` + "searching for endstream command.");
  493. actualLength = maybeLength;
  494. break;
  495. }
  496. }
  497. if (actualLength < 0) {
  498. throw new _util.FormatError("Missing endstream command.");
  499. }
  500. }
  501. length = actualLength;
  502. lexer.nextChar();
  503. this.shift();
  504. this.shift();
  505. }
  506. this.shift();
  507. stream = stream.makeSubStream(startPos, length, dict);
  508. if (cipherTransform) {
  509. stream = cipherTransform.createStream(stream, length);
  510. }
  511. stream = this.filter(stream, dict, length);
  512. stream.dict = dict;
  513. return stream;
  514. }
  515. filter(stream, dict, length) {
  516. let filter = dict.get("Filter", "F");
  517. let params = dict.get("DecodeParms", "DP");
  518. if ((0, _primitives.isName)(filter)) {
  519. if (Array.isArray(params)) {
  520. (0, _util.warn)("/DecodeParms should not contain an Array, " + "when /Filter contains a Name.");
  521. }
  522. return this.makeFilter(stream, filter.name, length, params);
  523. }
  524. let maybeLength = length;
  525. if (Array.isArray(filter)) {
  526. const filterArray = filter;
  527. const paramsArray = params;
  528. for (let i = 0, ii = filterArray.length; i < ii; ++i) {
  529. filter = this.xref.fetchIfRef(filterArray[i]);
  530. if (!(0, _primitives.isName)(filter)) {
  531. throw new _util.FormatError(`Bad filter name "${filter}"`);
  532. }
  533. params = null;
  534. if (Array.isArray(paramsArray) && i in paramsArray) {
  535. params = this.xref.fetchIfRef(paramsArray[i]);
  536. }
  537. stream = this.makeFilter(stream, filter.name, maybeLength, params);
  538. maybeLength = null;
  539. }
  540. }
  541. return stream;
  542. }
  543. makeFilter(stream, name, maybeLength, params) {
  544. if (maybeLength === 0) {
  545. (0, _util.warn)(`Empty "${name}" stream.`);
  546. return new _stream.NullStream();
  547. }
  548. try {
  549. const xrefStreamStats = this.xref.stats.streamTypes;
  550. if (name === "FlateDecode" || name === "Fl") {
  551. xrefStreamStats[_util.StreamType.FLATE] = true;
  552. if (params) {
  553. return new _stream.PredictorStream(new _stream.FlateStream(stream, maybeLength), maybeLength, params);
  554. }
  555. return new _stream.FlateStream(stream, maybeLength);
  556. }
  557. if (name === "LZWDecode" || name === "LZW") {
  558. xrefStreamStats[_util.StreamType.LZW] = true;
  559. let earlyChange = 1;
  560. if (params) {
  561. if (params.has("EarlyChange")) {
  562. earlyChange = params.get("EarlyChange");
  563. }
  564. return new _stream.PredictorStream(new _stream.LZWStream(stream, maybeLength, earlyChange), maybeLength, params);
  565. }
  566. return new _stream.LZWStream(stream, maybeLength, earlyChange);
  567. }
  568. if (name === "DCTDecode" || name === "DCT") {
  569. xrefStreamStats[_util.StreamType.DCT] = true;
  570. return new _jpeg_stream.JpegStream(stream, maybeLength, stream.dict, params);
  571. }
  572. if (name === "JPXDecode" || name === "JPX") {
  573. xrefStreamStats[_util.StreamType.JPX] = true;
  574. return new _jpx_stream.JpxStream(stream, maybeLength, stream.dict, params);
  575. }
  576. if (name === "ASCII85Decode" || name === "A85") {
  577. xrefStreamStats[_util.StreamType.A85] = true;
  578. return new _stream.Ascii85Stream(stream, maybeLength);
  579. }
  580. if (name === "ASCIIHexDecode" || name === "AHx") {
  581. xrefStreamStats[_util.StreamType.AHX] = true;
  582. return new _stream.AsciiHexStream(stream, maybeLength);
  583. }
  584. if (name === "CCITTFaxDecode" || name === "CCF") {
  585. xrefStreamStats[_util.StreamType.CCF] = true;
  586. return new _ccitt_stream.CCITTFaxStream(stream, maybeLength, params);
  587. }
  588. if (name === "RunLengthDecode" || name === "RL") {
  589. xrefStreamStats[_util.StreamType.RLX] = true;
  590. return new _stream.RunLengthStream(stream, maybeLength);
  591. }
  592. if (name === "JBIG2Decode") {
  593. xrefStreamStats[_util.StreamType.JBIG] = true;
  594. return new _jbig2_stream.Jbig2Stream(stream, maybeLength, stream.dict, params);
  595. }
  596. (0, _util.warn)(`Filter "${name}" is not supported.`);
  597. return stream;
  598. } catch (ex) {
  599. if (ex instanceof _core_utils.MissingDataException) {
  600. throw ex;
  601. }
  602. (0, _util.warn)(`Invalid stream: "${ex}"`);
  603. return new _stream.NullStream();
  604. }
  605. }
  606. }
  607. exports.Parser = Parser;
  608. const specialChars = [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
  609. function toHexDigit(ch) {
  610. if (ch >= 0x30 && ch <= 0x39) {
  611. return ch & 0x0f;
  612. }
  613. if (ch >= 0x41 && ch <= 0x46 || ch >= 0x61 && ch <= 0x66) {
  614. return (ch & 0x0f) + 9;
  615. }
  616. return -1;
  617. }
  618. class Lexer {
  619. constructor(stream, knownCommands = null) {
  620. this.stream = stream;
  621. this.nextChar();
  622. this.strBuf = [];
  623. this.knownCommands = knownCommands;
  624. this._hexStringNumWarn = 0;
  625. this.beginInlineImagePos = -1;
  626. }
  627. nextChar() {
  628. return this.currentChar = this.stream.getByte();
  629. }
  630. peekChar() {
  631. return this.stream.peekByte();
  632. }
  633. getNumber() {
  634. let ch = this.currentChar;
  635. let eNotation = false;
  636. let divideBy = 0;
  637. let sign = 0;
  638. if (ch === 0x2d) {
  639. sign = -1;
  640. ch = this.nextChar();
  641. if (ch === 0x2d) {
  642. ch = this.nextChar();
  643. }
  644. } else if (ch === 0x2b) {
  645. sign = 1;
  646. ch = this.nextChar();
  647. }
  648. if (ch === 0x0a || ch === 0x0d) {
  649. do {
  650. ch = this.nextChar();
  651. } while (ch === 0x0a || ch === 0x0d);
  652. }
  653. if (ch === 0x2e) {
  654. divideBy = 10;
  655. ch = this.nextChar();
  656. }
  657. if (ch < 0x30 || ch > 0x39) {
  658. if (divideBy === 10 && sign === 0 && ((0, _core_utils.isWhiteSpace)(ch) || ch === -1)) {
  659. (0, _util.warn)("Lexer.getNumber - treating a single decimal point as zero.");
  660. return 0;
  661. }
  662. throw new _util.FormatError(`Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`);
  663. }
  664. sign = sign || 1;
  665. let baseValue = ch - 0x30;
  666. let powerValue = 0;
  667. let powerValueSign = 1;
  668. while ((ch = this.nextChar()) >= 0) {
  669. if (ch >= 0x30 && ch <= 0x39) {
  670. const currentDigit = ch - 0x30;
  671. if (eNotation) {
  672. powerValue = powerValue * 10 + currentDigit;
  673. } else {
  674. if (divideBy !== 0) {
  675. divideBy *= 10;
  676. }
  677. baseValue = baseValue * 10 + currentDigit;
  678. }
  679. } else if (ch === 0x2e) {
  680. if (divideBy === 0) {
  681. divideBy = 1;
  682. } else {
  683. break;
  684. }
  685. } else if (ch === 0x2d) {
  686. (0, _util.warn)("Badly formatted number: minus sign in the middle");
  687. } else if (ch === 0x45 || ch === 0x65) {
  688. ch = this.peekChar();
  689. if (ch === 0x2b || ch === 0x2d) {
  690. powerValueSign = ch === 0x2d ? -1 : 1;
  691. this.nextChar();
  692. } else if (ch < 0x30 || ch > 0x39) {
  693. break;
  694. }
  695. eNotation = true;
  696. } else {
  697. break;
  698. }
  699. }
  700. if (divideBy !== 0) {
  701. baseValue /= divideBy;
  702. }
  703. if (eNotation) {
  704. baseValue *= 10 ** (powerValueSign * powerValue);
  705. }
  706. return sign * baseValue;
  707. }
  708. getString() {
  709. let numParen = 1;
  710. let done = false;
  711. const strBuf = this.strBuf;
  712. strBuf.length = 0;
  713. let ch = this.nextChar();
  714. while (true) {
  715. let charBuffered = false;
  716. switch (ch | 0) {
  717. case -1:
  718. (0, _util.warn)("Unterminated string");
  719. done = true;
  720. break;
  721. case 0x28:
  722. ++numParen;
  723. strBuf.push("(");
  724. break;
  725. case 0x29:
  726. if (--numParen === 0) {
  727. this.nextChar();
  728. done = true;
  729. } else {
  730. strBuf.push(")");
  731. }
  732. break;
  733. case 0x5c:
  734. ch = this.nextChar();
  735. switch (ch) {
  736. case -1:
  737. (0, _util.warn)("Unterminated string");
  738. done = true;
  739. break;
  740. case 0x6e:
  741. strBuf.push("\n");
  742. break;
  743. case 0x72:
  744. strBuf.push("\r");
  745. break;
  746. case 0x74:
  747. strBuf.push("\t");
  748. break;
  749. case 0x62:
  750. strBuf.push("\b");
  751. break;
  752. case 0x66:
  753. strBuf.push("\f");
  754. break;
  755. case 0x5c:
  756. case 0x28:
  757. case 0x29:
  758. strBuf.push(String.fromCharCode(ch));
  759. break;
  760. case 0x30:
  761. case 0x31:
  762. case 0x32:
  763. case 0x33:
  764. case 0x34:
  765. case 0x35:
  766. case 0x36:
  767. case 0x37:
  768. let x = ch & 0x0f;
  769. ch = this.nextChar();
  770. charBuffered = true;
  771. if (ch >= 0x30 && ch <= 0x37) {
  772. x = (x << 3) + (ch & 0x0f);
  773. ch = this.nextChar();
  774. if (ch >= 0x30 && ch <= 0x37) {
  775. charBuffered = false;
  776. x = (x << 3) + (ch & 0x0f);
  777. }
  778. }
  779. strBuf.push(String.fromCharCode(x));
  780. break;
  781. case 0x0d:
  782. if (this.peekChar() === 0x0a) {
  783. this.nextChar();
  784. }
  785. break;
  786. case 0x0a:
  787. break;
  788. default:
  789. strBuf.push(String.fromCharCode(ch));
  790. break;
  791. }
  792. break;
  793. default:
  794. strBuf.push(String.fromCharCode(ch));
  795. break;
  796. }
  797. if (done) {
  798. break;
  799. }
  800. if (!charBuffered) {
  801. ch = this.nextChar();
  802. }
  803. }
  804. return strBuf.join("");
  805. }
  806. getName() {
  807. let ch, previousCh;
  808. const strBuf = this.strBuf;
  809. strBuf.length = 0;
  810. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  811. if (ch === 0x23) {
  812. ch = this.nextChar();
  813. if (specialChars[ch]) {
  814. (0, _util.warn)("Lexer_getName: " + "NUMBER SIGN (#) should be followed by a hexadecimal number.");
  815. strBuf.push("#");
  816. break;
  817. }
  818. const x = toHexDigit(ch);
  819. if (x !== -1) {
  820. previousCh = ch;
  821. ch = this.nextChar();
  822. const x2 = toHexDigit(ch);
  823. if (x2 === -1) {
  824. (0, _util.warn)(`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` + "in hexadecimal number.");
  825. strBuf.push("#", String.fromCharCode(previousCh));
  826. if (specialChars[ch]) {
  827. break;
  828. }
  829. strBuf.push(String.fromCharCode(ch));
  830. continue;
  831. }
  832. strBuf.push(String.fromCharCode(x << 4 | x2));
  833. } else {
  834. strBuf.push("#", String.fromCharCode(ch));
  835. }
  836. } else {
  837. strBuf.push(String.fromCharCode(ch));
  838. }
  839. }
  840. if (strBuf.length > 127) {
  841. (0, _util.warn)(`Name token is longer than allowed by the spec: ${strBuf.length}`);
  842. }
  843. return _primitives.Name.get(strBuf.join(""));
  844. }
  845. _hexStringWarn(ch) {
  846. const MAX_HEX_STRING_NUM_WARN = 5;
  847. if (this._hexStringNumWarn++ === MAX_HEX_STRING_NUM_WARN) {
  848. (0, _util.warn)("getHexString - ignoring additional invalid characters.");
  849. return;
  850. }
  851. if (this._hexStringNumWarn > MAX_HEX_STRING_NUM_WARN) {
  852. return;
  853. }
  854. (0, _util.warn)(`getHexString - ignoring invalid character: ${ch}`);
  855. }
  856. getHexString() {
  857. const strBuf = this.strBuf;
  858. strBuf.length = 0;
  859. let ch = this.currentChar;
  860. let isFirstHex = true;
  861. let firstDigit, secondDigit;
  862. this._hexStringNumWarn = 0;
  863. while (true) {
  864. if (ch < 0) {
  865. (0, _util.warn)("Unterminated hex string");
  866. break;
  867. } else if (ch === 0x3e) {
  868. this.nextChar();
  869. break;
  870. } else if (specialChars[ch] === 1) {
  871. ch = this.nextChar();
  872. continue;
  873. } else {
  874. if (isFirstHex) {
  875. firstDigit = toHexDigit(ch);
  876. if (firstDigit === -1) {
  877. this._hexStringWarn(ch);
  878. ch = this.nextChar();
  879. continue;
  880. }
  881. } else {
  882. secondDigit = toHexDigit(ch);
  883. if (secondDigit === -1) {
  884. this._hexStringWarn(ch);
  885. ch = this.nextChar();
  886. continue;
  887. }
  888. strBuf.push(String.fromCharCode(firstDigit << 4 | secondDigit));
  889. }
  890. isFirstHex = !isFirstHex;
  891. ch = this.nextChar();
  892. }
  893. }
  894. return strBuf.join("");
  895. }
  896. getObj() {
  897. let comment = false;
  898. let ch = this.currentChar;
  899. while (true) {
  900. if (ch < 0) {
  901. return _primitives.EOF;
  902. }
  903. if (comment) {
  904. if (ch === 0x0a || ch === 0x0d) {
  905. comment = false;
  906. }
  907. } else if (ch === 0x25) {
  908. comment = true;
  909. } else if (specialChars[ch] !== 1) {
  910. break;
  911. }
  912. ch = this.nextChar();
  913. }
  914. switch (ch | 0) {
  915. case 0x30:
  916. case 0x31:
  917. case 0x32:
  918. case 0x33:
  919. case 0x34:
  920. case 0x35:
  921. case 0x36:
  922. case 0x37:
  923. case 0x38:
  924. case 0x39:
  925. case 0x2b:
  926. case 0x2d:
  927. case 0x2e:
  928. return this.getNumber();
  929. case 0x28:
  930. return this.getString();
  931. case 0x2f:
  932. return this.getName();
  933. case 0x5b:
  934. this.nextChar();
  935. return _primitives.Cmd.get("[");
  936. case 0x5d:
  937. this.nextChar();
  938. return _primitives.Cmd.get("]");
  939. case 0x3c:
  940. ch = this.nextChar();
  941. if (ch === 0x3c) {
  942. this.nextChar();
  943. return _primitives.Cmd.get("<<");
  944. }
  945. return this.getHexString();
  946. case 0x3e:
  947. ch = this.nextChar();
  948. if (ch === 0x3e) {
  949. this.nextChar();
  950. return _primitives.Cmd.get(">>");
  951. }
  952. return _primitives.Cmd.get(">");
  953. case 0x7b:
  954. this.nextChar();
  955. return _primitives.Cmd.get("{");
  956. case 0x7d:
  957. this.nextChar();
  958. return _primitives.Cmd.get("}");
  959. case 0x29:
  960. this.nextChar();
  961. throw new _util.FormatError(`Illegal character: ${ch}`);
  962. }
  963. let str = String.fromCharCode(ch);
  964. const knownCommands = this.knownCommands;
  965. let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  966. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  967. const possibleCommand = str + String.fromCharCode(ch);
  968. if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
  969. break;
  970. }
  971. if (str.length === 128) {
  972. throw new _util.FormatError(`Command token too long: ${str.length}`);
  973. }
  974. str = possibleCommand;
  975. knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  976. }
  977. if (str === "true") {
  978. return true;
  979. }
  980. if (str === "false") {
  981. return false;
  982. }
  983. if (str === "null") {
  984. return null;
  985. }
  986. if (str === "BI") {
  987. this.beginInlineImagePos = this.stream.pos;
  988. }
  989. return _primitives.Cmd.get(str);
  990. }
  991. peekObj() {
  992. const streamPos = this.stream.pos,
  993. currentChar = this.currentChar,
  994. beginInlineImagePos = this.beginInlineImagePos;
  995. let nextObj;
  996. try {
  997. nextObj = this.getObj();
  998. } catch (ex) {
  999. if (ex instanceof _core_utils.MissingDataException) {
  1000. throw ex;
  1001. }
  1002. (0, _util.warn)(`peekObj: ${ex}`);
  1003. }
  1004. this.stream.pos = streamPos;
  1005. this.currentChar = currentChar;
  1006. this.beginInlineImagePos = beginInlineImagePos;
  1007. return nextObj;
  1008. }
  1009. skipToNextLine() {
  1010. let ch = this.currentChar;
  1011. while (ch >= 0) {
  1012. if (ch === 0x0d) {
  1013. ch = this.nextChar();
  1014. if (ch === 0x0a) {
  1015. this.nextChar();
  1016. }
  1017. break;
  1018. } else if (ch === 0x0a) {
  1019. this.nextChar();
  1020. break;
  1021. }
  1022. ch = this.nextChar();
  1023. }
  1024. }
  1025. }
  1026. exports.Lexer = Lexer;
  1027. class Linearization {
  1028. static create(stream) {
  1029. function getInt(linDict, name, allowZeroValue = false) {
  1030. const obj = linDict.get(name);
  1031. if (Number.isInteger(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
  1032. return obj;
  1033. }
  1034. throw new Error(`The "${name}" parameter in the linearization ` + "dictionary is invalid.");
  1035. }
  1036. function getHints(linDict) {
  1037. const hints = linDict.get("H");
  1038. let hintsLength;
  1039. if (Array.isArray(hints) && ((hintsLength = hints.length) === 2 || hintsLength === 4)) {
  1040. for (let index = 0; index < hintsLength; index++) {
  1041. const hint = hints[index];
  1042. if (!(Number.isInteger(hint) && hint > 0)) {
  1043. throw new Error(`Hint (${index}) in the linearization dictionary is invalid.`);
  1044. }
  1045. }
  1046. return hints;
  1047. }
  1048. throw new Error("Hint array in the linearization dictionary is invalid.");
  1049. }
  1050. const parser = new Parser({
  1051. lexer: new Lexer(stream),
  1052. xref: null
  1053. });
  1054. const obj1 = parser.getObj();
  1055. const obj2 = parser.getObj();
  1056. const obj3 = parser.getObj();
  1057. const linDict = parser.getObj();
  1058. let obj, length;
  1059. if (!(Number.isInteger(obj1) && Number.isInteger(obj2) && (0, _primitives.isCmd)(obj3, "obj") && (0, _primitives.isDict)(linDict) && (0, _util.isNum)(obj = linDict.get("Linearized")) && obj > 0)) {
  1060. return null;
  1061. } else if ((length = getInt(linDict, "L")) !== stream.length) {
  1062. throw new Error('The "L" parameter in the linearization dictionary ' + "does not equal the stream length.");
  1063. }
  1064. return {
  1065. length,
  1066. hints: getHints(linDict),
  1067. objectNumberFirst: getInt(linDict, "O"),
  1068. endFirst: getInt(linDict, "E"),
  1069. numPages: getInt(linDict, "N"),
  1070. mainXRefEntriesOffset: getInt(linDict, "T"),
  1071. pageFirst: linDict.has("P") ? getInt(linDict, "P", true) : 0
  1072. };
  1073. }
  1074. }
  1075. exports.Linearization = Linearization;