parser.js 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * JavaScript code in this page
  4. *
  5. * Copyright 2022 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * JavaScript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.Parser = exports.Linearization = exports.Lexer = void 0;
  27. var _util = require("../shared/util.js");
  28. var _primitives = require("./primitives.js");
  29. var _core_utils = require("./core_utils.js");
  30. var _ascii_85_stream = require("./ascii_85_stream.js");
  31. var _ascii_hex_stream = require("./ascii_hex_stream.js");
  32. var _ccitt_stream = require("./ccitt_stream.js");
  33. var _flate_stream = require("./flate_stream.js");
  34. var _jbig2_stream = require("./jbig2_stream.js");
  35. var _jpeg_stream = require("./jpeg_stream.js");
  36. var _jpx_stream = require("./jpx_stream.js");
  37. var _lzw_stream = require("./lzw_stream.js");
  38. var _stream = require("./stream.js");
  39. var _predictor_stream = require("./predictor_stream.js");
  40. var _run_length_stream = require("./run_length_stream.js");
  41. const MAX_LENGTH_TO_CACHE = 1000;
  42. function getInlineImageCacheKey(bytes) {
  43. const strBuf = [],
  44. ii = bytes.length;
  45. let i = 0;
  46. while (i < ii - 1) {
  47. strBuf.push(bytes[i++] << 8 | bytes[i++]);
  48. }
  49. if (i < ii) {
  50. strBuf.push(bytes[i]);
  51. }
  52. return ii + "_" + String.fromCharCode.apply(null, strBuf);
  53. }
  54. class Parser {
  55. constructor({
  56. lexer,
  57. xref,
  58. allowStreams = false,
  59. recoveryMode = false
  60. }) {
  61. this.lexer = lexer;
  62. this.xref = xref;
  63. this.allowStreams = allowStreams;
  64. this.recoveryMode = recoveryMode;
  65. this.imageCache = Object.create(null);
  66. this._imageId = 0;
  67. this.refill();
  68. }
  69. refill() {
  70. this.buf1 = this.lexer.getObj();
  71. this.buf2 = this.lexer.getObj();
  72. }
  73. shift() {
  74. if (this.buf2 instanceof _primitives.Cmd && this.buf2.cmd === "ID") {
  75. this.buf1 = this.buf2;
  76. this.buf2 = null;
  77. } else {
  78. this.buf1 = this.buf2;
  79. this.buf2 = this.lexer.getObj();
  80. }
  81. }
  82. tryShift() {
  83. try {
  84. this.shift();
  85. return true;
  86. } catch (e) {
  87. if (e instanceof _core_utils.MissingDataException) {
  88. throw e;
  89. }
  90. return false;
  91. }
  92. }
  93. getObj(cipherTransform = null) {
  94. const buf1 = this.buf1;
  95. this.shift();
  96. if (buf1 instanceof _primitives.Cmd) {
  97. switch (buf1.cmd) {
  98. case "BI":
  99. return this.makeInlineImage(cipherTransform);
  100. case "[":
  101. const array = [];
  102. while (!(0, _primitives.isCmd)(this.buf1, "]") && this.buf1 !== _primitives.EOF) {
  103. array.push(this.getObj(cipherTransform));
  104. }
  105. if (this.buf1 === _primitives.EOF) {
  106. if (this.recoveryMode) {
  107. return array;
  108. }
  109. throw new _core_utils.ParserEOFException("End of file inside array.");
  110. }
  111. this.shift();
  112. return array;
  113. case "<<":
  114. const dict = new _primitives.Dict(this.xref);
  115. while (!(0, _primitives.isCmd)(this.buf1, ">>") && this.buf1 !== _primitives.EOF) {
  116. if (!(this.buf1 instanceof _primitives.Name)) {
  117. (0, _util.info)("Malformed dictionary: key must be a name object");
  118. this.shift();
  119. continue;
  120. }
  121. const key = this.buf1.name;
  122. this.shift();
  123. if (this.buf1 === _primitives.EOF) {
  124. break;
  125. }
  126. dict.set(key, this.getObj(cipherTransform));
  127. }
  128. if (this.buf1 === _primitives.EOF) {
  129. if (this.recoveryMode) {
  130. return dict;
  131. }
  132. throw new _core_utils.ParserEOFException("End of file inside dictionary.");
  133. }
  134. if ((0, _primitives.isCmd)(this.buf2, "stream")) {
  135. return this.allowStreams ? this.makeStream(dict, cipherTransform) : dict;
  136. }
  137. this.shift();
  138. return dict;
  139. default:
  140. return buf1;
  141. }
  142. }
  143. if (Number.isInteger(buf1)) {
  144. if (Number.isInteger(this.buf1) && (0, _primitives.isCmd)(this.buf2, "R")) {
  145. const ref = _primitives.Ref.get(buf1, this.buf1);
  146. this.shift();
  147. this.shift();
  148. return ref;
  149. }
  150. return buf1;
  151. }
  152. if (typeof buf1 === "string") {
  153. if (cipherTransform) {
  154. return cipherTransform.decryptString(buf1);
  155. }
  156. return buf1;
  157. }
  158. return buf1;
  159. }
  160. findDefaultInlineStreamEnd(stream) {
  161. const E = 0x45,
  162. I = 0x49,
  163. SPACE = 0x20,
  164. LF = 0xa,
  165. CR = 0xd,
  166. NUL = 0x0;
  167. const lexer = this.lexer,
  168. startPos = stream.pos,
  169. n = 10;
  170. let state = 0,
  171. ch,
  172. maybeEIPos;
  173. while ((ch = stream.getByte()) !== -1) {
  174. if (state === 0) {
  175. state = ch === E ? 1 : 0;
  176. } else if (state === 1) {
  177. state = ch === I ? 2 : 0;
  178. } else {
  179. if (ch === SPACE || ch === LF || ch === CR) {
  180. maybeEIPos = stream.pos;
  181. const followingBytes = stream.peekBytes(n);
  182. for (let i = 0, ii = followingBytes.length; i < ii; i++) {
  183. ch = followingBytes[i];
  184. if (ch === NUL && followingBytes[i + 1] !== NUL) {
  185. continue;
  186. }
  187. if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7f)) {
  188. state = 0;
  189. break;
  190. }
  191. }
  192. if (state !== 2) {
  193. continue;
  194. }
  195. if (lexer.knownCommands) {
  196. const nextObj = lexer.peekObj();
  197. if (nextObj instanceof _primitives.Cmd && !lexer.knownCommands[nextObj.cmd]) {
  198. state = 0;
  199. }
  200. } else {
  201. (0, _util.warn)("findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined.");
  202. }
  203. if (state === 2) {
  204. break;
  205. }
  206. } else {
  207. state = 0;
  208. }
  209. }
  210. }
  211. if (ch === -1) {
  212. (0, _util.warn)("findDefaultInlineStreamEnd: " + "Reached the end of the stream without finding a valid EI marker");
  213. if (maybeEIPos) {
  214. (0, _util.warn)('... trying to recover by using the last "EI" occurrence.');
  215. stream.skip(-(stream.pos - maybeEIPos));
  216. }
  217. }
  218. let endOffset = 4;
  219. stream.skip(-endOffset);
  220. ch = stream.peekByte();
  221. stream.skip(endOffset);
  222. if (!(0, _core_utils.isWhiteSpace)(ch)) {
  223. endOffset--;
  224. }
  225. return stream.pos - endOffset - startPos;
  226. }
  227. findDCTDecodeInlineStreamEnd(stream) {
  228. const startPos = stream.pos;
  229. let foundEOI = false,
  230. b,
  231. markerLength;
  232. while ((b = stream.getByte()) !== -1) {
  233. if (b !== 0xff) {
  234. continue;
  235. }
  236. switch (stream.getByte()) {
  237. case 0x00:
  238. break;
  239. case 0xff:
  240. stream.skip(-1);
  241. break;
  242. case 0xd9:
  243. foundEOI = true;
  244. break;
  245. case 0xc0:
  246. case 0xc1:
  247. case 0xc2:
  248. case 0xc3:
  249. case 0xc5:
  250. case 0xc6:
  251. case 0xc7:
  252. case 0xc9:
  253. case 0xca:
  254. case 0xcb:
  255. case 0xcd:
  256. case 0xce:
  257. case 0xcf:
  258. case 0xc4:
  259. case 0xcc:
  260. case 0xda:
  261. case 0xdb:
  262. case 0xdc:
  263. case 0xdd:
  264. case 0xde:
  265. case 0xdf:
  266. case 0xe0:
  267. case 0xe1:
  268. case 0xe2:
  269. case 0xe3:
  270. case 0xe4:
  271. case 0xe5:
  272. case 0xe6:
  273. case 0xe7:
  274. case 0xe8:
  275. case 0xe9:
  276. case 0xea:
  277. case 0xeb:
  278. case 0xec:
  279. case 0xed:
  280. case 0xee:
  281. case 0xef:
  282. case 0xfe:
  283. markerLength = stream.getUint16();
  284. if (markerLength > 2) {
  285. stream.skip(markerLength - 2);
  286. } else {
  287. stream.skip(-2);
  288. }
  289. break;
  290. }
  291. if (foundEOI) {
  292. break;
  293. }
  294. }
  295. const length = stream.pos - startPos;
  296. if (b === -1) {
  297. (0, _util.warn)("Inline DCTDecode image stream: " + "EOI marker not found, searching for /EI/ instead.");
  298. stream.skip(-length);
  299. return this.findDefaultInlineStreamEnd(stream);
  300. }
  301. this.inlineStreamSkipEI(stream);
  302. return length;
  303. }
  304. findASCII85DecodeInlineStreamEnd(stream) {
  305. const TILDE = 0x7e,
  306. GT = 0x3e;
  307. const startPos = stream.pos;
  308. let ch;
  309. while ((ch = stream.getByte()) !== -1) {
  310. if (ch === TILDE) {
  311. const tildePos = stream.pos;
  312. ch = stream.peekByte();
  313. while ((0, _core_utils.isWhiteSpace)(ch)) {
  314. stream.skip();
  315. ch = stream.peekByte();
  316. }
  317. if (ch === GT) {
  318. stream.skip();
  319. break;
  320. }
  321. if (stream.pos > tildePos) {
  322. const maybeEI = stream.peekBytes(2);
  323. if (maybeEI[0] === 0x45 && maybeEI[1] === 0x49) {
  324. break;
  325. }
  326. }
  327. }
  328. }
  329. const length = stream.pos - startPos;
  330. if (ch === -1) {
  331. (0, _util.warn)("Inline ASCII85Decode image stream: " + "EOD marker not found, searching for /EI/ instead.");
  332. stream.skip(-length);
  333. return this.findDefaultInlineStreamEnd(stream);
  334. }
  335. this.inlineStreamSkipEI(stream);
  336. return length;
  337. }
  338. findASCIIHexDecodeInlineStreamEnd(stream) {
  339. const GT = 0x3e;
  340. const startPos = stream.pos;
  341. let ch;
  342. while ((ch = stream.getByte()) !== -1) {
  343. if (ch === GT) {
  344. break;
  345. }
  346. }
  347. const length = stream.pos - startPos;
  348. if (ch === -1) {
  349. (0, _util.warn)("Inline ASCIIHexDecode image stream: " + "EOD marker not found, searching for /EI/ instead.");
  350. stream.skip(-length);
  351. return this.findDefaultInlineStreamEnd(stream);
  352. }
  353. this.inlineStreamSkipEI(stream);
  354. return length;
  355. }
  356. inlineStreamSkipEI(stream) {
  357. const E = 0x45,
  358. I = 0x49;
  359. let state = 0,
  360. ch;
  361. while ((ch = stream.getByte()) !== -1) {
  362. if (state === 0) {
  363. state = ch === E ? 1 : 0;
  364. } else if (state === 1) {
  365. state = ch === I ? 2 : 0;
  366. } else if (state === 2) {
  367. break;
  368. }
  369. }
  370. }
  371. makeInlineImage(cipherTransform) {
  372. const lexer = this.lexer;
  373. const stream = lexer.stream;
  374. const dictMap = Object.create(null);
  375. let dictLength;
  376. while (!(0, _primitives.isCmd)(this.buf1, "ID") && this.buf1 !== _primitives.EOF) {
  377. if (!(this.buf1 instanceof _primitives.Name)) {
  378. throw new _util.FormatError("Dictionary key must be a name object");
  379. }
  380. const key = this.buf1.name;
  381. this.shift();
  382. if (this.buf1 === _primitives.EOF) {
  383. break;
  384. }
  385. dictMap[key] = this.getObj(cipherTransform);
  386. }
  387. if (lexer.beginInlineImagePos !== -1) {
  388. dictLength = stream.pos - lexer.beginInlineImagePos;
  389. }
  390. const filter = this.xref.fetchIfRef(dictMap.F || dictMap.Filter);
  391. let filterName;
  392. if (filter instanceof _primitives.Name) {
  393. filterName = filter.name;
  394. } else if (Array.isArray(filter)) {
  395. const filterZero = this.xref.fetchIfRef(filter[0]);
  396. if (filterZero instanceof _primitives.Name) {
  397. filterName = filterZero.name;
  398. }
  399. }
  400. const startPos = stream.pos;
  401. let length;
  402. switch (filterName) {
  403. case "DCT":
  404. case "DCTDecode":
  405. length = this.findDCTDecodeInlineStreamEnd(stream);
  406. break;
  407. case "A85":
  408. case "ASCII85Decode":
  409. length = this.findASCII85DecodeInlineStreamEnd(stream);
  410. break;
  411. case "AHx":
  412. case "ASCIIHexDecode":
  413. length = this.findASCIIHexDecodeInlineStreamEnd(stream);
  414. break;
  415. default:
  416. length = this.findDefaultInlineStreamEnd(stream);
  417. }
  418. let cacheKey;
  419. if (length < MAX_LENGTH_TO_CACHE && dictLength > 0) {
  420. const initialStreamPos = stream.pos;
  421. stream.pos = lexer.beginInlineImagePos;
  422. cacheKey = getInlineImageCacheKey(stream.getBytes(dictLength + length));
  423. stream.pos = initialStreamPos;
  424. const cacheEntry = this.imageCache[cacheKey];
  425. if (cacheEntry !== undefined) {
  426. this.buf2 = _primitives.Cmd.get("EI");
  427. this.shift();
  428. cacheEntry.reset();
  429. return cacheEntry;
  430. }
  431. }
  432. const dict = new _primitives.Dict(this.xref);
  433. for (const key in dictMap) {
  434. dict.set(key, dictMap[key]);
  435. }
  436. let imageStream = stream.makeSubStream(startPos, length, dict);
  437. if (cipherTransform) {
  438. imageStream = cipherTransform.createStream(imageStream, length);
  439. }
  440. imageStream = this.filter(imageStream, dict, length);
  441. imageStream.dict = dict;
  442. if (cacheKey !== undefined) {
  443. imageStream.cacheKey = `inline_img_${++this._imageId}`;
  444. this.imageCache[cacheKey] = imageStream;
  445. }
  446. this.buf2 = _primitives.Cmd.get("EI");
  447. this.shift();
  448. return imageStream;
  449. }
  450. _findStreamLength(startPos, signature) {
  451. const {
  452. stream
  453. } = this.lexer;
  454. stream.pos = startPos;
  455. const SCAN_BLOCK_LENGTH = 2048;
  456. const signatureLength = signature.length;
  457. while (stream.pos < stream.end) {
  458. const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
  459. const scanLength = scanBytes.length - signatureLength;
  460. if (scanLength <= 0) {
  461. break;
  462. }
  463. let pos = 0;
  464. while (pos < scanLength) {
  465. let j = 0;
  466. while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
  467. j++;
  468. }
  469. if (j >= signatureLength) {
  470. stream.pos += pos;
  471. return stream.pos - startPos;
  472. }
  473. pos++;
  474. }
  475. stream.pos += scanLength;
  476. }
  477. return -1;
  478. }
  479. makeStream(dict, cipherTransform) {
  480. const lexer = this.lexer;
  481. let stream = lexer.stream;
  482. lexer.skipToNextLine();
  483. const startPos = stream.pos - 1;
  484. let length = dict.get("Length");
  485. if (!Number.isInteger(length)) {
  486. (0, _util.info)(`Bad length "${length && length.toString()}" in stream.`);
  487. length = 0;
  488. }
  489. stream.pos = startPos + length;
  490. lexer.nextChar();
  491. if (this.tryShift() && (0, _primitives.isCmd)(this.buf2, "endstream")) {
  492. this.shift();
  493. } else {
  494. const ENDSTREAM_SIGNATURE = new Uint8Array([0x65, 0x6e, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d]);
  495. let actualLength = this._findStreamLength(startPos, ENDSTREAM_SIGNATURE);
  496. if (actualLength < 0) {
  497. const MAX_TRUNCATION = 1;
  498. for (let i = 1; i <= MAX_TRUNCATION; i++) {
  499. const end = ENDSTREAM_SIGNATURE.length - i;
  500. const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
  501. const maybeLength = this._findStreamLength(startPos, TRUNCATED_SIGNATURE);
  502. if (maybeLength >= 0) {
  503. const lastByte = stream.peekBytes(end + 1)[end];
  504. if (!(0, _core_utils.isWhiteSpace)(lastByte)) {
  505. break;
  506. }
  507. (0, _util.info)(`Found "${(0, _util.bytesToString)(TRUNCATED_SIGNATURE)}" when ` + "searching for endstream command.");
  508. actualLength = maybeLength;
  509. break;
  510. }
  511. }
  512. if (actualLength < 0) {
  513. throw new _util.FormatError("Missing endstream command.");
  514. }
  515. }
  516. length = actualLength;
  517. lexer.nextChar();
  518. this.shift();
  519. this.shift();
  520. }
  521. this.shift();
  522. stream = stream.makeSubStream(startPos, length, dict);
  523. if (cipherTransform) {
  524. stream = cipherTransform.createStream(stream, length);
  525. }
  526. stream = this.filter(stream, dict, length);
  527. stream.dict = dict;
  528. return stream;
  529. }
  530. filter(stream, dict, length) {
  531. let filter = dict.get("F", "Filter");
  532. let params = dict.get("DP", "DecodeParms");
  533. if (filter instanceof _primitives.Name) {
  534. if (Array.isArray(params)) {
  535. (0, _util.warn)("/DecodeParms should not be an Array, when /Filter is a Name.");
  536. }
  537. return this.makeFilter(stream, filter.name, length, params);
  538. }
  539. let maybeLength = length;
  540. if (Array.isArray(filter)) {
  541. const filterArray = filter;
  542. const paramsArray = params;
  543. for (let i = 0, ii = filterArray.length; i < ii; ++i) {
  544. filter = this.xref.fetchIfRef(filterArray[i]);
  545. if (!(filter instanceof _primitives.Name)) {
  546. throw new _util.FormatError(`Bad filter name "${filter}"`);
  547. }
  548. params = null;
  549. if (Array.isArray(paramsArray) && i in paramsArray) {
  550. params = this.xref.fetchIfRef(paramsArray[i]);
  551. }
  552. stream = this.makeFilter(stream, filter.name, maybeLength, params);
  553. maybeLength = null;
  554. }
  555. }
  556. return stream;
  557. }
  558. makeFilter(stream, name, maybeLength, params) {
  559. if (maybeLength === 0) {
  560. (0, _util.warn)(`Empty "${name}" stream.`);
  561. return new _stream.NullStream();
  562. }
  563. const xrefStats = this.xref.stats;
  564. try {
  565. switch (name) {
  566. case "Fl":
  567. case "FlateDecode":
  568. xrefStats.addStreamType(_util.StreamType.FLATE);
  569. if (params) {
  570. return new _predictor_stream.PredictorStream(new _flate_stream.FlateStream(stream, maybeLength), maybeLength, params);
  571. }
  572. return new _flate_stream.FlateStream(stream, maybeLength);
  573. case "LZW":
  574. case "LZWDecode":
  575. xrefStats.addStreamType(_util.StreamType.LZW);
  576. let earlyChange = 1;
  577. if (params) {
  578. if (params.has("EarlyChange")) {
  579. earlyChange = params.get("EarlyChange");
  580. }
  581. return new _predictor_stream.PredictorStream(new _lzw_stream.LZWStream(stream, maybeLength, earlyChange), maybeLength, params);
  582. }
  583. return new _lzw_stream.LZWStream(stream, maybeLength, earlyChange);
  584. case "DCT":
  585. case "DCTDecode":
  586. xrefStats.addStreamType(_util.StreamType.DCT);
  587. return new _jpeg_stream.JpegStream(stream, maybeLength, params);
  588. case "JPX":
  589. case "JPXDecode":
  590. xrefStats.addStreamType(_util.StreamType.JPX);
  591. return new _jpx_stream.JpxStream(stream, maybeLength, params);
  592. case "A85":
  593. case "ASCII85Decode":
  594. xrefStats.addStreamType(_util.StreamType.A85);
  595. return new _ascii_85_stream.Ascii85Stream(stream, maybeLength);
  596. case "AHx":
  597. case "ASCIIHexDecode":
  598. xrefStats.addStreamType(_util.StreamType.AHX);
  599. return new _ascii_hex_stream.AsciiHexStream(stream, maybeLength);
  600. case "CCF":
  601. case "CCITTFaxDecode":
  602. xrefStats.addStreamType(_util.StreamType.CCF);
  603. return new _ccitt_stream.CCITTFaxStream(stream, maybeLength, params);
  604. case "RL":
  605. case "RunLengthDecode":
  606. xrefStats.addStreamType(_util.StreamType.RLX);
  607. return new _run_length_stream.RunLengthStream(stream, maybeLength);
  608. case "JBIG2Decode":
  609. xrefStats.addStreamType(_util.StreamType.JBIG);
  610. return new _jbig2_stream.Jbig2Stream(stream, maybeLength, params);
  611. }
  612. (0, _util.warn)(`Filter "${name}" is not supported.`);
  613. return stream;
  614. } catch (ex) {
  615. if (ex instanceof _core_utils.MissingDataException) {
  616. throw ex;
  617. }
  618. (0, _util.warn)(`Invalid stream: "${ex}"`);
  619. return new _stream.NullStream();
  620. }
  621. }
  622. }
  623. exports.Parser = Parser;
  624. const specialChars = [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
  625. function toHexDigit(ch) {
  626. if (ch >= 0x30 && ch <= 0x39) {
  627. return ch & 0x0f;
  628. }
  629. if (ch >= 0x41 && ch <= 0x46 || ch >= 0x61 && ch <= 0x66) {
  630. return (ch & 0x0f) + 9;
  631. }
  632. return -1;
  633. }
  634. class Lexer {
  635. constructor(stream, knownCommands = null) {
  636. this.stream = stream;
  637. this.nextChar();
  638. this.strBuf = [];
  639. this.knownCommands = knownCommands;
  640. this._hexStringNumWarn = 0;
  641. this.beginInlineImagePos = -1;
  642. }
  643. nextChar() {
  644. return this.currentChar = this.stream.getByte();
  645. }
  646. peekChar() {
  647. return this.stream.peekByte();
  648. }
  649. getNumber() {
  650. let ch = this.currentChar;
  651. let eNotation = false;
  652. let divideBy = 0;
  653. let sign = 0;
  654. if (ch === 0x2d) {
  655. sign = -1;
  656. ch = this.nextChar();
  657. if (ch === 0x2d) {
  658. ch = this.nextChar();
  659. }
  660. } else if (ch === 0x2b) {
  661. sign = 1;
  662. ch = this.nextChar();
  663. }
  664. if (ch === 0x0a || ch === 0x0d) {
  665. do {
  666. ch = this.nextChar();
  667. } while (ch === 0x0a || ch === 0x0d);
  668. }
  669. if (ch === 0x2e) {
  670. divideBy = 10;
  671. ch = this.nextChar();
  672. }
  673. if (ch < 0x30 || ch > 0x39) {
  674. const msg = `Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`;
  675. if ((0, _core_utils.isWhiteSpace)(ch) || ch === -1) {
  676. (0, _util.info)(`Lexer.getNumber - "${msg}".`);
  677. return 0;
  678. }
  679. throw new _util.FormatError(msg);
  680. }
  681. sign = sign || 1;
  682. let baseValue = ch - 0x30;
  683. let powerValue = 0;
  684. let powerValueSign = 1;
  685. while ((ch = this.nextChar()) >= 0) {
  686. if (ch >= 0x30 && ch <= 0x39) {
  687. const currentDigit = ch - 0x30;
  688. if (eNotation) {
  689. powerValue = powerValue * 10 + currentDigit;
  690. } else {
  691. if (divideBy !== 0) {
  692. divideBy *= 10;
  693. }
  694. baseValue = baseValue * 10 + currentDigit;
  695. }
  696. } else if (ch === 0x2e) {
  697. if (divideBy === 0) {
  698. divideBy = 1;
  699. } else {
  700. break;
  701. }
  702. } else if (ch === 0x2d) {
  703. (0, _util.warn)("Badly formatted number: minus sign in the middle");
  704. } else if (ch === 0x45 || ch === 0x65) {
  705. ch = this.peekChar();
  706. if (ch === 0x2b || ch === 0x2d) {
  707. powerValueSign = ch === 0x2d ? -1 : 1;
  708. this.nextChar();
  709. } else if (ch < 0x30 || ch > 0x39) {
  710. break;
  711. }
  712. eNotation = true;
  713. } else {
  714. break;
  715. }
  716. }
  717. if (divideBy !== 0) {
  718. baseValue /= divideBy;
  719. }
  720. if (eNotation) {
  721. baseValue *= 10 ** (powerValueSign * powerValue);
  722. }
  723. return sign * baseValue;
  724. }
  725. getString() {
  726. let numParen = 1;
  727. let done = false;
  728. const strBuf = this.strBuf;
  729. strBuf.length = 0;
  730. let ch = this.nextChar();
  731. while (true) {
  732. let charBuffered = false;
  733. switch (ch | 0) {
  734. case -1:
  735. (0, _util.warn)("Unterminated string");
  736. done = true;
  737. break;
  738. case 0x28:
  739. ++numParen;
  740. strBuf.push("(");
  741. break;
  742. case 0x29:
  743. if (--numParen === 0) {
  744. this.nextChar();
  745. done = true;
  746. } else {
  747. strBuf.push(")");
  748. }
  749. break;
  750. case 0x5c:
  751. ch = this.nextChar();
  752. switch (ch) {
  753. case -1:
  754. (0, _util.warn)("Unterminated string");
  755. done = true;
  756. break;
  757. case 0x6e:
  758. strBuf.push("\n");
  759. break;
  760. case 0x72:
  761. strBuf.push("\r");
  762. break;
  763. case 0x74:
  764. strBuf.push("\t");
  765. break;
  766. case 0x62:
  767. strBuf.push("\b");
  768. break;
  769. case 0x66:
  770. strBuf.push("\f");
  771. break;
  772. case 0x5c:
  773. case 0x28:
  774. case 0x29:
  775. strBuf.push(String.fromCharCode(ch));
  776. break;
  777. case 0x30:
  778. case 0x31:
  779. case 0x32:
  780. case 0x33:
  781. case 0x34:
  782. case 0x35:
  783. case 0x36:
  784. case 0x37:
  785. let x = ch & 0x0f;
  786. ch = this.nextChar();
  787. charBuffered = true;
  788. if (ch >= 0x30 && ch <= 0x37) {
  789. x = (x << 3) + (ch & 0x0f);
  790. ch = this.nextChar();
  791. if (ch >= 0x30 && ch <= 0x37) {
  792. charBuffered = false;
  793. x = (x << 3) + (ch & 0x0f);
  794. }
  795. }
  796. strBuf.push(String.fromCharCode(x));
  797. break;
  798. case 0x0d:
  799. if (this.peekChar() === 0x0a) {
  800. this.nextChar();
  801. }
  802. break;
  803. case 0x0a:
  804. break;
  805. default:
  806. strBuf.push(String.fromCharCode(ch));
  807. break;
  808. }
  809. break;
  810. default:
  811. strBuf.push(String.fromCharCode(ch));
  812. break;
  813. }
  814. if (done) {
  815. break;
  816. }
  817. if (!charBuffered) {
  818. ch = this.nextChar();
  819. }
  820. }
  821. return strBuf.join("");
  822. }
  823. getName() {
  824. let ch, previousCh;
  825. const strBuf = this.strBuf;
  826. strBuf.length = 0;
  827. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  828. if (ch === 0x23) {
  829. ch = this.nextChar();
  830. if (specialChars[ch]) {
  831. (0, _util.warn)("Lexer_getName: " + "NUMBER SIGN (#) should be followed by a hexadecimal number.");
  832. strBuf.push("#");
  833. break;
  834. }
  835. const x = toHexDigit(ch);
  836. if (x !== -1) {
  837. previousCh = ch;
  838. ch = this.nextChar();
  839. const x2 = toHexDigit(ch);
  840. if (x2 === -1) {
  841. (0, _util.warn)(`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` + "in hexadecimal number.");
  842. strBuf.push("#", String.fromCharCode(previousCh));
  843. if (specialChars[ch]) {
  844. break;
  845. }
  846. strBuf.push(String.fromCharCode(ch));
  847. continue;
  848. }
  849. strBuf.push(String.fromCharCode(x << 4 | x2));
  850. } else {
  851. strBuf.push("#", String.fromCharCode(ch));
  852. }
  853. } else {
  854. strBuf.push(String.fromCharCode(ch));
  855. }
  856. }
  857. if (strBuf.length > 127) {
  858. (0, _util.warn)(`Name token is longer than allowed by the spec: ${strBuf.length}`);
  859. }
  860. return _primitives.Name.get(strBuf.join(""));
  861. }
  862. _hexStringWarn(ch) {
  863. const MAX_HEX_STRING_NUM_WARN = 5;
  864. if (this._hexStringNumWarn++ === MAX_HEX_STRING_NUM_WARN) {
  865. (0, _util.warn)("getHexString - ignoring additional invalid characters.");
  866. return;
  867. }
  868. if (this._hexStringNumWarn > MAX_HEX_STRING_NUM_WARN) {
  869. return;
  870. }
  871. (0, _util.warn)(`getHexString - ignoring invalid character: ${ch}`);
  872. }
  873. getHexString() {
  874. const strBuf = this.strBuf;
  875. strBuf.length = 0;
  876. let ch = this.currentChar;
  877. let isFirstHex = true;
  878. let firstDigit, secondDigit;
  879. this._hexStringNumWarn = 0;
  880. while (true) {
  881. if (ch < 0) {
  882. (0, _util.warn)("Unterminated hex string");
  883. break;
  884. } else if (ch === 0x3e) {
  885. this.nextChar();
  886. break;
  887. } else if (specialChars[ch] === 1) {
  888. ch = this.nextChar();
  889. continue;
  890. } else {
  891. if (isFirstHex) {
  892. firstDigit = toHexDigit(ch);
  893. if (firstDigit === -1) {
  894. this._hexStringWarn(ch);
  895. ch = this.nextChar();
  896. continue;
  897. }
  898. } else {
  899. secondDigit = toHexDigit(ch);
  900. if (secondDigit === -1) {
  901. this._hexStringWarn(ch);
  902. ch = this.nextChar();
  903. continue;
  904. }
  905. strBuf.push(String.fromCharCode(firstDigit << 4 | secondDigit));
  906. }
  907. isFirstHex = !isFirstHex;
  908. ch = this.nextChar();
  909. }
  910. }
  911. return strBuf.join("");
  912. }
  913. getObj() {
  914. let comment = false;
  915. let ch = this.currentChar;
  916. while (true) {
  917. if (ch < 0) {
  918. return _primitives.EOF;
  919. }
  920. if (comment) {
  921. if (ch === 0x0a || ch === 0x0d) {
  922. comment = false;
  923. }
  924. } else if (ch === 0x25) {
  925. comment = true;
  926. } else if (specialChars[ch] !== 1) {
  927. break;
  928. }
  929. ch = this.nextChar();
  930. }
  931. switch (ch | 0) {
  932. case 0x30:
  933. case 0x31:
  934. case 0x32:
  935. case 0x33:
  936. case 0x34:
  937. case 0x35:
  938. case 0x36:
  939. case 0x37:
  940. case 0x38:
  941. case 0x39:
  942. case 0x2b:
  943. case 0x2d:
  944. case 0x2e:
  945. return this.getNumber();
  946. case 0x28:
  947. return this.getString();
  948. case 0x2f:
  949. return this.getName();
  950. case 0x5b:
  951. this.nextChar();
  952. return _primitives.Cmd.get("[");
  953. case 0x5d:
  954. this.nextChar();
  955. return _primitives.Cmd.get("]");
  956. case 0x3c:
  957. ch = this.nextChar();
  958. if (ch === 0x3c) {
  959. this.nextChar();
  960. return _primitives.Cmd.get("<<");
  961. }
  962. return this.getHexString();
  963. case 0x3e:
  964. ch = this.nextChar();
  965. if (ch === 0x3e) {
  966. this.nextChar();
  967. return _primitives.Cmd.get(">>");
  968. }
  969. return _primitives.Cmd.get(">");
  970. case 0x7b:
  971. this.nextChar();
  972. return _primitives.Cmd.get("{");
  973. case 0x7d:
  974. this.nextChar();
  975. return _primitives.Cmd.get("}");
  976. case 0x29:
  977. this.nextChar();
  978. throw new _util.FormatError(`Illegal character: ${ch}`);
  979. }
  980. let str = String.fromCharCode(ch);
  981. if (ch < 0x20 || ch > 0x7f) {
  982. const nextCh = this.peekChar();
  983. if (nextCh >= 0x20 && nextCh <= 0x7f) {
  984. this.nextChar();
  985. return _primitives.Cmd.get(str);
  986. }
  987. }
  988. const knownCommands = this.knownCommands;
  989. let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  990. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  991. const possibleCommand = str + String.fromCharCode(ch);
  992. if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
  993. break;
  994. }
  995. if (str.length === 128) {
  996. throw new _util.FormatError(`Command token too long: ${str.length}`);
  997. }
  998. str = possibleCommand;
  999. knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  1000. }
  1001. if (str === "true") {
  1002. return true;
  1003. }
  1004. if (str === "false") {
  1005. return false;
  1006. }
  1007. if (str === "null") {
  1008. return null;
  1009. }
  1010. if (str === "BI") {
  1011. this.beginInlineImagePos = this.stream.pos;
  1012. }
  1013. return _primitives.Cmd.get(str);
  1014. }
  1015. peekObj() {
  1016. const streamPos = this.stream.pos,
  1017. currentChar = this.currentChar,
  1018. beginInlineImagePos = this.beginInlineImagePos;
  1019. let nextObj;
  1020. try {
  1021. nextObj = this.getObj();
  1022. } catch (ex) {
  1023. if (ex instanceof _core_utils.MissingDataException) {
  1024. throw ex;
  1025. }
  1026. (0, _util.warn)(`peekObj: ${ex}`);
  1027. }
  1028. this.stream.pos = streamPos;
  1029. this.currentChar = currentChar;
  1030. this.beginInlineImagePos = beginInlineImagePos;
  1031. return nextObj;
  1032. }
  1033. skipToNextLine() {
  1034. let ch = this.currentChar;
  1035. while (ch >= 0) {
  1036. if (ch === 0x0d) {
  1037. ch = this.nextChar();
  1038. if (ch === 0x0a) {
  1039. this.nextChar();
  1040. }
  1041. break;
  1042. } else if (ch === 0x0a) {
  1043. this.nextChar();
  1044. break;
  1045. }
  1046. ch = this.nextChar();
  1047. }
  1048. }
  1049. }
  1050. exports.Lexer = Lexer;
  1051. class Linearization {
  1052. static create(stream) {
  1053. function getInt(linDict, name, allowZeroValue = false) {
  1054. const obj = linDict.get(name);
  1055. if (Number.isInteger(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
  1056. return obj;
  1057. }
  1058. throw new Error(`The "${name}" parameter in the linearization ` + "dictionary is invalid.");
  1059. }
  1060. function getHints(linDict) {
  1061. const hints = linDict.get("H");
  1062. let hintsLength;
  1063. if (Array.isArray(hints) && ((hintsLength = hints.length) === 2 || hintsLength === 4)) {
  1064. for (let index = 0; index < hintsLength; index++) {
  1065. const hint = hints[index];
  1066. if (!(Number.isInteger(hint) && hint > 0)) {
  1067. throw new Error(`Hint (${index}) in the linearization dictionary is invalid.`);
  1068. }
  1069. }
  1070. return hints;
  1071. }
  1072. throw new Error("Hint array in the linearization dictionary is invalid.");
  1073. }
  1074. const parser = new Parser({
  1075. lexer: new Lexer(stream),
  1076. xref: null
  1077. });
  1078. const obj1 = parser.getObj();
  1079. const obj2 = parser.getObj();
  1080. const obj3 = parser.getObj();
  1081. const linDict = parser.getObj();
  1082. let obj, length;
  1083. if (!(Number.isInteger(obj1) && Number.isInteger(obj2) && (0, _primitives.isCmd)(obj3, "obj") && linDict instanceof _primitives.Dict && typeof (obj = linDict.get("Linearized")) === "number" && obj > 0)) {
  1084. return null;
  1085. } else if ((length = getInt(linDict, "L")) !== stream.length) {
  1086. throw new Error('The "L" parameter in the linearization dictionary ' + "does not equal the stream length.");
  1087. }
  1088. return {
  1089. length,
  1090. hints: getHints(linDict),
  1091. objectNumberFirst: getInt(linDict, "O"),
  1092. endFirst: getInt(linDict, "E"),
  1093. numPages: getInt(linDict, "N"),
  1094. mainXRefEntriesOffset: getInt(linDict, "T"),
  1095. pageFirst: linDict.has("P") ? getInt(linDict, "P", true) : 0
  1096. };
  1097. }
  1098. }
  1099. exports.Linearization = Linearization;