2
0

parser.js 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * Javascript code in this page
  4. *
  5. * Copyright 2020 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * Javascript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.Parser = exports.Linearization = exports.Lexer = void 0;
  27. var _stream = require("./stream.js");
  28. var _util = require("../shared/util.js");
  29. var _primitives = require("./primitives.js");
  30. var _core_utils = require("./core_utils.js");
  31. var _ccitt_stream = require("./ccitt_stream.js");
  32. var _jbig2_stream = require("./jbig2_stream.js");
  33. var _jpeg_stream = require("./jpeg_stream.js");
  34. var _jpx_stream = require("./jpx_stream.js");
  35. const MAX_LENGTH_TO_CACHE = 1000;
  36. const MAX_ADLER32_LENGTH = 5552;
  37. function computeAdler32(bytes) {
  38. const bytesLength = bytes.length;
  39. let a = 1,
  40. b = 0;
  41. for (let i = 0; i < bytesLength; ++i) {
  42. a += bytes[i] & 0xff;
  43. b += a;
  44. }
  45. return b % 65521 << 16 | a % 65521;
  46. }
  47. class Parser {
  48. constructor({
  49. lexer,
  50. xref,
  51. allowStreams = false,
  52. recoveryMode = false
  53. }) {
  54. this.lexer = lexer;
  55. this.xref = xref;
  56. this.allowStreams = allowStreams;
  57. this.recoveryMode = recoveryMode;
  58. this.imageCache = Object.create(null);
  59. this.refill();
  60. }
  61. refill() {
  62. this.buf1 = this.lexer.getObj();
  63. this.buf2 = this.lexer.getObj();
  64. }
  65. shift() {
  66. if (this.buf2 instanceof _primitives.Cmd && this.buf2.cmd === "ID") {
  67. this.buf1 = this.buf2;
  68. this.buf2 = null;
  69. } else {
  70. this.buf1 = this.buf2;
  71. this.buf2 = this.lexer.getObj();
  72. }
  73. }
  74. tryShift() {
  75. try {
  76. this.shift();
  77. return true;
  78. } catch (e) {
  79. if (e instanceof _core_utils.MissingDataException) {
  80. throw e;
  81. }
  82. return false;
  83. }
  84. }
  85. getObj(cipherTransform = null) {
  86. const buf1 = this.buf1;
  87. this.shift();
  88. if (buf1 instanceof _primitives.Cmd) {
  89. switch (buf1.cmd) {
  90. case "BI":
  91. return this.makeInlineImage(cipherTransform);
  92. case "[":
  93. const array = [];
  94. while (!(0, _primitives.isCmd)(this.buf1, "]") && !(0, _primitives.isEOF)(this.buf1)) {
  95. array.push(this.getObj(cipherTransform));
  96. }
  97. if ((0, _primitives.isEOF)(this.buf1)) {
  98. if (!this.recoveryMode) {
  99. throw new _util.FormatError("End of file inside array");
  100. }
  101. return array;
  102. }
  103. this.shift();
  104. return array;
  105. case "<<":
  106. const dict = new _primitives.Dict(this.xref);
  107. while (!(0, _primitives.isCmd)(this.buf1, ">>") && !(0, _primitives.isEOF)(this.buf1)) {
  108. if (!(0, _primitives.isName)(this.buf1)) {
  109. (0, _util.info)("Malformed dictionary: key must be a name object");
  110. this.shift();
  111. continue;
  112. }
  113. const key = this.buf1.name;
  114. this.shift();
  115. if ((0, _primitives.isEOF)(this.buf1)) {
  116. break;
  117. }
  118. dict.set(key, this.getObj(cipherTransform));
  119. }
  120. if ((0, _primitives.isEOF)(this.buf1)) {
  121. if (!this.recoveryMode) {
  122. throw new _util.FormatError("End of file inside dictionary");
  123. }
  124. return dict;
  125. }
  126. if ((0, _primitives.isCmd)(this.buf2, "stream")) {
  127. return this.allowStreams ? this.makeStream(dict, cipherTransform) : dict;
  128. }
  129. this.shift();
  130. return dict;
  131. default:
  132. return buf1;
  133. }
  134. }
  135. if (Number.isInteger(buf1)) {
  136. if (Number.isInteger(this.buf1) && (0, _primitives.isCmd)(this.buf2, "R")) {
  137. const ref = _primitives.Ref.get(buf1, this.buf1);
  138. this.shift();
  139. this.shift();
  140. return ref;
  141. }
  142. return buf1;
  143. }
  144. if (typeof buf1 === "string") {
  145. if (cipherTransform) {
  146. return cipherTransform.decryptString(buf1);
  147. }
  148. return buf1;
  149. }
  150. return buf1;
  151. }
  152. findDefaultInlineStreamEnd(stream) {
  153. const E = 0x45,
  154. I = 0x49,
  155. SPACE = 0x20,
  156. LF = 0xa,
  157. CR = 0xd;
  158. const n = 10,
  159. NUL = 0x0;
  160. const startPos = stream.pos;
  161. let state = 0,
  162. ch,
  163. maybeEIPos;
  164. while ((ch = stream.getByte()) !== -1) {
  165. if (state === 0) {
  166. state = ch === E ? 1 : 0;
  167. } else if (state === 1) {
  168. state = ch === I ? 2 : 0;
  169. } else {
  170. (0, _util.assert)(state === 2, "findDefaultInlineStreamEnd - invalid state.");
  171. if (ch === SPACE || ch === LF || ch === CR) {
  172. maybeEIPos = stream.pos;
  173. const followingBytes = stream.peekBytes(n);
  174. for (let i = 0, ii = followingBytes.length; i < ii; i++) {
  175. ch = followingBytes[i];
  176. if (ch === NUL && followingBytes[i + 1] !== NUL) {
  177. continue;
  178. }
  179. if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7f)) {
  180. state = 0;
  181. break;
  182. }
  183. }
  184. if (state === 2) {
  185. break;
  186. }
  187. } else {
  188. state = 0;
  189. }
  190. }
  191. }
  192. if (ch === -1) {
  193. (0, _util.warn)("findDefaultInlineStreamEnd: " + "Reached the end of the stream without finding a valid EI marker");
  194. if (maybeEIPos) {
  195. (0, _util.warn)('... trying to recover by using the last "EI" occurrence.');
  196. stream.skip(-(stream.pos - maybeEIPos));
  197. }
  198. }
  199. let endOffset = 4;
  200. stream.skip(-endOffset);
  201. ch = stream.peekByte();
  202. stream.skip(endOffset);
  203. if (!(0, _core_utils.isWhiteSpace)(ch)) {
  204. endOffset--;
  205. }
  206. return stream.pos - endOffset - startPos;
  207. }
  208. findDCTDecodeInlineStreamEnd(stream) {
  209. const startPos = stream.pos;
  210. let foundEOI = false,
  211. b,
  212. markerLength;
  213. while ((b = stream.getByte()) !== -1) {
  214. if (b !== 0xff) {
  215. continue;
  216. }
  217. switch (stream.getByte()) {
  218. case 0x00:
  219. break;
  220. case 0xff:
  221. stream.skip(-1);
  222. break;
  223. case 0xd9:
  224. foundEOI = true;
  225. break;
  226. case 0xc0:
  227. case 0xc1:
  228. case 0xc2:
  229. case 0xc3:
  230. case 0xc5:
  231. case 0xc6:
  232. case 0xc7:
  233. case 0xc9:
  234. case 0xca:
  235. case 0xcb:
  236. case 0xcd:
  237. case 0xce:
  238. case 0xcf:
  239. case 0xc4:
  240. case 0xcc:
  241. case 0xda:
  242. case 0xdb:
  243. case 0xdc:
  244. case 0xdd:
  245. case 0xde:
  246. case 0xdf:
  247. case 0xe0:
  248. case 0xe1:
  249. case 0xe2:
  250. case 0xe3:
  251. case 0xe4:
  252. case 0xe5:
  253. case 0xe6:
  254. case 0xe7:
  255. case 0xe8:
  256. case 0xe9:
  257. case 0xea:
  258. case 0xeb:
  259. case 0xec:
  260. case 0xed:
  261. case 0xee:
  262. case 0xef:
  263. case 0xfe:
  264. markerLength = stream.getUint16();
  265. if (markerLength > 2) {
  266. stream.skip(markerLength - 2);
  267. } else {
  268. stream.skip(-2);
  269. }
  270. break;
  271. }
  272. if (foundEOI) {
  273. break;
  274. }
  275. }
  276. const length = stream.pos - startPos;
  277. if (b === -1) {
  278. (0, _util.warn)("Inline DCTDecode image stream: " + "EOI marker not found, searching for /EI/ instead.");
  279. stream.skip(-length);
  280. return this.findDefaultInlineStreamEnd(stream);
  281. }
  282. this.inlineStreamSkipEI(stream);
  283. return length;
  284. }
  285. findASCII85DecodeInlineStreamEnd(stream) {
  286. const TILDE = 0x7e,
  287. GT = 0x3e;
  288. const startPos = stream.pos;
  289. let ch;
  290. while ((ch = stream.getByte()) !== -1) {
  291. if (ch === TILDE) {
  292. const tildePos = stream.pos;
  293. ch = stream.peekByte();
  294. while ((0, _core_utils.isWhiteSpace)(ch)) {
  295. stream.skip();
  296. ch = stream.peekByte();
  297. }
  298. if (ch === GT) {
  299. stream.skip();
  300. break;
  301. }
  302. if (stream.pos > tildePos) {
  303. const maybeEI = stream.peekBytes(2);
  304. if (maybeEI[0] === 0x45 && maybeEI[1] === 0x49) {
  305. break;
  306. }
  307. }
  308. }
  309. }
  310. const length = stream.pos - startPos;
  311. if (ch === -1) {
  312. (0, _util.warn)("Inline ASCII85Decode image stream: " + "EOD marker not found, searching for /EI/ instead.");
  313. stream.skip(-length);
  314. return this.findDefaultInlineStreamEnd(stream);
  315. }
  316. this.inlineStreamSkipEI(stream);
  317. return length;
  318. }
  319. findASCIIHexDecodeInlineStreamEnd(stream) {
  320. const GT = 0x3e;
  321. const startPos = stream.pos;
  322. let ch;
  323. while ((ch = stream.getByte()) !== -1) {
  324. if (ch === GT) {
  325. break;
  326. }
  327. }
  328. const length = stream.pos - startPos;
  329. if (ch === -1) {
  330. (0, _util.warn)("Inline ASCIIHexDecode image stream: " + "EOD marker not found, searching for /EI/ instead.");
  331. stream.skip(-length);
  332. return this.findDefaultInlineStreamEnd(stream);
  333. }
  334. this.inlineStreamSkipEI(stream);
  335. return length;
  336. }
  337. inlineStreamSkipEI(stream) {
  338. const E = 0x45,
  339. I = 0x49;
  340. let state = 0,
  341. ch;
  342. while ((ch = stream.getByte()) !== -1) {
  343. if (state === 0) {
  344. state = ch === E ? 1 : 0;
  345. } else if (state === 1) {
  346. state = ch === I ? 2 : 0;
  347. } else if (state === 2) {
  348. break;
  349. }
  350. }
  351. }
  352. makeInlineImage(cipherTransform) {
  353. const lexer = this.lexer;
  354. const stream = lexer.stream;
  355. const dict = new _primitives.Dict(this.xref);
  356. let dictLength;
  357. while (!(0, _primitives.isCmd)(this.buf1, "ID") && !(0, _primitives.isEOF)(this.buf1)) {
  358. if (!(0, _primitives.isName)(this.buf1)) {
  359. throw new _util.FormatError("Dictionary key must be a name object");
  360. }
  361. const key = this.buf1.name;
  362. this.shift();
  363. if ((0, _primitives.isEOF)(this.buf1)) {
  364. break;
  365. }
  366. dict.set(key, this.getObj(cipherTransform));
  367. }
  368. if (lexer.beginInlineImagePos !== -1) {
  369. dictLength = stream.pos - lexer.beginInlineImagePos;
  370. }
  371. const filter = dict.get("Filter", "F");
  372. let filterName;
  373. if ((0, _primitives.isName)(filter)) {
  374. filterName = filter.name;
  375. } else if (Array.isArray(filter)) {
  376. const filterZero = this.xref.fetchIfRef(filter[0]);
  377. if ((0, _primitives.isName)(filterZero)) {
  378. filterName = filterZero.name;
  379. }
  380. }
  381. const startPos = stream.pos;
  382. let length;
  383. if (filterName === "DCTDecode" || filterName === "DCT") {
  384. length = this.findDCTDecodeInlineStreamEnd(stream);
  385. } else if (filterName === "ASCII85Decode" || filterName === "A85") {
  386. length = this.findASCII85DecodeInlineStreamEnd(stream);
  387. } else if (filterName === "ASCIIHexDecode" || filterName === "AHx") {
  388. length = this.findASCIIHexDecodeInlineStreamEnd(stream);
  389. } else {
  390. length = this.findDefaultInlineStreamEnd(stream);
  391. }
  392. let imageStream = stream.makeSubStream(startPos, length, dict);
  393. let cacheKey;
  394. if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) {
  395. const imageBytes = imageStream.getBytes();
  396. imageStream.reset();
  397. const initialStreamPos = stream.pos;
  398. stream.pos = lexer.beginInlineImagePos;
  399. const dictBytes = stream.getBytes(dictLength);
  400. stream.pos = initialStreamPos;
  401. cacheKey = computeAdler32(imageBytes) + "_" + computeAdler32(dictBytes);
  402. const cacheEntry = this.imageCache[cacheKey];
  403. if (cacheEntry !== undefined) {
  404. this.buf2 = _primitives.Cmd.get("EI");
  405. this.shift();
  406. cacheEntry.reset();
  407. return cacheEntry;
  408. }
  409. }
  410. if (cipherTransform) {
  411. imageStream = cipherTransform.createStream(imageStream, length);
  412. }
  413. imageStream = this.filter(imageStream, dict, length);
  414. imageStream.dict = dict;
  415. if (cacheKey !== undefined) {
  416. imageStream.cacheKey = `inline_${length}_${cacheKey}`;
  417. this.imageCache[cacheKey] = imageStream;
  418. }
  419. this.buf2 = _primitives.Cmd.get("EI");
  420. this.shift();
  421. return imageStream;
  422. }
  423. _findStreamLength(startPos, signature) {
  424. const {
  425. stream
  426. } = this.lexer;
  427. stream.pos = startPos;
  428. const SCAN_BLOCK_LENGTH = 2048;
  429. const signatureLength = signature.length;
  430. while (stream.pos < stream.end) {
  431. const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
  432. const scanLength = scanBytes.length - signatureLength;
  433. if (scanLength <= 0) {
  434. break;
  435. }
  436. let pos = 0;
  437. while (pos < scanLength) {
  438. let j = 0;
  439. while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
  440. j++;
  441. }
  442. if (j >= signatureLength) {
  443. stream.pos += pos;
  444. return stream.pos - startPos;
  445. }
  446. pos++;
  447. }
  448. stream.pos += scanLength;
  449. }
  450. return -1;
  451. }
  452. makeStream(dict, cipherTransform) {
  453. const lexer = this.lexer;
  454. let stream = lexer.stream;
  455. lexer.skipToNextLine();
  456. const startPos = stream.pos - 1;
  457. let length = dict.get("Length");
  458. if (!Number.isInteger(length)) {
  459. (0, _util.info)(`Bad length "${length}" in stream`);
  460. length = 0;
  461. }
  462. stream.pos = startPos + length;
  463. lexer.nextChar();
  464. if (this.tryShift() && (0, _primitives.isCmd)(this.buf2, "endstream")) {
  465. this.shift();
  466. } else {
  467. const ENDSTREAM_SIGNATURE = new Uint8Array([0x65, 0x6E, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6D]);
  468. let actualLength = this._findStreamLength(startPos, ENDSTREAM_SIGNATURE);
  469. if (actualLength < 0) {
  470. const MAX_TRUNCATION = 1;
  471. for (let i = 1; i <= MAX_TRUNCATION; i++) {
  472. const end = ENDSTREAM_SIGNATURE.length - i;
  473. const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
  474. const maybeLength = this._findStreamLength(startPos, TRUNCATED_SIGNATURE);
  475. if (maybeLength >= 0) {
  476. const lastByte = stream.peekBytes(end + 1)[end];
  477. if (!(0, _core_utils.isWhiteSpace)(lastByte)) {
  478. break;
  479. }
  480. (0, _util.info)(`Found "${(0, _util.bytesToString)(TRUNCATED_SIGNATURE)}" when ` + "searching for endstream command.");
  481. actualLength = maybeLength;
  482. break;
  483. }
  484. }
  485. if (actualLength < 0) {
  486. throw new _util.FormatError("Missing endstream command.");
  487. }
  488. }
  489. length = actualLength;
  490. lexer.nextChar();
  491. this.shift();
  492. this.shift();
  493. }
  494. this.shift();
  495. stream = stream.makeSubStream(startPos, length, dict);
  496. if (cipherTransform) {
  497. stream = cipherTransform.createStream(stream, length);
  498. }
  499. stream = this.filter(stream, dict, length);
  500. stream.dict = dict;
  501. return stream;
  502. }
  503. filter(stream, dict, length) {
  504. let filter = dict.get("Filter", "F");
  505. let params = dict.get("DecodeParms", "DP");
  506. if ((0, _primitives.isName)(filter)) {
  507. if (Array.isArray(params)) {
  508. (0, _util.warn)("/DecodeParms should not contain an Array, " + "when /Filter contains a Name.");
  509. }
  510. return this.makeFilter(stream, filter.name, length, params);
  511. }
  512. let maybeLength = length;
  513. if (Array.isArray(filter)) {
  514. const filterArray = filter;
  515. const paramsArray = params;
  516. for (let i = 0, ii = filterArray.length; i < ii; ++i) {
  517. filter = this.xref.fetchIfRef(filterArray[i]);
  518. if (!(0, _primitives.isName)(filter)) {
  519. throw new _util.FormatError(`Bad filter name "${filter}"`);
  520. }
  521. params = null;
  522. if (Array.isArray(paramsArray) && i in paramsArray) {
  523. params = this.xref.fetchIfRef(paramsArray[i]);
  524. }
  525. stream = this.makeFilter(stream, filter.name, maybeLength, params);
  526. maybeLength = null;
  527. }
  528. }
  529. return stream;
  530. }
  531. makeFilter(stream, name, maybeLength, params) {
  532. if (maybeLength === 0) {
  533. (0, _util.warn)(`Empty "${name}" stream.`);
  534. return new _stream.NullStream();
  535. }
  536. try {
  537. const xrefStreamStats = this.xref.stats.streamTypes;
  538. if (name === "FlateDecode" || name === "Fl") {
  539. xrefStreamStats[_util.StreamType.FLATE] = true;
  540. if (params) {
  541. return new _stream.PredictorStream(new _stream.FlateStream(stream, maybeLength), maybeLength, params);
  542. }
  543. return new _stream.FlateStream(stream, maybeLength);
  544. }
  545. if (name === "LZWDecode" || name === "LZW") {
  546. xrefStreamStats[_util.StreamType.LZW] = true;
  547. let earlyChange = 1;
  548. if (params) {
  549. if (params.has("EarlyChange")) {
  550. earlyChange = params.get("EarlyChange");
  551. }
  552. return new _stream.PredictorStream(new _stream.LZWStream(stream, maybeLength, earlyChange), maybeLength, params);
  553. }
  554. return new _stream.LZWStream(stream, maybeLength, earlyChange);
  555. }
  556. if (name === "DCTDecode" || name === "DCT") {
  557. xrefStreamStats[_util.StreamType.DCT] = true;
  558. return new _jpeg_stream.JpegStream(stream, maybeLength, stream.dict, params);
  559. }
  560. if (name === "JPXDecode" || name === "JPX") {
  561. xrefStreamStats[_util.StreamType.JPX] = true;
  562. return new _jpx_stream.JpxStream(stream, maybeLength, stream.dict, params);
  563. }
  564. if (name === "ASCII85Decode" || name === "A85") {
  565. xrefStreamStats[_util.StreamType.A85] = true;
  566. return new _stream.Ascii85Stream(stream, maybeLength);
  567. }
  568. if (name === "ASCIIHexDecode" || name === "AHx") {
  569. xrefStreamStats[_util.StreamType.AHX] = true;
  570. return new _stream.AsciiHexStream(stream, maybeLength);
  571. }
  572. if (name === "CCITTFaxDecode" || name === "CCF") {
  573. xrefStreamStats[_util.StreamType.CCF] = true;
  574. return new _ccitt_stream.CCITTFaxStream(stream, maybeLength, params);
  575. }
  576. if (name === "RunLengthDecode" || name === "RL") {
  577. xrefStreamStats[_util.StreamType.RLX] = true;
  578. return new _stream.RunLengthStream(stream, maybeLength);
  579. }
  580. if (name === "JBIG2Decode") {
  581. xrefStreamStats[_util.StreamType.JBIG] = true;
  582. return new _jbig2_stream.Jbig2Stream(stream, maybeLength, stream.dict, params);
  583. }
  584. (0, _util.warn)(`Filter "${name}" is not supported.`);
  585. return stream;
  586. } catch (ex) {
  587. if (ex instanceof _core_utils.MissingDataException) {
  588. throw ex;
  589. }
  590. (0, _util.warn)(`Invalid stream: "${ex}"`);
  591. return new _stream.NullStream();
  592. }
  593. }
  594. }
  595. exports.Parser = Parser;
  596. const specialChars = [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
  597. function toHexDigit(ch) {
  598. if (ch >= 0x30 && ch <= 0x39) {
  599. return ch & 0x0f;
  600. }
  601. if (ch >= 0x41 && ch <= 0x46 || ch >= 0x61 && ch <= 0x66) {
  602. return (ch & 0x0f) + 9;
  603. }
  604. return -1;
  605. }
  606. class Lexer {
  607. constructor(stream, knownCommands = null) {
  608. this.stream = stream;
  609. this.nextChar();
  610. this.strBuf = [];
  611. this.knownCommands = knownCommands;
  612. this._hexStringNumWarn = 0;
  613. this.beginInlineImagePos = -1;
  614. }
  615. nextChar() {
  616. return this.currentChar = this.stream.getByte();
  617. }
  618. peekChar() {
  619. return this.stream.peekByte();
  620. }
  621. getNumber() {
  622. let ch = this.currentChar;
  623. let eNotation = false;
  624. let divideBy = 0;
  625. let sign = 0;
  626. if (ch === 0x2d) {
  627. sign = -1;
  628. ch = this.nextChar();
  629. if (ch === 0x2d) {
  630. ch = this.nextChar();
  631. }
  632. } else if (ch === 0x2b) {
  633. sign = 1;
  634. ch = this.nextChar();
  635. }
  636. if (ch === 0x0a || ch === 0x0d) {
  637. do {
  638. ch = this.nextChar();
  639. } while (ch === 0x0a || ch === 0x0d);
  640. }
  641. if (ch === 0x2e) {
  642. divideBy = 10;
  643. ch = this.nextChar();
  644. }
  645. if (ch < 0x30 || ch > 0x39) {
  646. if (divideBy === 10 && sign === 0 && ((0, _core_utils.isWhiteSpace)(ch) || ch === -1)) {
  647. (0, _util.warn)("Lexer.getNumber - treating a single decimal point as zero.");
  648. return 0;
  649. }
  650. throw new _util.FormatError(`Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`);
  651. }
  652. sign = sign || 1;
  653. let baseValue = ch - 0x30;
  654. let powerValue = 0;
  655. let powerValueSign = 1;
  656. while ((ch = this.nextChar()) >= 0) {
  657. if (ch >= 0x30 && ch <= 0x39) {
  658. const currentDigit = ch - 0x30;
  659. if (eNotation) {
  660. powerValue = powerValue * 10 + currentDigit;
  661. } else {
  662. if (divideBy !== 0) {
  663. divideBy *= 10;
  664. }
  665. baseValue = baseValue * 10 + currentDigit;
  666. }
  667. } else if (ch === 0x2e) {
  668. if (divideBy === 0) {
  669. divideBy = 1;
  670. } else {
  671. break;
  672. }
  673. } else if (ch === 0x2d) {
  674. (0, _util.warn)("Badly formatted number: minus sign in the middle");
  675. } else if (ch === 0x45 || ch === 0x65) {
  676. ch = this.peekChar();
  677. if (ch === 0x2b || ch === 0x2d) {
  678. powerValueSign = ch === 0x2d ? -1 : 1;
  679. this.nextChar();
  680. } else if (ch < 0x30 || ch > 0x39) {
  681. break;
  682. }
  683. eNotation = true;
  684. } else {
  685. break;
  686. }
  687. }
  688. if (divideBy !== 0) {
  689. baseValue /= divideBy;
  690. }
  691. if (eNotation) {
  692. baseValue *= 10 ** (powerValueSign * powerValue);
  693. }
  694. return sign * baseValue;
  695. }
  696. getString() {
  697. let numParen = 1;
  698. let done = false;
  699. const strBuf = this.strBuf;
  700. strBuf.length = 0;
  701. let ch = this.nextChar();
  702. while (true) {
  703. let charBuffered = false;
  704. switch (ch | 0) {
  705. case -1:
  706. (0, _util.warn)("Unterminated string");
  707. done = true;
  708. break;
  709. case 0x28:
  710. ++numParen;
  711. strBuf.push("(");
  712. break;
  713. case 0x29:
  714. if (--numParen === 0) {
  715. this.nextChar();
  716. done = true;
  717. } else {
  718. strBuf.push(")");
  719. }
  720. break;
  721. case 0x5c:
  722. ch = this.nextChar();
  723. switch (ch) {
  724. case -1:
  725. (0, _util.warn)("Unterminated string");
  726. done = true;
  727. break;
  728. case 0x6e:
  729. strBuf.push("\n");
  730. break;
  731. case 0x72:
  732. strBuf.push("\r");
  733. break;
  734. case 0x74:
  735. strBuf.push("\t");
  736. break;
  737. case 0x62:
  738. strBuf.push("\b");
  739. break;
  740. case 0x66:
  741. strBuf.push("\f");
  742. break;
  743. case 0x5c:
  744. case 0x28:
  745. case 0x29:
  746. strBuf.push(String.fromCharCode(ch));
  747. break;
  748. case 0x30:
  749. case 0x31:
  750. case 0x32:
  751. case 0x33:
  752. case 0x34:
  753. case 0x35:
  754. case 0x36:
  755. case 0x37:
  756. let x = ch & 0x0f;
  757. ch = this.nextChar();
  758. charBuffered = true;
  759. if (ch >= 0x30 && ch <= 0x37) {
  760. x = (x << 3) + (ch & 0x0f);
  761. ch = this.nextChar();
  762. if (ch >= 0x30 && ch <= 0x37) {
  763. charBuffered = false;
  764. x = (x << 3) + (ch & 0x0f);
  765. }
  766. }
  767. strBuf.push(String.fromCharCode(x));
  768. break;
  769. case 0x0d:
  770. if (this.peekChar() === 0x0a) {
  771. this.nextChar();
  772. }
  773. break;
  774. case 0x0a:
  775. break;
  776. default:
  777. strBuf.push(String.fromCharCode(ch));
  778. break;
  779. }
  780. break;
  781. default:
  782. strBuf.push(String.fromCharCode(ch));
  783. break;
  784. }
  785. if (done) {
  786. break;
  787. }
  788. if (!charBuffered) {
  789. ch = this.nextChar();
  790. }
  791. }
  792. return strBuf.join("");
  793. }
  794. getName() {
  795. let ch, previousCh;
  796. const strBuf = this.strBuf;
  797. strBuf.length = 0;
  798. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  799. if (ch === 0x23) {
  800. ch = this.nextChar();
  801. if (specialChars[ch]) {
  802. (0, _util.warn)("Lexer_getName: " + "NUMBER SIGN (#) should be followed by a hexadecimal number.");
  803. strBuf.push("#");
  804. break;
  805. }
  806. const x = toHexDigit(ch);
  807. if (x !== -1) {
  808. previousCh = ch;
  809. ch = this.nextChar();
  810. const x2 = toHexDigit(ch);
  811. if (x2 === -1) {
  812. (0, _util.warn)(`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` + "in hexadecimal number.");
  813. strBuf.push("#", String.fromCharCode(previousCh));
  814. if (specialChars[ch]) {
  815. break;
  816. }
  817. strBuf.push(String.fromCharCode(ch));
  818. continue;
  819. }
  820. strBuf.push(String.fromCharCode(x << 4 | x2));
  821. } else {
  822. strBuf.push("#", String.fromCharCode(ch));
  823. }
  824. } else {
  825. strBuf.push(String.fromCharCode(ch));
  826. }
  827. }
  828. if (strBuf.length > 127) {
  829. (0, _util.warn)(`Name token is longer than allowed by the spec: ${strBuf.length}`);
  830. }
  831. return _primitives.Name.get(strBuf.join(""));
  832. }
  833. _hexStringWarn(ch) {
  834. const MAX_HEX_STRING_NUM_WARN = 5;
  835. if (this._hexStringNumWarn++ === MAX_HEX_STRING_NUM_WARN) {
  836. (0, _util.warn)("getHexString - ignoring additional invalid characters.");
  837. return;
  838. }
  839. if (this._hexStringNumWarn > MAX_HEX_STRING_NUM_WARN) {
  840. return;
  841. }
  842. (0, _util.warn)(`getHexString - ignoring invalid character: ${ch}`);
  843. }
  844. getHexString() {
  845. const strBuf = this.strBuf;
  846. strBuf.length = 0;
  847. let ch = this.currentChar;
  848. let isFirstHex = true;
  849. let firstDigit, secondDigit;
  850. this._hexStringNumWarn = 0;
  851. while (true) {
  852. if (ch < 0) {
  853. (0, _util.warn)("Unterminated hex string");
  854. break;
  855. } else if (ch === 0x3e) {
  856. this.nextChar();
  857. break;
  858. } else if (specialChars[ch] === 1) {
  859. ch = this.nextChar();
  860. continue;
  861. } else {
  862. if (isFirstHex) {
  863. firstDigit = toHexDigit(ch);
  864. if (firstDigit === -1) {
  865. this._hexStringWarn(ch);
  866. ch = this.nextChar();
  867. continue;
  868. }
  869. } else {
  870. secondDigit = toHexDigit(ch);
  871. if (secondDigit === -1) {
  872. this._hexStringWarn(ch);
  873. ch = this.nextChar();
  874. continue;
  875. }
  876. strBuf.push(String.fromCharCode(firstDigit << 4 | secondDigit));
  877. }
  878. isFirstHex = !isFirstHex;
  879. ch = this.nextChar();
  880. }
  881. }
  882. return strBuf.join("");
  883. }
  884. getObj() {
  885. let comment = false;
  886. let ch = this.currentChar;
  887. while (true) {
  888. if (ch < 0) {
  889. return _primitives.EOF;
  890. }
  891. if (comment) {
  892. if (ch === 0x0a || ch === 0x0d) {
  893. comment = false;
  894. }
  895. } else if (ch === 0x25) {
  896. comment = true;
  897. } else if (specialChars[ch] !== 1) {
  898. break;
  899. }
  900. ch = this.nextChar();
  901. }
  902. switch (ch | 0) {
  903. case 0x30:
  904. case 0x31:
  905. case 0x32:
  906. case 0x33:
  907. case 0x34:
  908. case 0x35:
  909. case 0x36:
  910. case 0x37:
  911. case 0x38:
  912. case 0x39:
  913. case 0x2b:
  914. case 0x2d:
  915. case 0x2e:
  916. return this.getNumber();
  917. case 0x28:
  918. return this.getString();
  919. case 0x2f:
  920. return this.getName();
  921. case 0x5b:
  922. this.nextChar();
  923. return _primitives.Cmd.get("[");
  924. case 0x5d:
  925. this.nextChar();
  926. return _primitives.Cmd.get("]");
  927. case 0x3c:
  928. ch = this.nextChar();
  929. if (ch === 0x3c) {
  930. this.nextChar();
  931. return _primitives.Cmd.get("<<");
  932. }
  933. return this.getHexString();
  934. case 0x3e:
  935. ch = this.nextChar();
  936. if (ch === 0x3e) {
  937. this.nextChar();
  938. return _primitives.Cmd.get(">>");
  939. }
  940. return _primitives.Cmd.get(">");
  941. case 0x7b:
  942. this.nextChar();
  943. return _primitives.Cmd.get("{");
  944. case 0x7d:
  945. this.nextChar();
  946. return _primitives.Cmd.get("}");
  947. case 0x29:
  948. this.nextChar();
  949. throw new _util.FormatError(`Illegal character: ${ch}`);
  950. }
  951. let str = String.fromCharCode(ch);
  952. const knownCommands = this.knownCommands;
  953. let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  954. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  955. const possibleCommand = str + String.fromCharCode(ch);
  956. if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
  957. break;
  958. }
  959. if (str.length === 128) {
  960. throw new _util.FormatError(`Command token too long: ${str.length}`);
  961. }
  962. str = possibleCommand;
  963. knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  964. }
  965. if (str === "true") {
  966. return true;
  967. }
  968. if (str === "false") {
  969. return false;
  970. }
  971. if (str === "null") {
  972. return null;
  973. }
  974. if (str === "BI") {
  975. this.beginInlineImagePos = this.stream.pos;
  976. }
  977. return _primitives.Cmd.get(str);
  978. }
  979. skipToNextLine() {
  980. let ch = this.currentChar;
  981. while (ch >= 0) {
  982. if (ch === 0x0d) {
  983. ch = this.nextChar();
  984. if (ch === 0x0a) {
  985. this.nextChar();
  986. }
  987. break;
  988. } else if (ch === 0x0a) {
  989. this.nextChar();
  990. break;
  991. }
  992. ch = this.nextChar();
  993. }
  994. }
  995. }
  996. exports.Lexer = Lexer;
  997. class Linearization {
  998. static create(stream) {
  999. function getInt(linDict, name, allowZeroValue = false) {
  1000. const obj = linDict.get(name);
  1001. if (Number.isInteger(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
  1002. return obj;
  1003. }
  1004. throw new Error(`The "${name}" parameter in the linearization ` + "dictionary is invalid.");
  1005. }
  1006. function getHints(linDict) {
  1007. const hints = linDict.get("H");
  1008. let hintsLength;
  1009. if (Array.isArray(hints) && ((hintsLength = hints.length) === 2 || hintsLength === 4)) {
  1010. for (let index = 0; index < hintsLength; index++) {
  1011. const hint = hints[index];
  1012. if (!(Number.isInteger(hint) && hint > 0)) {
  1013. throw new Error(`Hint (${index}) in the linearization dictionary is invalid.`);
  1014. }
  1015. }
  1016. return hints;
  1017. }
  1018. throw new Error("Hint array in the linearization dictionary is invalid.");
  1019. }
  1020. const parser = new Parser({
  1021. lexer: new Lexer(stream),
  1022. xref: null
  1023. });
  1024. const obj1 = parser.getObj();
  1025. const obj2 = parser.getObj();
  1026. const obj3 = parser.getObj();
  1027. const linDict = parser.getObj();
  1028. let obj, length;
  1029. if (!(Number.isInteger(obj1) && Number.isInteger(obj2) && (0, _primitives.isCmd)(obj3, "obj") && (0, _primitives.isDict)(linDict) && (0, _util.isNum)(obj = linDict.get("Linearized")) && obj > 0)) {
  1030. return null;
  1031. } else if ((length = getInt(linDict, "L")) !== stream.length) {
  1032. throw new Error('The "L" parameter in the linearization dictionary ' + "does not equal the stream length.");
  1033. }
  1034. return {
  1035. length,
  1036. hints: getHints(linDict),
  1037. objectNumberFirst: getInt(linDict, "O"),
  1038. endFirst: getInt(linDict, "E"),
  1039. numPages: getInt(linDict, "N"),
  1040. mainXRefEntriesOffset: getInt(linDict, "T"),
  1041. pageFirst: linDict.has("P") ? getInt(linDict, "P", true) : 0
  1042. };
  1043. }
  1044. }
  1045. exports.Linearization = Linearization;