2
0

parser.js 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986
  1. /* Copyright 2017 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. 'use strict';
  16. var sharedUtil = require('../shared/util.js');
  17. var corePrimitives = require('./primitives.js');
  18. var coreStream = require('./stream.js');
  19. var MissingDataException = sharedUtil.MissingDataException;
  20. var StreamType = sharedUtil.StreamType;
  21. var assert = sharedUtil.assert;
  22. var error = sharedUtil.error;
  23. var info = sharedUtil.info;
  24. var isArray = sharedUtil.isArray;
  25. var isInt = sharedUtil.isInt;
  26. var isNum = sharedUtil.isNum;
  27. var isString = sharedUtil.isString;
  28. var warn = sharedUtil.warn;
  29. var EOF = corePrimitives.EOF;
  30. var Cmd = corePrimitives.Cmd;
  31. var Dict = corePrimitives.Dict;
  32. var Name = corePrimitives.Name;
  33. var Ref = corePrimitives.Ref;
  34. var isEOF = corePrimitives.isEOF;
  35. var isCmd = corePrimitives.isCmd;
  36. var isDict = corePrimitives.isDict;
  37. var isName = corePrimitives.isName;
  38. var Ascii85Stream = coreStream.Ascii85Stream;
  39. var AsciiHexStream = coreStream.AsciiHexStream;
  40. var CCITTFaxStream = coreStream.CCITTFaxStream;
  41. var FlateStream = coreStream.FlateStream;
  42. var Jbig2Stream = coreStream.Jbig2Stream;
  43. var JpegStream = coreStream.JpegStream;
  44. var JpxStream = coreStream.JpxStream;
  45. var LZWStream = coreStream.LZWStream;
  46. var NullStream = coreStream.NullStream;
  47. var PredictorStream = coreStream.PredictorStream;
  48. var RunLengthStream = coreStream.RunLengthStream;
  49. var MAX_LENGTH_TO_CACHE = 1000;
  50. var Parser = function ParserClosure() {
  51. function Parser(lexer, allowStreams, xref, recoveryMode) {
  52. this.lexer = lexer;
  53. this.allowStreams = allowStreams;
  54. this.xref = xref;
  55. this.recoveryMode = recoveryMode || false;
  56. this.imageCache = Object.create(null);
  57. this.refill();
  58. }
  59. Parser.prototype = {
  60. refill: function Parser_refill() {
  61. this.buf1 = this.lexer.getObj();
  62. this.buf2 = this.lexer.getObj();
  63. },
  64. shift: function Parser_shift() {
  65. if (isCmd(this.buf2, 'ID')) {
  66. this.buf1 = this.buf2;
  67. this.buf2 = null;
  68. } else {
  69. this.buf1 = this.buf2;
  70. this.buf2 = this.lexer.getObj();
  71. }
  72. },
  73. tryShift: function Parser_tryShift() {
  74. try {
  75. this.shift();
  76. return true;
  77. } catch (e) {
  78. if (e instanceof MissingDataException) {
  79. throw e;
  80. }
  81. return false;
  82. }
  83. },
  84. getObj: function Parser_getObj(cipherTransform) {
  85. var buf1 = this.buf1;
  86. this.shift();
  87. if (buf1 instanceof Cmd) {
  88. switch (buf1.cmd) {
  89. case 'BI':
  90. return this.makeInlineImage(cipherTransform);
  91. case '[':
  92. var array = [];
  93. while (!isCmd(this.buf1, ']') && !isEOF(this.buf1)) {
  94. array.push(this.getObj(cipherTransform));
  95. }
  96. if (isEOF(this.buf1)) {
  97. if (!this.recoveryMode) {
  98. error('End of file inside array');
  99. }
  100. return array;
  101. }
  102. this.shift();
  103. return array;
  104. case '<<':
  105. var dict = new Dict(this.xref);
  106. while (!isCmd(this.buf1, '>>') && !isEOF(this.buf1)) {
  107. if (!isName(this.buf1)) {
  108. info('Malformed dictionary: key must be a name object');
  109. this.shift();
  110. continue;
  111. }
  112. var key = this.buf1.name;
  113. this.shift();
  114. if (isEOF(this.buf1)) {
  115. break;
  116. }
  117. dict.set(key, this.getObj(cipherTransform));
  118. }
  119. if (isEOF(this.buf1)) {
  120. if (!this.recoveryMode) {
  121. error('End of file inside dictionary');
  122. }
  123. return dict;
  124. }
  125. if (isCmd(this.buf2, 'stream')) {
  126. return this.allowStreams ? this.makeStream(dict, cipherTransform) : dict;
  127. }
  128. this.shift();
  129. return dict;
  130. default:
  131. return buf1;
  132. }
  133. }
  134. if (isInt(buf1)) {
  135. var num = buf1;
  136. if (isInt(this.buf1) && isCmd(this.buf2, 'R')) {
  137. var ref = new Ref(num, this.buf1);
  138. this.shift();
  139. this.shift();
  140. return ref;
  141. }
  142. return num;
  143. }
  144. if (isString(buf1)) {
  145. var str = buf1;
  146. if (cipherTransform) {
  147. str = cipherTransform.decryptString(str);
  148. }
  149. return str;
  150. }
  151. return buf1;
  152. },
  153. findDefaultInlineStreamEnd: function Parser_findDefaultInlineStreamEnd(stream) {
  154. var E = 0x45,
  155. I = 0x49,
  156. SPACE = 0x20,
  157. LF = 0xA,
  158. CR = 0xD;
  159. var startPos = stream.pos,
  160. state = 0,
  161. ch,
  162. i,
  163. n,
  164. followingBytes;
  165. while ((ch = stream.getByte()) !== -1) {
  166. if (state === 0) {
  167. state = ch === E ? 1 : 0;
  168. } else if (state === 1) {
  169. state = ch === I ? 2 : 0;
  170. } else {
  171. assert(state === 2);
  172. if (ch === SPACE || ch === LF || ch === CR) {
  173. n = 5;
  174. followingBytes = stream.peekBytes(n);
  175. for (i = 0; i < n; i++) {
  176. ch = followingBytes[i];
  177. if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7F)) {
  178. state = 0;
  179. break;
  180. }
  181. }
  182. if (state === 2) {
  183. break;
  184. }
  185. } else {
  186. state = 0;
  187. }
  188. }
  189. }
  190. return stream.pos - 4 - startPos;
  191. },
  192. findDCTDecodeInlineStreamEnd: function Parser_findDCTDecodeInlineStreamEnd(stream) {
  193. var startPos = stream.pos,
  194. foundEOI = false,
  195. b,
  196. markerLength,
  197. length;
  198. while ((b = stream.getByte()) !== -1) {
  199. if (b !== 0xFF) {
  200. continue;
  201. }
  202. switch (stream.getByte()) {
  203. case 0x00:
  204. break;
  205. case 0xFF:
  206. stream.skip(-1);
  207. break;
  208. case 0xD9:
  209. foundEOI = true;
  210. break;
  211. case 0xC0:
  212. case 0xC1:
  213. case 0xC2:
  214. case 0xC3:
  215. case 0xC5:
  216. case 0xC6:
  217. case 0xC7:
  218. case 0xC9:
  219. case 0xCA:
  220. case 0xCB:
  221. case 0xCD:
  222. case 0xCE:
  223. case 0xCF:
  224. case 0xC4:
  225. case 0xCC:
  226. case 0xDA:
  227. case 0xDB:
  228. case 0xDC:
  229. case 0xDD:
  230. case 0xDE:
  231. case 0xDF:
  232. case 0xE0:
  233. case 0xE1:
  234. case 0xE2:
  235. case 0xE3:
  236. case 0xE4:
  237. case 0xE5:
  238. case 0xE6:
  239. case 0xE7:
  240. case 0xE8:
  241. case 0xE9:
  242. case 0xEA:
  243. case 0xEB:
  244. case 0xEC:
  245. case 0xED:
  246. case 0xEE:
  247. case 0xEF:
  248. case 0xFE:
  249. markerLength = stream.getUint16();
  250. if (markerLength > 2) {
  251. stream.skip(markerLength - 2);
  252. } else {
  253. stream.skip(-2);
  254. }
  255. break;
  256. }
  257. if (foundEOI) {
  258. break;
  259. }
  260. }
  261. length = stream.pos - startPos;
  262. if (b === -1) {
  263. warn('Inline DCTDecode image stream: ' + 'EOI marker not found, searching for /EI/ instead.');
  264. stream.skip(-length);
  265. return this.findDefaultInlineStreamEnd(stream);
  266. }
  267. this.inlineStreamSkipEI(stream);
  268. return length;
  269. },
  270. findASCII85DecodeInlineStreamEnd: function Parser_findASCII85DecodeInlineStreamEnd(stream) {
  271. var TILDE = 0x7E,
  272. GT = 0x3E;
  273. var startPos = stream.pos,
  274. ch,
  275. length;
  276. while ((ch = stream.getByte()) !== -1) {
  277. if (ch === TILDE && stream.peekByte() === GT) {
  278. stream.skip();
  279. break;
  280. }
  281. }
  282. length = stream.pos - startPos;
  283. if (ch === -1) {
  284. warn('Inline ASCII85Decode image stream: ' + 'EOD marker not found, searching for /EI/ instead.');
  285. stream.skip(-length);
  286. return this.findDefaultInlineStreamEnd(stream);
  287. }
  288. this.inlineStreamSkipEI(stream);
  289. return length;
  290. },
  291. findASCIIHexDecodeInlineStreamEnd: function Parser_findASCIIHexDecodeInlineStreamEnd(stream) {
  292. var GT = 0x3E;
  293. var startPos = stream.pos,
  294. ch,
  295. length;
  296. while ((ch = stream.getByte()) !== -1) {
  297. if (ch === GT) {
  298. break;
  299. }
  300. }
  301. length = stream.pos - startPos;
  302. if (ch === -1) {
  303. warn('Inline ASCIIHexDecode image stream: ' + 'EOD marker not found, searching for /EI/ instead.');
  304. stream.skip(-length);
  305. return this.findDefaultInlineStreamEnd(stream);
  306. }
  307. this.inlineStreamSkipEI(stream);
  308. return length;
  309. },
  310. inlineStreamSkipEI: function Parser_inlineStreamSkipEI(stream) {
  311. var E = 0x45,
  312. I = 0x49;
  313. var state = 0,
  314. ch;
  315. while ((ch = stream.getByte()) !== -1) {
  316. if (state === 0) {
  317. state = ch === E ? 1 : 0;
  318. } else if (state === 1) {
  319. state = ch === I ? 2 : 0;
  320. } else if (state === 2) {
  321. break;
  322. }
  323. }
  324. },
  325. makeInlineImage: function Parser_makeInlineImage(cipherTransform) {
  326. var lexer = this.lexer;
  327. var stream = lexer.stream;
  328. var dict = new Dict(this.xref);
  329. while (!isCmd(this.buf1, 'ID') && !isEOF(this.buf1)) {
  330. if (!isName(this.buf1)) {
  331. error('Dictionary key must be a name object');
  332. }
  333. var key = this.buf1.name;
  334. this.shift();
  335. if (isEOF(this.buf1)) {
  336. break;
  337. }
  338. dict.set(key, this.getObj(cipherTransform));
  339. }
  340. var filter = dict.get('Filter', 'F'),
  341. filterName;
  342. if (isName(filter)) {
  343. filterName = filter.name;
  344. } else if (isArray(filter)) {
  345. var filterZero = this.xref.fetchIfRef(filter[0]);
  346. if (isName(filterZero)) {
  347. filterName = filterZero.name;
  348. }
  349. }
  350. var startPos = stream.pos,
  351. length,
  352. i,
  353. ii;
  354. if (filterName === 'DCTDecode' || filterName === 'DCT') {
  355. length = this.findDCTDecodeInlineStreamEnd(stream);
  356. } else if (filterName === 'ASCII85Decide' || filterName === 'A85') {
  357. length = this.findASCII85DecodeInlineStreamEnd(stream);
  358. } else if (filterName === 'ASCIIHexDecode' || filterName === 'AHx') {
  359. length = this.findASCIIHexDecodeInlineStreamEnd(stream);
  360. } else {
  361. length = this.findDefaultInlineStreamEnd(stream);
  362. }
  363. var imageStream = stream.makeSubStream(startPos, length, dict);
  364. var adler32;
  365. if (length < MAX_LENGTH_TO_CACHE) {
  366. var imageBytes = imageStream.getBytes();
  367. imageStream.reset();
  368. var a = 1;
  369. var b = 0;
  370. for (i = 0, ii = imageBytes.length; i < ii; ++i) {
  371. a += imageBytes[i] & 0xff;
  372. b += a;
  373. }
  374. adler32 = b % 65521 << 16 | a % 65521;
  375. if (this.imageCache.adler32 === adler32) {
  376. this.buf2 = Cmd.get('EI');
  377. this.shift();
  378. this.imageCache[adler32].reset();
  379. return this.imageCache[adler32];
  380. }
  381. }
  382. if (cipherTransform) {
  383. imageStream = cipherTransform.createStream(imageStream, length);
  384. }
  385. imageStream = this.filter(imageStream, dict, length);
  386. imageStream.dict = dict;
  387. if (adler32 !== undefined) {
  388. imageStream.cacheKey = 'inline_' + length + '_' + adler32;
  389. this.imageCache[adler32] = imageStream;
  390. }
  391. this.buf2 = Cmd.get('EI');
  392. this.shift();
  393. return imageStream;
  394. },
  395. makeStream: function Parser_makeStream(dict, cipherTransform) {
  396. var lexer = this.lexer;
  397. var stream = lexer.stream;
  398. lexer.skipToNextLine();
  399. var pos = stream.pos - 1;
  400. var length = dict.get('Length');
  401. if (!isInt(length)) {
  402. info('Bad ' + length + ' attribute in stream');
  403. length = 0;
  404. }
  405. stream.pos = pos + length;
  406. lexer.nextChar();
  407. if (this.tryShift() && isCmd(this.buf2, 'endstream')) {
  408. this.shift();
  409. } else {
  410. stream.pos = pos;
  411. var SCAN_BLOCK_SIZE = 2048;
  412. var ENDSTREAM_SIGNATURE_LENGTH = 9;
  413. var ENDSTREAM_SIGNATURE = [0x65, 0x6E, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6D];
  414. var skipped = 0,
  415. found = false,
  416. i,
  417. j;
  418. while (stream.pos < stream.end) {
  419. var scanBytes = stream.peekBytes(SCAN_BLOCK_SIZE);
  420. var scanLength = scanBytes.length - ENDSTREAM_SIGNATURE_LENGTH;
  421. if (scanLength <= 0) {
  422. break;
  423. }
  424. found = false;
  425. i = 0;
  426. while (i < scanLength) {
  427. j = 0;
  428. while (j < ENDSTREAM_SIGNATURE_LENGTH && scanBytes[i + j] === ENDSTREAM_SIGNATURE[j]) {
  429. j++;
  430. }
  431. if (j >= ENDSTREAM_SIGNATURE_LENGTH) {
  432. found = true;
  433. break;
  434. }
  435. i++;
  436. }
  437. if (found) {
  438. skipped += i;
  439. stream.pos += i;
  440. break;
  441. }
  442. skipped += scanLength;
  443. stream.pos += scanLength;
  444. }
  445. if (!found) {
  446. error('Missing endstream');
  447. }
  448. length = skipped;
  449. lexer.nextChar();
  450. this.shift();
  451. this.shift();
  452. }
  453. this.shift();
  454. stream = stream.makeSubStream(pos, length, dict);
  455. if (cipherTransform) {
  456. stream = cipherTransform.createStream(stream, length);
  457. }
  458. stream = this.filter(stream, dict, length);
  459. stream.dict = dict;
  460. return stream;
  461. },
  462. filter: function Parser_filter(stream, dict, length) {
  463. var filter = dict.get('Filter', 'F');
  464. var params = dict.get('DecodeParms', 'DP');
  465. if (isName(filter)) {
  466. if (isArray(params)) {
  467. params = this.xref.fetchIfRef(params[0]);
  468. }
  469. return this.makeFilter(stream, filter.name, length, params);
  470. }
  471. var maybeLength = length;
  472. if (isArray(filter)) {
  473. var filterArray = filter;
  474. var paramsArray = params;
  475. for (var i = 0, ii = filterArray.length; i < ii; ++i) {
  476. filter = this.xref.fetchIfRef(filterArray[i]);
  477. if (!isName(filter)) {
  478. error('Bad filter name: ' + filter);
  479. }
  480. params = null;
  481. if (isArray(paramsArray) && i in paramsArray) {
  482. params = this.xref.fetchIfRef(paramsArray[i]);
  483. }
  484. stream = this.makeFilter(stream, filter.name, maybeLength, params);
  485. maybeLength = null;
  486. }
  487. }
  488. return stream;
  489. },
  490. makeFilter: function Parser_makeFilter(stream, name, maybeLength, params) {
  491. if (maybeLength === 0) {
  492. warn('Empty "' + name + '" stream.');
  493. return new NullStream(stream);
  494. }
  495. try {
  496. var xrefStreamStats = this.xref.stats.streamTypes;
  497. if (name === 'FlateDecode' || name === 'Fl') {
  498. xrefStreamStats[StreamType.FLATE] = true;
  499. if (params) {
  500. return new PredictorStream(new FlateStream(stream, maybeLength), maybeLength, params);
  501. }
  502. return new FlateStream(stream, maybeLength);
  503. }
  504. if (name === 'LZWDecode' || name === 'LZW') {
  505. xrefStreamStats[StreamType.LZW] = true;
  506. var earlyChange = 1;
  507. if (params) {
  508. if (params.has('EarlyChange')) {
  509. earlyChange = params.get('EarlyChange');
  510. }
  511. return new PredictorStream(new LZWStream(stream, maybeLength, earlyChange), maybeLength, params);
  512. }
  513. return new LZWStream(stream, maybeLength, earlyChange);
  514. }
  515. if (name === 'DCTDecode' || name === 'DCT') {
  516. xrefStreamStats[StreamType.DCT] = true;
  517. return new JpegStream(stream, maybeLength, stream.dict, params);
  518. }
  519. if (name === 'JPXDecode' || name === 'JPX') {
  520. xrefStreamStats[StreamType.JPX] = true;
  521. return new JpxStream(stream, maybeLength, stream.dict, params);
  522. }
  523. if (name === 'ASCII85Decode' || name === 'A85') {
  524. xrefStreamStats[StreamType.A85] = true;
  525. return new Ascii85Stream(stream, maybeLength);
  526. }
  527. if (name === 'ASCIIHexDecode' || name === 'AHx') {
  528. xrefStreamStats[StreamType.AHX] = true;
  529. return new AsciiHexStream(stream, maybeLength);
  530. }
  531. if (name === 'CCITTFaxDecode' || name === 'CCF') {
  532. xrefStreamStats[StreamType.CCF] = true;
  533. return new CCITTFaxStream(stream, maybeLength, params);
  534. }
  535. if (name === 'RunLengthDecode' || name === 'RL') {
  536. xrefStreamStats[StreamType.RL] = true;
  537. return new RunLengthStream(stream, maybeLength);
  538. }
  539. if (name === 'JBIG2Decode') {
  540. xrefStreamStats[StreamType.JBIG] = true;
  541. return new Jbig2Stream(stream, maybeLength, stream.dict, params);
  542. }
  543. warn('filter "' + name + '" not supported yet');
  544. return stream;
  545. } catch (ex) {
  546. if (ex instanceof MissingDataException) {
  547. throw ex;
  548. }
  549. warn('Invalid stream: \"' + ex + '\"');
  550. return new NullStream(stream);
  551. }
  552. }
  553. };
  554. return Parser;
  555. }();
  556. var Lexer = function LexerClosure() {
  557. function Lexer(stream, knownCommands) {
  558. this.stream = stream;
  559. this.nextChar();
  560. this.strBuf = [];
  561. this.knownCommands = knownCommands;
  562. }
  563. var specialChars = [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
  564. function toHexDigit(ch) {
  565. if (ch >= 0x30 && ch <= 0x39) {
  566. return ch & 0x0F;
  567. }
  568. if (ch >= 0x41 && ch <= 0x46 || ch >= 0x61 && ch <= 0x66) {
  569. return (ch & 0x0F) + 9;
  570. }
  571. return -1;
  572. }
  573. Lexer.prototype = {
  574. nextChar: function Lexer_nextChar() {
  575. return this.currentChar = this.stream.getByte();
  576. },
  577. peekChar: function Lexer_peekChar() {
  578. return this.stream.peekByte();
  579. },
  580. getNumber: function Lexer_getNumber() {
  581. var ch = this.currentChar;
  582. var eNotation = false;
  583. var divideBy = 0;
  584. var sign = 1;
  585. if (ch === 0x2D) {
  586. sign = -1;
  587. ch = this.nextChar();
  588. if (ch === 0x2D) {
  589. ch = this.nextChar();
  590. }
  591. } else if (ch === 0x2B) {
  592. ch = this.nextChar();
  593. }
  594. if (ch === 0x2E) {
  595. divideBy = 10;
  596. ch = this.nextChar();
  597. }
  598. if (ch === 0x0A || ch === 0x0D) {
  599. do {
  600. ch = this.nextChar();
  601. } while (ch === 0x0A || ch === 0x0D);
  602. }
  603. if (ch < 0x30 || ch > 0x39) {
  604. error('Invalid number: ' + String.fromCharCode(ch) + ' (charCode ' + ch + ')');
  605. }
  606. var baseValue = ch - 0x30;
  607. var powerValue = 0;
  608. var powerValueSign = 1;
  609. while ((ch = this.nextChar()) >= 0) {
  610. if (0x30 <= ch && ch <= 0x39) {
  611. var currentDigit = ch - 0x30;
  612. if (eNotation) {
  613. powerValue = powerValue * 10 + currentDigit;
  614. } else {
  615. if (divideBy !== 0) {
  616. divideBy *= 10;
  617. }
  618. baseValue = baseValue * 10 + currentDigit;
  619. }
  620. } else if (ch === 0x2E) {
  621. if (divideBy === 0) {
  622. divideBy = 1;
  623. } else {
  624. break;
  625. }
  626. } else if (ch === 0x2D) {
  627. warn('Badly formatted number');
  628. } else if (ch === 0x45 || ch === 0x65) {
  629. ch = this.peekChar();
  630. if (ch === 0x2B || ch === 0x2D) {
  631. powerValueSign = ch === 0x2D ? -1 : 1;
  632. this.nextChar();
  633. } else if (ch < 0x30 || ch > 0x39) {
  634. break;
  635. }
  636. eNotation = true;
  637. } else {
  638. break;
  639. }
  640. }
  641. if (divideBy !== 0) {
  642. baseValue /= divideBy;
  643. }
  644. if (eNotation) {
  645. baseValue *= Math.pow(10, powerValueSign * powerValue);
  646. }
  647. return sign * baseValue;
  648. },
  649. getString: function Lexer_getString() {
  650. var numParen = 1;
  651. var done = false;
  652. var strBuf = this.strBuf;
  653. strBuf.length = 0;
  654. var ch = this.nextChar();
  655. while (true) {
  656. var charBuffered = false;
  657. switch (ch | 0) {
  658. case -1:
  659. warn('Unterminated string');
  660. done = true;
  661. break;
  662. case 0x28:
  663. ++numParen;
  664. strBuf.push('(');
  665. break;
  666. case 0x29:
  667. if (--numParen === 0) {
  668. this.nextChar();
  669. done = true;
  670. } else {
  671. strBuf.push(')');
  672. }
  673. break;
  674. case 0x5C:
  675. ch = this.nextChar();
  676. switch (ch) {
  677. case -1:
  678. warn('Unterminated string');
  679. done = true;
  680. break;
  681. case 0x6E:
  682. strBuf.push('\n');
  683. break;
  684. case 0x72:
  685. strBuf.push('\r');
  686. break;
  687. case 0x74:
  688. strBuf.push('\t');
  689. break;
  690. case 0x62:
  691. strBuf.push('\b');
  692. break;
  693. case 0x66:
  694. strBuf.push('\f');
  695. break;
  696. case 0x5C:
  697. case 0x28:
  698. case 0x29:
  699. strBuf.push(String.fromCharCode(ch));
  700. break;
  701. case 0x30:
  702. case 0x31:
  703. case 0x32:
  704. case 0x33:
  705. case 0x34:
  706. case 0x35:
  707. case 0x36:
  708. case 0x37:
  709. var x = ch & 0x0F;
  710. ch = this.nextChar();
  711. charBuffered = true;
  712. if (ch >= 0x30 && ch <= 0x37) {
  713. x = (x << 3) + (ch & 0x0F);
  714. ch = this.nextChar();
  715. if (ch >= 0x30 && ch <= 0x37) {
  716. charBuffered = false;
  717. x = (x << 3) + (ch & 0x0F);
  718. }
  719. }
  720. strBuf.push(String.fromCharCode(x));
  721. break;
  722. case 0x0D:
  723. if (this.peekChar() === 0x0A) {
  724. this.nextChar();
  725. }
  726. break;
  727. case 0x0A:
  728. break;
  729. default:
  730. strBuf.push(String.fromCharCode(ch));
  731. break;
  732. }
  733. break;
  734. default:
  735. strBuf.push(String.fromCharCode(ch));
  736. break;
  737. }
  738. if (done) {
  739. break;
  740. }
  741. if (!charBuffered) {
  742. ch = this.nextChar();
  743. }
  744. }
  745. return strBuf.join('');
  746. },
  747. getName: function Lexer_getName() {
  748. var ch, previousCh;
  749. var strBuf = this.strBuf;
  750. strBuf.length = 0;
  751. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  752. if (ch === 0x23) {
  753. ch = this.nextChar();
  754. if (specialChars[ch]) {
  755. warn('Lexer_getName: ' + 'NUMBER SIGN (#) should be followed by a hexadecimal number.');
  756. strBuf.push('#');
  757. break;
  758. }
  759. var x = toHexDigit(ch);
  760. if (x !== -1) {
  761. previousCh = ch;
  762. ch = this.nextChar();
  763. var x2 = toHexDigit(ch);
  764. if (x2 === -1) {
  765. warn('Lexer_getName: Illegal digit (' + String.fromCharCode(ch) + ') in hexadecimal number.');
  766. strBuf.push('#', String.fromCharCode(previousCh));
  767. if (specialChars[ch]) {
  768. break;
  769. }
  770. strBuf.push(String.fromCharCode(ch));
  771. continue;
  772. }
  773. strBuf.push(String.fromCharCode(x << 4 | x2));
  774. } else {
  775. strBuf.push('#', String.fromCharCode(ch));
  776. }
  777. } else {
  778. strBuf.push(String.fromCharCode(ch));
  779. }
  780. }
  781. if (strBuf.length > 127) {
  782. warn('name token is longer than allowed by the spec: ' + strBuf.length);
  783. }
  784. return Name.get(strBuf.join(''));
  785. },
  786. getHexString: function Lexer_getHexString() {
  787. var strBuf = this.strBuf;
  788. strBuf.length = 0;
  789. var ch = this.currentChar;
  790. var isFirstHex = true;
  791. var firstDigit;
  792. var secondDigit;
  793. while (true) {
  794. if (ch < 0) {
  795. warn('Unterminated hex string');
  796. break;
  797. } else if (ch === 0x3E) {
  798. this.nextChar();
  799. break;
  800. } else if (specialChars[ch] === 1) {
  801. ch = this.nextChar();
  802. continue;
  803. } else {
  804. if (isFirstHex) {
  805. firstDigit = toHexDigit(ch);
  806. if (firstDigit === -1) {
  807. warn('Ignoring invalid character "' + ch + '" in hex string');
  808. ch = this.nextChar();
  809. continue;
  810. }
  811. } else {
  812. secondDigit = toHexDigit(ch);
  813. if (secondDigit === -1) {
  814. warn('Ignoring invalid character "' + ch + '" in hex string');
  815. ch = this.nextChar();
  816. continue;
  817. }
  818. strBuf.push(String.fromCharCode(firstDigit << 4 | secondDigit));
  819. }
  820. isFirstHex = !isFirstHex;
  821. ch = this.nextChar();
  822. }
  823. }
  824. return strBuf.join('');
  825. },
  826. getObj: function Lexer_getObj() {
  827. var comment = false;
  828. var ch = this.currentChar;
  829. while (true) {
  830. if (ch < 0) {
  831. return EOF;
  832. }
  833. if (comment) {
  834. if (ch === 0x0A || ch === 0x0D) {
  835. comment = false;
  836. }
  837. } else if (ch === 0x25) {
  838. comment = true;
  839. } else if (specialChars[ch] !== 1) {
  840. break;
  841. }
  842. ch = this.nextChar();
  843. }
  844. switch (ch | 0) {
  845. case 0x30:
  846. case 0x31:
  847. case 0x32:
  848. case 0x33:
  849. case 0x34:
  850. case 0x35:
  851. case 0x36:
  852. case 0x37:
  853. case 0x38:
  854. case 0x39:
  855. case 0x2B:
  856. case 0x2D:
  857. case 0x2E:
  858. return this.getNumber();
  859. case 0x28:
  860. return this.getString();
  861. case 0x2F:
  862. return this.getName();
  863. case 0x5B:
  864. this.nextChar();
  865. return Cmd.get('[');
  866. case 0x5D:
  867. this.nextChar();
  868. return Cmd.get(']');
  869. case 0x3C:
  870. ch = this.nextChar();
  871. if (ch === 0x3C) {
  872. this.nextChar();
  873. return Cmd.get('<<');
  874. }
  875. return this.getHexString();
  876. case 0x3E:
  877. ch = this.nextChar();
  878. if (ch === 0x3E) {
  879. this.nextChar();
  880. return Cmd.get('>>');
  881. }
  882. return Cmd.get('>');
  883. case 0x7B:
  884. this.nextChar();
  885. return Cmd.get('{');
  886. case 0x7D:
  887. this.nextChar();
  888. return Cmd.get('}');
  889. case 0x29:
  890. this.nextChar();
  891. error('Illegal character: ' + ch);
  892. break;
  893. }
  894. var str = String.fromCharCode(ch);
  895. var knownCommands = this.knownCommands;
  896. var knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  897. while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
  898. var possibleCommand = str + String.fromCharCode(ch);
  899. if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
  900. break;
  901. }
  902. if (str.length === 128) {
  903. error('Command token too long: ' + str.length);
  904. }
  905. str = possibleCommand;
  906. knownCommandFound = knownCommands && knownCommands[str] !== undefined;
  907. }
  908. if (str === 'true') {
  909. return true;
  910. }
  911. if (str === 'false') {
  912. return false;
  913. }
  914. if (str === 'null') {
  915. return null;
  916. }
  917. return Cmd.get(str);
  918. },
  919. skipToNextLine: function Lexer_skipToNextLine() {
  920. var ch = this.currentChar;
  921. while (ch >= 0) {
  922. if (ch === 0x0D) {
  923. ch = this.nextChar();
  924. if (ch === 0x0A) {
  925. this.nextChar();
  926. }
  927. break;
  928. } else if (ch === 0x0A) {
  929. this.nextChar();
  930. break;
  931. }
  932. ch = this.nextChar();
  933. }
  934. }
  935. };
  936. return Lexer;
  937. }();
  938. var Linearization = {
  939. create: function LinearizationCreate(stream) {
  940. function getInt(name, allowZeroValue) {
  941. var obj = linDict.get(name);
  942. if (isInt(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
  943. return obj;
  944. }
  945. throw new Error('The "' + name + '" parameter in the linearization ' + 'dictionary is invalid.');
  946. }
  947. function getHints() {
  948. var hints = linDict.get('H'),
  949. hintsLength,
  950. item;
  951. if (isArray(hints) && ((hintsLength = hints.length) === 2 || hintsLength === 4)) {
  952. for (var index = 0; index < hintsLength; index++) {
  953. if (!(isInt(item = hints[index]) && item > 0)) {
  954. throw new Error('Hint (' + index + ') in the linearization dictionary is invalid.');
  955. }
  956. }
  957. return hints;
  958. }
  959. throw new Error('Hint array in the linearization dictionary is invalid.');
  960. }
  961. var parser = new Parser(new Lexer(stream), false, null);
  962. var obj1 = parser.getObj();
  963. var obj2 = parser.getObj();
  964. var obj3 = parser.getObj();
  965. var linDict = parser.getObj();
  966. var obj, length;
  967. if (!(isInt(obj1) && isInt(obj2) && isCmd(obj3, 'obj') && isDict(linDict) && isNum(obj = linDict.get('Linearized')) && obj > 0)) {
  968. return null;
  969. } else if ((length = getInt('L')) !== stream.length) {
  970. throw new Error('The "L" parameter in the linearization dictionary ' + 'does not equal the stream length.');
  971. }
  972. return {
  973. length: length,
  974. hints: getHints(),
  975. objectNumberFirst: getInt('O'),
  976. endFirst: getInt('E'),
  977. numPages: getInt('N'),
  978. mainXRefEntriesOffset: getInt('T'),
  979. pageFirst: linDict.has('P') ? getInt('P', true) : 0
  980. };
  981. }
  982. };
  983. exports.Lexer = Lexer;
  984. exports.Linearization = Linearization;
  985. exports.Parser = Parser;