pdf_find_controller.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * Javascript code in this page
  4. *
  5. * Copyright 2020 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * Javascript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.PDFFindController = exports.FindState = void 0;
  27. var _pdf = require("../pdf");
  28. var _pdf_find_utils = require("./pdf_find_utils.js");
  29. var _ui_utils = require("./ui_utils.js");
  30. const FindState = {
  31. FOUND: 0,
  32. NOT_FOUND: 1,
  33. WRAPPED: 2,
  34. PENDING: 3
  35. };
  36. exports.FindState = FindState;
  37. const FIND_TIMEOUT = 250;
  38. const MATCH_SCROLL_OFFSET_TOP = -50;
  39. const MATCH_SCROLL_OFFSET_LEFT = -400;
  40. const CHARACTERS_TO_NORMALIZE = {
  41. "\u2018": "'",
  42. "\u2019": "'",
  43. "\u201A": "'",
  44. "\u201B": "'",
  45. "\u201C": '"',
  46. "\u201D": '"',
  47. "\u201E": '"',
  48. "\u201F": '"',
  49. "\u00BC": "1/4",
  50. "\u00BD": "1/2",
  51. "\u00BE": "3/4"
  52. };
  53. let normalizationRegex = null;
  54. function normalize(text) {
  55. if (!normalizationRegex) {
  56. const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join("");
  57. normalizationRegex = new RegExp(`[${replace}]`, "g");
  58. }
  59. let diffs = null;
  60. const normalizedText = text.replace(normalizationRegex, function (ch, index) {
  61. const normalizedCh = CHARACTERS_TO_NORMALIZE[ch],
  62. diff = normalizedCh.length - ch.length;
  63. if (diff !== 0) {
  64. (diffs || (diffs = [])).push([index, diff]);
  65. }
  66. return normalizedCh;
  67. });
  68. return [normalizedText, diffs];
  69. }
  70. function getOriginalIndex(matchIndex, diffs = null) {
  71. if (!diffs) {
  72. return matchIndex;
  73. }
  74. let totalDiff = 0;
  75. for (const [index, diff] of diffs) {
  76. const currentIndex = index + totalDiff;
  77. if (currentIndex >= matchIndex) {
  78. break;
  79. }
  80. if (currentIndex + diff > matchIndex) {
  81. totalDiff += matchIndex - currentIndex;
  82. break;
  83. }
  84. totalDiff += diff;
  85. }
  86. return matchIndex - totalDiff;
  87. }
  88. class PDFFindController {
  89. constructor({
  90. linkService,
  91. eventBus
  92. }) {
  93. this._linkService = linkService;
  94. this._eventBus = eventBus;
  95. this._reset();
  96. eventBus._on("findbarclose", this._onFindBarClose.bind(this));
  97. }
  98. get highlightMatches() {
  99. return this._highlightMatches;
  100. }
  101. get pageMatches() {
  102. return this._pageMatches;
  103. }
  104. get pageMatchesLength() {
  105. return this._pageMatchesLength;
  106. }
  107. get selected() {
  108. return this._selected;
  109. }
  110. get state() {
  111. return this._state;
  112. }
  113. setDocument(pdfDocument) {
  114. if (this._pdfDocument) {
  115. this._reset();
  116. }
  117. if (!pdfDocument) {
  118. return;
  119. }
  120. this._pdfDocument = pdfDocument;
  121. this._firstPageCapability.resolve();
  122. }
  123. executeCommand(cmd, state) {
  124. if (!state) {
  125. return;
  126. }
  127. const pdfDocument = this._pdfDocument;
  128. if (this._state === null || this._shouldDirtyMatch(cmd, state)) {
  129. this._dirtyMatch = true;
  130. }
  131. this._state = state;
  132. if (cmd !== "findhighlightallchange") {
  133. this._updateUIState(FindState.PENDING);
  134. }
  135. this._firstPageCapability.promise.then(() => {
  136. if (!this._pdfDocument || pdfDocument && this._pdfDocument !== pdfDocument) {
  137. return;
  138. }
  139. this._extractText();
  140. const findbarClosed = !this._highlightMatches;
  141. const pendingTimeout = !!this._findTimeout;
  142. if (this._findTimeout) {
  143. clearTimeout(this._findTimeout);
  144. this._findTimeout = null;
  145. }
  146. if (cmd === "find") {
  147. this._findTimeout = setTimeout(() => {
  148. this._nextMatch();
  149. this._findTimeout = null;
  150. }, FIND_TIMEOUT);
  151. } else if (this._dirtyMatch) {
  152. this._nextMatch();
  153. } else if (cmd === "findagain") {
  154. this._nextMatch();
  155. if (findbarClosed && this._state.highlightAll) {
  156. this._updateAllPages();
  157. }
  158. } else if (cmd === "findhighlightallchange") {
  159. if (pendingTimeout) {
  160. this._nextMatch();
  161. } else {
  162. this._highlightMatches = true;
  163. }
  164. this._updateAllPages();
  165. } else {
  166. this._nextMatch();
  167. }
  168. });
  169. }
  170. scrollMatchIntoView({
  171. element = null,
  172. pageIndex = -1,
  173. matchIndex = -1
  174. }) {
  175. if (!this._scrollMatches || !element) {
  176. return;
  177. } else if (matchIndex === -1 || matchIndex !== this._selected.matchIdx) {
  178. return;
  179. } else if (pageIndex === -1 || pageIndex !== this._selected.pageIdx) {
  180. return;
  181. }
  182. this._scrollMatches = false;
  183. const spot = {
  184. top: MATCH_SCROLL_OFFSET_TOP,
  185. left: MATCH_SCROLL_OFFSET_LEFT
  186. };
  187. (0, _ui_utils.scrollIntoView)(element, spot, true);
  188. }
  189. _reset() {
  190. this._highlightMatches = false;
  191. this._scrollMatches = false;
  192. this._pdfDocument = null;
  193. this._pageMatches = [];
  194. this._pageMatchesLength = [];
  195. this._state = null;
  196. this._selected = {
  197. pageIdx: -1,
  198. matchIdx: -1
  199. };
  200. this._offset = {
  201. pageIdx: null,
  202. matchIdx: null,
  203. wrapped: false
  204. };
  205. this._extractTextPromises = [];
  206. this._pageContents = [];
  207. this._pageDiffs = [];
  208. this._matchesCountTotal = 0;
  209. this._pagesToSearch = null;
  210. this._pendingFindMatches = Object.create(null);
  211. this._resumePageIdx = null;
  212. this._dirtyMatch = false;
  213. clearTimeout(this._findTimeout);
  214. this._findTimeout = null;
  215. this._firstPageCapability = (0, _pdf.createPromiseCapability)();
  216. }
  217. get _query() {
  218. if (this._state.query !== this._rawQuery) {
  219. this._rawQuery = this._state.query;
  220. [this._normalizedQuery] = normalize(this._state.query);
  221. }
  222. return this._normalizedQuery;
  223. }
  224. _shouldDirtyMatch(cmd, state) {
  225. if (state.query !== this._state.query) {
  226. return true;
  227. }
  228. switch (cmd) {
  229. case "findagain":
  230. const pageNumber = this._selected.pageIdx + 1;
  231. const linkService = this._linkService;
  232. if (pageNumber >= 1 && pageNumber <= linkService.pagesCount && pageNumber !== linkService.page && !linkService.isPageVisible(pageNumber)) {
  233. return true;
  234. }
  235. return false;
  236. case "findhighlightallchange":
  237. return false;
  238. }
  239. return true;
  240. }
  241. _prepareMatches(matchesWithLength, matches, matchesLength) {
  242. function isSubTerm(currentIndex) {
  243. const currentElem = matchesWithLength[currentIndex];
  244. const nextElem = matchesWithLength[currentIndex + 1];
  245. if (currentIndex < matchesWithLength.length - 1 && currentElem.match === nextElem.match) {
  246. currentElem.skipped = true;
  247. return true;
  248. }
  249. for (let i = currentIndex - 1; i >= 0; i--) {
  250. const prevElem = matchesWithLength[i];
  251. if (prevElem.skipped) {
  252. continue;
  253. }
  254. if (prevElem.match + prevElem.matchLength < currentElem.match) {
  255. break;
  256. }
  257. if (prevElem.match + prevElem.matchLength >= currentElem.match + currentElem.matchLength) {
  258. currentElem.skipped = true;
  259. return true;
  260. }
  261. }
  262. return false;
  263. }
  264. matchesWithLength.sort(function (a, b) {
  265. return a.match === b.match ? a.matchLength - b.matchLength : a.match - b.match;
  266. });
  267. for (let i = 0, len = matchesWithLength.length; i < len; i++) {
  268. if (isSubTerm(i)) {
  269. continue;
  270. }
  271. matches.push(matchesWithLength[i].match);
  272. matchesLength.push(matchesWithLength[i].matchLength);
  273. }
  274. }
  275. _isEntireWord(content, startIdx, length) {
  276. if (startIdx > 0) {
  277. const first = content.charCodeAt(startIdx);
  278. const limit = content.charCodeAt(startIdx - 1);
  279. if ((0, _pdf_find_utils.getCharacterType)(first) === (0, _pdf_find_utils.getCharacterType)(limit)) {
  280. return false;
  281. }
  282. }
  283. const endIdx = startIdx + length - 1;
  284. if (endIdx < content.length - 1) {
  285. const last = content.charCodeAt(endIdx);
  286. const limit = content.charCodeAt(endIdx + 1);
  287. if ((0, _pdf_find_utils.getCharacterType)(last) === (0, _pdf_find_utils.getCharacterType)(limit)) {
  288. return false;
  289. }
  290. }
  291. return true;
  292. }
  293. _calculatePhraseMatch(query, pageIndex, pageContent, pageDiffs, entireWord) {
  294. const matches = [],
  295. matchesLength = [];
  296. const queryLen = query.length;
  297. let matchIdx = -queryLen;
  298. while (true) {
  299. matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
  300. if (matchIdx === -1) {
  301. break;
  302. }
  303. if (entireWord && !this._isEntireWord(pageContent, matchIdx, queryLen)) {
  304. continue;
  305. }
  306. const originalMatchIdx = getOriginalIndex(matchIdx, pageDiffs),
  307. matchEnd = matchIdx + queryLen - 1,
  308. originalQueryLen = getOriginalIndex(matchEnd, pageDiffs) - originalMatchIdx + 1;
  309. matches.push(originalMatchIdx);
  310. matchesLength.push(originalQueryLen);
  311. }
  312. this._pageMatches[pageIndex] = matches;
  313. this._pageMatchesLength[pageIndex] = matchesLength;
  314. }
  315. _calculateWordMatch(query, pageIndex, pageContent, pageDiffs, entireWord) {
  316. const matchesWithLength = [];
  317. const queryArray = query.match(/\S+/g);
  318. for (let i = 0, len = queryArray.length; i < len; i++) {
  319. const subquery = queryArray[i];
  320. const subqueryLen = subquery.length;
  321. let matchIdx = -subqueryLen;
  322. while (true) {
  323. matchIdx = pageContent.indexOf(subquery, matchIdx + subqueryLen);
  324. if (matchIdx === -1) {
  325. break;
  326. }
  327. if (entireWord && !this._isEntireWord(pageContent, matchIdx, subqueryLen)) {
  328. continue;
  329. }
  330. const originalMatchIdx = getOriginalIndex(matchIdx, pageDiffs),
  331. matchEnd = matchIdx + subqueryLen - 1,
  332. originalQueryLen = getOriginalIndex(matchEnd, pageDiffs) - originalMatchIdx + 1;
  333. matchesWithLength.push({
  334. match: originalMatchIdx,
  335. matchLength: originalQueryLen,
  336. skipped: false
  337. });
  338. }
  339. }
  340. this._pageMatchesLength[pageIndex] = [];
  341. this._pageMatches[pageIndex] = [];
  342. this._prepareMatches(matchesWithLength, this._pageMatches[pageIndex], this._pageMatchesLength[pageIndex]);
  343. }
  344. _calculateMatch(pageIndex) {
  345. let pageContent = this._pageContents[pageIndex];
  346. const pageDiffs = this._pageDiffs[pageIndex];
  347. let query = this._query;
  348. const {
  349. caseSensitive,
  350. entireWord,
  351. phraseSearch
  352. } = this._state;
  353. if (query.length === 0) {
  354. return;
  355. }
  356. if (!caseSensitive) {
  357. pageContent = pageContent.toLowerCase();
  358. query = query.toLowerCase();
  359. }
  360. if (phraseSearch) {
  361. this._calculatePhraseMatch(query, pageIndex, pageContent, pageDiffs, entireWord);
  362. } else {
  363. this._calculateWordMatch(query, pageIndex, pageContent, pageDiffs, entireWord);
  364. }
  365. if (this._state.highlightAll) {
  366. this._updatePage(pageIndex);
  367. }
  368. if (this._resumePageIdx === pageIndex) {
  369. this._resumePageIdx = null;
  370. this._nextPageMatch();
  371. }
  372. const pageMatchesCount = this._pageMatches[pageIndex].length;
  373. if (pageMatchesCount > 0) {
  374. this._matchesCountTotal += pageMatchesCount;
  375. this._updateUIResultsCount();
  376. }
  377. }
  378. _extractText() {
  379. if (this._extractTextPromises.length > 0) {
  380. return;
  381. }
  382. let promise = Promise.resolve();
  383. for (let i = 0, ii = this._linkService.pagesCount; i < ii; i++) {
  384. const extractTextCapability = (0, _pdf.createPromiseCapability)();
  385. this._extractTextPromises[i] = extractTextCapability.promise;
  386. promise = promise.then(() => {
  387. return this._pdfDocument.getPage(i + 1).then(pdfPage => {
  388. return pdfPage.getTextContent({
  389. normalizeWhitespace: true
  390. });
  391. }).then(textContent => {
  392. const textItems = textContent.items;
  393. const strBuf = [];
  394. for (let j = 0, jj = textItems.length; j < jj; j++) {
  395. strBuf.push(textItems[j].str);
  396. }
  397. [this._pageContents[i], this._pageDiffs[i]] = normalize(strBuf.join(""));
  398. extractTextCapability.resolve(i);
  399. }, reason => {
  400. console.error(`Unable to get text content for page ${i + 1}`, reason);
  401. this._pageContents[i] = "";
  402. this._pageDiffs[i] = null;
  403. extractTextCapability.resolve(i);
  404. });
  405. });
  406. }
  407. }
  408. _updatePage(index) {
  409. if (this._scrollMatches && this._selected.pageIdx === index) {
  410. this._linkService.page = index + 1;
  411. }
  412. this._eventBus.dispatch("updatetextlayermatches", {
  413. source: this,
  414. pageIndex: index
  415. });
  416. }
  417. _updateAllPages() {
  418. this._eventBus.dispatch("updatetextlayermatches", {
  419. source: this,
  420. pageIndex: -1
  421. });
  422. }
  423. _nextMatch() {
  424. const previous = this._state.findPrevious;
  425. const currentPageIndex = this._linkService.page - 1;
  426. const numPages = this._linkService.pagesCount;
  427. this._highlightMatches = true;
  428. if (this._dirtyMatch) {
  429. this._dirtyMatch = false;
  430. this._selected.pageIdx = this._selected.matchIdx = -1;
  431. this._offset.pageIdx = currentPageIndex;
  432. this._offset.matchIdx = null;
  433. this._offset.wrapped = false;
  434. this._resumePageIdx = null;
  435. this._pageMatches.length = 0;
  436. this._pageMatchesLength.length = 0;
  437. this._matchesCountTotal = 0;
  438. this._updateAllPages();
  439. for (let i = 0; i < numPages; i++) {
  440. if (this._pendingFindMatches[i] === true) {
  441. continue;
  442. }
  443. this._pendingFindMatches[i] = true;
  444. this._extractTextPromises[i].then(pageIdx => {
  445. delete this._pendingFindMatches[pageIdx];
  446. this._calculateMatch(pageIdx);
  447. });
  448. }
  449. }
  450. if (this._query === "") {
  451. this._updateUIState(FindState.FOUND);
  452. return;
  453. }
  454. if (this._resumePageIdx) {
  455. return;
  456. }
  457. const offset = this._offset;
  458. this._pagesToSearch = numPages;
  459. if (offset.matchIdx !== null) {
  460. const numPageMatches = this._pageMatches[offset.pageIdx].length;
  461. if (!previous && offset.matchIdx + 1 < numPageMatches || previous && offset.matchIdx > 0) {
  462. offset.matchIdx = previous ? offset.matchIdx - 1 : offset.matchIdx + 1;
  463. this._updateMatch(true);
  464. return;
  465. }
  466. this._advanceOffsetPage(previous);
  467. }
  468. this._nextPageMatch();
  469. }
  470. _matchesReady(matches) {
  471. const offset = this._offset;
  472. const numMatches = matches.length;
  473. const previous = this._state.findPrevious;
  474. if (numMatches) {
  475. offset.matchIdx = previous ? numMatches - 1 : 0;
  476. this._updateMatch(true);
  477. return true;
  478. }
  479. this._advanceOffsetPage(previous);
  480. if (offset.wrapped) {
  481. offset.matchIdx = null;
  482. if (this._pagesToSearch < 0) {
  483. this._updateMatch(false);
  484. return true;
  485. }
  486. }
  487. return false;
  488. }
  489. _nextPageMatch() {
  490. if (this._resumePageIdx !== null) {
  491. console.error("There can only be one pending page.");
  492. }
  493. let matches = null;
  494. do {
  495. const pageIdx = this._offset.pageIdx;
  496. matches = this._pageMatches[pageIdx];
  497. if (!matches) {
  498. this._resumePageIdx = pageIdx;
  499. break;
  500. }
  501. } while (!this._matchesReady(matches));
  502. }
  503. _advanceOffsetPage(previous) {
  504. const offset = this._offset;
  505. const numPages = this._linkService.pagesCount;
  506. offset.pageIdx = previous ? offset.pageIdx - 1 : offset.pageIdx + 1;
  507. offset.matchIdx = null;
  508. this._pagesToSearch--;
  509. if (offset.pageIdx >= numPages || offset.pageIdx < 0) {
  510. offset.pageIdx = previous ? numPages - 1 : 0;
  511. offset.wrapped = true;
  512. }
  513. }
  514. _updateMatch(found = false) {
  515. let state = FindState.NOT_FOUND;
  516. const wrapped = this._offset.wrapped;
  517. this._offset.wrapped = false;
  518. if (found) {
  519. const previousPage = this._selected.pageIdx;
  520. this._selected.pageIdx = this._offset.pageIdx;
  521. this._selected.matchIdx = this._offset.matchIdx;
  522. state = wrapped ? FindState.WRAPPED : FindState.FOUND;
  523. if (previousPage !== -1 && previousPage !== this._selected.pageIdx) {
  524. this._updatePage(previousPage);
  525. }
  526. }
  527. this._updateUIState(state, this._state.findPrevious);
  528. if (this._selected.pageIdx !== -1) {
  529. this._scrollMatches = true;
  530. this._updatePage(this._selected.pageIdx);
  531. }
  532. }
  533. _onFindBarClose(evt) {
  534. const pdfDocument = this._pdfDocument;
  535. this._firstPageCapability.promise.then(() => {
  536. if (!this._pdfDocument || pdfDocument && this._pdfDocument !== pdfDocument) {
  537. return;
  538. }
  539. if (this._findTimeout) {
  540. clearTimeout(this._findTimeout);
  541. this._findTimeout = null;
  542. }
  543. if (this._resumePageIdx) {
  544. this._resumePageIdx = null;
  545. this._dirtyMatch = true;
  546. }
  547. this._updateUIState(FindState.FOUND);
  548. this._highlightMatches = false;
  549. this._updateAllPages();
  550. });
  551. }
  552. _requestMatchesCount() {
  553. const {
  554. pageIdx,
  555. matchIdx
  556. } = this._selected;
  557. let current = 0,
  558. total = this._matchesCountTotal;
  559. if (matchIdx !== -1) {
  560. for (let i = 0; i < pageIdx; i++) {
  561. current += this._pageMatches[i] && this._pageMatches[i].length || 0;
  562. }
  563. current += matchIdx + 1;
  564. }
  565. if (current < 1 || current > total) {
  566. current = total = 0;
  567. }
  568. return {
  569. current,
  570. total
  571. };
  572. }
  573. _updateUIResultsCount() {
  574. this._eventBus.dispatch("updatefindmatchescount", {
  575. source: this,
  576. matchesCount: this._requestMatchesCount()
  577. });
  578. }
  579. _updateUIState(state, previous) {
  580. this._eventBus.dispatch("updatefindcontrolstate", {
  581. source: this,
  582. state,
  583. previous,
  584. matchesCount: this._requestMatchesCount(),
  585. rawQuery: this._state ? this._state.query : null
  586. });
  587. }
  588. }
  589. exports.PDFFindController = PDFFindController;