2
0

pdf_find_controller.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * Javascript code in this page
  4. *
  5. * Copyright 2021 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * Javascript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.PDFFindController = exports.FindState = void 0;
  27. var _pdf = require("../pdf");
  28. var _pdf_find_utils = require("./pdf_find_utils.js");
  29. var _ui_utils = require("./ui_utils.js");
  30. const FindState = {
  31. FOUND: 0,
  32. NOT_FOUND: 1,
  33. WRAPPED: 2,
  34. PENDING: 3
  35. };
  36. exports.FindState = FindState;
  37. const FIND_TIMEOUT = 250;
  38. const MATCH_SCROLL_OFFSET_TOP = -50;
  39. const MATCH_SCROLL_OFFSET_LEFT = -400;
  40. const CHARACTERS_TO_NORMALIZE = {
  41. "\u2010": "-",
  42. "\u2018": "'",
  43. "\u2019": "'",
  44. "\u201A": "'",
  45. "\u201B": "'",
  46. "\u201C": '"',
  47. "\u201D": '"',
  48. "\u201E": '"',
  49. "\u201F": '"',
  50. "\u00BC": "1/4",
  51. "\u00BD": "1/2",
  52. "\u00BE": "3/4"
  53. };
  54. let normalizationRegex = null;
  55. function normalize(text) {
  56. if (!normalizationRegex) {
  57. const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join("");
  58. normalizationRegex = new RegExp(`[${replace}]`, "g");
  59. }
  60. let diffs = null;
  61. const normalizedText = text.replace(normalizationRegex, function (ch, index) {
  62. const normalizedCh = CHARACTERS_TO_NORMALIZE[ch],
  63. diff = normalizedCh.length - ch.length;
  64. if (diff !== 0) {
  65. (diffs ||= []).push([index, diff]);
  66. }
  67. return normalizedCh;
  68. });
  69. return [normalizedText, diffs];
  70. }
  71. function getOriginalIndex(matchIndex, diffs = null) {
  72. if (!diffs) {
  73. return matchIndex;
  74. }
  75. let totalDiff = 0;
  76. for (const [index, diff] of diffs) {
  77. const currentIndex = index + totalDiff;
  78. if (currentIndex >= matchIndex) {
  79. break;
  80. }
  81. if (currentIndex + diff > matchIndex) {
  82. totalDiff += matchIndex - currentIndex;
  83. break;
  84. }
  85. totalDiff += diff;
  86. }
  87. return matchIndex - totalDiff;
  88. }
  89. class PDFFindController {
  90. constructor({
  91. linkService,
  92. eventBus
  93. }) {
  94. this._linkService = linkService;
  95. this._eventBus = eventBus;
  96. this._reset();
  97. eventBus._on("find", this._onFind.bind(this));
  98. eventBus._on("findbarclose", this._onFindBarClose.bind(this));
  99. this.executeCommand = (cmd, state) => {
  100. console.error("Deprecated method `PDFFindController.executeCommand` called, " + 'please dispatch a "find"-event using the EventBus instead.');
  101. const eventState = Object.assign(Object.create(null), state, {
  102. type: cmd.substring("find".length)
  103. });
  104. this._onFind(eventState);
  105. };
  106. }
  107. get highlightMatches() {
  108. return this._highlightMatches;
  109. }
  110. get pageMatches() {
  111. return this._pageMatches;
  112. }
  113. get pageMatchesLength() {
  114. return this._pageMatchesLength;
  115. }
  116. get selected() {
  117. return this._selected;
  118. }
  119. get state() {
  120. return this._state;
  121. }
  122. setDocument(pdfDocument) {
  123. if (this._pdfDocument) {
  124. this._reset();
  125. }
  126. if (!pdfDocument) {
  127. return;
  128. }
  129. this._pdfDocument = pdfDocument;
  130. this._firstPageCapability.resolve();
  131. }
  132. _onFind(state) {
  133. if (!state) {
  134. return;
  135. }
  136. const pdfDocument = this._pdfDocument;
  137. const {
  138. type
  139. } = state;
  140. if (this._state === null || this._shouldDirtyMatch(state)) {
  141. this._dirtyMatch = true;
  142. }
  143. this._state = state;
  144. if (type !== "highlightallchange") {
  145. this._updateUIState(FindState.PENDING);
  146. }
  147. this._firstPageCapability.promise.then(() => {
  148. if (!this._pdfDocument || pdfDocument && this._pdfDocument !== pdfDocument) {
  149. return;
  150. }
  151. this._extractText();
  152. const findbarClosed = !this._highlightMatches;
  153. const pendingTimeout = !!this._findTimeout;
  154. if (this._findTimeout) {
  155. clearTimeout(this._findTimeout);
  156. this._findTimeout = null;
  157. }
  158. if (!type) {
  159. this._findTimeout = setTimeout(() => {
  160. this._nextMatch();
  161. this._findTimeout = null;
  162. }, FIND_TIMEOUT);
  163. } else if (this._dirtyMatch) {
  164. this._nextMatch();
  165. } else if (type === "again") {
  166. this._nextMatch();
  167. if (findbarClosed && this._state.highlightAll) {
  168. this._updateAllPages();
  169. }
  170. } else if (type === "highlightallchange") {
  171. if (pendingTimeout) {
  172. this._nextMatch();
  173. } else {
  174. this._highlightMatches = true;
  175. }
  176. this._updateAllPages();
  177. } else {
  178. this._nextMatch();
  179. }
  180. });
  181. }
  182. scrollMatchIntoView({
  183. element = null,
  184. selectedLeft = 0,
  185. pageIndex = -1,
  186. matchIndex = -1
  187. }) {
  188. if (!this._scrollMatches || !element) {
  189. return;
  190. } else if (matchIndex === -1 || matchIndex !== this._selected.matchIdx) {
  191. return;
  192. } else if (pageIndex === -1 || pageIndex !== this._selected.pageIdx) {
  193. return;
  194. }
  195. this._scrollMatches = false;
  196. const spot = {
  197. top: MATCH_SCROLL_OFFSET_TOP,
  198. left: selectedLeft + MATCH_SCROLL_OFFSET_LEFT
  199. };
  200. (0, _ui_utils.scrollIntoView)(element, spot, true);
  201. }
  202. _reset() {
  203. this._highlightMatches = false;
  204. this._scrollMatches = false;
  205. this._pdfDocument = null;
  206. this._pageMatches = [];
  207. this._pageMatchesLength = [];
  208. this._state = null;
  209. this._selected = {
  210. pageIdx: -1,
  211. matchIdx: -1
  212. };
  213. this._offset = {
  214. pageIdx: null,
  215. matchIdx: null,
  216. wrapped: false
  217. };
  218. this._extractTextPromises = [];
  219. this._pageContents = [];
  220. this._pageDiffs = [];
  221. this._matchesCountTotal = 0;
  222. this._pagesToSearch = null;
  223. this._pendingFindMatches = new Set();
  224. this._resumePageIdx = null;
  225. this._dirtyMatch = false;
  226. clearTimeout(this._findTimeout);
  227. this._findTimeout = null;
  228. this._firstPageCapability = (0, _pdf.createPromiseCapability)();
  229. }
  230. get _query() {
  231. if (this._state.query !== this._rawQuery) {
  232. this._rawQuery = this._state.query;
  233. [this._normalizedQuery] = normalize(this._state.query);
  234. }
  235. return this._normalizedQuery;
  236. }
  237. _shouldDirtyMatch(state) {
  238. if (state.query !== this._state.query) {
  239. return true;
  240. }
  241. switch (state.type) {
  242. case "again":
  243. const pageNumber = this._selected.pageIdx + 1;
  244. const linkService = this._linkService;
  245. if (pageNumber >= 1 && pageNumber <= linkService.pagesCount && pageNumber !== linkService.page && !linkService.isPageVisible(pageNumber)) {
  246. return true;
  247. }
  248. return false;
  249. case "highlightallchange":
  250. return false;
  251. }
  252. return true;
  253. }
  254. _prepareMatches(matchesWithLength, matches, matchesLength) {
  255. function isSubTerm(currentIndex) {
  256. const currentElem = matchesWithLength[currentIndex];
  257. const nextElem = matchesWithLength[currentIndex + 1];
  258. if (currentIndex < matchesWithLength.length - 1 && currentElem.match === nextElem.match) {
  259. currentElem.skipped = true;
  260. return true;
  261. }
  262. for (let i = currentIndex - 1; i >= 0; i--) {
  263. const prevElem = matchesWithLength[i];
  264. if (prevElem.skipped) {
  265. continue;
  266. }
  267. if (prevElem.match + prevElem.matchLength < currentElem.match) {
  268. break;
  269. }
  270. if (prevElem.match + prevElem.matchLength >= currentElem.match + currentElem.matchLength) {
  271. currentElem.skipped = true;
  272. return true;
  273. }
  274. }
  275. return false;
  276. }
  277. matchesWithLength.sort(function (a, b) {
  278. return a.match === b.match ? a.matchLength - b.matchLength : a.match - b.match;
  279. });
  280. for (let i = 0, len = matchesWithLength.length; i < len; i++) {
  281. if (isSubTerm(i)) {
  282. continue;
  283. }
  284. matches.push(matchesWithLength[i].match);
  285. matchesLength.push(matchesWithLength[i].matchLength);
  286. }
  287. }
  288. _isEntireWord(content, startIdx, length) {
  289. if (startIdx > 0) {
  290. const first = content.charCodeAt(startIdx);
  291. const limit = content.charCodeAt(startIdx - 1);
  292. if ((0, _pdf_find_utils.getCharacterType)(first) === (0, _pdf_find_utils.getCharacterType)(limit)) {
  293. return false;
  294. }
  295. }
  296. const endIdx = startIdx + length - 1;
  297. if (endIdx < content.length - 1) {
  298. const last = content.charCodeAt(endIdx);
  299. const limit = content.charCodeAt(endIdx + 1);
  300. if ((0, _pdf_find_utils.getCharacterType)(last) === (0, _pdf_find_utils.getCharacterType)(limit)) {
  301. return false;
  302. }
  303. }
  304. return true;
  305. }
  306. _calculatePhraseMatch(query, pageIndex, pageContent, pageDiffs, entireWord) {
  307. const matches = [],
  308. matchesLength = [];
  309. const queryLen = query.length;
  310. let matchIdx = -queryLen;
  311. while (true) {
  312. matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
  313. if (matchIdx === -1) {
  314. break;
  315. }
  316. if (entireWord && !this._isEntireWord(pageContent, matchIdx, queryLen)) {
  317. continue;
  318. }
  319. const originalMatchIdx = getOriginalIndex(matchIdx, pageDiffs),
  320. matchEnd = matchIdx + queryLen - 1,
  321. originalQueryLen = getOriginalIndex(matchEnd, pageDiffs) - originalMatchIdx + 1;
  322. matches.push(originalMatchIdx);
  323. matchesLength.push(originalQueryLen);
  324. }
  325. this._pageMatches[pageIndex] = matches;
  326. this._pageMatchesLength[pageIndex] = matchesLength;
  327. }
  328. _calculateWordMatch(query, pageIndex, pageContent, pageDiffs, entireWord) {
  329. const matchesWithLength = [];
  330. const queryArray = query.match(/\S+/g);
  331. for (let i = 0, len = queryArray.length; i < len; i++) {
  332. const subquery = queryArray[i];
  333. const subqueryLen = subquery.length;
  334. let matchIdx = -subqueryLen;
  335. while (true) {
  336. matchIdx = pageContent.indexOf(subquery, matchIdx + subqueryLen);
  337. if (matchIdx === -1) {
  338. break;
  339. }
  340. if (entireWord && !this._isEntireWord(pageContent, matchIdx, subqueryLen)) {
  341. continue;
  342. }
  343. const originalMatchIdx = getOriginalIndex(matchIdx, pageDiffs),
  344. matchEnd = matchIdx + subqueryLen - 1,
  345. originalQueryLen = getOriginalIndex(matchEnd, pageDiffs) - originalMatchIdx + 1;
  346. matchesWithLength.push({
  347. match: originalMatchIdx,
  348. matchLength: originalQueryLen,
  349. skipped: false
  350. });
  351. }
  352. }
  353. this._pageMatchesLength[pageIndex] = [];
  354. this._pageMatches[pageIndex] = [];
  355. this._prepareMatches(matchesWithLength, this._pageMatches[pageIndex], this._pageMatchesLength[pageIndex]);
  356. }
  357. _calculateMatch(pageIndex) {
  358. let pageContent = this._pageContents[pageIndex];
  359. const pageDiffs = this._pageDiffs[pageIndex];
  360. let query = this._query;
  361. const {
  362. caseSensitive,
  363. entireWord,
  364. phraseSearch
  365. } = this._state;
  366. if (query.length === 0) {
  367. return;
  368. }
  369. if (!caseSensitive) {
  370. pageContent = pageContent.toLowerCase();
  371. query = query.toLowerCase();
  372. }
  373. if (phraseSearch) {
  374. this._calculatePhraseMatch(query, pageIndex, pageContent, pageDiffs, entireWord);
  375. } else {
  376. this._calculateWordMatch(query, pageIndex, pageContent, pageDiffs, entireWord);
  377. }
  378. if (this._state.highlightAll) {
  379. this._updatePage(pageIndex);
  380. }
  381. if (this._resumePageIdx === pageIndex) {
  382. this._resumePageIdx = null;
  383. this._nextPageMatch();
  384. }
  385. const pageMatchesCount = this._pageMatches[pageIndex].length;
  386. if (pageMatchesCount > 0) {
  387. this._matchesCountTotal += pageMatchesCount;
  388. this._updateUIResultsCount();
  389. }
  390. }
  391. _extractText() {
  392. if (this._extractTextPromises.length > 0) {
  393. return;
  394. }
  395. let promise = Promise.resolve();
  396. for (let i = 0, ii = this._linkService.pagesCount; i < ii; i++) {
  397. const extractTextCapability = (0, _pdf.createPromiseCapability)();
  398. this._extractTextPromises[i] = extractTextCapability.promise;
  399. promise = promise.then(() => {
  400. return this._pdfDocument.getPage(i + 1).then(pdfPage => {
  401. return pdfPage.getTextContent({
  402. normalizeWhitespace: true
  403. });
  404. }).then(textContent => {
  405. const textItems = textContent.items;
  406. const strBuf = [];
  407. for (let j = 0, jj = textItems.length; j < jj; j++) {
  408. strBuf.push(textItems[j].str);
  409. }
  410. [this._pageContents[i], this._pageDiffs[i]] = normalize(strBuf.join(""));
  411. extractTextCapability.resolve(i);
  412. }, reason => {
  413. console.error(`Unable to get text content for page ${i + 1}`, reason);
  414. this._pageContents[i] = "";
  415. this._pageDiffs[i] = null;
  416. extractTextCapability.resolve(i);
  417. });
  418. });
  419. }
  420. }
  421. _updatePage(index) {
  422. if (this._scrollMatches && this._selected.pageIdx === index) {
  423. this._linkService.page = index + 1;
  424. }
  425. this._eventBus.dispatch("updatetextlayermatches", {
  426. source: this,
  427. pageIndex: index
  428. });
  429. }
  430. _updateAllPages() {
  431. this._eventBus.dispatch("updatetextlayermatches", {
  432. source: this,
  433. pageIndex: -1
  434. });
  435. }
  436. _nextMatch() {
  437. const previous = this._state.findPrevious;
  438. const currentPageIndex = this._linkService.page - 1;
  439. const numPages = this._linkService.pagesCount;
  440. this._highlightMatches = true;
  441. if (this._dirtyMatch) {
  442. this._dirtyMatch = false;
  443. this._selected.pageIdx = this._selected.matchIdx = -1;
  444. this._offset.pageIdx = currentPageIndex;
  445. this._offset.matchIdx = null;
  446. this._offset.wrapped = false;
  447. this._resumePageIdx = null;
  448. this._pageMatches.length = 0;
  449. this._pageMatchesLength.length = 0;
  450. this._matchesCountTotal = 0;
  451. this._updateAllPages();
  452. for (let i = 0; i < numPages; i++) {
  453. if (this._pendingFindMatches.has(i)) {
  454. continue;
  455. }
  456. this._pendingFindMatches.add(i);
  457. this._extractTextPromises[i].then(pageIdx => {
  458. this._pendingFindMatches.delete(pageIdx);
  459. this._calculateMatch(pageIdx);
  460. });
  461. }
  462. }
  463. if (this._query === "") {
  464. this._updateUIState(FindState.FOUND);
  465. return;
  466. }
  467. if (this._resumePageIdx) {
  468. return;
  469. }
  470. const offset = this._offset;
  471. this._pagesToSearch = numPages;
  472. if (offset.matchIdx !== null) {
  473. const numPageMatches = this._pageMatches[offset.pageIdx].length;
  474. if (!previous && offset.matchIdx + 1 < numPageMatches || previous && offset.matchIdx > 0) {
  475. offset.matchIdx = previous ? offset.matchIdx - 1 : offset.matchIdx + 1;
  476. this._updateMatch(true);
  477. return;
  478. }
  479. this._advanceOffsetPage(previous);
  480. }
  481. this._nextPageMatch();
  482. }
  483. _matchesReady(matches) {
  484. const offset = this._offset;
  485. const numMatches = matches.length;
  486. const previous = this._state.findPrevious;
  487. if (numMatches) {
  488. offset.matchIdx = previous ? numMatches - 1 : 0;
  489. this._updateMatch(true);
  490. return true;
  491. }
  492. this._advanceOffsetPage(previous);
  493. if (offset.wrapped) {
  494. offset.matchIdx = null;
  495. if (this._pagesToSearch < 0) {
  496. this._updateMatch(false);
  497. return true;
  498. }
  499. }
  500. return false;
  501. }
  502. _nextPageMatch() {
  503. if (this._resumePageIdx !== null) {
  504. console.error("There can only be one pending page.");
  505. }
  506. let matches = null;
  507. do {
  508. const pageIdx = this._offset.pageIdx;
  509. matches = this._pageMatches[pageIdx];
  510. if (!matches) {
  511. this._resumePageIdx = pageIdx;
  512. break;
  513. }
  514. } while (!this._matchesReady(matches));
  515. }
  516. _advanceOffsetPage(previous) {
  517. const offset = this._offset;
  518. const numPages = this._linkService.pagesCount;
  519. offset.pageIdx = previous ? offset.pageIdx - 1 : offset.pageIdx + 1;
  520. offset.matchIdx = null;
  521. this._pagesToSearch--;
  522. if (offset.pageIdx >= numPages || offset.pageIdx < 0) {
  523. offset.pageIdx = previous ? numPages - 1 : 0;
  524. offset.wrapped = true;
  525. }
  526. }
  527. _updateMatch(found = false) {
  528. let state = FindState.NOT_FOUND;
  529. const wrapped = this._offset.wrapped;
  530. this._offset.wrapped = false;
  531. if (found) {
  532. const previousPage = this._selected.pageIdx;
  533. this._selected.pageIdx = this._offset.pageIdx;
  534. this._selected.matchIdx = this._offset.matchIdx;
  535. state = wrapped ? FindState.WRAPPED : FindState.FOUND;
  536. if (previousPage !== -1 && previousPage !== this._selected.pageIdx) {
  537. this._updatePage(previousPage);
  538. }
  539. }
  540. this._updateUIState(state, this._state.findPrevious);
  541. if (this._selected.pageIdx !== -1) {
  542. this._scrollMatches = true;
  543. this._updatePage(this._selected.pageIdx);
  544. }
  545. }
  546. _onFindBarClose(evt) {
  547. const pdfDocument = this._pdfDocument;
  548. this._firstPageCapability.promise.then(() => {
  549. if (!this._pdfDocument || pdfDocument && this._pdfDocument !== pdfDocument) {
  550. return;
  551. }
  552. if (this._findTimeout) {
  553. clearTimeout(this._findTimeout);
  554. this._findTimeout = null;
  555. }
  556. if (this._resumePageIdx) {
  557. this._resumePageIdx = null;
  558. this._dirtyMatch = true;
  559. }
  560. this._updateUIState(FindState.FOUND);
  561. this._highlightMatches = false;
  562. this._updateAllPages();
  563. });
  564. }
  565. _requestMatchesCount() {
  566. const {
  567. pageIdx,
  568. matchIdx
  569. } = this._selected;
  570. let current = 0,
  571. total = this._matchesCountTotal;
  572. if (matchIdx !== -1) {
  573. for (let i = 0; i < pageIdx; i++) {
  574. current += this._pageMatches[i]?.length || 0;
  575. }
  576. current += matchIdx + 1;
  577. }
  578. if (current < 1 || current > total) {
  579. current = total = 0;
  580. }
  581. return {
  582. current,
  583. total
  584. };
  585. }
  586. _updateUIResultsCount() {
  587. this._eventBus.dispatch("updatefindmatchescount", {
  588. source: this,
  589. matchesCount: this._requestMatchesCount()
  590. });
  591. }
  592. _updateUIState(state, previous = false) {
  593. this._eventBus.dispatch("updatefindcontrolstate", {
  594. source: this,
  595. state,
  596. previous,
  597. matchesCount: this._requestMatchesCount(),
  598. rawQuery: this._state?.query ?? null
  599. });
  600. }
  601. }
  602. exports.PDFFindController = PDFFindController;