pdf_find_controller.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * Javascript code in this page
  4. *
  5. * Copyright 2021 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * Javascript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.PDFFindController = exports.FindState = void 0;
  27. var _pdf = require("../pdf");
  28. var _pdf_find_utils = require("./pdf_find_utils.js");
  29. var _ui_utils = require("./ui_utils.js");
  30. const FindState = {
  31. FOUND: 0,
  32. NOT_FOUND: 1,
  33. WRAPPED: 2,
  34. PENDING: 3
  35. };
  36. exports.FindState = FindState;
  37. const FIND_TIMEOUT = 250;
  38. const MATCH_SCROLL_OFFSET_TOP = -50;
  39. const MATCH_SCROLL_OFFSET_LEFT = -400;
  40. const CHARACTERS_TO_NORMALIZE = {
  41. "\u2010": "-",
  42. "\u2018": "'",
  43. "\u2019": "'",
  44. "\u201A": "'",
  45. "\u201B": "'",
  46. "\u201C": '"',
  47. "\u201D": '"',
  48. "\u201E": '"',
  49. "\u201F": '"',
  50. "\u00BC": "1/4",
  51. "\u00BD": "1/2",
  52. "\u00BE": "3/4"
  53. };
  54. let normalizationRegex = null;
  55. function normalize(text) {
  56. if (!normalizationRegex) {
  57. const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join("");
  58. normalizationRegex = new RegExp(`[${replace}]`, "g");
  59. }
  60. let diffs = null;
  61. const normalizedText = text.replace(normalizationRegex, function (ch, index) {
  62. const normalizedCh = CHARACTERS_TO_NORMALIZE[ch],
  63. diff = normalizedCh.length - ch.length;
  64. if (diff !== 0) {
  65. (diffs || (diffs = [])).push([index, diff]);
  66. }
  67. return normalizedCh;
  68. });
  69. return [normalizedText, diffs];
  70. }
  71. function getOriginalIndex(matchIndex, diffs = null) {
  72. if (!diffs) {
  73. return matchIndex;
  74. }
  75. let totalDiff = 0;
  76. for (const [index, diff] of diffs) {
  77. const currentIndex = index + totalDiff;
  78. if (currentIndex >= matchIndex) {
  79. break;
  80. }
  81. if (currentIndex + diff > matchIndex) {
  82. totalDiff += matchIndex - currentIndex;
  83. break;
  84. }
  85. totalDiff += diff;
  86. }
  87. return matchIndex - totalDiff;
  88. }
  89. class PDFFindController {
  90. constructor({
  91. linkService,
  92. eventBus
  93. }) {
  94. this._linkService = linkService;
  95. this._eventBus = eventBus;
  96. this._reset();
  97. eventBus._on("findbarclose", this._onFindBarClose.bind(this));
  98. }
  99. get highlightMatches() {
  100. return this._highlightMatches;
  101. }
  102. get pageMatches() {
  103. return this._pageMatches;
  104. }
  105. get pageMatchesLength() {
  106. return this._pageMatchesLength;
  107. }
  108. get selected() {
  109. return this._selected;
  110. }
  111. get state() {
  112. return this._state;
  113. }
  114. setDocument(pdfDocument) {
  115. if (this._pdfDocument) {
  116. this._reset();
  117. }
  118. if (!pdfDocument) {
  119. return;
  120. }
  121. this._pdfDocument = pdfDocument;
  122. this._firstPageCapability.resolve();
  123. }
  124. executeCommand(cmd, state) {
  125. if (!state) {
  126. return;
  127. }
  128. const pdfDocument = this._pdfDocument;
  129. if (this._state === null || this._shouldDirtyMatch(cmd, state)) {
  130. this._dirtyMatch = true;
  131. }
  132. this._state = state;
  133. if (cmd !== "findhighlightallchange") {
  134. this._updateUIState(FindState.PENDING);
  135. }
  136. this._firstPageCapability.promise.then(() => {
  137. if (!this._pdfDocument || pdfDocument && this._pdfDocument !== pdfDocument) {
  138. return;
  139. }
  140. this._extractText();
  141. const findbarClosed = !this._highlightMatches;
  142. const pendingTimeout = !!this._findTimeout;
  143. if (this._findTimeout) {
  144. clearTimeout(this._findTimeout);
  145. this._findTimeout = null;
  146. }
  147. if (cmd === "find") {
  148. this._findTimeout = setTimeout(() => {
  149. this._nextMatch();
  150. this._findTimeout = null;
  151. }, FIND_TIMEOUT);
  152. } else if (this._dirtyMatch) {
  153. this._nextMatch();
  154. } else if (cmd === "findagain") {
  155. this._nextMatch();
  156. if (findbarClosed && this._state.highlightAll) {
  157. this._updateAllPages();
  158. }
  159. } else if (cmd === "findhighlightallchange") {
  160. if (pendingTimeout) {
  161. this._nextMatch();
  162. } else {
  163. this._highlightMatches = true;
  164. }
  165. this._updateAllPages();
  166. } else {
  167. this._nextMatch();
  168. }
  169. });
  170. }
  171. scrollMatchIntoView({
  172. element = null,
  173. selectedLeft = 0,
  174. pageIndex = -1,
  175. matchIndex = -1
  176. }) {
  177. if (!this._scrollMatches || !element) {
  178. return;
  179. } else if (matchIndex === -1 || matchIndex !== this._selected.matchIdx) {
  180. return;
  181. } else if (pageIndex === -1 || pageIndex !== this._selected.pageIdx) {
  182. return;
  183. }
  184. this._scrollMatches = false;
  185. const spot = {
  186. top: MATCH_SCROLL_OFFSET_TOP,
  187. left: selectedLeft + MATCH_SCROLL_OFFSET_LEFT
  188. };
  189. (0, _ui_utils.scrollIntoView)(element, spot, true);
  190. }
  191. _reset() {
  192. this._highlightMatches = false;
  193. this._scrollMatches = false;
  194. this._pdfDocument = null;
  195. this._pageMatches = [];
  196. this._pageMatchesLength = [];
  197. this._state = null;
  198. this._selected = {
  199. pageIdx: -1,
  200. matchIdx: -1
  201. };
  202. this._offset = {
  203. pageIdx: null,
  204. matchIdx: null,
  205. wrapped: false
  206. };
  207. this._extractTextPromises = [];
  208. this._pageContents = [];
  209. this._pageDiffs = [];
  210. this._matchesCountTotal = 0;
  211. this._pagesToSearch = null;
  212. this._pendingFindMatches = new Set();
  213. this._resumePageIdx = null;
  214. this._dirtyMatch = false;
  215. clearTimeout(this._findTimeout);
  216. this._findTimeout = null;
  217. this._firstPageCapability = (0, _pdf.createPromiseCapability)();
  218. }
  219. get _query() {
  220. if (this._state.query !== this._rawQuery) {
  221. this._rawQuery = this._state.query;
  222. [this._normalizedQuery] = normalize(this._state.query);
  223. }
  224. return this._normalizedQuery;
  225. }
  226. _shouldDirtyMatch(cmd, state) {
  227. if (state.query !== this._state.query) {
  228. return true;
  229. }
  230. switch (cmd) {
  231. case "findagain":
  232. const pageNumber = this._selected.pageIdx + 1;
  233. const linkService = this._linkService;
  234. if (pageNumber >= 1 && pageNumber <= linkService.pagesCount && pageNumber !== linkService.page && !linkService.isPageVisible(pageNumber)) {
  235. return true;
  236. }
  237. return false;
  238. case "findhighlightallchange":
  239. return false;
  240. }
  241. return true;
  242. }
  243. _prepareMatches(matchesWithLength, matches, matchesLength) {
  244. function isSubTerm(currentIndex) {
  245. const currentElem = matchesWithLength[currentIndex];
  246. const nextElem = matchesWithLength[currentIndex + 1];
  247. if (currentIndex < matchesWithLength.length - 1 && currentElem.match === nextElem.match) {
  248. currentElem.skipped = true;
  249. return true;
  250. }
  251. for (let i = currentIndex - 1; i >= 0; i--) {
  252. const prevElem = matchesWithLength[i];
  253. if (prevElem.skipped) {
  254. continue;
  255. }
  256. if (prevElem.match + prevElem.matchLength < currentElem.match) {
  257. break;
  258. }
  259. if (prevElem.match + prevElem.matchLength >= currentElem.match + currentElem.matchLength) {
  260. currentElem.skipped = true;
  261. return true;
  262. }
  263. }
  264. return false;
  265. }
  266. matchesWithLength.sort(function (a, b) {
  267. return a.match === b.match ? a.matchLength - b.matchLength : a.match - b.match;
  268. });
  269. for (let i = 0, len = matchesWithLength.length; i < len; i++) {
  270. if (isSubTerm(i)) {
  271. continue;
  272. }
  273. matches.push(matchesWithLength[i].match);
  274. matchesLength.push(matchesWithLength[i].matchLength);
  275. }
  276. }
  277. _isEntireWord(content, startIdx, length) {
  278. if (startIdx > 0) {
  279. const first = content.charCodeAt(startIdx);
  280. const limit = content.charCodeAt(startIdx - 1);
  281. if ((0, _pdf_find_utils.getCharacterType)(first) === (0, _pdf_find_utils.getCharacterType)(limit)) {
  282. return false;
  283. }
  284. }
  285. const endIdx = startIdx + length - 1;
  286. if (endIdx < content.length - 1) {
  287. const last = content.charCodeAt(endIdx);
  288. const limit = content.charCodeAt(endIdx + 1);
  289. if ((0, _pdf_find_utils.getCharacterType)(last) === (0, _pdf_find_utils.getCharacterType)(limit)) {
  290. return false;
  291. }
  292. }
  293. return true;
  294. }
  295. _calculatePhraseMatch(query, pageIndex, pageContent, pageDiffs, entireWord) {
  296. const matches = [],
  297. matchesLength = [];
  298. const queryLen = query.length;
  299. let matchIdx = -queryLen;
  300. while (true) {
  301. matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
  302. if (matchIdx === -1) {
  303. break;
  304. }
  305. if (entireWord && !this._isEntireWord(pageContent, matchIdx, queryLen)) {
  306. continue;
  307. }
  308. const originalMatchIdx = getOriginalIndex(matchIdx, pageDiffs),
  309. matchEnd = matchIdx + queryLen - 1,
  310. originalQueryLen = getOriginalIndex(matchEnd, pageDiffs) - originalMatchIdx + 1;
  311. matches.push(originalMatchIdx);
  312. matchesLength.push(originalQueryLen);
  313. }
  314. this._pageMatches[pageIndex] = matches;
  315. this._pageMatchesLength[pageIndex] = matchesLength;
  316. }
  317. _calculateWordMatch(query, pageIndex, pageContent, pageDiffs, entireWord) {
  318. const matchesWithLength = [];
  319. const queryArray = query.match(/\S+/g);
  320. for (let i = 0, len = queryArray.length; i < len; i++) {
  321. const subquery = queryArray[i];
  322. const subqueryLen = subquery.length;
  323. let matchIdx = -subqueryLen;
  324. while (true) {
  325. matchIdx = pageContent.indexOf(subquery, matchIdx + subqueryLen);
  326. if (matchIdx === -1) {
  327. break;
  328. }
  329. if (entireWord && !this._isEntireWord(pageContent, matchIdx, subqueryLen)) {
  330. continue;
  331. }
  332. const originalMatchIdx = getOriginalIndex(matchIdx, pageDiffs),
  333. matchEnd = matchIdx + subqueryLen - 1,
  334. originalQueryLen = getOriginalIndex(matchEnd, pageDiffs) - originalMatchIdx + 1;
  335. matchesWithLength.push({
  336. match: originalMatchIdx,
  337. matchLength: originalQueryLen,
  338. skipped: false
  339. });
  340. }
  341. }
  342. this._pageMatchesLength[pageIndex] = [];
  343. this._pageMatches[pageIndex] = [];
  344. this._prepareMatches(matchesWithLength, this._pageMatches[pageIndex], this._pageMatchesLength[pageIndex]);
  345. }
  346. _calculateMatch(pageIndex) {
  347. let pageContent = this._pageContents[pageIndex];
  348. const pageDiffs = this._pageDiffs[pageIndex];
  349. let query = this._query;
  350. const {
  351. caseSensitive,
  352. entireWord,
  353. phraseSearch
  354. } = this._state;
  355. if (query.length === 0) {
  356. return;
  357. }
  358. if (!caseSensitive) {
  359. pageContent = pageContent.toLowerCase();
  360. query = query.toLowerCase();
  361. }
  362. if (phraseSearch) {
  363. this._calculatePhraseMatch(query, pageIndex, pageContent, pageDiffs, entireWord);
  364. } else {
  365. this._calculateWordMatch(query, pageIndex, pageContent, pageDiffs, entireWord);
  366. }
  367. if (this._state.highlightAll) {
  368. this._updatePage(pageIndex);
  369. }
  370. if (this._resumePageIdx === pageIndex) {
  371. this._resumePageIdx = null;
  372. this._nextPageMatch();
  373. }
  374. const pageMatchesCount = this._pageMatches[pageIndex].length;
  375. if (pageMatchesCount > 0) {
  376. this._matchesCountTotal += pageMatchesCount;
  377. this._updateUIResultsCount();
  378. }
  379. }
  380. _extractText() {
  381. if (this._extractTextPromises.length > 0) {
  382. return;
  383. }
  384. let promise = Promise.resolve();
  385. for (let i = 0, ii = this._linkService.pagesCount; i < ii; i++) {
  386. const extractTextCapability = (0, _pdf.createPromiseCapability)();
  387. this._extractTextPromises[i] = extractTextCapability.promise;
  388. promise = promise.then(() => {
  389. return this._pdfDocument.getPage(i + 1).then(pdfPage => {
  390. return pdfPage.getTextContent({
  391. normalizeWhitespace: true
  392. });
  393. }).then(textContent => {
  394. const textItems = textContent.items;
  395. const strBuf = [];
  396. for (let j = 0, jj = textItems.length; j < jj; j++) {
  397. strBuf.push(textItems[j].str);
  398. }
  399. [this._pageContents[i], this._pageDiffs[i]] = normalize(strBuf.join(""));
  400. extractTextCapability.resolve(i);
  401. }, reason => {
  402. console.error(`Unable to get text content for page ${i + 1}`, reason);
  403. this._pageContents[i] = "";
  404. this._pageDiffs[i] = null;
  405. extractTextCapability.resolve(i);
  406. });
  407. });
  408. }
  409. }
  410. _updatePage(index) {
  411. if (this._scrollMatches && this._selected.pageIdx === index) {
  412. this._linkService.page = index + 1;
  413. }
  414. this._eventBus.dispatch("updatetextlayermatches", {
  415. source: this,
  416. pageIndex: index
  417. });
  418. }
  419. _updateAllPages() {
  420. this._eventBus.dispatch("updatetextlayermatches", {
  421. source: this,
  422. pageIndex: -1
  423. });
  424. }
  425. _nextMatch() {
  426. const previous = this._state.findPrevious;
  427. const currentPageIndex = this._linkService.page - 1;
  428. const numPages = this._linkService.pagesCount;
  429. this._highlightMatches = true;
  430. if (this._dirtyMatch) {
  431. this._dirtyMatch = false;
  432. this._selected.pageIdx = this._selected.matchIdx = -1;
  433. this._offset.pageIdx = currentPageIndex;
  434. this._offset.matchIdx = null;
  435. this._offset.wrapped = false;
  436. this._resumePageIdx = null;
  437. this._pageMatches.length = 0;
  438. this._pageMatchesLength.length = 0;
  439. this._matchesCountTotal = 0;
  440. this._updateAllPages();
  441. for (let i = 0; i < numPages; i++) {
  442. if (this._pendingFindMatches.has(i)) {
  443. continue;
  444. }
  445. this._pendingFindMatches.add(i);
  446. this._extractTextPromises[i].then(pageIdx => {
  447. this._pendingFindMatches.delete(pageIdx);
  448. this._calculateMatch(pageIdx);
  449. });
  450. }
  451. }
  452. if (this._query === "") {
  453. this._updateUIState(FindState.FOUND);
  454. return;
  455. }
  456. if (this._resumePageIdx) {
  457. return;
  458. }
  459. const offset = this._offset;
  460. this._pagesToSearch = numPages;
  461. if (offset.matchIdx !== null) {
  462. const numPageMatches = this._pageMatches[offset.pageIdx].length;
  463. if (!previous && offset.matchIdx + 1 < numPageMatches || previous && offset.matchIdx > 0) {
  464. offset.matchIdx = previous ? offset.matchIdx - 1 : offset.matchIdx + 1;
  465. this._updateMatch(true);
  466. return;
  467. }
  468. this._advanceOffsetPage(previous);
  469. }
  470. this._nextPageMatch();
  471. }
  472. _matchesReady(matches) {
  473. const offset = this._offset;
  474. const numMatches = matches.length;
  475. const previous = this._state.findPrevious;
  476. if (numMatches) {
  477. offset.matchIdx = previous ? numMatches - 1 : 0;
  478. this._updateMatch(true);
  479. return true;
  480. }
  481. this._advanceOffsetPage(previous);
  482. if (offset.wrapped) {
  483. offset.matchIdx = null;
  484. if (this._pagesToSearch < 0) {
  485. this._updateMatch(false);
  486. return true;
  487. }
  488. }
  489. return false;
  490. }
  491. _nextPageMatch() {
  492. if (this._resumePageIdx !== null) {
  493. console.error("There can only be one pending page.");
  494. }
  495. let matches = null;
  496. do {
  497. const pageIdx = this._offset.pageIdx;
  498. matches = this._pageMatches[pageIdx];
  499. if (!matches) {
  500. this._resumePageIdx = pageIdx;
  501. break;
  502. }
  503. } while (!this._matchesReady(matches));
  504. }
  505. _advanceOffsetPage(previous) {
  506. const offset = this._offset;
  507. const numPages = this._linkService.pagesCount;
  508. offset.pageIdx = previous ? offset.pageIdx - 1 : offset.pageIdx + 1;
  509. offset.matchIdx = null;
  510. this._pagesToSearch--;
  511. if (offset.pageIdx >= numPages || offset.pageIdx < 0) {
  512. offset.pageIdx = previous ? numPages - 1 : 0;
  513. offset.wrapped = true;
  514. }
  515. }
  516. _updateMatch(found = false) {
  517. let state = FindState.NOT_FOUND;
  518. const wrapped = this._offset.wrapped;
  519. this._offset.wrapped = false;
  520. if (found) {
  521. const previousPage = this._selected.pageIdx;
  522. this._selected.pageIdx = this._offset.pageIdx;
  523. this._selected.matchIdx = this._offset.matchIdx;
  524. state = wrapped ? FindState.WRAPPED : FindState.FOUND;
  525. if (previousPage !== -1 && previousPage !== this._selected.pageIdx) {
  526. this._updatePage(previousPage);
  527. }
  528. }
  529. this._updateUIState(state, this._state.findPrevious);
  530. if (this._selected.pageIdx !== -1) {
  531. this._scrollMatches = true;
  532. this._updatePage(this._selected.pageIdx);
  533. }
  534. }
  535. _onFindBarClose(evt) {
  536. const pdfDocument = this._pdfDocument;
  537. this._firstPageCapability.promise.then(() => {
  538. if (!this._pdfDocument || pdfDocument && this._pdfDocument !== pdfDocument) {
  539. return;
  540. }
  541. if (this._findTimeout) {
  542. clearTimeout(this._findTimeout);
  543. this._findTimeout = null;
  544. }
  545. if (this._resumePageIdx) {
  546. this._resumePageIdx = null;
  547. this._dirtyMatch = true;
  548. }
  549. this._updateUIState(FindState.FOUND);
  550. this._highlightMatches = false;
  551. this._updateAllPages();
  552. });
  553. }
  554. _requestMatchesCount() {
  555. const {
  556. pageIdx,
  557. matchIdx
  558. } = this._selected;
  559. let current = 0,
  560. total = this._matchesCountTotal;
  561. if (matchIdx !== -1) {
  562. for (let i = 0; i < pageIdx; i++) {
  563. current += this._pageMatches[i]?.length || 0;
  564. }
  565. current += matchIdx + 1;
  566. }
  567. if (current < 1 || current > total) {
  568. current = total = 0;
  569. }
  570. return {
  571. current,
  572. total
  573. };
  574. }
  575. _updateUIResultsCount() {
  576. this._eventBus.dispatch("updatefindmatchescount", {
  577. source: this,
  578. matchesCount: this._requestMatchesCount()
  579. });
  580. }
  581. _updateUIState(state, previous) {
  582. this._eventBus.dispatch("updatefindcontrolstate", {
  583. source: this,
  584. state,
  585. previous,
  586. matchesCount: this._requestMatchesCount(),
  587. rawQuery: this._state?.query ?? null
  588. });
  589. }
  590. }
  591. exports.PDFFindController = PDFFindController;