pdf_find_controller.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * Javascript code in this page
  4. *
  5. * Copyright 2020 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * Javascript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.PDFFindController = exports.FindState = void 0;
  27. var _pdf = require("../pdf");
  28. var _pdf_find_utils = require("./pdf_find_utils.js");
  29. var _ui_utils = require("./ui_utils.js");
  30. const FindState = {
  31. FOUND: 0,
  32. NOT_FOUND: 1,
  33. WRAPPED: 2,
  34. PENDING: 3
  35. };
  36. exports.FindState = FindState;
  37. const FIND_TIMEOUT = 250;
  38. const MATCH_SCROLL_OFFSET_TOP = -50;
  39. const MATCH_SCROLL_OFFSET_LEFT = -400;
  40. const CHARACTERS_TO_NORMALIZE = {
  41. "\u2018": "'",
  42. "\u2019": "'",
  43. "\u201A": "'",
  44. "\u201B": "'",
  45. "\u201C": '"',
  46. "\u201D": '"',
  47. "\u201E": '"',
  48. "\u201F": '"',
  49. "\u00BC": "1/4",
  50. "\u00BD": "1/2",
  51. "\u00BE": "3/4"
  52. };
  53. let normalizationRegex = null;
  54. function normalize(text) {
  55. if (!normalizationRegex) {
  56. const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join("");
  57. normalizationRegex = new RegExp(`[${replace}]`, "g");
  58. }
  59. return text.replace(normalizationRegex, function (ch) {
  60. return CHARACTERS_TO_NORMALIZE[ch];
  61. });
  62. }
  63. class PDFFindController {
  64. constructor({
  65. linkService,
  66. eventBus
  67. }) {
  68. this._linkService = linkService;
  69. this._eventBus = eventBus;
  70. this._reset();
  71. eventBus._on("findbarclose", this._onFindBarClose.bind(this));
  72. }
  73. get highlightMatches() {
  74. return this._highlightMatches;
  75. }
  76. get pageMatches() {
  77. return this._pageMatches;
  78. }
  79. get pageMatchesLength() {
  80. return this._pageMatchesLength;
  81. }
  82. get selected() {
  83. return this._selected;
  84. }
  85. get state() {
  86. return this._state;
  87. }
  88. setDocument(pdfDocument) {
  89. if (this._pdfDocument) {
  90. this._reset();
  91. }
  92. if (!pdfDocument) {
  93. return;
  94. }
  95. this._pdfDocument = pdfDocument;
  96. this._firstPageCapability.resolve();
  97. }
  98. executeCommand(cmd, state) {
  99. if (!state) {
  100. return;
  101. }
  102. const pdfDocument = this._pdfDocument;
  103. if (this._state === null || this._shouldDirtyMatch(cmd, state)) {
  104. this._dirtyMatch = true;
  105. }
  106. this._state = state;
  107. if (cmd !== "findhighlightallchange") {
  108. this._updateUIState(FindState.PENDING);
  109. }
  110. this._firstPageCapability.promise.then(() => {
  111. if (!this._pdfDocument || pdfDocument && this._pdfDocument !== pdfDocument) {
  112. return;
  113. }
  114. this._extractText();
  115. const findbarClosed = !this._highlightMatches;
  116. const pendingTimeout = !!this._findTimeout;
  117. if (this._findTimeout) {
  118. clearTimeout(this._findTimeout);
  119. this._findTimeout = null;
  120. }
  121. if (cmd === "find") {
  122. this._findTimeout = setTimeout(() => {
  123. this._nextMatch();
  124. this._findTimeout = null;
  125. }, FIND_TIMEOUT);
  126. } else if (this._dirtyMatch) {
  127. this._nextMatch();
  128. } else if (cmd === "findagain") {
  129. this._nextMatch();
  130. if (findbarClosed && this._state.highlightAll) {
  131. this._updateAllPages();
  132. }
  133. } else if (cmd === "findhighlightallchange") {
  134. if (pendingTimeout) {
  135. this._nextMatch();
  136. } else {
  137. this._highlightMatches = true;
  138. }
  139. this._updateAllPages();
  140. } else {
  141. this._nextMatch();
  142. }
  143. });
  144. }
  145. scrollMatchIntoView({
  146. element = null,
  147. pageIndex = -1,
  148. matchIndex = -1
  149. }) {
  150. if (!this._scrollMatches || !element) {
  151. return;
  152. } else if (matchIndex === -1 || matchIndex !== this._selected.matchIdx) {
  153. return;
  154. } else if (pageIndex === -1 || pageIndex !== this._selected.pageIdx) {
  155. return;
  156. }
  157. this._scrollMatches = false;
  158. const spot = {
  159. top: MATCH_SCROLL_OFFSET_TOP,
  160. left: MATCH_SCROLL_OFFSET_LEFT
  161. };
  162. (0, _ui_utils.scrollIntoView)(element, spot, true);
  163. }
  164. _reset() {
  165. this._highlightMatches = false;
  166. this._scrollMatches = false;
  167. this._pdfDocument = null;
  168. this._pageMatches = [];
  169. this._pageMatchesLength = [];
  170. this._state = null;
  171. this._selected = {
  172. pageIdx: -1,
  173. matchIdx: -1
  174. };
  175. this._offset = {
  176. pageIdx: null,
  177. matchIdx: null,
  178. wrapped: false
  179. };
  180. this._extractTextPromises = [];
  181. this._pageContents = [];
  182. this._matchesCountTotal = 0;
  183. this._pagesToSearch = null;
  184. this._pendingFindMatches = Object.create(null);
  185. this._resumePageIdx = null;
  186. this._dirtyMatch = false;
  187. clearTimeout(this._findTimeout);
  188. this._findTimeout = null;
  189. this._firstPageCapability = (0, _pdf.createPromiseCapability)();
  190. }
  191. get _query() {
  192. if (this._state.query !== this._rawQuery) {
  193. this._rawQuery = this._state.query;
  194. this._normalizedQuery = normalize(this._state.query);
  195. }
  196. return this._normalizedQuery;
  197. }
  198. _shouldDirtyMatch(cmd, state) {
  199. if (state.query !== this._state.query) {
  200. return true;
  201. }
  202. switch (cmd) {
  203. case "findagain":
  204. const pageNumber = this._selected.pageIdx + 1;
  205. const linkService = this._linkService;
  206. if (pageNumber >= 1 && pageNumber <= linkService.pagesCount && pageNumber !== linkService.page && !linkService.isPageVisible(pageNumber)) {
  207. return true;
  208. }
  209. return false;
  210. case "findhighlightallchange":
  211. return false;
  212. }
  213. return true;
  214. }
  215. _prepareMatches(matchesWithLength, matches, matchesLength) {
  216. function isSubTerm(currentIndex) {
  217. const currentElem = matchesWithLength[currentIndex];
  218. const nextElem = matchesWithLength[currentIndex + 1];
  219. if (currentIndex < matchesWithLength.length - 1 && currentElem.match === nextElem.match) {
  220. currentElem.skipped = true;
  221. return true;
  222. }
  223. for (let i = currentIndex - 1; i >= 0; i--) {
  224. const prevElem = matchesWithLength[i];
  225. if (prevElem.skipped) {
  226. continue;
  227. }
  228. if (prevElem.match + prevElem.matchLength < currentElem.match) {
  229. break;
  230. }
  231. if (prevElem.match + prevElem.matchLength >= currentElem.match + currentElem.matchLength) {
  232. currentElem.skipped = true;
  233. return true;
  234. }
  235. }
  236. return false;
  237. }
  238. matchesWithLength.sort(function (a, b) {
  239. return a.match === b.match ? a.matchLength - b.matchLength : a.match - b.match;
  240. });
  241. for (let i = 0, len = matchesWithLength.length; i < len; i++) {
  242. if (isSubTerm(i)) {
  243. continue;
  244. }
  245. matches.push(matchesWithLength[i].match);
  246. matchesLength.push(matchesWithLength[i].matchLength);
  247. }
  248. }
  249. _isEntireWord(content, startIdx, length) {
  250. if (startIdx > 0) {
  251. const first = content.charCodeAt(startIdx);
  252. const limit = content.charCodeAt(startIdx - 1);
  253. if ((0, _pdf_find_utils.getCharacterType)(first) === (0, _pdf_find_utils.getCharacterType)(limit)) {
  254. return false;
  255. }
  256. }
  257. const endIdx = startIdx + length - 1;
  258. if (endIdx < content.length - 1) {
  259. const last = content.charCodeAt(endIdx);
  260. const limit = content.charCodeAt(endIdx + 1);
  261. if ((0, _pdf_find_utils.getCharacterType)(last) === (0, _pdf_find_utils.getCharacterType)(limit)) {
  262. return false;
  263. }
  264. }
  265. return true;
  266. }
  267. _calculatePhraseMatch(query, pageIndex, pageContent, entireWord) {
  268. const matches = [];
  269. const queryLen = query.length;
  270. let matchIdx = -queryLen;
  271. while (true) {
  272. matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
  273. if (matchIdx === -1) {
  274. break;
  275. }
  276. if (entireWord && !this._isEntireWord(pageContent, matchIdx, queryLen)) {
  277. continue;
  278. }
  279. matches.push(matchIdx);
  280. }
  281. this._pageMatches[pageIndex] = matches;
  282. }
  283. _calculateWordMatch(query, pageIndex, pageContent, entireWord) {
  284. const matchesWithLength = [];
  285. const queryArray = query.match(/\S+/g);
  286. for (let i = 0, len = queryArray.length; i < len; i++) {
  287. const subquery = queryArray[i];
  288. const subqueryLen = subquery.length;
  289. let matchIdx = -subqueryLen;
  290. while (true) {
  291. matchIdx = pageContent.indexOf(subquery, matchIdx + subqueryLen);
  292. if (matchIdx === -1) {
  293. break;
  294. }
  295. if (entireWord && !this._isEntireWord(pageContent, matchIdx, subqueryLen)) {
  296. continue;
  297. }
  298. matchesWithLength.push({
  299. match: matchIdx,
  300. matchLength: subqueryLen,
  301. skipped: false
  302. });
  303. }
  304. }
  305. this._pageMatchesLength[pageIndex] = [];
  306. this._pageMatches[pageIndex] = [];
  307. this._prepareMatches(matchesWithLength, this._pageMatches[pageIndex], this._pageMatchesLength[pageIndex]);
  308. }
  309. _calculateMatch(pageIndex) {
  310. let pageContent = this._pageContents[pageIndex];
  311. let query = this._query;
  312. const {
  313. caseSensitive,
  314. entireWord,
  315. phraseSearch
  316. } = this._state;
  317. if (query.length === 0) {
  318. return;
  319. }
  320. if (!caseSensitive) {
  321. pageContent = pageContent.toLowerCase();
  322. query = query.toLowerCase();
  323. }
  324. if (phraseSearch) {
  325. this._calculatePhraseMatch(query, pageIndex, pageContent, entireWord);
  326. } else {
  327. this._calculateWordMatch(query, pageIndex, pageContent, entireWord);
  328. }
  329. if (this._state.highlightAll) {
  330. this._updatePage(pageIndex);
  331. }
  332. if (this._resumePageIdx === pageIndex) {
  333. this._resumePageIdx = null;
  334. this._nextPageMatch();
  335. }
  336. const pageMatchesCount = this._pageMatches[pageIndex].length;
  337. if (pageMatchesCount > 0) {
  338. this._matchesCountTotal += pageMatchesCount;
  339. this._updateUIResultsCount();
  340. }
  341. }
  342. _extractText() {
  343. if (this._extractTextPromises.length > 0) {
  344. return;
  345. }
  346. let promise = Promise.resolve();
  347. for (let i = 0, ii = this._linkService.pagesCount; i < ii; i++) {
  348. const extractTextCapability = (0, _pdf.createPromiseCapability)();
  349. this._extractTextPromises[i] = extractTextCapability.promise;
  350. promise = promise.then(() => {
  351. return this._pdfDocument.getPage(i + 1).then(pdfPage => {
  352. return pdfPage.getTextContent({
  353. normalizeWhitespace: true
  354. });
  355. }).then(textContent => {
  356. const textItems = textContent.items;
  357. const strBuf = [];
  358. for (let j = 0, jj = textItems.length; j < jj; j++) {
  359. strBuf.push(textItems[j].str);
  360. }
  361. this._pageContents[i] = normalize(strBuf.join(""));
  362. extractTextCapability.resolve(i);
  363. }, reason => {
  364. console.error(`Unable to get text content for page ${i + 1}`, reason);
  365. this._pageContents[i] = "";
  366. extractTextCapability.resolve(i);
  367. });
  368. });
  369. }
  370. }
  371. _updatePage(index) {
  372. if (this._scrollMatches && this._selected.pageIdx === index) {
  373. this._linkService.page = index + 1;
  374. }
  375. this._eventBus.dispatch("updatetextlayermatches", {
  376. source: this,
  377. pageIndex: index
  378. });
  379. }
  380. _updateAllPages() {
  381. this._eventBus.dispatch("updatetextlayermatches", {
  382. source: this,
  383. pageIndex: -1
  384. });
  385. }
  386. _nextMatch() {
  387. const previous = this._state.findPrevious;
  388. const currentPageIndex = this._linkService.page - 1;
  389. const numPages = this._linkService.pagesCount;
  390. this._highlightMatches = true;
  391. if (this._dirtyMatch) {
  392. this._dirtyMatch = false;
  393. this._selected.pageIdx = this._selected.matchIdx = -1;
  394. this._offset.pageIdx = currentPageIndex;
  395. this._offset.matchIdx = null;
  396. this._offset.wrapped = false;
  397. this._resumePageIdx = null;
  398. this._pageMatches.length = 0;
  399. this._pageMatchesLength.length = 0;
  400. this._matchesCountTotal = 0;
  401. this._updateAllPages();
  402. for (let i = 0; i < numPages; i++) {
  403. if (this._pendingFindMatches[i] === true) {
  404. continue;
  405. }
  406. this._pendingFindMatches[i] = true;
  407. this._extractTextPromises[i].then(pageIdx => {
  408. delete this._pendingFindMatches[pageIdx];
  409. this._calculateMatch(pageIdx);
  410. });
  411. }
  412. }
  413. if (this._query === "") {
  414. this._updateUIState(FindState.FOUND);
  415. return;
  416. }
  417. if (this._resumePageIdx) {
  418. return;
  419. }
  420. const offset = this._offset;
  421. this._pagesToSearch = numPages;
  422. if (offset.matchIdx !== null) {
  423. const numPageMatches = this._pageMatches[offset.pageIdx].length;
  424. if (!previous && offset.matchIdx + 1 < numPageMatches || previous && offset.matchIdx > 0) {
  425. offset.matchIdx = previous ? offset.matchIdx - 1 : offset.matchIdx + 1;
  426. this._updateMatch(true);
  427. return;
  428. }
  429. this._advanceOffsetPage(previous);
  430. }
  431. this._nextPageMatch();
  432. }
  433. _matchesReady(matches) {
  434. const offset = this._offset;
  435. const numMatches = matches.length;
  436. const previous = this._state.findPrevious;
  437. if (numMatches) {
  438. offset.matchIdx = previous ? numMatches - 1 : 0;
  439. this._updateMatch(true);
  440. return true;
  441. }
  442. this._advanceOffsetPage(previous);
  443. if (offset.wrapped) {
  444. offset.matchIdx = null;
  445. if (this._pagesToSearch < 0) {
  446. this._updateMatch(false);
  447. return true;
  448. }
  449. }
  450. return false;
  451. }
  452. _nextPageMatch() {
  453. if (this._resumePageIdx !== null) {
  454. console.error("There can only be one pending page.");
  455. }
  456. let matches = null;
  457. do {
  458. const pageIdx = this._offset.pageIdx;
  459. matches = this._pageMatches[pageIdx];
  460. if (!matches) {
  461. this._resumePageIdx = pageIdx;
  462. break;
  463. }
  464. } while (!this._matchesReady(matches));
  465. }
  466. _advanceOffsetPage(previous) {
  467. const offset = this._offset;
  468. const numPages = this._linkService.pagesCount;
  469. offset.pageIdx = previous ? offset.pageIdx - 1 : offset.pageIdx + 1;
  470. offset.matchIdx = null;
  471. this._pagesToSearch--;
  472. if (offset.pageIdx >= numPages || offset.pageIdx < 0) {
  473. offset.pageIdx = previous ? numPages - 1 : 0;
  474. offset.wrapped = true;
  475. }
  476. }
  477. _updateMatch(found = false) {
  478. let state = FindState.NOT_FOUND;
  479. const wrapped = this._offset.wrapped;
  480. this._offset.wrapped = false;
  481. if (found) {
  482. const previousPage = this._selected.pageIdx;
  483. this._selected.pageIdx = this._offset.pageIdx;
  484. this._selected.matchIdx = this._offset.matchIdx;
  485. state = wrapped ? FindState.WRAPPED : FindState.FOUND;
  486. if (previousPage !== -1 && previousPage !== this._selected.pageIdx) {
  487. this._updatePage(previousPage);
  488. }
  489. }
  490. this._updateUIState(state, this._state.findPrevious);
  491. if (this._selected.pageIdx !== -1) {
  492. this._scrollMatches = true;
  493. this._updatePage(this._selected.pageIdx);
  494. }
  495. }
  496. _onFindBarClose(evt) {
  497. const pdfDocument = this._pdfDocument;
  498. this._firstPageCapability.promise.then(() => {
  499. if (!this._pdfDocument || pdfDocument && this._pdfDocument !== pdfDocument) {
  500. return;
  501. }
  502. if (this._findTimeout) {
  503. clearTimeout(this._findTimeout);
  504. this._findTimeout = null;
  505. }
  506. if (this._resumePageIdx) {
  507. this._resumePageIdx = null;
  508. this._dirtyMatch = true;
  509. }
  510. this._updateUIState(FindState.FOUND);
  511. this._highlightMatches = false;
  512. this._updateAllPages();
  513. });
  514. }
  515. _requestMatchesCount() {
  516. const {
  517. pageIdx,
  518. matchIdx
  519. } = this._selected;
  520. let current = 0,
  521. total = this._matchesCountTotal;
  522. if (matchIdx !== -1) {
  523. for (let i = 0; i < pageIdx; i++) {
  524. current += this._pageMatches[i] && this._pageMatches[i].length || 0;
  525. }
  526. current += matchIdx + 1;
  527. }
  528. if (current < 1 || current > total) {
  529. current = total = 0;
  530. }
  531. return {
  532. current,
  533. total
  534. };
  535. }
  536. _updateUIResultsCount() {
  537. this._eventBus.dispatch("updatefindmatchescount", {
  538. source: this,
  539. matchesCount: this._requestMatchesCount()
  540. });
  541. }
  542. _updateUIState(state, previous) {
  543. this._eventBus.dispatch("updatefindcontrolstate", {
  544. source: this,
  545. state,
  546. previous,
  547. matchesCount: this._requestMatchesCount()
  548. });
  549. }
  550. }
  551. exports.PDFFindController = PDFFindController;