2
0

pdf_find_controller.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * Javascript code in this page
  4. *
  5. * Copyright 2017 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * Javascript code in this page
  21. */
  22. 'use strict';
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.PDFFindController = exports.FindState = undefined;
  27. var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
  28. var _pdf = require('../pdf');
  29. var _ui_utils = require('./ui_utils');
  30. function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
  31. var FindState = {
  32. FOUND: 0,
  33. NOT_FOUND: 1,
  34. WRAPPED: 2,
  35. PENDING: 3
  36. };
  37. var FIND_SCROLL_OFFSET_TOP = -50;
  38. var FIND_SCROLL_OFFSET_LEFT = -400;
  39. var FIND_TIMEOUT = 250;
  40. var CHARACTERS_TO_NORMALIZE = {
  41. '\u2018': '\'',
  42. '\u2019': '\'',
  43. '\u201A': '\'',
  44. '\u201B': '\'',
  45. '\u201C': '"',
  46. '\u201D': '"',
  47. '\u201E': '"',
  48. '\u201F': '"',
  49. '\xBC': '1/4',
  50. '\xBD': '1/2',
  51. '\xBE': '3/4'
  52. };
  53. var PDFFindController = function () {
  54. function PDFFindController(_ref) {
  55. var pdfViewer = _ref.pdfViewer;
  56. _classCallCheck(this, PDFFindController);
  57. this.pdfViewer = pdfViewer;
  58. this.onUpdateResultsCount = null;
  59. this.onUpdateState = null;
  60. this.reset();
  61. var replace = Object.keys(CHARACTERS_TO_NORMALIZE).join('');
  62. this.normalizationRegex = new RegExp('[' + replace + ']', 'g');
  63. }
  64. _createClass(PDFFindController, [{
  65. key: 'reset',
  66. value: function reset() {
  67. var _this = this;
  68. this.startedTextExtraction = false;
  69. this.extractTextPromises = [];
  70. this.pendingFindMatches = Object.create(null);
  71. this.active = false;
  72. this.pageContents = [];
  73. this.pageMatches = [];
  74. this.pageMatchesLength = null;
  75. this.matchCount = 0;
  76. this.selected = {
  77. pageIdx: -1,
  78. matchIdx: -1
  79. };
  80. this.offset = {
  81. pageIdx: null,
  82. matchIdx: null
  83. };
  84. this.pagesToSearch = null;
  85. this.resumePageIdx = null;
  86. this.state = null;
  87. this.dirtyMatch = false;
  88. this.findTimeout = null;
  89. this._firstPagePromise = new Promise(function (resolve) {
  90. _this.resolveFirstPage = resolve;
  91. });
  92. }
  93. }, {
  94. key: 'executeCommand',
  95. value: function executeCommand(cmd, state) {
  96. var _this2 = this;
  97. if (this.state === null || cmd !== 'findagain') {
  98. this.dirtyMatch = true;
  99. }
  100. this.state = state;
  101. this._updateUIState(FindState.PENDING);
  102. this._firstPagePromise.then(function () {
  103. _this2._extractText();
  104. clearTimeout(_this2.findTimeout);
  105. if (cmd === 'find') {
  106. _this2.findTimeout = setTimeout(_this2._nextMatch.bind(_this2), FIND_TIMEOUT);
  107. } else {
  108. _this2._nextMatch();
  109. }
  110. });
  111. }
  112. }, {
  113. key: 'updateMatchPosition',
  114. value: function updateMatchPosition(pageIndex, matchIndex, elements, beginIdx) {
  115. if (this.selected.matchIdx === matchIndex && this.selected.pageIdx === pageIndex) {
  116. var spot = {
  117. top: FIND_SCROLL_OFFSET_TOP,
  118. left: FIND_SCROLL_OFFSET_LEFT
  119. };
  120. (0, _ui_utils.scrollIntoView)(elements[beginIdx], spot, true);
  121. }
  122. }
  123. }, {
  124. key: '_normalize',
  125. value: function _normalize(text) {
  126. return text.replace(this.normalizationRegex, function (ch) {
  127. return CHARACTERS_TO_NORMALIZE[ch];
  128. });
  129. }
  130. }, {
  131. key: '_prepareMatches',
  132. value: function _prepareMatches(matchesWithLength, matches, matchesLength) {
  133. function isSubTerm(matchesWithLength, currentIndex) {
  134. var currentElem = matchesWithLength[currentIndex];
  135. var nextElem = matchesWithLength[currentIndex + 1];
  136. if (currentIndex < matchesWithLength.length - 1 && currentElem.match === nextElem.match) {
  137. currentElem.skipped = true;
  138. return true;
  139. }
  140. for (var i = currentIndex - 1; i >= 0; i--) {
  141. var prevElem = matchesWithLength[i];
  142. if (prevElem.skipped) {
  143. continue;
  144. }
  145. if (prevElem.match + prevElem.matchLength < currentElem.match) {
  146. break;
  147. }
  148. if (prevElem.match + prevElem.matchLength >= currentElem.match + currentElem.matchLength) {
  149. currentElem.skipped = true;
  150. return true;
  151. }
  152. }
  153. return false;
  154. }
  155. matchesWithLength.sort(function (a, b) {
  156. return a.match === b.match ? a.matchLength - b.matchLength : a.match - b.match;
  157. });
  158. for (var i = 0, len = matchesWithLength.length; i < len; i++) {
  159. if (isSubTerm(matchesWithLength, i)) {
  160. continue;
  161. }
  162. matches.push(matchesWithLength[i].match);
  163. matchesLength.push(matchesWithLength[i].matchLength);
  164. }
  165. }
  166. }, {
  167. key: '_calculatePhraseMatch',
  168. value: function _calculatePhraseMatch(query, pageIndex, pageContent) {
  169. var matches = [];
  170. var queryLen = query.length;
  171. var matchIdx = -queryLen;
  172. while (true) {
  173. matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
  174. if (matchIdx === -1) {
  175. break;
  176. }
  177. matches.push(matchIdx);
  178. }
  179. this.pageMatches[pageIndex] = matches;
  180. }
  181. }, {
  182. key: '_calculateWordMatch',
  183. value: function _calculateWordMatch(query, pageIndex, pageContent) {
  184. var matchesWithLength = [];
  185. var queryArray = query.match(/\S+/g);
  186. for (var i = 0, len = queryArray.length; i < len; i++) {
  187. var subquery = queryArray[i];
  188. var subqueryLen = subquery.length;
  189. var matchIdx = -subqueryLen;
  190. while (true) {
  191. matchIdx = pageContent.indexOf(subquery, matchIdx + subqueryLen);
  192. if (matchIdx === -1) {
  193. break;
  194. }
  195. matchesWithLength.push({
  196. match: matchIdx,
  197. matchLength: subqueryLen,
  198. skipped: false
  199. });
  200. }
  201. }
  202. if (!this.pageMatchesLength) {
  203. this.pageMatchesLength = [];
  204. }
  205. this.pageMatchesLength[pageIndex] = [];
  206. this.pageMatches[pageIndex] = [];
  207. this._prepareMatches(matchesWithLength, this.pageMatches[pageIndex], this.pageMatchesLength[pageIndex]);
  208. }
  209. }, {
  210. key: '_calculateMatch',
  211. value: function _calculateMatch(pageIndex) {
  212. var pageContent = this._normalize(this.pageContents[pageIndex]);
  213. var query = this._normalize(this.state.query);
  214. var caseSensitive = this.state.caseSensitive;
  215. var phraseSearch = this.state.phraseSearch;
  216. var queryLen = query.length;
  217. if (queryLen === 0) {
  218. return;
  219. }
  220. if (!caseSensitive) {
  221. pageContent = pageContent.toLowerCase();
  222. query = query.toLowerCase();
  223. }
  224. if (phraseSearch) {
  225. this._calculatePhraseMatch(query, pageIndex, pageContent);
  226. } else {
  227. this._calculateWordMatch(query, pageIndex, pageContent);
  228. }
  229. this._updatePage(pageIndex);
  230. if (this.resumePageIdx === pageIndex) {
  231. this.resumePageIdx = null;
  232. this._nextPageMatch();
  233. }
  234. if (this.pageMatches[pageIndex].length > 0) {
  235. this.matchCount += this.pageMatches[pageIndex].length;
  236. this._updateUIResultsCount();
  237. }
  238. }
  239. }, {
  240. key: '_extractText',
  241. value: function _extractText() {
  242. var _this3 = this;
  243. if (this.startedTextExtraction) {
  244. return;
  245. }
  246. this.startedTextExtraction = true;
  247. this.pageContents.length = 0;
  248. var promise = Promise.resolve();
  249. var _loop = function _loop(i, ii) {
  250. var extractTextCapability = (0, _pdf.createPromiseCapability)();
  251. _this3.extractTextPromises[i] = extractTextCapability.promise;
  252. promise = promise.then(function () {
  253. return _this3.pdfViewer.getPageTextContent(i).then(function (textContent) {
  254. var textItems = textContent.items;
  255. var strBuf = [];
  256. for (var j = 0, jj = textItems.length; j < jj; j++) {
  257. strBuf.push(textItems[j].str);
  258. }
  259. _this3.pageContents[i] = strBuf.join('');
  260. extractTextCapability.resolve(i);
  261. }, function (reason) {
  262. console.error('Unable to get page ' + (i + 1) + ' text content', reason);
  263. _this3.pageContents[i] = '';
  264. extractTextCapability.resolve(i);
  265. });
  266. });
  267. };
  268. for (var i = 0, ii = this.pdfViewer.pagesCount; i < ii; i++) {
  269. _loop(i, ii);
  270. }
  271. }
  272. }, {
  273. key: '_updatePage',
  274. value: function _updatePage(index) {
  275. if (this.selected.pageIdx === index) {
  276. this.pdfViewer.currentPageNumber = index + 1;
  277. }
  278. var page = this.pdfViewer.getPageView(index);
  279. if (page.textLayer) {
  280. page.textLayer.updateMatches();
  281. }
  282. }
  283. }, {
  284. key: '_nextMatch',
  285. value: function _nextMatch() {
  286. var _this4 = this;
  287. var previous = this.state.findPrevious;
  288. var currentPageIndex = this.pdfViewer.currentPageNumber - 1;
  289. var numPages = this.pdfViewer.pagesCount;
  290. this.active = true;
  291. if (this.dirtyMatch) {
  292. this.dirtyMatch = false;
  293. this.selected.pageIdx = this.selected.matchIdx = -1;
  294. this.offset.pageIdx = currentPageIndex;
  295. this.offset.matchIdx = null;
  296. this.hadMatch = false;
  297. this.resumePageIdx = null;
  298. this.pageMatches = [];
  299. this.matchCount = 0;
  300. this.pageMatchesLength = null;
  301. for (var i = 0; i < numPages; i++) {
  302. this._updatePage(i);
  303. if (!(i in this.pendingFindMatches)) {
  304. this.pendingFindMatches[i] = true;
  305. this.extractTextPromises[i].then(function (pageIdx) {
  306. delete _this4.pendingFindMatches[pageIdx];
  307. _this4._calculateMatch(pageIdx);
  308. });
  309. }
  310. }
  311. }
  312. if (this.state.query === '') {
  313. this._updateUIState(FindState.FOUND);
  314. return;
  315. }
  316. if (this.resumePageIdx) {
  317. return;
  318. }
  319. var offset = this.offset;
  320. this.pagesToSearch = numPages;
  321. if (offset.matchIdx !== null) {
  322. var numPageMatches = this.pageMatches[offset.pageIdx].length;
  323. if (!previous && offset.matchIdx + 1 < numPageMatches || previous && offset.matchIdx > 0) {
  324. this.hadMatch = true;
  325. offset.matchIdx = previous ? offset.matchIdx - 1 : offset.matchIdx + 1;
  326. this._updateMatch(true);
  327. return;
  328. }
  329. this._advanceOffsetPage(previous);
  330. }
  331. this._nextPageMatch();
  332. }
  333. }, {
  334. key: '_matchesReady',
  335. value: function _matchesReady(matches) {
  336. var offset = this.offset;
  337. var numMatches = matches.length;
  338. var previous = this.state.findPrevious;
  339. if (numMatches) {
  340. this.hadMatch = true;
  341. offset.matchIdx = previous ? numMatches - 1 : 0;
  342. this._updateMatch(true);
  343. return true;
  344. }
  345. this._advanceOffsetPage(previous);
  346. if (offset.wrapped) {
  347. offset.matchIdx = null;
  348. if (this.pagesToSearch < 0) {
  349. this._updateMatch(false);
  350. return true;
  351. }
  352. }
  353. return false;
  354. }
  355. }, {
  356. key: '_nextPageMatch',
  357. value: function _nextPageMatch() {
  358. if (this.resumePageIdx !== null) {
  359. console.error('There can only be one pending page.');
  360. }
  361. var matches = null;
  362. do {
  363. var pageIdx = this.offset.pageIdx;
  364. matches = this.pageMatches[pageIdx];
  365. if (!matches) {
  366. this.resumePageIdx = pageIdx;
  367. break;
  368. }
  369. } while (!this._matchesReady(matches));
  370. }
  371. }, {
  372. key: '_advanceOffsetPage',
  373. value: function _advanceOffsetPage(previous) {
  374. var offset = this.offset;
  375. var numPages = this.extractTextPromises.length;
  376. offset.pageIdx = previous ? offset.pageIdx - 1 : offset.pageIdx + 1;
  377. offset.matchIdx = null;
  378. this.pagesToSearch--;
  379. if (offset.pageIdx >= numPages || offset.pageIdx < 0) {
  380. offset.pageIdx = previous ? numPages - 1 : 0;
  381. offset.wrapped = true;
  382. }
  383. }
  384. }, {
  385. key: '_updateMatch',
  386. value: function _updateMatch() {
  387. var found = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : false;
  388. var state = FindState.NOT_FOUND;
  389. var wrapped = this.offset.wrapped;
  390. this.offset.wrapped = false;
  391. if (found) {
  392. var previousPage = this.selected.pageIdx;
  393. this.selected.pageIdx = this.offset.pageIdx;
  394. this.selected.matchIdx = this.offset.matchIdx;
  395. state = wrapped ? FindState.WRAPPED : FindState.FOUND;
  396. if (previousPage !== -1 && previousPage !== this.selected.pageIdx) {
  397. this._updatePage(previousPage);
  398. }
  399. }
  400. this._updateUIState(state, this.state.findPrevious);
  401. if (this.selected.pageIdx !== -1) {
  402. this._updatePage(this.selected.pageIdx);
  403. }
  404. }
  405. }, {
  406. key: '_updateUIResultsCount',
  407. value: function _updateUIResultsCount() {
  408. if (this.onUpdateResultsCount) {
  409. this.onUpdateResultsCount(this.matchCount);
  410. }
  411. }
  412. }, {
  413. key: '_updateUIState',
  414. value: function _updateUIState(state, previous) {
  415. if (this.onUpdateState) {
  416. this.onUpdateState(state, previous, this.matchCount);
  417. }
  418. }
  419. }]);
  420. return PDFFindController;
  421. }();
  422. exports.FindState = FindState;
  423. exports.PDFFindController = PDFFindController;