2
0

pdf_find_controller.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. /* Copyright 2017 Mozilla Foundation
  2. *
  3. * Licensed under the Apache License, Version 2.0 (the "License");
  4. * you may not use this file except in compliance with the License.
  5. * You may obtain a copy of the License at
  6. *
  7. * http://www.apache.org/licenses/LICENSE-2.0
  8. *
  9. * Unless required by applicable law or agreed to in writing, software
  10. * distributed under the License is distributed on an "AS IS" BASIS,
  11. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. * See the License for the specific language governing permissions and
  13. * limitations under the License.
  14. */
  15. 'use strict';
  16. var uiUtils = require('./ui_utils.js');
  17. var scrollIntoView = uiUtils.scrollIntoView;
  18. var FindStates = {
  19. FIND_FOUND: 0,
  20. FIND_NOTFOUND: 1,
  21. FIND_WRAPPED: 2,
  22. FIND_PENDING: 3
  23. };
  24. var FIND_SCROLL_OFFSET_TOP = -50;
  25. var FIND_SCROLL_OFFSET_LEFT = -400;
  26. var CHARACTERS_TO_NORMALIZE = {
  27. '\u2018': '\'',
  28. '\u2019': '\'',
  29. '\u201A': '\'',
  30. '\u201B': '\'',
  31. '\u201C': '"',
  32. '\u201D': '"',
  33. '\u201E': '"',
  34. '\u201F': '"',
  35. '\u00BC': '1/4',
  36. '\u00BD': '1/2',
  37. '\u00BE': '3/4'
  38. };
  39. var PDFFindController = function PDFFindControllerClosure() {
  40. function PDFFindController(options) {
  41. this.pdfViewer = options.pdfViewer || null;
  42. this.onUpdateResultsCount = null;
  43. this.onUpdateState = null;
  44. this.reset();
  45. var replace = Object.keys(CHARACTERS_TO_NORMALIZE).join('');
  46. this.normalizationRegex = new RegExp('[' + replace + ']', 'g');
  47. }
  48. PDFFindController.prototype = {
  49. reset: function PDFFindController_reset() {
  50. this.startedTextExtraction = false;
  51. this.extractTextPromises = [];
  52. this.pendingFindMatches = Object.create(null);
  53. this.active = false;
  54. this.pageContents = [];
  55. this.pageMatches = [];
  56. this.pageMatchesLength = null;
  57. this.matchCount = 0;
  58. this.selected = {
  59. pageIdx: -1,
  60. matchIdx: -1
  61. };
  62. this.offset = {
  63. pageIdx: null,
  64. matchIdx: null
  65. };
  66. this.pagesToSearch = null;
  67. this.resumePageIdx = null;
  68. this.state = null;
  69. this.dirtyMatch = false;
  70. this.findTimeout = null;
  71. this.firstPagePromise = new Promise(function (resolve) {
  72. this.resolveFirstPage = resolve;
  73. }.bind(this));
  74. },
  75. normalize: function PDFFindController_normalize(text) {
  76. return text.replace(this.normalizationRegex, function (ch) {
  77. return CHARACTERS_TO_NORMALIZE[ch];
  78. });
  79. },
  80. _prepareMatches: function PDFFindController_prepareMatches(matchesWithLength, matches, matchesLength) {
  81. function isSubTerm(matchesWithLength, currentIndex) {
  82. var currentElem, prevElem, nextElem;
  83. currentElem = matchesWithLength[currentIndex];
  84. nextElem = matchesWithLength[currentIndex + 1];
  85. if (currentIndex < matchesWithLength.length - 1 && currentElem.match === nextElem.match) {
  86. currentElem.skipped = true;
  87. return true;
  88. }
  89. for (var i = currentIndex - 1; i >= 0; i--) {
  90. prevElem = matchesWithLength[i];
  91. if (prevElem.skipped) {
  92. continue;
  93. }
  94. if (prevElem.match + prevElem.matchLength < currentElem.match) {
  95. break;
  96. }
  97. if (prevElem.match + prevElem.matchLength >= currentElem.match + currentElem.matchLength) {
  98. currentElem.skipped = true;
  99. return true;
  100. }
  101. }
  102. return false;
  103. }
  104. var i, len;
  105. matchesWithLength.sort(function (a, b) {
  106. return a.match === b.match ? a.matchLength - b.matchLength : a.match - b.match;
  107. });
  108. for (i = 0, len = matchesWithLength.length; i < len; i++) {
  109. if (isSubTerm(matchesWithLength, i)) {
  110. continue;
  111. }
  112. matches.push(matchesWithLength[i].match);
  113. matchesLength.push(matchesWithLength[i].matchLength);
  114. }
  115. },
  116. calcFindPhraseMatch: function PDFFindController_calcFindPhraseMatch(query, pageIndex, pageContent) {
  117. var matches = [];
  118. var queryLen = query.length;
  119. var matchIdx = -queryLen;
  120. while (true) {
  121. matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
  122. if (matchIdx === -1) {
  123. break;
  124. }
  125. matches.push(matchIdx);
  126. }
  127. this.pageMatches[pageIndex] = matches;
  128. },
  129. calcFindWordMatch: function PDFFindController_calcFindWordMatch(query, pageIndex, pageContent) {
  130. var matchesWithLength = [];
  131. var queryArray = query.match(/\S+/g);
  132. var subquery, subqueryLen, matchIdx;
  133. for (var i = 0, len = queryArray.length; i < len; i++) {
  134. subquery = queryArray[i];
  135. subqueryLen = subquery.length;
  136. matchIdx = -subqueryLen;
  137. while (true) {
  138. matchIdx = pageContent.indexOf(subquery, matchIdx + subqueryLen);
  139. if (matchIdx === -1) {
  140. break;
  141. }
  142. matchesWithLength.push({
  143. match: matchIdx,
  144. matchLength: subqueryLen,
  145. skipped: false
  146. });
  147. }
  148. }
  149. if (!this.pageMatchesLength) {
  150. this.pageMatchesLength = [];
  151. }
  152. this.pageMatchesLength[pageIndex] = [];
  153. this.pageMatches[pageIndex] = [];
  154. this._prepareMatches(matchesWithLength, this.pageMatches[pageIndex], this.pageMatchesLength[pageIndex]);
  155. },
  156. calcFindMatch: function PDFFindController_calcFindMatch(pageIndex) {
  157. var pageContent = this.normalize(this.pageContents[pageIndex]);
  158. var query = this.normalize(this.state.query);
  159. var caseSensitive = this.state.caseSensitive;
  160. var phraseSearch = this.state.phraseSearch;
  161. var queryLen = query.length;
  162. if (queryLen === 0) {
  163. return;
  164. }
  165. if (!caseSensitive) {
  166. pageContent = pageContent.toLowerCase();
  167. query = query.toLowerCase();
  168. }
  169. if (phraseSearch) {
  170. this.calcFindPhraseMatch(query, pageIndex, pageContent);
  171. } else {
  172. this.calcFindWordMatch(query, pageIndex, pageContent);
  173. }
  174. this.updatePage(pageIndex);
  175. if (this.resumePageIdx === pageIndex) {
  176. this.resumePageIdx = null;
  177. this.nextPageMatch();
  178. }
  179. if (this.pageMatches[pageIndex].length > 0) {
  180. this.matchCount += this.pageMatches[pageIndex].length;
  181. this.updateUIResultsCount();
  182. }
  183. },
  184. extractText: function PDFFindController_extractText() {
  185. if (this.startedTextExtraction) {
  186. return;
  187. }
  188. this.startedTextExtraction = true;
  189. this.pageContents = [];
  190. var extractTextPromisesResolves = [];
  191. var numPages = this.pdfViewer.pagesCount;
  192. for (var i = 0; i < numPages; i++) {
  193. this.extractTextPromises.push(new Promise(function (resolve) {
  194. extractTextPromisesResolves.push(resolve);
  195. }));
  196. }
  197. var self = this;
  198. function extractPageText(pageIndex) {
  199. self.pdfViewer.getPageTextContent(pageIndex).then(function textContentResolved(textContent) {
  200. var textItems = textContent.items;
  201. var str = [];
  202. for (var i = 0, len = textItems.length; i < len; i++) {
  203. str.push(textItems[i].str);
  204. }
  205. self.pageContents.push(str.join(''));
  206. extractTextPromisesResolves[pageIndex](pageIndex);
  207. if (pageIndex + 1 < self.pdfViewer.pagesCount) {
  208. extractPageText(pageIndex + 1);
  209. }
  210. });
  211. }
  212. extractPageText(0);
  213. },
  214. executeCommand: function PDFFindController_executeCommand(cmd, state) {
  215. if (this.state === null || cmd !== 'findagain') {
  216. this.dirtyMatch = true;
  217. }
  218. this.state = state;
  219. this.updateUIState(FindStates.FIND_PENDING);
  220. this.firstPagePromise.then(function () {
  221. this.extractText();
  222. clearTimeout(this.findTimeout);
  223. if (cmd === 'find') {
  224. this.findTimeout = setTimeout(this.nextMatch.bind(this), 250);
  225. } else {
  226. this.nextMatch();
  227. }
  228. }.bind(this));
  229. },
  230. updatePage: function PDFFindController_updatePage(index) {
  231. if (this.selected.pageIdx === index) {
  232. this.pdfViewer.currentPageNumber = index + 1;
  233. }
  234. var page = this.pdfViewer.getPageView(index);
  235. if (page.textLayer) {
  236. page.textLayer.updateMatches();
  237. }
  238. },
  239. nextMatch: function PDFFindController_nextMatch() {
  240. var previous = this.state.findPrevious;
  241. var currentPageIndex = this.pdfViewer.currentPageNumber - 1;
  242. var numPages = this.pdfViewer.pagesCount;
  243. this.active = true;
  244. if (this.dirtyMatch) {
  245. this.dirtyMatch = false;
  246. this.selected.pageIdx = this.selected.matchIdx = -1;
  247. this.offset.pageIdx = currentPageIndex;
  248. this.offset.matchIdx = null;
  249. this.hadMatch = false;
  250. this.resumePageIdx = null;
  251. this.pageMatches = [];
  252. this.matchCount = 0;
  253. this.pageMatchesLength = null;
  254. var self = this;
  255. for (var i = 0; i < numPages; i++) {
  256. this.updatePage(i);
  257. if (!(i in this.pendingFindMatches)) {
  258. this.pendingFindMatches[i] = true;
  259. this.extractTextPromises[i].then(function (pageIdx) {
  260. delete self.pendingFindMatches[pageIdx];
  261. self.calcFindMatch(pageIdx);
  262. });
  263. }
  264. }
  265. }
  266. if (this.state.query === '') {
  267. this.updateUIState(FindStates.FIND_FOUND);
  268. return;
  269. }
  270. if (this.resumePageIdx) {
  271. return;
  272. }
  273. var offset = this.offset;
  274. this.pagesToSearch = numPages;
  275. if (offset.matchIdx !== null) {
  276. var numPageMatches = this.pageMatches[offset.pageIdx].length;
  277. if (!previous && offset.matchIdx + 1 < numPageMatches || previous && offset.matchIdx > 0) {
  278. this.hadMatch = true;
  279. offset.matchIdx = previous ? offset.matchIdx - 1 : offset.matchIdx + 1;
  280. this.updateMatch(true);
  281. return;
  282. }
  283. this.advanceOffsetPage(previous);
  284. }
  285. this.nextPageMatch();
  286. },
  287. matchesReady: function PDFFindController_matchesReady(matches) {
  288. var offset = this.offset;
  289. var numMatches = matches.length;
  290. var previous = this.state.findPrevious;
  291. if (numMatches) {
  292. this.hadMatch = true;
  293. offset.matchIdx = previous ? numMatches - 1 : 0;
  294. this.updateMatch(true);
  295. return true;
  296. }
  297. this.advanceOffsetPage(previous);
  298. if (offset.wrapped) {
  299. offset.matchIdx = null;
  300. if (this.pagesToSearch < 0) {
  301. this.updateMatch(false);
  302. return true;
  303. }
  304. }
  305. return false;
  306. },
  307. updateMatchPosition: function PDFFindController_updateMatchPosition(pageIndex, index, elements, beginIdx) {
  308. if (this.selected.matchIdx === index && this.selected.pageIdx === pageIndex) {
  309. var spot = {
  310. top: FIND_SCROLL_OFFSET_TOP,
  311. left: FIND_SCROLL_OFFSET_LEFT
  312. };
  313. scrollIntoView(elements[beginIdx], spot, true);
  314. }
  315. },
  316. nextPageMatch: function PDFFindController_nextPageMatch() {
  317. if (this.resumePageIdx !== null) {
  318. console.error('There can only be one pending page.');
  319. }
  320. do {
  321. var pageIdx = this.offset.pageIdx;
  322. var matches = this.pageMatches[pageIdx];
  323. if (!matches) {
  324. this.resumePageIdx = pageIdx;
  325. break;
  326. }
  327. } while (!this.matchesReady(matches));
  328. },
  329. advanceOffsetPage: function PDFFindController_advanceOffsetPage(previous) {
  330. var offset = this.offset;
  331. var numPages = this.extractTextPromises.length;
  332. offset.pageIdx = previous ? offset.pageIdx - 1 : offset.pageIdx + 1;
  333. offset.matchIdx = null;
  334. this.pagesToSearch--;
  335. if (offset.pageIdx >= numPages || offset.pageIdx < 0) {
  336. offset.pageIdx = previous ? numPages - 1 : 0;
  337. offset.wrapped = true;
  338. }
  339. },
  340. updateMatch: function PDFFindController_updateMatch(found) {
  341. var state = FindStates.FIND_NOTFOUND;
  342. var wrapped = this.offset.wrapped;
  343. this.offset.wrapped = false;
  344. if (found) {
  345. var previousPage = this.selected.pageIdx;
  346. this.selected.pageIdx = this.offset.pageIdx;
  347. this.selected.matchIdx = this.offset.matchIdx;
  348. state = wrapped ? FindStates.FIND_WRAPPED : FindStates.FIND_FOUND;
  349. if (previousPage !== -1 && previousPage !== this.selected.pageIdx) {
  350. this.updatePage(previousPage);
  351. }
  352. }
  353. this.updateUIState(state, this.state.findPrevious);
  354. if (this.selected.pageIdx !== -1) {
  355. this.updatePage(this.selected.pageIdx);
  356. }
  357. },
  358. updateUIResultsCount: function PDFFindController_updateUIResultsCount() {
  359. if (this.onUpdateResultsCount) {
  360. this.onUpdateResultsCount(this.matchCount);
  361. }
  362. },
  363. updateUIState: function PDFFindController_updateUIState(state, previous) {
  364. if (this.onUpdateState) {
  365. this.onUpdateState(state, previous, this.matchCount);
  366. }
  367. }
  368. };
  369. return PDFFindController;
  370. }();
  371. exports.FindStates = FindStates;
  372. exports.PDFFindController = PDFFindController;