struct_tree.js 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * Javascript code in this page
  4. *
  5. * Copyright 2021 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * Javascript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.StructTreeRoot = exports.StructTreePage = void 0;
  27. var _primitives = require("./primitives.js");
  28. var _util = require("../shared/util.js");
  29. var _name_number_tree = require("./name_number_tree.js");
  30. const MAX_DEPTH = 40;
  31. const StructElementType = {
  32. PAGE_CONTENT: "PAGE_CONTENT",
  33. STREAM_CONTENT: "STREAM_CONTENT",
  34. OBJECT: "OBJECT",
  35. ELEMENT: "ELEMENT"
  36. };
  37. class StructTreeRoot {
  38. constructor(rootDict) {
  39. this.dict = rootDict;
  40. this.roleMap = new Map();
  41. }
  42. init() {
  43. this.readRoleMap();
  44. }
  45. readRoleMap() {
  46. const roleMapDict = this.dict.get("RoleMap");
  47. if (!(0, _primitives.isDict)(roleMapDict)) {
  48. return;
  49. }
  50. roleMapDict.forEach((key, value) => {
  51. if (!(0, _primitives.isName)(value)) {
  52. return;
  53. }
  54. this.roleMap.set(key, value.name);
  55. });
  56. }
  57. }
  58. exports.StructTreeRoot = StructTreeRoot;
  59. class StructElementNode {
  60. constructor(tree, dict) {
  61. this.tree = tree;
  62. this.dict = dict;
  63. this.kids = [];
  64. this.parseKids();
  65. }
  66. get role() {
  67. const nameObj = this.dict.get("S");
  68. const name = (0, _primitives.isName)(nameObj) ? nameObj.name : "";
  69. const {
  70. root
  71. } = this.tree;
  72. if (root.roleMap.has(name)) {
  73. return root.roleMap.get(name);
  74. }
  75. return name;
  76. }
  77. parseKids() {
  78. let pageObjId = null;
  79. const objRef = this.dict.getRaw("Pg");
  80. if ((0, _primitives.isRef)(objRef)) {
  81. pageObjId = objRef.toString();
  82. }
  83. const kids = this.dict.get("K");
  84. if (Array.isArray(kids)) {
  85. for (const kid of kids) {
  86. const element = this.parseKid(pageObjId, kid);
  87. if (element) {
  88. this.kids.push(element);
  89. }
  90. }
  91. } else {
  92. const element = this.parseKid(pageObjId, kids);
  93. if (element) {
  94. this.kids.push(element);
  95. }
  96. }
  97. }
  98. parseKid(pageObjId, kid) {
  99. if (Number.isInteger(kid)) {
  100. if (this.tree.pageDict.objId !== pageObjId) {
  101. return null;
  102. }
  103. return new StructElement({
  104. type: StructElementType.PAGE_CONTENT,
  105. mcid: kid,
  106. pageObjId
  107. });
  108. }
  109. let kidDict = null;
  110. if ((0, _primitives.isRef)(kid)) {
  111. kidDict = this.dict.xref.fetch(kid);
  112. } else if ((0, _primitives.isDict)(kid)) {
  113. kidDict = kid;
  114. }
  115. if (!kidDict) {
  116. return null;
  117. }
  118. const pageRef = kidDict.getRaw("Pg");
  119. if ((0, _primitives.isRef)(pageRef)) {
  120. pageObjId = pageRef.toString();
  121. }
  122. const type = (0, _primitives.isName)(kidDict.get("Type")) ? kidDict.get("Type").name : null;
  123. if (type === "MCR") {
  124. if (this.tree.pageDict.objId !== pageObjId) {
  125. return null;
  126. }
  127. return new StructElement({
  128. type: StructElementType.STREAM_CONTENT,
  129. refObjId: (0, _primitives.isRef)(kidDict.getRaw("Stm")) ? kidDict.getRaw("Stm").toString() : null,
  130. pageObjId,
  131. mcid: kidDict.get("MCID")
  132. });
  133. }
  134. if (type === "OBJR") {
  135. if (this.tree.pageDict.objId !== pageObjId) {
  136. return null;
  137. }
  138. return new StructElement({
  139. type: StructElementType.OBJECT,
  140. refObjId: (0, _primitives.isRef)(kidDict.getRaw("Obj")) ? kidDict.getRaw("Obj").toString() : null,
  141. pageObjId
  142. });
  143. }
  144. return new StructElement({
  145. type: StructElementType.ELEMENT,
  146. dict: kidDict
  147. });
  148. }
  149. }
  150. class StructElement {
  151. constructor({
  152. type,
  153. dict = null,
  154. mcid = null,
  155. pageObjId = null,
  156. refObjId = null
  157. }) {
  158. this.type = type;
  159. this.dict = dict;
  160. this.mcid = mcid;
  161. this.pageObjId = pageObjId;
  162. this.refObjId = refObjId;
  163. this.parentNode = null;
  164. }
  165. }
  166. class StructTreePage {
  167. constructor(structTreeRoot, pageDict) {
  168. this.root = structTreeRoot;
  169. this.rootDict = structTreeRoot ? structTreeRoot.dict : null;
  170. this.pageDict = pageDict;
  171. this.nodes = [];
  172. }
  173. parse() {
  174. if (!this.root || !this.rootDict) {
  175. return;
  176. }
  177. const parentTree = this.rootDict.get("ParentTree");
  178. if (!parentTree) {
  179. return;
  180. }
  181. const id = this.pageDict.get("StructParents");
  182. if (!Number.isInteger(id)) {
  183. return;
  184. }
  185. const numberTree = new _name_number_tree.NumberTree(parentTree, this.rootDict.xref);
  186. const parentArray = numberTree.get(id);
  187. if (!Array.isArray(parentArray)) {
  188. return;
  189. }
  190. const map = new Map();
  191. for (const ref of parentArray) {
  192. if ((0, _primitives.isRef)(ref)) {
  193. this.addNode(this.rootDict.xref.fetch(ref), map);
  194. }
  195. }
  196. }
  197. addNode(dict, map, level = 0) {
  198. if (level > MAX_DEPTH) {
  199. (0, _util.warn)("StructTree MAX_DEPTH reached.");
  200. return null;
  201. }
  202. if (map.has(dict)) {
  203. return map.get(dict);
  204. }
  205. const element = new StructElementNode(this, dict);
  206. map.set(dict, element);
  207. const parent = dict.get("P");
  208. if (!parent || (0, _primitives.isName)(parent.get("Type"), "StructTreeRoot")) {
  209. if (!this.addTopLevelNode(dict, element)) {
  210. map.delete(dict);
  211. }
  212. return element;
  213. }
  214. const parentNode = this.addNode(parent, map, level + 1);
  215. if (!parentNode) {
  216. return element;
  217. }
  218. let save = false;
  219. for (const kid of parentNode.kids) {
  220. if (kid.type === StructElementType.ELEMENT && kid.dict === dict) {
  221. kid.parentNode = element;
  222. save = true;
  223. }
  224. }
  225. if (!save) {
  226. map.delete(dict);
  227. }
  228. return element;
  229. }
  230. addTopLevelNode(dict, element) {
  231. const obj = this.rootDict.get("K");
  232. if (!obj) {
  233. return false;
  234. }
  235. if ((0, _primitives.isDict)(obj)) {
  236. if (obj.objId !== dict.objId) {
  237. return false;
  238. }
  239. this.nodes[0] = element;
  240. return true;
  241. }
  242. if (!Array.isArray(obj)) {
  243. return true;
  244. }
  245. let save = false;
  246. for (let i = 0; i < obj.length; i++) {
  247. const kidRef = obj[i];
  248. if (kidRef && kidRef.toString() === dict.objId) {
  249. this.nodes[i] = element;
  250. save = true;
  251. }
  252. }
  253. return save;
  254. }
  255. get serializable() {
  256. function nodeToSerializable(node, parent, level = 0) {
  257. if (level > MAX_DEPTH) {
  258. (0, _util.warn)("StructTree too deep to be fully serialized.");
  259. return;
  260. }
  261. const obj = Object.create(null);
  262. obj.role = node.role;
  263. obj.children = [];
  264. parent.children.push(obj);
  265. const alt = node.dict.get("Alt");
  266. if ((0, _util.isString)(alt)) {
  267. obj.alt = (0, _util.stringToPDFString)(alt);
  268. }
  269. for (const kid of node.kids) {
  270. const kidElement = kid.type === StructElementType.ELEMENT ? kid.parentNode : null;
  271. if (kidElement) {
  272. nodeToSerializable(kidElement, obj, level + 1);
  273. continue;
  274. } else if (kid.type === StructElementType.PAGE_CONTENT || kid.type === StructElementType.STREAM_CONTENT) {
  275. obj.children.push({
  276. type: "content",
  277. id: `page${kid.pageObjId}_mcid${kid.mcid}`
  278. });
  279. } else if (kid.type === StructElementType.OBJECT) {
  280. obj.children.push({
  281. type: "object",
  282. id: kid.refObjId
  283. });
  284. }
  285. }
  286. }
  287. const root = Object.create(null);
  288. root.children = [];
  289. root.role = "Root";
  290. for (const child of this.nodes) {
  291. if (!child) {
  292. continue;
  293. }
  294. nodeToSerializable(child, root);
  295. }
  296. return root;
  297. }
  298. }
  299. exports.StructTreePage = StructTreePage;