xml_parser.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * JavaScript code in this page
  4. *
  5. * Copyright 2022 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * JavaScript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.XMLParserErrorCode = exports.XMLParserBase = exports.SimpleXMLParser = exports.SimpleDOMNode = void 0;
  27. var _core_utils = require("./core_utils.js");
  28. const XMLParserErrorCode = {
  29. NoError: 0,
  30. EndOfDocument: -1,
  31. UnterminatedCdat: -2,
  32. UnterminatedXmlDeclaration: -3,
  33. UnterminatedDoctypeDeclaration: -4,
  34. UnterminatedComment: -5,
  35. MalformedElement: -6,
  36. OutOfMemory: -7,
  37. UnterminatedAttributeValue: -8,
  38. UnterminatedElement: -9,
  39. ElementNeverBegun: -10
  40. };
  41. exports.XMLParserErrorCode = XMLParserErrorCode;
  42. function isWhitespace(s, index) {
  43. const ch = s[index];
  44. return ch === " " || ch === "\n" || ch === "\r" || ch === "\t";
  45. }
  46. function isWhitespaceString(s) {
  47. for (let i = 0, ii = s.length; i < ii; i++) {
  48. if (!isWhitespace(s, i)) {
  49. return false;
  50. }
  51. }
  52. return true;
  53. }
  54. class XMLParserBase {
  55. _resolveEntities(s) {
  56. return s.replace(/&([^;]+);/g, (all, entity) => {
  57. if (entity.substring(0, 2) === "#x") {
  58. return String.fromCodePoint(parseInt(entity.substring(2), 16));
  59. } else if (entity.substring(0, 1) === "#") {
  60. return String.fromCodePoint(parseInt(entity.substring(1), 10));
  61. }
  62. switch (entity) {
  63. case "lt":
  64. return "<";
  65. case "gt":
  66. return ">";
  67. case "amp":
  68. return "&";
  69. case "quot":
  70. return '"';
  71. case "apos":
  72. return "'";
  73. }
  74. return this.onResolveEntity(entity);
  75. });
  76. }
  77. _parseContent(s, start) {
  78. const attributes = [];
  79. let pos = start;
  80. function skipWs() {
  81. while (pos < s.length && isWhitespace(s, pos)) {
  82. ++pos;
  83. }
  84. }
  85. while (pos < s.length && !isWhitespace(s, pos) && s[pos] !== ">" && s[pos] !== "/") {
  86. ++pos;
  87. }
  88. const name = s.substring(start, pos);
  89. skipWs();
  90. while (pos < s.length && s[pos] !== ">" && s[pos] !== "/" && s[pos] !== "?") {
  91. skipWs();
  92. let attrName = "",
  93. attrValue = "";
  94. while (pos < s.length && !isWhitespace(s, pos) && s[pos] !== "=") {
  95. attrName += s[pos];
  96. ++pos;
  97. }
  98. skipWs();
  99. if (s[pos] !== "=") {
  100. return null;
  101. }
  102. ++pos;
  103. skipWs();
  104. const attrEndChar = s[pos];
  105. if (attrEndChar !== '"' && attrEndChar !== "'") {
  106. return null;
  107. }
  108. const attrEndIndex = s.indexOf(attrEndChar, ++pos);
  109. if (attrEndIndex < 0) {
  110. return null;
  111. }
  112. attrValue = s.substring(pos, attrEndIndex);
  113. attributes.push({
  114. name: attrName,
  115. value: this._resolveEntities(attrValue)
  116. });
  117. pos = attrEndIndex + 1;
  118. skipWs();
  119. }
  120. return {
  121. name,
  122. attributes,
  123. parsed: pos - start
  124. };
  125. }
  126. _parseProcessingInstruction(s, start) {
  127. let pos = start;
  128. function skipWs() {
  129. while (pos < s.length && isWhitespace(s, pos)) {
  130. ++pos;
  131. }
  132. }
  133. while (pos < s.length && !isWhitespace(s, pos) && s[pos] !== ">" && s[pos] !== "?" && s[pos] !== "/") {
  134. ++pos;
  135. }
  136. const name = s.substring(start, pos);
  137. skipWs();
  138. const attrStart = pos;
  139. while (pos < s.length && (s[pos] !== "?" || s[pos + 1] !== ">")) {
  140. ++pos;
  141. }
  142. const value = s.substring(attrStart, pos);
  143. return {
  144. name,
  145. value,
  146. parsed: pos - start
  147. };
  148. }
  149. parseXml(s) {
  150. let i = 0;
  151. while (i < s.length) {
  152. const ch = s[i];
  153. let j = i;
  154. if (ch === "<") {
  155. ++j;
  156. const ch2 = s[j];
  157. let q;
  158. switch (ch2) {
  159. case "/":
  160. ++j;
  161. q = s.indexOf(">", j);
  162. if (q < 0) {
  163. this.onError(XMLParserErrorCode.UnterminatedElement);
  164. return;
  165. }
  166. this.onEndElement(s.substring(j, q));
  167. j = q + 1;
  168. break;
  169. case "?":
  170. ++j;
  171. const pi = this._parseProcessingInstruction(s, j);
  172. if (s.substring(j + pi.parsed, j + pi.parsed + 2) !== "?>") {
  173. this.onError(XMLParserErrorCode.UnterminatedXmlDeclaration);
  174. return;
  175. }
  176. this.onPi(pi.name, pi.value);
  177. j += pi.parsed + 2;
  178. break;
  179. case "!":
  180. if (s.substring(j + 1, j + 3) === "--") {
  181. q = s.indexOf("-->", j + 3);
  182. if (q < 0) {
  183. this.onError(XMLParserErrorCode.UnterminatedComment);
  184. return;
  185. }
  186. this.onComment(s.substring(j + 3, q));
  187. j = q + 3;
  188. } else if (s.substring(j + 1, j + 8) === "[CDATA[") {
  189. q = s.indexOf("]]>", j + 8);
  190. if (q < 0) {
  191. this.onError(XMLParserErrorCode.UnterminatedCdat);
  192. return;
  193. }
  194. this.onCdata(s.substring(j + 8, q));
  195. j = q + 3;
  196. } else if (s.substring(j + 1, j + 8) === "DOCTYPE") {
  197. const q2 = s.indexOf("[", j + 8);
  198. let complexDoctype = false;
  199. q = s.indexOf(">", j + 8);
  200. if (q < 0) {
  201. this.onError(XMLParserErrorCode.UnterminatedDoctypeDeclaration);
  202. return;
  203. }
  204. if (q2 > 0 && q > q2) {
  205. q = s.indexOf("]>", j + 8);
  206. if (q < 0) {
  207. this.onError(XMLParserErrorCode.UnterminatedDoctypeDeclaration);
  208. return;
  209. }
  210. complexDoctype = true;
  211. }
  212. const doctypeContent = s.substring(j + 8, q + (complexDoctype ? 1 : 0));
  213. this.onDoctype(doctypeContent);
  214. j = q + (complexDoctype ? 2 : 1);
  215. } else {
  216. this.onError(XMLParserErrorCode.MalformedElement);
  217. return;
  218. }
  219. break;
  220. default:
  221. const content = this._parseContent(s, j);
  222. if (content === null) {
  223. this.onError(XMLParserErrorCode.MalformedElement);
  224. return;
  225. }
  226. let isClosed = false;
  227. if (s.substring(j + content.parsed, j + content.parsed + 2) === "/>") {
  228. isClosed = true;
  229. } else if (s.substring(j + content.parsed, j + content.parsed + 1) !== ">") {
  230. this.onError(XMLParserErrorCode.UnterminatedElement);
  231. return;
  232. }
  233. this.onBeginElement(content.name, content.attributes, isClosed);
  234. j += content.parsed + (isClosed ? 2 : 1);
  235. break;
  236. }
  237. } else {
  238. while (j < s.length && s[j] !== "<") {
  239. j++;
  240. }
  241. const text = s.substring(i, j);
  242. this.onText(this._resolveEntities(text));
  243. }
  244. i = j;
  245. }
  246. }
  247. onResolveEntity(name) {
  248. return `&${name};`;
  249. }
  250. onPi(name, value) {}
  251. onComment(text) {}
  252. onCdata(text) {}
  253. onDoctype(doctypeContent) {}
  254. onText(text) {}
  255. onBeginElement(name, attributes, isEmpty) {}
  256. onEndElement(name) {}
  257. onError(code) {}
  258. }
  259. exports.XMLParserBase = XMLParserBase;
  260. class SimpleDOMNode {
  261. constructor(nodeName, nodeValue) {
  262. this.nodeName = nodeName;
  263. this.nodeValue = nodeValue;
  264. Object.defineProperty(this, "parentNode", {
  265. value: null,
  266. writable: true
  267. });
  268. }
  269. get firstChild() {
  270. return this.childNodes && this.childNodes[0];
  271. }
  272. get nextSibling() {
  273. const childNodes = this.parentNode.childNodes;
  274. if (!childNodes) {
  275. return undefined;
  276. }
  277. const index = childNodes.indexOf(this);
  278. if (index === -1) {
  279. return undefined;
  280. }
  281. return childNodes[index + 1];
  282. }
  283. get textContent() {
  284. if (!this.childNodes) {
  285. return this.nodeValue || "";
  286. }
  287. return this.childNodes.map(function (child) {
  288. return child.textContent;
  289. }).join("");
  290. }
  291. get children() {
  292. return this.childNodes || [];
  293. }
  294. hasChildNodes() {
  295. return this.childNodes && this.childNodes.length > 0;
  296. }
  297. searchNode(paths, pos) {
  298. if (pos >= paths.length) {
  299. return this;
  300. }
  301. const component = paths[pos];
  302. const stack = [];
  303. let node = this;
  304. while (true) {
  305. if (component.name === node.nodeName) {
  306. if (component.pos === 0) {
  307. const res = node.searchNode(paths, pos + 1);
  308. if (res !== null) {
  309. return res;
  310. }
  311. } else if (stack.length === 0) {
  312. return null;
  313. } else {
  314. const [parent] = stack.pop();
  315. let siblingPos = 0;
  316. for (const child of parent.childNodes) {
  317. if (component.name === child.nodeName) {
  318. if (siblingPos === component.pos) {
  319. return child.searchNode(paths, pos + 1);
  320. }
  321. siblingPos++;
  322. }
  323. }
  324. return node.searchNode(paths, pos + 1);
  325. }
  326. }
  327. if (node.childNodes && node.childNodes.length !== 0) {
  328. stack.push([node, 0]);
  329. node = node.childNodes[0];
  330. } else if (stack.length === 0) {
  331. return null;
  332. } else {
  333. while (stack.length !== 0) {
  334. const [parent, currentPos] = stack.pop();
  335. const newPos = currentPos + 1;
  336. if (newPos < parent.childNodes.length) {
  337. stack.push([parent, newPos]);
  338. node = parent.childNodes[newPos];
  339. break;
  340. }
  341. }
  342. if (stack.length === 0) {
  343. return null;
  344. }
  345. }
  346. }
  347. }
  348. dump(buffer) {
  349. if (this.nodeName === "#text") {
  350. buffer.push((0, _core_utils.encodeToXmlString)(this.nodeValue));
  351. return;
  352. }
  353. buffer.push(`<${this.nodeName}`);
  354. if (this.attributes) {
  355. for (const attribute of this.attributes) {
  356. buffer.push(` ${attribute.name}="${(0, _core_utils.encodeToXmlString)(attribute.value)}"`);
  357. }
  358. }
  359. if (this.hasChildNodes()) {
  360. buffer.push(">");
  361. for (const child of this.childNodes) {
  362. child.dump(buffer);
  363. }
  364. buffer.push(`</${this.nodeName}>`);
  365. } else if (this.nodeValue) {
  366. buffer.push(`>${(0, _core_utils.encodeToXmlString)(this.nodeValue)}</${this.nodeName}>`);
  367. } else {
  368. buffer.push("/>");
  369. }
  370. }
  371. }
  372. exports.SimpleDOMNode = SimpleDOMNode;
  373. class SimpleXMLParser extends XMLParserBase {
  374. constructor({
  375. hasAttributes = false,
  376. lowerCaseName = false
  377. }) {
  378. super();
  379. this._currentFragment = null;
  380. this._stack = null;
  381. this._errorCode = XMLParserErrorCode.NoError;
  382. this._hasAttributes = hasAttributes;
  383. this._lowerCaseName = lowerCaseName;
  384. }
  385. parseFromString(data) {
  386. this._currentFragment = [];
  387. this._stack = [];
  388. this._errorCode = XMLParserErrorCode.NoError;
  389. this.parseXml(data);
  390. if (this._errorCode !== XMLParserErrorCode.NoError) {
  391. return undefined;
  392. }
  393. const [documentElement] = this._currentFragment;
  394. if (!documentElement) {
  395. return undefined;
  396. }
  397. return {
  398. documentElement
  399. };
  400. }
  401. onText(text) {
  402. if (isWhitespaceString(text)) {
  403. return;
  404. }
  405. const node = new SimpleDOMNode("#text", text);
  406. this._currentFragment.push(node);
  407. }
  408. onCdata(text) {
  409. const node = new SimpleDOMNode("#text", text);
  410. this._currentFragment.push(node);
  411. }
  412. onBeginElement(name, attributes, isEmpty) {
  413. if (this._lowerCaseName) {
  414. name = name.toLowerCase();
  415. }
  416. const node = new SimpleDOMNode(name);
  417. node.childNodes = [];
  418. if (this._hasAttributes) {
  419. node.attributes = attributes;
  420. }
  421. this._currentFragment.push(node);
  422. if (isEmpty) {
  423. return;
  424. }
  425. this._stack.push(this._currentFragment);
  426. this._currentFragment = node.childNodes;
  427. }
  428. onEndElement(name) {
  429. this._currentFragment = this._stack.pop() || [];
  430. const lastElement = this._currentFragment.at(-1);
  431. if (!lastElement) {
  432. return null;
  433. }
  434. for (const childNode of lastElement.childNodes) {
  435. childNode.parentNode = lastElement;
  436. }
  437. return lastElement;
  438. }
  439. onError(code) {
  440. this._errorCode = code;
  441. }
  442. }
  443. exports.SimpleXMLParser = SimpleXMLParser;