2
0

xml_parser.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * Javascript code in this page
  4. *
  5. * Copyright 2020 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * Javascript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.SimpleXMLParser = void 0;
  27. const XMLParserErrorCode = {
  28. NoError: 0,
  29. EndOfDocument: -1,
  30. UnterminatedCdat: -2,
  31. UnterminatedXmlDeclaration: -3,
  32. UnterminatedDoctypeDeclaration: -4,
  33. UnterminatedComment: -5,
  34. MalformedElement: -6,
  35. OutOfMemory: -7,
  36. UnterminatedAttributeValue: -8,
  37. UnterminatedElement: -9,
  38. ElementNeverBegun: -10
  39. };
  40. function isWhitespace(s, index) {
  41. const ch = s[index];
  42. return ch === " " || ch === "\n" || ch === "\r" || ch === "\t";
  43. }
  44. function isWhitespaceString(s) {
  45. for (let i = 0, ii = s.length; i < ii; i++) {
  46. if (!isWhitespace(s, i)) {
  47. return false;
  48. }
  49. }
  50. return true;
  51. }
  52. class XMLParserBase {
  53. _resolveEntities(s) {
  54. return s.replace(/&([^;]+);/g, (all, entity) => {
  55. if (entity.substring(0, 2) === "#x") {
  56. return String.fromCharCode(parseInt(entity.substring(2), 16));
  57. } else if (entity.substring(0, 1) === "#") {
  58. return String.fromCharCode(parseInt(entity.substring(1), 10));
  59. }
  60. switch (entity) {
  61. case "lt":
  62. return "<";
  63. case "gt":
  64. return ">";
  65. case "amp":
  66. return "&";
  67. case "quot":
  68. return '"';
  69. }
  70. return this.onResolveEntity(entity);
  71. });
  72. }
  73. _parseContent(s, start) {
  74. const attributes = [];
  75. let pos = start;
  76. function skipWs() {
  77. while (pos < s.length && isWhitespace(s, pos)) {
  78. ++pos;
  79. }
  80. }
  81. while (pos < s.length && !isWhitespace(s, pos) && s[pos] !== ">" && s[pos] !== "/") {
  82. ++pos;
  83. }
  84. const name = s.substring(start, pos);
  85. skipWs();
  86. while (pos < s.length && s[pos] !== ">" && s[pos] !== "/" && s[pos] !== "?") {
  87. skipWs();
  88. let attrName = "",
  89. attrValue = "";
  90. while (pos < s.length && !isWhitespace(s, pos) && s[pos] !== "=") {
  91. attrName += s[pos];
  92. ++pos;
  93. }
  94. skipWs();
  95. if (s[pos] !== "=") {
  96. return null;
  97. }
  98. ++pos;
  99. skipWs();
  100. const attrEndChar = s[pos];
  101. if (attrEndChar !== '"' && attrEndChar !== "'") {
  102. return null;
  103. }
  104. const attrEndIndex = s.indexOf(attrEndChar, ++pos);
  105. if (attrEndIndex < 0) {
  106. return null;
  107. }
  108. attrValue = s.substring(pos, attrEndIndex);
  109. attributes.push({
  110. name: attrName,
  111. value: this._resolveEntities(attrValue)
  112. });
  113. pos = attrEndIndex + 1;
  114. skipWs();
  115. }
  116. return {
  117. name,
  118. attributes,
  119. parsed: pos - start
  120. };
  121. }
  122. _parseProcessingInstruction(s, start) {
  123. let pos = start;
  124. function skipWs() {
  125. while (pos < s.length && isWhitespace(s, pos)) {
  126. ++pos;
  127. }
  128. }
  129. while (pos < s.length && !isWhitespace(s, pos) && s[pos] !== ">" && s[pos] !== "/") {
  130. ++pos;
  131. }
  132. const name = s.substring(start, pos);
  133. skipWs();
  134. const attrStart = pos;
  135. while (pos < s.length && (s[pos] !== "?" || s[pos + 1] !== ">")) {
  136. ++pos;
  137. }
  138. const value = s.substring(attrStart, pos);
  139. return {
  140. name,
  141. value,
  142. parsed: pos - start
  143. };
  144. }
  145. parseXml(s) {
  146. let i = 0;
  147. while (i < s.length) {
  148. const ch = s[i];
  149. let j = i;
  150. if (ch === "<") {
  151. ++j;
  152. const ch2 = s[j];
  153. let q;
  154. switch (ch2) {
  155. case "/":
  156. ++j;
  157. q = s.indexOf(">", j);
  158. if (q < 0) {
  159. this.onError(XMLParserErrorCode.UnterminatedElement);
  160. return;
  161. }
  162. this.onEndElement(s.substring(j, q));
  163. j = q + 1;
  164. break;
  165. case "?":
  166. ++j;
  167. const pi = this._parseProcessingInstruction(s, j);
  168. if (s.substring(j + pi.parsed, j + pi.parsed + 2) !== "?>") {
  169. this.onError(XMLParserErrorCode.UnterminatedXmlDeclaration);
  170. return;
  171. }
  172. this.onPi(pi.name, pi.value);
  173. j += pi.parsed + 2;
  174. break;
  175. case "!":
  176. if (s.substring(j + 1, j + 3) === "--") {
  177. q = s.indexOf("-->", j + 3);
  178. if (q < 0) {
  179. this.onError(XMLParserErrorCode.UnterminatedComment);
  180. return;
  181. }
  182. this.onComment(s.substring(j + 3, q));
  183. j = q + 3;
  184. } else if (s.substring(j + 1, j + 8) === "[CDATA[") {
  185. q = s.indexOf("]]>", j + 8);
  186. if (q < 0) {
  187. this.onError(XMLParserErrorCode.UnterminatedCdat);
  188. return;
  189. }
  190. this.onCdata(s.substring(j + 8, q));
  191. j = q + 3;
  192. } else if (s.substring(j + 1, j + 8) === "DOCTYPE") {
  193. const q2 = s.indexOf("[", j + 8);
  194. let complexDoctype = false;
  195. q = s.indexOf(">", j + 8);
  196. if (q < 0) {
  197. this.onError(XMLParserErrorCode.UnterminatedDoctypeDeclaration);
  198. return;
  199. }
  200. if (q2 > 0 && q > q2) {
  201. q = s.indexOf("]>", j + 8);
  202. if (q < 0) {
  203. this.onError(XMLParserErrorCode.UnterminatedDoctypeDeclaration);
  204. return;
  205. }
  206. complexDoctype = true;
  207. }
  208. const doctypeContent = s.substring(j + 8, q + (complexDoctype ? 1 : 0));
  209. this.onDoctype(doctypeContent);
  210. j = q + (complexDoctype ? 2 : 1);
  211. } else {
  212. this.onError(XMLParserErrorCode.MalformedElement);
  213. return;
  214. }
  215. break;
  216. default:
  217. const content = this._parseContent(s, j);
  218. if (content === null) {
  219. this.onError(XMLParserErrorCode.MalformedElement);
  220. return;
  221. }
  222. let isClosed = false;
  223. if (s.substring(j + content.parsed, j + content.parsed + 2) === "/>") {
  224. isClosed = true;
  225. } else if (s.substring(j + content.parsed, j + content.parsed + 1) !== ">") {
  226. this.onError(XMLParserErrorCode.UnterminatedElement);
  227. return;
  228. }
  229. this.onBeginElement(content.name, content.attributes, isClosed);
  230. j += content.parsed + (isClosed ? 2 : 1);
  231. break;
  232. }
  233. } else {
  234. while (j < s.length && s[j] !== "<") {
  235. j++;
  236. }
  237. const text = s.substring(i, j);
  238. this.onText(this._resolveEntities(text));
  239. }
  240. i = j;
  241. }
  242. }
  243. onResolveEntity(name) {
  244. return `&${name};`;
  245. }
  246. onPi(name, value) {}
  247. onComment(text) {}
  248. onCdata(text) {}
  249. onDoctype(doctypeContent) {}
  250. onText(text) {}
  251. onBeginElement(name, attributes, isEmpty) {}
  252. onEndElement(name) {}
  253. onError(code) {}
  254. }
  255. class SimpleDOMNode {
  256. constructor(nodeName, nodeValue) {
  257. this.nodeName = nodeName;
  258. this.nodeValue = nodeValue;
  259. Object.defineProperty(this, "parentNode", {
  260. value: null,
  261. writable: true
  262. });
  263. }
  264. get firstChild() {
  265. return this.childNodes && this.childNodes[0];
  266. }
  267. get nextSibling() {
  268. const childNodes = this.parentNode.childNodes;
  269. if (!childNodes) {
  270. return undefined;
  271. }
  272. const index = childNodes.indexOf(this);
  273. if (index === -1) {
  274. return undefined;
  275. }
  276. return childNodes[index + 1];
  277. }
  278. get textContent() {
  279. if (!this.childNodes) {
  280. return this.nodeValue || "";
  281. }
  282. return this.childNodes.map(function (child) {
  283. return child.textContent;
  284. }).join("");
  285. }
  286. hasChildNodes() {
  287. return this.childNodes && this.childNodes.length > 0;
  288. }
  289. }
  290. class SimpleXMLParser extends XMLParserBase {
  291. constructor() {
  292. super();
  293. this._currentFragment = null;
  294. this._stack = null;
  295. this._errorCode = XMLParserErrorCode.NoError;
  296. }
  297. parseFromString(data) {
  298. this._currentFragment = [];
  299. this._stack = [];
  300. this._errorCode = XMLParserErrorCode.NoError;
  301. this.parseXml(data);
  302. if (this._errorCode !== XMLParserErrorCode.NoError) {
  303. return undefined;
  304. }
  305. const [documentElement] = this._currentFragment;
  306. if (!documentElement) {
  307. return undefined;
  308. }
  309. return {
  310. documentElement
  311. };
  312. }
  313. onResolveEntity(name) {
  314. switch (name) {
  315. case "apos":
  316. return "'";
  317. }
  318. return super.onResolveEntity(name);
  319. }
  320. onText(text) {
  321. if (isWhitespaceString(text)) {
  322. return;
  323. }
  324. const node = new SimpleDOMNode("#text", text);
  325. this._currentFragment.push(node);
  326. }
  327. onCdata(text) {
  328. const node = new SimpleDOMNode("#text", text);
  329. this._currentFragment.push(node);
  330. }
  331. onBeginElement(name, attributes, isEmpty) {
  332. const node = new SimpleDOMNode(name);
  333. node.childNodes = [];
  334. this._currentFragment.push(node);
  335. if (isEmpty) {
  336. return;
  337. }
  338. this._stack.push(this._currentFragment);
  339. this._currentFragment = node.childNodes;
  340. }
  341. onEndElement(name) {
  342. this._currentFragment = this._stack.pop() || [];
  343. const lastElement = this._currentFragment[this._currentFragment.length - 1];
  344. if (!lastElement) {
  345. return;
  346. }
  347. for (let i = 0, ii = lastElement.childNodes.length; i < ii; i++) {
  348. lastElement.childNodes[i].parentNode = lastElement;
  349. }
  350. }
  351. onError(code) {
  352. this._errorCode = code;
  353. }
  354. }
  355. exports.SimpleXMLParser = SimpleXMLParser;