metadata_parser.js 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * JavaScript code in this page
  4. *
  5. * Copyright 2022 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * JavaScript code in this page
  21. */
  22. "use strict";
  23. Object.defineProperty(exports, "__esModule", {
  24. value: true
  25. });
  26. exports.MetadataParser = void 0;
  27. var _xml_parser = require("./xml_parser.js");
  28. class MetadataParser {
  29. constructor(data) {
  30. data = this._repair(data);
  31. const parser = new _xml_parser.SimpleXMLParser({
  32. lowerCaseName: true
  33. });
  34. const xmlDocument = parser.parseFromString(data);
  35. this._metadataMap = new Map();
  36. this._data = data;
  37. if (xmlDocument) {
  38. this._parse(xmlDocument);
  39. }
  40. }
  41. _repair(data) {
  42. return data.replace(/^[^<]+/, "").replace(/>\\376\\377([^<]+)/g, function (all, codes) {
  43. const bytes = codes.replace(/\\([0-3])([0-7])([0-7])/g, function (code, d1, d2, d3) {
  44. return String.fromCharCode(d1 * 64 + d2 * 8 + d3 * 1);
  45. }).replace(/&(amp|apos|gt|lt|quot);/g, function (str, name) {
  46. switch (name) {
  47. case "amp":
  48. return "&";
  49. case "apos":
  50. return "'";
  51. case "gt":
  52. return ">";
  53. case "lt":
  54. return "<";
  55. case "quot":
  56. return '"';
  57. }
  58. throw new Error(`_repair: ${name} isn't defined.`);
  59. });
  60. const charBuf = [];
  61. for (let i = 0, ii = bytes.length; i < ii; i += 2) {
  62. const code = bytes.charCodeAt(i) * 256 + bytes.charCodeAt(i + 1);
  63. if (code >= 32 && code < 127 && code !== 60 && code !== 62 && code !== 38) {
  64. charBuf.push(String.fromCharCode(code));
  65. } else {
  66. charBuf.push("&#x" + (0x10000 + code).toString(16).substring(1) + ";");
  67. }
  68. }
  69. return ">" + charBuf.join("");
  70. });
  71. }
  72. _getSequence(entry) {
  73. const name = entry.nodeName;
  74. if (name !== "rdf:bag" && name !== "rdf:seq" && name !== "rdf:alt") {
  75. return null;
  76. }
  77. return entry.childNodes.filter(node => node.nodeName === "rdf:li");
  78. }
  79. _parseArray(entry) {
  80. if (!entry.hasChildNodes()) {
  81. return;
  82. }
  83. const [seqNode] = entry.childNodes;
  84. const sequence = this._getSequence(seqNode) || [];
  85. this._metadataMap.set(entry.nodeName, sequence.map(node => node.textContent.trim()));
  86. }
  87. _parse(xmlDocument) {
  88. let rdf = xmlDocument.documentElement;
  89. if (rdf.nodeName !== "rdf:rdf") {
  90. rdf = rdf.firstChild;
  91. while (rdf && rdf.nodeName !== "rdf:rdf") {
  92. rdf = rdf.nextSibling;
  93. }
  94. }
  95. if (!rdf || rdf.nodeName !== "rdf:rdf" || !rdf.hasChildNodes()) {
  96. return;
  97. }
  98. for (const desc of rdf.childNodes) {
  99. if (desc.nodeName !== "rdf:description") {
  100. continue;
  101. }
  102. for (const entry of desc.childNodes) {
  103. const name = entry.nodeName;
  104. switch (name) {
  105. case "#text":
  106. continue;
  107. case "dc:creator":
  108. case "dc:subject":
  109. this._parseArray(entry);
  110. continue;
  111. }
  112. this._metadataMap.set(name, entry.textContent.trim());
  113. }
  114. }
  115. }
  116. get serializable() {
  117. return {
  118. parsedData: this._metadataMap,
  119. rawData: this._data
  120. };
  121. }
  122. }
  123. exports.MetadataParser = MetadataParser;