metadata_spec.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. /**
  2. * @licstart The following is the entire license notice for the
  3. * JavaScript code in this page
  4. *
  5. * Copyright 2022 Mozilla Foundation
  6. *
  7. * Licensed under the Apache License, Version 2.0 (the "License");
  8. * you may not use this file except in compliance with the License.
  9. * You may obtain a copy of the License at
  10. *
  11. * http://www.apache.org/licenses/LICENSE-2.0
  12. *
  13. * Unless required by applicable law or agreed to in writing, software
  14. * distributed under the License is distributed on an "AS IS" BASIS,
  15. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. * See the License for the specific language governing permissions and
  17. * limitations under the License.
  18. *
  19. * @licend The above is the entire license notice for the
  20. * JavaScript code in this page
  21. */
  22. "use strict";
  23. var _test_utils = require("./test_utils.js");
  24. var _metadata = require("../../display/metadata.js");
  25. var _metadata_parser = require("../../core/metadata_parser.js");
  26. function createMetadata(data) {
  27. const metadataParser = new _metadata_parser.MetadataParser(data);
  28. return new _metadata.Metadata(metadataParser.serializable);
  29. }
  30. describe("metadata", function () {
  31. it("should handle valid metadata", function () {
  32. const data = "<x:xmpmeta xmlns:x='adobe:ns:meta/'>" + "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>" + "<rdf:Description xmlns:dc='http://purl.org/dc/elements/1.1/'>" + '<dc:title><rdf:Alt><rdf:li xml:lang="x-default">Foo bar baz</rdf:li>' + "</rdf:Alt></dc:title></rdf:Description></rdf:RDF></x:xmpmeta>";
  33. const metadata = createMetadata(data);
  34. expect(metadata.has("dc:title")).toBeTruthy();
  35. expect(metadata.has("dc:qux")).toBeFalsy();
  36. expect(metadata.get("dc:title")).toEqual("Foo bar baz");
  37. expect(metadata.get("dc:qux")).toEqual(null);
  38. expect(metadata.getAll()).toEqual({
  39. "dc:title": "Foo bar baz"
  40. });
  41. });
  42. it("should repair and handle invalid metadata", function () {
  43. const data = "<x:xmpmeta xmlns:x='adobe:ns:meta/'>" + "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>" + "<rdf:Description xmlns:dc='http://purl.org/dc/elements/1.1/'>" + "<dc:title>\\376\\377\\000P\\000D\\000F\\000&</dc:title>" + "</rdf:Description></rdf:RDF></x:xmpmeta>";
  44. const metadata = createMetadata(data);
  45. expect(metadata.has("dc:title")).toBeTruthy();
  46. expect(metadata.has("dc:qux")).toBeFalsy();
  47. expect(metadata.get("dc:title")).toEqual("PDF&");
  48. expect(metadata.get("dc:qux")).toEqual(null);
  49. expect(metadata.getAll()).toEqual({
  50. "dc:title": "PDF&"
  51. });
  52. });
  53. it("should repair and handle invalid metadata (bug 1424938)", function () {
  54. const data = "<x:xmpmeta xmlns:x='adobe:ns:meta/' " + "x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>" + "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' " + "xmlns:iX='http://ns.adobe.com/iX/1.0/'>" + "<rdf:Description rdf:about='61652fa7-fc1f-11dd-0000-ce81d41f9ecf' " + "xmlns:pdf='http://ns.adobe.com/pdf/1.3/' " + "pdf:Producer='GPL Ghostscript 8.63'/>" + "<rdf:Description rdf:about='61652fa7-fc1f-11dd-0000-ce81d41f9ecf' " + "xmlns:xap='http://ns.adobe.com/xap/1.0/' " + "xap:ModifyDate='2009-02-13T12:42:54+01:00' " + "xap:CreateDate='2009-02-13T12:42:54+01:00'>" + "<xap:CreatorTool>\\376\\377\\000P\\000D\\000F\\000C\\000r\\000e\\000a" + "\\000t\\000o\\000r\\000 \\000V\\000e\\000r\\000s\\000i\\000o\\000n" + "\\000 \\0000\\000.\\0009\\000.\\0006</xap:CreatorTool>" + "</rdf:Description><rdf:Description " + "rdf:about='61652fa7-fc1f-11dd-0000-ce81d41f9ecf' " + "xmlns:xapMM='http://ns.adobe.com/xap/1.0/mm/' " + "xapMM:DocumentID='61652fa7-fc1f-11dd-0000-ce81d41f9ecf'/>" + "<rdf:Description rdf:about='61652fa7-fc1f-11dd-0000-ce81d41f9ecf' " + "xmlns:dc='http://purl.org/dc/elements/1.1/' " + "dc:format='application/pdf'><dc:title><rdf:Alt>" + "<rdf:li xml:lang='x-default'>\\376\\377\\000L\\000&apos;\\000O\\000d" + "\\000i\\000s\\000s\\000e\\000e\\000 \\000t\\000h\\000\\351\\000m\\000a" + "\\000t\\000i\\000q\\000u\\000e\\000 \\000l\\000o\\000g\\000o\\000 " + "\\000O\\000d\\000i\\000s\\000s\\000\\351\\000\\351\\000 \\000-\\000 " + "\\000d\\000\\351\\000c\\000e\\000m\\000b\\000r\\000e\\000 \\0002\\0000" + "\\0000\\0008\\000.\\000p\\000u\\000b</rdf:li></rdf:Alt></dc:title>" + "<dc:creator><rdf:Seq><rdf:li>\\376\\377\\000O\\000D\\000I\\000S" + "</rdf:li></rdf:Seq></dc:creator></rdf:Description></rdf:RDF>" + "</x:xmpmeta>";
  55. const metadata = createMetadata(data);
  56. expect(metadata.has("dc:title")).toBeTruthy();
  57. expect(metadata.has("dc:qux")).toBeFalsy();
  58. expect(metadata.get("dc:title")).toEqual("L'Odissee thématique logo Odisséé - décembre 2008.pub");
  59. expect(metadata.get("dc:qux")).toEqual(null);
  60. expect(metadata.getAll()).toEqual({
  61. "dc:creator": ["ODIS"],
  62. "dc:title": "L'Odissee thématique logo Odisséé - décembre 2008.pub",
  63. "xap:creatortool": "PDFCreator Version 0.9.6"
  64. });
  65. });
  66. it("should gracefully handle incomplete tags (issue 8884)", function () {
  67. const data = '<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d' + '<x:xmpmeta xmlns:x="adobe:ns:meta/">' + '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">' + '<rdf:Description rdf:about=""' + 'xmlns:pdfx="http://ns.adobe.com/pdfx/1.3/">' + "</rdf:Description>" + '<rdf:Description rdf:about=""' + 'xmlns:xap="http://ns.adobe.com/xap/1.0/">' + "<xap:ModifyDate>2010-03-25T11:20:09-04:00</xap:ModifyDate>" + "<xap:CreateDate>2010-03-25T11:20:09-04:00</xap:CreateDate>" + "<xap:MetadataDate>2010-03-25T11:20:09-04:00</xap:MetadataDate>" + "</rdf:Description>" + '<rdf:Description rdf:about=""' + 'xmlns:dc="http://purl.org/dc/elements/1.1/">' + "<dc:format>application/pdf</dc:format>" + "</rdf:Description>" + '<rdf:Description rdf:about=""' + 'xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/">' + "<pdfaid:part>1</pdfaid:part>" + "<pdfaid:conformance>A</pdfaid:conformance>" + "</rdf:Description>" + "</rdf:RDF>" + "</x:xmpmeta>" + '<?xpacket end="w"?>';
  68. const metadata = createMetadata(data);
  69. expect((0, _test_utils.isEmptyObj)(metadata.getAll())).toEqual(true);
  70. });
  71. it('should gracefully handle "junk" before the actual metadata (issue 10395)', function () {
  72. const data = '<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>' + '<x:xmpmeta x:xmptk="TallComponents PDFObjects 1.0" ' + 'xmlns:x="adobe:ns:meta/">' + '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">' + '<rdf:Description rdf:about="" ' + 'xmlns:pdf="http://ns.adobe.com/pdf/1.3/">' + "<pdf:Producer>PDFKit.NET 4.0.102.0</pdf:Producer>" + "<pdf:Keywords></pdf:Keywords>" + "<pdf:PDFVersion>1.7</pdf:PDFVersion></rdf:Description>" + '<rdf:Description rdf:about="" ' + 'xmlns:xap="http://ns.adobe.com/xap/1.0/">' + "<xap:CreateDate>2018-12-27T13:50:36-08:00</xap:CreateDate>" + "<xap:ModifyDate>2018-12-27T13:50:38-08:00</xap:ModifyDate>" + "<xap:CreatorTool></xap:CreatorTool>" + "<xap:MetadataDate>2018-12-27T13:50:38-08:00</xap:MetadataDate>" + '</rdf:Description><rdf:Description rdf:about="" ' + 'xmlns:dc="http://purl.org/dc/elements/1.1/">' + "<dc:creator><rdf:Seq><rdf:li></rdf:li></rdf:Seq></dc:creator>" + "<dc:subject><rdf:Bag /></dc:subject>" + '<dc:description><rdf:Alt><rdf:li xml:lang="x-default">' + "</rdf:li></rdf:Alt></dc:description>" + '<dc:title><rdf:Alt><rdf:li xml:lang="x-default"></rdf:li>' + "</rdf:Alt></dc:title><dc:format>application/pdf</dc:format>" + '</rdf:Description></rdf:RDF></x:xmpmeta><?xpacket end="w"?>';
  73. const metadata = createMetadata(data);
  74. expect(metadata.has("dc:title")).toBeTruthy();
  75. expect(metadata.has("dc:qux")).toBeFalsy();
  76. expect(metadata.get("dc:title")).toEqual("");
  77. expect(metadata.get("dc:qux")).toEqual(null);
  78. expect(metadata.getAll()).toEqual({
  79. "dc:creator": [""],
  80. "dc:description": "",
  81. "dc:format": "application/pdf",
  82. "dc:subject": [],
  83. "dc:title": "",
  84. "pdf:keywords": "",
  85. "pdf:pdfversion": "1.7",
  86. "pdf:producer": "PDFKit.NET 4.0.102.0",
  87. "xap:createdate": "2018-12-27T13:50:36-08:00",
  88. "xap:creatortool": "",
  89. "xap:metadatadate": "2018-12-27T13:50:38-08:00",
  90. "xap:modifydate": "2018-12-27T13:50:38-08:00"
  91. });
  92. });
  93. it('should correctly handle metadata containing "&apos" (issue 10407)', function () {
  94. const data = "<x:xmpmeta xmlns:x='adobe:ns:meta/'>" + "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>" + "<rdf:Description xmlns:dc='http://purl.org/dc/elements/1.1/'>" + "<dc:title><rdf:Alt>" + '<rdf:li xml:lang="x-default">&apos;Foo bar baz&apos;</rdf:li>' + "</rdf:Alt></dc:title></rdf:Description></rdf:RDF></x:xmpmeta>";
  95. const metadata = createMetadata(data);
  96. expect(metadata.has("dc:title")).toBeTruthy();
  97. expect(metadata.has("dc:qux")).toBeFalsy();
  98. expect(metadata.get("dc:title")).toEqual("'Foo bar baz'");
  99. expect(metadata.get("dc:qux")).toEqual(null);
  100. expect(metadata.getAll()).toEqual({
  101. "dc:title": "'Foo bar baz'"
  102. });
  103. });
  104. it("should gracefully handle unbalanced end tags (issue 10410)", function () {
  105. const data = '<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>' + '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">' + '<rdf:Description rdf:about="" ' + 'xmlns:pdf="http://ns.adobe.com/pdf/1.3/">' + "<pdf:Producer>Soda PDF 5</pdf:Producer></rdf:Description>" + '<rdf:Description rdf:about="" ' + 'xmlns:xap="http://ns.adobe.com/xap/1.0/">' + "<xap:CreateDate>2018-10-02T08:14:49-05:00</xap:CreateDate>" + "<xap:CreatorTool>Soda PDF 5</xap:CreatorTool>" + "<xap:MetadataDate>2018-10-02T08:14:49-05:00</xap:MetadataDate> " + "<xap:ModifyDate>2018-10-02T08:14:49-05:00</xap:ModifyDate>" + '</rdf:Description><rdf:Description rdf:about="" ' + 'xmlns:xmpMM="http://ns.adobe.com/xap/1.0/mm/">' + "<xmpMM:DocumentID>uuid:00000000-1c84-3cf9-89ba-bef0e729c831" + "</xmpMM:DocumentID></rdf:Description>" + '</rdf:RDF></x:xmpmeta><?xpacket end="w"?>';
  106. const metadata = createMetadata(data);
  107. expect((0, _test_utils.isEmptyObj)(metadata.getAll())).toEqual(true);
  108. });
  109. it("should not be vulnerable to the billion laughs attack", function () {
  110. const data = '<?xml version="1.0"?>' + "<!DOCTYPE lolz [" + ' <!ENTITY lol "lol">' + ' <!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">' + ' <!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;">' + ' <!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">' + ' <!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">' + ' <!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">' + ' <!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;">' + ' <!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;">' + ' <!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;">' + ' <!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">' + "]>" + '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">' + ' <rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/">' + " <dc:title>" + " <rdf:Alt>" + ' <rdf:li xml:lang="x-default">a&lol9;b</rdf:li>' + " </rdf:Alt>" + " </dc:title>" + " </rdf:Description>" + "</rdf:RDF>";
  111. const metadata = createMetadata(data);
  112. expect(metadata.has("dc:title")).toBeTruthy();
  113. expect(metadata.has("dc:qux")).toBeFalsy();
  114. expect(metadata.get("dc:title")).toEqual("a&lol9;b");
  115. expect(metadata.get("dc:qux")).toEqual(null);
  116. expect(metadata.getAll()).toEqual({
  117. "dc:title": "a&lol9;b"
  118. });
  119. });
  120. });