ods.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. /* ods.js (C) 2014 SheetJS -- http://sheetjs.com */
  2. /* vim: set ts=2: */
  3. /*jshint -W041 */
  4. var ODS = {};
  5. (function make_ods(ODS) {
  6. /* Open Document Format for Office Applications (OpenDocument) Version 1.2 */
  7. var get_utils = function() {
  8. if(typeof XLSX !== 'undefined') return XLSX.utils;
  9. if(typeof module !== "undefined" && typeof require !== 'undefined') try {
  10. return require('../' + 'xlsx').utils;
  11. } catch(e) {
  12. try { return require('./' + 'xlsx').utils; }
  13. catch(ee) { return require('xl' + 'sx').utils; }
  14. }
  15. throw new Error("Cannot find XLSX utils");
  16. };
  17. var has_buf = (typeof Buffer !== 'undefined');
  18. function cc2str(arr) {
  19. var o = "";
  20. for(var i = 0; i != arr.length; ++i) o += String.fromCharCode(arr[i]);
  21. return o;
  22. }
  23. function getdata(data) {
  24. if(!data) return null;
  25. if(data.data) return data.data;
  26. if(data.asNodeBuffer && has_buf) return data.asNodeBuffer().toString('binary');
  27. if(data.asBinary) return data.asBinary();
  28. if(data._data && data._data.getContent) return cc2str(Array.prototype.slice.call(data._data.getContent(),0));
  29. return null;
  30. }
  31. function safegetzipfile(zip, file) {
  32. var f = file; if(zip.files[f]) return zip.files[f];
  33. f = file.toLowerCase(); if(zip.files[f]) return zip.files[f];
  34. f = f.replace(/\//g,'\\'); if(zip.files[f]) return zip.files[f];
  35. return null;
  36. }
  37. function getzipfile(zip, file) {
  38. var o = safegetzipfile(zip, file);
  39. if(o == null) throw new Error("Cannot find file " + file + " in zip");
  40. return o;
  41. }
  42. function getzipdata(zip, file, safe) {
  43. if(!safe) return getdata(getzipfile(zip, file));
  44. if(!file) return null;
  45. try { return getzipdata(zip, file); } catch(e) { return null; }
  46. }
  47. var _fs, jszip;
  48. if(typeof JSZip !== 'undefined') jszip = JSZip;
  49. if (typeof exports !== 'undefined') {
  50. if (typeof module !== 'undefined' && module.exports) {
  51. if(has_buf && typeof jszip === 'undefined') jszip = require('js'+'zip');
  52. if(typeof jszip === 'undefined') jszip = require('./js'+'zip').JSZip;
  53. _fs = require('f'+'s');
  54. }
  55. }
  56. var attregexg=/\b[\w:-]+=["'][^"]*['"]/g;
  57. var tagregex=/<[^>]*>/g;
  58. var nsregex=/<\w*:/, nsregex2 = /<(\/?)\w+:/;
  59. function parsexmltag(tag, skip_root) {
  60. var z = [];
  61. var eq = 0, c = 0;
  62. for(; eq !== tag.length; ++eq) if((c = tag.charCodeAt(eq)) === 32 || c === 10 || c === 13) break;
  63. if(!skip_root) z[0] = tag.substr(0, eq);
  64. if(eq === tag.length) return z;
  65. var m = tag.match(attregexg), j=0, w="", v="", i=0, q="", cc="";
  66. if(m) for(i = 0; i != m.length; ++i) {
  67. cc = m[i];
  68. for(c=0; c != cc.length; ++c) if(cc.charCodeAt(c) === 61) break;
  69. q = cc.substr(0,c); v = cc.substring(c+2, cc.length-1);
  70. for(j=0;j!=q.length;++j) if(q.charCodeAt(j) === 58) break;
  71. if(j===q.length) z[q] = v;
  72. else z[(j===5 && q.substr(0,5)==="xmlns"?"xmlns":"")+q.substr(j+1)] = v;
  73. }
  74. return z;
  75. }
  76. function strip_ns(x) { return x.replace(nsregex2, "<$1"); }
  77. var encodings = {
  78. '&quot;': '"',
  79. '&apos;': "'",
  80. '&gt;': '>',
  81. '&lt;': '<',
  82. '&amp;': '&'
  83. };
  84. var rencoding = {
  85. '"': '&quot;',
  86. "'": '&apos;',
  87. '>': '&gt;',
  88. '<': '&lt;',
  89. '&': '&amp;'
  90. };
  91. var rencstr = "&<>'\"".split("");
  92. // TODO: CP remap (need to read file version to determine OS)
  93. var encregex = /&[a-z]*;/g, coderegex = /_x([\da-fA-F]+)_/g;
  94. function unescapexml(text){
  95. var s = text + '';
  96. return s.replace(encregex, function($$) { return encodings[$$]; }).replace(coderegex,function(m,c) {return String.fromCharCode(parseInt(c,16));});
  97. }
  98. var decregex=/[&<>'"]/g, charegex = /[\u0000-\u0008\u000b-\u001f]/g;
  99. function escapexml(text){
  100. var s = text + '';
  101. return s.replace(decregex, function(y) { return rencoding[y]; }).replace(charegex,function(s) { return "_x" + ("000"+s.charCodeAt(0).toString(16)).substr(-4) + "_";});
  102. }
  103. function parsexmlbool(value, tag) {
  104. switch(value) {
  105. case '1': case 'true': case 'TRUE': return true;
  106. /* case '0': case 'false': case 'FALSE':*/
  107. default: return false;
  108. }
  109. }
  110. function datenum(v) {
  111. var epoch = Date.parse(v);
  112. return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
  113. }
  114. /* ISO 8601 Duration */
  115. function parse_isodur(s) {
  116. var sec = 0, mt = 0, time = false;
  117. var m = s.match(/P([0-9\.]+Y)?([0-9\.]+M)?([0-9\.]+D)?T([0-9\.]+H)?([0-9\.]+M)?([0-9\.]+S)?/);
  118. if(!m) throw new Error("|" + s + "| is not an ISO8601 Duration");
  119. for(var i = 1; i != m.length; ++i) {
  120. if(!m[i]) continue;
  121. mt = 1;
  122. if(i > 3) time = true;
  123. switch(m[i].substr(m[i].length-1)) {
  124. case 'Y':
  125. throw new Error("Unsupported ISO Duration Field: " + m[i].substr(m[i].length-1));
  126. case 'D': mt *= 24;
  127. /* falls through */
  128. case 'H': mt *= 60;
  129. /* falls through */
  130. case 'M':
  131. if(!time) throw new Error("Unsupported ISO Duration Field: M");
  132. else mt *= 60;
  133. /* falls through */
  134. case 'S': break;
  135. }
  136. sec += mt * parseInt(m[i], 10);
  137. }
  138. return sec;
  139. }
  140. /* copied from js-xls (C) SheetJS Apache2 license */
  141. function xlml_normalize(d) {
  142. if(has_buf && Buffer.isBuffer(d)) return d.toString('utf8');
  143. if(typeof d === 'string') return d;
  144. throw "badf";
  145. }
  146. var xlmlregex = /<(\/?)([a-z0-9]*:|)([\w-]+)[^>]*>/mg;
  147. /* Part 3 Section 4 Manifest File */
  148. var CT_ODS = "application/vnd.oasis.opendocument.spreadsheet";
  149. var parse_manifest = function(d, opts) {
  150. var str = xlml_normalize(d);
  151. var Rn;
  152. var FEtag;
  153. while((Rn = xlmlregex.exec(str))) switch(Rn[3]) {
  154. case 'manifest': break; // 4.2 <manifest:manifest>
  155. case 'file-entry': // 4.3 <manifest:file-entry>
  156. FEtag = parsexmltag(Rn[0]);
  157. if(FEtag.path == '/' && FEtag.type !== CT_ODS) throw new Error("This OpenDocument is not a spreadsheet");
  158. break;
  159. case 'encryption-data': // 4.4 <manifest:encryption-data>
  160. case 'algorithm': // 4.5 <manifest:algorithm>
  161. case 'start-key-generation': // 4.6 <manifest:start-key-generation>
  162. case 'key-derivation': // 4.7 <manifest:key-derivation>
  163. throw new Error("Unsupported ODS Encryption");
  164. default: throw Rn;
  165. }
  166. };
  167. var parse_text_p = function(text, tag) {
  168. return utf8read(text.replace(/<text:s\/>/g," ").replace(/<[^>]*>/g,""));
  169. };
  170. var utf8read = function utf8reada(orig) {
  171. var out = "", i = 0, c = 0, d = 0, e = 0, f = 0, w = 0;
  172. while (i < orig.length) {
  173. c = orig.charCodeAt(i++);
  174. if (c < 128) { out += String.fromCharCode(c); continue; }
  175. d = orig.charCodeAt(i++);
  176. if (c>191 && c<224) { out += String.fromCharCode(((c & 31) << 6) | (d & 63)); continue; }
  177. e = orig.charCodeAt(i++);
  178. if (c < 240) { out += String.fromCharCode(((c & 15) << 12) | ((d & 63) << 6) | (e & 63)); continue; }
  179. f = orig.charCodeAt(i++);
  180. w = (((c & 7) << 18) | ((d & 63) << 12) | ((e & 63) << 6) | (f & 63))-65536;
  181. out += String.fromCharCode(0xD800 + ((w>>>10)&1023));
  182. out += String.fromCharCode(0xDC00 + (w&1023));
  183. }
  184. return out;
  185. };
  186. var parse_content_xml = (function() {
  187. var number_formats = {
  188. /* ods name: [short ssf fmt, long ssf fmt] */
  189. day: ["d", "dd"],
  190. month: ["m", "mm"],
  191. year: ["y", "yy"],
  192. hours: ["h", "hh"],
  193. minutes: ["m", "mm"],
  194. seconds: ["s", "ss"],
  195. "am-pm": ["A/P", "AM/PM"],
  196. "day-of-week": ["ddd", "dddd"]
  197. };
  198. return function pcx(d, opts) {
  199. var str = xlml_normalize(d);
  200. var state = [], tmp;
  201. var tag;
  202. var NFtag, NF, pidx;
  203. var sheetag;
  204. var Sheets = {}, SheetNames = [], ws = {};
  205. var Rn, q;
  206. var ctag;
  207. var textp, textpidx, textptag;
  208. var R, C, range = {s: {r:1000000,c:10000000}, e: {r:0, c:0}};
  209. var number_format_map = {};
  210. var merges = [], mrange = {}, mR = 0, mC = 0;
  211. while((Rn = xlmlregex.exec(str))) switch(Rn[3]) {
  212. case 'table': // 9.1.2 <table:table>
  213. if(Rn[1]==='/') {
  214. if(range.e.c >= range.s.c && range.e.r >= range.s.r) ws['!ref'] = get_utils().encode_range(range);
  215. if(merges.length) ws['!merges'] = merges;
  216. SheetNames.push(sheetag.name);
  217. Sheets[sheetag.name] = ws;
  218. }
  219. else if(Rn[0].charAt(Rn[0].length-2) !== '/') {
  220. sheetag = parsexmltag(Rn[0]);
  221. R = C = -1;
  222. range.s.r = range.s.c = 10000000; range.e.r = range.e.c = 0;
  223. ws = {}; merges = [];
  224. }
  225. break;
  226. case 'table-row': // 9.1.3 <table:table-row>
  227. if(Rn[1] === '/') break;
  228. ++R; C = -1; break;
  229. case 'covered-table-cell': // 9.1.5 table:covered-table-cell
  230. ++C; break; /* stub */
  231. case 'table-cell':
  232. if(Rn[0].charAt(Rn[0].length-2) === '/') {
  233. ctag = parsexmltag(Rn[0]);
  234. if(ctag['number-columns-repeated']) C+= parseInt(ctag['number-columns-repeated'], 10);
  235. else ++C;
  236. }
  237. else if(Rn[1]!=='/') {
  238. ++C;
  239. if(C > range.e.c) range.e.c = C;
  240. if(R > range.e.r) range.e.r = R;
  241. if(C < range.s.c) range.s.c = C;
  242. if(R < range.s.r) range.s.r = R;
  243. ctag = parsexmltag(Rn[0]);
  244. q = {t:ctag['value-type'], v:null};
  245. if(ctag['number-columns-spanned'] || ctag['number-rows-spanned']) {
  246. mR = parseInt(ctag['number-rows-spanned'],10) || 0;
  247. mC = parseInt(ctag['number-columns-spanned'],10) || 0;
  248. mrange = {s: {r:R,c:C}, e:{r:R + mR-1,c:C + mC-1}};
  249. merges.push(mrange);
  250. }
  251. /* 19.385 office:value-type */
  252. switch(q.t) {
  253. case 'boolean': q.t = 'b'; q.v = parsexmlbool(ctag['boolean-value']); break;
  254. case 'float': q.t = 'n'; q.v = parseFloat(ctag.value); break;
  255. case 'percentage': q.t = 'n'; q.v = parseFloat(ctag.value); break;
  256. case 'currency': q.t = 'n'; q.v = parseFloat(ctag.value); break;
  257. case 'date': q.t = 'n'; q.v = datenum(ctag['date-value']); q.z = 'm/d/yy'; break;
  258. case 'time': q.t = 'n'; q.v = parse_isodur(ctag['time-value'])/86400; break;
  259. case 'string': q.t = 's'; break;
  260. default: throw new Error('Unsupported value type ' + q.t);
  261. }
  262. } else {
  263. if(q.t === 's') q.v = textp;
  264. if(textp) q.w = textp;
  265. if(!(opts.sheetRows && opts.sheetRows < R)) ws[get_utils().encode_cell({r:R,c:C})] = q;
  266. q = null;
  267. }
  268. break; // 9.1.4 <table:table-cell>
  269. /* pure state */
  270. case 'document-content': // 3.1.3.2 <office:document-content>
  271. case 'spreadsheet': // 3.7 <office:spreadsheet>
  272. case 'scripts': // 3.12 <office:scripts>
  273. case 'font-face-decls': // 3.14 <office:font-face-decls>
  274. if(Rn[1]==='/'){if((tmp=state.pop())[0]!==Rn[3]) throw "Bad state: "+tmp;}
  275. else if(Rn[0].charAt(Rn[0].length-2) !== '/') state.push([Rn[3], true]);
  276. break;
  277. /* ignore state */
  278. case 'shapes': // 9.2.8 <table:shapes>
  279. case 'frame': // 10.4.2 <draw:frame>
  280. if(Rn[1]==='/'){if((tmp=state.pop())[0]!==Rn[3]) throw "Bad state: "+tmp;}
  281. else if(Rn[0].charAt(Rn[0].length-2) !== '/') state.push([Rn[3], false]);
  282. break;
  283. case 'number-style': // 16.27.2 <number:number-style>
  284. case 'percentage-style': // 16.27.9 <number:percentage-style>
  285. case 'date-style': // 16.27.10 <number:date-style>
  286. case 'time-style': // 16.27.18 <number:time-style>
  287. if(Rn[1]==='/'){
  288. number_format_map[NFtag.name] = NF;
  289. if((tmp=state.pop())[0]!==Rn[3]) throw "Bad state: "+tmp;
  290. } else if(Rn[0].charAt(Rn[0].length-2) !== '/') {
  291. NF = "";
  292. NFtag = parsexmltag(Rn[0]);
  293. state.push([Rn[3], true]);
  294. } break;
  295. case 'script': break; // 3.13 <office:script>
  296. case 'automatic-styles': break; // 3.15.3 <office:automatic-styles>
  297. case 'style': break; // 16.2 <style:style>
  298. case 'font-face': break; // 16.21 <style:font-face>
  299. case 'paragraph-properties': break; // 17.6 <style:paragraph-properties>
  300. case 'table-properties': break; // 17.15 <style:table-properties>
  301. case 'table-column-properties': break; // 17.16 <style:table-column-properties>
  302. case 'table-row-properties': break; // 17.17 <style:table-row-properties>
  303. case 'table-cell-properties': break; // 17.18 <style:table-cell-properties>
  304. case 'number': // 16.27.3 <number:number>
  305. switch(state[state.length-1][0]) {
  306. case 'time-style':
  307. case 'date-style':
  308. tag = parsexmltag(Rn[0]);
  309. NF += number_formats[Rn[3]][tag.style==='long'?1:0]; break;
  310. } break;
  311. case 'day': // 16.27.11 <number:day>
  312. case 'month': // 16.27.12 <number:month>
  313. case 'year': // 16.27.13 <number:year>
  314. case 'era': // 16.27.14 <number:era>
  315. case 'day-of-week': // 16.27.15 <number:day-of-week>
  316. case 'week-of-year': // 16.27.16 <number:week-of-year>
  317. case 'quarter': // 16.27.17 <number:quarter>
  318. case 'hours': // 16.27.19 <number:hours>
  319. case 'minutes': // 16.27.20 <number:minutes>
  320. case 'seconds': // 16.27.21 <number:seconds>
  321. case 'am-pm': // 16.27.22 <number:am-pm>
  322. switch(state[state.length-1][0]) {
  323. case 'time-style':
  324. case 'date-style':
  325. tag = parsexmltag(Rn[0]);
  326. NF += number_formats[Rn[3]][tag.style==='long'?1:0]; break;
  327. } break;
  328. case 'boolean-style': break; // 16.27.23 <number:boolean-style>
  329. case 'boolean': break; // 16.27.24 <number:boolean>
  330. case 'text-style': break; // 16.27.25 <number:text-style>
  331. case 'text': // 16.27.26 <number:text>
  332. if(Rn[0].substr(-2) === "/>") break;
  333. else if(Rn[1]==="/") switch(state[state.length-1][0]) {
  334. case 'number-style':
  335. case 'date-style':
  336. case 'time-style':
  337. NF += str.slice(pidx, Rn.index);
  338. break;
  339. }
  340. else pidx = Rn.index + Rn[0].length;
  341. break;
  342. case 'text-content': break; // 16.27.27 <number:text-content>
  343. case 'text-properties': break; // 16.27.27 <style:text-properties>
  344. case 'body': break; // 3.3 16.9.6 19.726.3
  345. case 'forms': break; // 12.25.2 13.2
  346. case 'table-column': break; // 9.1.6 <table:table-column>
  347. case 'graphic-properties': break;
  348. case 'calculation-settings': break; // 9.4.1 <table:calculation-settings>
  349. case 'named-expressions': break; // 9.4.11 <table:named-expressions>
  350. case 'named-range': break; // 9.4.11 <table:named-range>
  351. case 'span': break; // <text:span>
  352. case 'p':
  353. if(Rn[1]==='/') textp = parse_text_p(str.slice(textpidx,Rn.index), textptag);
  354. else { textptag = parsexmltag(Rn[0]); textpidx = Rn.index + Rn[0].length; }
  355. break; // <text:p>
  356. case 's': break; // <text:s>
  357. case 'date': break; // <*:date>
  358. case 'annotation': break;
  359. case 'object': break; // 10.4.6.2 <draw:object>
  360. case 'title': break; // <*:title>
  361. case 'desc': break; // <*:desc>
  362. case 'database-ranges': break; // 9.4.14 <table:database-ranges>
  363. case 'database-range': break; // 9.4.15 <table:database-range>
  364. case 'filter': break; // 9.5.2 <table:filter>
  365. case 'filter-and': break; // 9.5.3 <table:filter-and>
  366. case 'filter-or': break; // 9.5.4 <table:filter-or>
  367. case 'filter-condition': break; // 9.5.5 <table:filter-condition>
  368. default: if(opts.WTF) throw Rn;
  369. }
  370. var out = {
  371. Sheets: Sheets,
  372. SheetNames: SheetNames
  373. };
  374. return out;
  375. };
  376. })();
  377. /* Part 3: Packages */
  378. var parse_ods = function(zip, opts) {
  379. //var manifest = parse_manifest(getzipdata(zip, 'META-INF/manifest.xml'));
  380. return parse_content_xml(getzipdata(zip, 'content.xml'), opts);
  381. };
  382. ODS.parse_ods = parse_ods;
  383. })(typeof exports !== 'undefined' ? exports : ODS);