Skip to content

Commit

Permalink
version bump 0.8.8: HTML
Browse files Browse the repository at this point in the history
- read MSO HTML (fixes SheetJS#419 h/t @vineetl fixes SheetJS#458 h/t @tienne)
- roll out xml namespace fix (closes SheetJS#362 h/t @PierreOCXP)
- cellDates clarifications
  • Loading branch information
SheetJSDev committed Mar 9, 2017
1 parent d2b5506 commit 7b6fb7b
Show file tree
Hide file tree
Showing 38 changed files with 286 additions and 114 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ File format support for known spreadsheet data formats:
| OpenDocument Spreadsheet (ODS) | :o: | :o: |
| Flat XML ODF Spreadsheet (FODS) | :o: | :o: |
| Uniform Office Format Spreadsheet (标文通 UOS1/UOS2) | :o: | |
| **Other Common Spreadsheet Output Formats** |:-----:|:-----:|
| HTML Tables | :o: | |

Demo: <http://oss.sheetjs.com/js-xlsx>

Expand Down Expand Up @@ -565,7 +567,7 @@ file but Excel will know how to handle it. This library applies similar logic:
|:-------|:--------------|:----------------------------------------------------|
| `0xD0` | CFB Container | BIFF 5/8 or password-protected XLSX/XLSB |
| `0x09` | BIFF Stream | BIFF 2/3/4/5 |
| `0x3C` | XML | SpreadsheetML or Flat ODS or UOS1 |
| `0x3C` | XML/HTML | SpreadsheetML or Flat ODS or UOS1 or HTML |
| `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 |
| `0xFE` | UTF8 Text | SpreadsheetML or Flat ODS or UOS1 |

Expand Down Expand Up @@ -679,6 +681,11 @@ Excel CSV deviates from RFC4180 in a number of important ways. The generated
CSV files should generally work in Excel although they may not work in RFC4180
compatible readers.

### HTML

Excel HTML worksheets include special metadata encoded in styles. For example,
`mso-number-format` is a localized string containing the number format. Despite
the metadata the output is valid HTML, although it does accept bare `&` symbols.

## Tested Environments

Expand Down
2 changes: 1 addition & 1 deletion bits/01_version.js
Original file line number Diff line number Diff line change
@@ -1 +1 @@
XLSX.version = '0.8.7';
XLSX.version = '0.8.8';
7 changes: 3 additions & 4 deletions bits/20_jsutils.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,9 @@ function evert_arr(obj/*:any*/)/*:EvertArrType*/ {
return o;
}

/* TODO: date1904 logic */
function datenum(v/*:number*/, date1904/*:?boolean*/)/*:number*/ {
if(date1904) v+=1462;
var epoch = Date.parse(v);
function datenum(v/*:Date*/, date1904/*:?boolean*/)/*:number*/ {
var epoch = v.getTime();
if(date1904) epoch += 1462*24*60*60*1000;
return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
}

Expand Down
1 change: 1 addition & 0 deletions bits/66_wscommon.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ function get_cell_style(styles, cell, opts) {
}

function safe_format(p, fmtid, fillid, opts) {
if(p.t === 'd' && typeof p.v === 'string') p.v = new Date(p.v);
try {
if(p.t === 'e') p.w = p.w || BErr[p.v];
else if(fmtid === 0) {
Expand Down
4 changes: 2 additions & 2 deletions bits/67_wsxml.js
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ function write_ws_xml_cell(cell, ref, ws, opts, idx, wb) {
if(opts.cellDates) vv = new Date(cell.v).toISOString();
else {
cell.t = 'n';
vv = ''+(cell.v = datenum(cell.v));
vv = ''+(cell.v = datenum(new Date(cell.v)));
if(typeof cell.z === 'undefined') cell.z = SSF._table[14];
}
break;
Expand Down Expand Up @@ -265,7 +265,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess) {
break; // inline string
case 'b': p.v = parsexmlbool(p.v); break;
case 'd':
if(!opts.cellDates) { p.v = datenum(p.v); p.t = 'n'; }
if(!opts.cellDates) { p.v = datenum(new Date(p.v)); p.t = 'n'; }
break;
/* error string in .v, number in .v */
case 'e': p.w = p.v; p.v = RBErr[p.v]; break;
Expand Down
3 changes: 2 additions & 1 deletion bits/75_xlml.js
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,9 @@ function xlml_normalize(d)/*:string*/ {

/* TODO: Everything */
var xlmlregex = /<(\/?)([a-z0-9]*:|)(\w+)[^>]*>/mg;
function parse_xlml_xml(d, opts) {
function parse_xlml_xml(d, opts)/*:Workbook*/ {
var str = debom(xlml_normalize(d));
if(str.substr(0,1000).indexOf("<html") >= 0) return parse_html(str, opts);
var Rn;
var state = [], tmp;
var sheets = {}, sheetnames = [], cursheet = {}, sheetname = "";
Expand Down
35 changes: 35 additions & 0 deletions bits/79_html.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/* TODO: in browser attach to DOM; in node use an html parser */
function parse_html(str/*:string*/, opts)/*:Workbook*/ {
var ws/*:Worksheet*/ = ({}/*:any*/);
var o/*:Workbook*/ = { SheetNames: ["Sheet1"], Sheets: {Sheet1:ws} };
var i = str.indexOf("<table"), j = str.indexOf("</table");
if(i == -1 || j == -1) throw new Error("Invalid HTML: missing <table> / </table> pair");
var rows = str.slice(i, j).split(/<tr[^>]*>/);
var R = 0, C = 0;
var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}};
for(i = 0; i < rows.length; ++i) {
if(rows[i].substr(0,3) != "<td") continue;
var cells = rows[i].split("</td>");
for(j = 0; j < cells.length; ++j) {
if(cells[j].substr(0,3) != "<td") continue;
++C;
var m = cells[j], cc = 0;
/* TODO: parse styles etc */
while(m.charAt(0) == "<" && (cc = m.indexOf(">")) > -1) m = m.slice(cc+1);
while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<"));
/* TODO: generate stub cells */
if(!m.length) continue;
if(range.s.r > R) range.s.r = R;
if(range.e.r < R) range.e.r = R;
if(range.s.c > C) range.s.c = C;
if(range.e.c < C) range.e.c = C;
var coord/*:string*/ = encode_cell({r:R, c:C});
/* TODO: value parsing */
if(m == +m) ws[coord] = {t:'n', v:+m};
else ws[coord] = {t:'s', v:m};
}
++R; C = 0;
}
ws['!ref'] = encode_range(range);
return o;
}
4 changes: 2 additions & 2 deletions bits/88_write.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ function write_string_type(out/*:string*/, opts/*:WriteOpts*/) {
switch(opts.type) {
case "base64": return Base64.encode(out);
case "binary": return out;
case "file": return _fs.writeFileSync(opts.file, out, {encoding:'utf8'});
case "file": return _fs.writeFileSync(opts.file, out, 'utf8');
case "buffer": {
if(has_buf) return new Buffer(out, 'utf8');
else return out.split("").map(function(c) { return c.charCodeAt(0); });
} break;
default: return out;
default: throw new Error("Unrecognized type " + opts.type);
}
}

Expand Down
14 changes: 9 additions & 5 deletions dist/ods.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ var rencoding = {
var rencstr = "&<>'\"".split("");

// TODO: CP remap (need to read file version to determine OS)
var encregex = /&[a-z]*;/g, coderegex = /_x([\da-fA-F]+)_/g;
/* 22.4.2.4 bstr (Basic String) */
var encregex = /&[a-z]*;/g, coderegex = /_x([\da-fA-F]{4})_/g;
function unescapexml(text){
var s = text + '';
return s.replace(encregex, function($$) { return encodings[$$]; }).replace(coderegex,function(m,c) {return String.fromCharCode(parseInt(c,16));});
Expand All @@ -133,8 +134,9 @@ function parsexmlbool(value) {
}
}

function datenum(v) {
var epoch = Date.parse(v);
function datenum(v, date1904) {
var epoch = v.getTime();
if(date1904) epoch += 1462*24*60*60*1000;
return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
}

Expand Down Expand Up @@ -275,7 +277,8 @@ var parse_content_xml = (function() {
"day-of-week": ["ddd", "dddd"]
};

return function pcx(d, opts) {
return function pcx(d, _opts) {
var opts = _opts || {};
var str = xlml_normalize(d);
var state = [], tmp;
var tag;
Expand Down Expand Up @@ -363,7 +366,7 @@ var parse_content_xml = (function() {
case 'float': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'percentage': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'currency': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'date': q.t = 'n'; q.v = datenum(ctag['date-value']); q.z = 'm/d/yy'; break;
case 'date': q.t = 'n'; q.v = datenum(new Date(ctag['date-value'])); q.z = 'm/d/yy'; break;
case 'time': q.t = 'n'; q.v = parse_isodur(ctag['time-value'])/86400; break;
case 'number': q.t = 'n'; q.v = parseFloat(ctag['数据数值']); break;
default:
Expand Down Expand Up @@ -699,6 +702,7 @@ function parse_ods(zip, opts) {
var ods = !!safegetzipfile(zip, 'objectdata');
if(ods) var manifest = parse_manifest(getzipdata(zip, 'META-INF/manifest.xml'), opts);
var content = getzipdata(zip, 'content.xml');
if(!content) throw new Error("Missing content.xml in " + (ods ? "ODS" : "UOF")+ " file");
return parse_content_xml(ods ? content : utf8read(content), opts);
}

Expand Down
2 changes: 1 addition & 1 deletion dist/ods.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/ods.min.map

Large diffs are not rendered by default.

22 changes: 11 additions & 11 deletions dist/xlsx.core.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/xlsx.core.min.map

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions dist/xlsx.full.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/xlsx.full.min.map

Large diffs are not rendered by default.

61 changes: 52 additions & 9 deletions dist/xlsx.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
/*jshint funcscope:true, eqnull:true */
var XLSX = {};
(function make_xlsx(XLSX){
XLSX.version = '0.8.7';
XLSX.version = '0.8.8';
var current_codepage = 1200, current_cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('./dist/cpexcel.js');
Expand Down Expand Up @@ -1293,10 +1293,9 @@ function evert_arr(obj) {
return o;
}

/* TODO: date1904 logic */
function datenum(v, date1904) {
if(date1904) v+=1462;
var epoch = Date.parse(v);
var epoch = v.getTime();
if(date1904) epoch += 1462*24*60*60*1000;
return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
}

Expand Down Expand Up @@ -1415,7 +1414,8 @@ var rencstr = "&<>'\"".split("");

// TODO: CP remap (need to read file version to determine OS)
var unescapexml = (function() {
var encregex = /&[a-z]*;/g, coderegex = /_x([\da-fA-F]+)_/g;
/* 22.4.2.4 bstr (Basic String) */
var encregex = /&[a-z]*;/g, coderegex = /_x([\da-fA-F]{4})_/g;
return function unescapexml(text) {
var s = text + '';
return s.replace(encregex, function($$) { return encodings[$$]; }).replace(coderegex,function(m,c) {return String.fromCharCode(parseInt(c,16));});
Expand Down Expand Up @@ -4320,6 +4320,7 @@ var parse_rs = (function parse_rs_factory() {
/* ** not required . */
case '<shadow':
/* falls through */
case '<shadow>':
case '<shadow/>': break;

/* 18.4.1 charset CT_IntProperty TODO */
Expand All @@ -4331,6 +4332,7 @@ var parse_rs = (function parse_rs_factory() {
/* 18.4.2 outline CT_BooleanProperty TODO */
case '<outline':
/* falls through */
case '<outline>':
case '<outline/>': break;

/* 18.4.5 rFont CT_FontName */
Expand All @@ -4343,27 +4345,31 @@ var parse_rs = (function parse_rs_factory() {
case '<strike':
if(!y.val) break;
/* falls through */
case '<strike>':
case '<strike/>': font.strike = 1; break;
case '</strike>': break;

/* 18.4.13 u CT_UnderlineProperty */
case '<u':
if(y.val == '0') break;
/* falls through */
case '<u>':
case '<u/>': font.u = 1; break;
case '</u>': break;

/* 18.8.2 b */
case '<b':
if(y.val == '0') break;
/* falls through */
case '<b>':
case '<b/>': font.b = 1; break;
case '</b>': break;

/* 18.8.26 i */
case '<i':
if(y.val == '0') break;
/* falls through */
case '<i>':
case '<i/>': font.i = 1; break;
case '</i>': break;

Expand Down Expand Up @@ -7852,6 +7858,7 @@ function get_cell_style(styles, cell, opts) {
}

function safe_format(p, fmtid, fillid, opts) {
if(p.t === 'd' && typeof p.v === 'string') p.v = new Date(p.v);
try {
if(p.t === 'e') p.w = p.w || BErr[p.v];
else if(fmtid === 0) {
Expand Down Expand Up @@ -8022,7 +8029,7 @@ function write_ws_xml_cell(cell, ref, ws, opts, idx, wb) {
if(opts.cellDates) vv = new Date(cell.v).toISOString();
else {
cell.t = 'n';
vv = ''+(cell.v = datenum(cell.v));
vv = ''+(cell.v = datenum(new Date(cell.v)));
if(typeof cell.z === 'undefined') cell.z = SSF._table[14];
}
break;
Expand Down Expand Up @@ -8150,7 +8157,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess) {
break; // inline string
case 'b': p.v = parsexmlbool(p.v); break;
case 'd':
if(!opts.cellDates) { p.v = datenum(p.v); p.t = 'n'; }
if(!opts.cellDates) { p.v = datenum(new Date(p.v)); p.t = 'n'; }
break;
/* error string in .v, number in .v */
case 'e': p.w = p.v; p.v = RBErr[p.v]; break;
Expand Down Expand Up @@ -9479,6 +9486,7 @@ function xlml_normalize(d) {
var xlmlregex = /<(\/?)([a-z0-9]*:|)(\w+)[^>]*>/mg;
function parse_xlml_xml(d, opts) {
var str = debom(xlml_normalize(d));
if(str.substr(0,1000).indexOf("<html") >= 0) return parse_html(str, opts);
var Rn;
var state = [], tmp;
var sheets = {}, sheetnames = [], cursheet = {}, sheetname = "";
Expand Down Expand Up @@ -12102,6 +12110,41 @@ function write_biff_buf(wb, o) {
// TODO
return ba.end();
}
/* TODO: in browser attach to DOM; in node use an html parser */
function parse_html(str, opts) {
var ws = ({});
var o = { SheetNames: ["Sheet1"], Sheets: {Sheet1:ws} };
var i = str.indexOf("<table"), j = str.indexOf("</table");
if(i == -1 || j == -1) throw new Error("Invalid HTML: missing <table> / </table> pair");
var rows = str.slice(i, j).split(/<tr[^>]*>/);
var R = 0, C = 0;
var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}};
for(i = 0; i < rows.length; ++i) {
if(rows[i].substr(0,3) != "<td") continue;
var cells = rows[i].split("</td>");
for(j = 0; j < cells.length; ++j) {
if(cells[j].substr(0,3) != "<td") continue;
++C;
var m = cells[j], cc = 0;
/* TODO: parse styles etc */
while(m.charAt(0) == "<" && (cc = m.indexOf(">")) > -1) m = m.slice(cc+1);
while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<"));
/* TODO: generate stub cells */
if(!m.length) continue;
if(range.s.r > R) range.s.r = R;
if(range.e.r < R) range.e.r = R;
if(range.s.c > C) range.s.c = C;
if(range.e.c < C) range.e.c = C;
var coord = encode_cell({r:R, c:C});
/* TODO: value parsing */
if(m == +m) ws[coord] = {t:'n', v:+m};
else ws[coord] = {t:'s', v:m};
}
++R; C = 0;
}
ws['!ref'] = encode_range(range);
return o;
}
/* actual implementation in utils, wrappers are for read/write */
function write_csv_str(wb, o) {
var idx = 0;
Expand Down Expand Up @@ -12469,12 +12512,12 @@ function write_string_type(out, opts) {
switch(opts.type) {
case "base64": return Base64.encode(out);
case "binary": return out;
case "file": return _fs.writeFileSync(opts.file, out, {encoding:'utf8'});
case "file": return _fs.writeFileSync(opts.file, out, 'utf8');
case "buffer": {
if(has_buf) return new Buffer(out, 'utf8');
else return out.split("").map(function(c) { return c.charCodeAt(0); });
} break;
default: return out;
default: throw new Error("Unrecognized type " + opts.type);
}
}

Expand Down
18 changes: 9 additions & 9 deletions dist/xlsx.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/xlsx.min.map

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
<pre id="out"></pre>
<br />
<!-- uncomment the next line here and in xlsxworker.js for encoding support -->
<!--<script src="dist/cpexcel.js"></script>-->
<script src="dist/cpexcel.js"></script>
<script src="shim.js"></script>
<script src="jszip.js"></script>
<script src="xlsx.js"></script>
Expand Down
1 change: 1 addition & 0 deletions misc/flowdeps.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ declare module 'xlsx' { declare var exports:XLSXModule; };
declare module '../' { declare var exports:XLSXModule; };
declare module 'commander' { declare var exports:any; };
declare module './jszip.js' { declare var exports:any; };
type ZIP = any;
*/
Loading

0 comments on commit 7b6fb7b

Please sign in to comment.