Skip to content

Commit 5ba4694

Browse files
committed
utf-8 without BOM now by default
1 parent ea47cdc commit 5ba4694

File tree

1 file changed

+42
-26
lines changed

1 file changed

+42
-26
lines changed

lib/utils/files.js

+42-26
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,6 @@ function writeWorkbook(workbook, baseName, ext, opt = {}) {
151151
* Download matrix to file.
152152
* Note that BOM and UTF-8 can create problems on some systems when importing
153153
* file. See "Supported Output Formats" and "UTF-16 Unicode Text" sections of
154-
* https://reactian.com/sheetjs-community-edition-spreadsheet-data-toolkit/
155154
* and https://github.com/SheetJS/sheetjs
156155
* Solution at bottom of: https://github.com/SheetJS/sheetjs/issues/943
157156
* The "Comma Separated Values" format is actually UTF-8 with BOM prefix.
@@ -167,7 +166,6 @@ export function exportWorkbook(workbook, baseName, ext) {
167166
const worksheet = workbook.Sheets[sheetName];
168167
const fileName = `${baseName}${sheets.length > 1 ? `_${sheetName}` : ''}.${ext.split(' ')[0]}`;
169168
var data = '';
170-
171169
switch (ext) {
172170
case 'xlsx':
173171
case 'xls':
@@ -179,52 +177,74 @@ export function exportWorkbook(workbook, baseName, ext) {
179177
See
180178
- https://docs.sheetjs.com/docs/api/write-options/
181179
- https://docs.sheetjs.com/docs/api/utilities/csv#csv-output
182-
* We want more accurate mimeTypes, so saveBlob() allows this.
180+
saveBlob() enables more accurate mimeTypes?
183181
* writeFile(bookType: 'csv'...) output includes the UTF-8 byte order
184182
* mark ("BOM").
185-
* writeFile(bookType: 'tsv'...) output will NOT include the BOM ???
186183
* sheet_to_csv() will return JavaScript strings without the UTF-8 BOM.
187-
* sheet_to_txt(): If encoding support is available, the output will be
188-
* encoded in CP1200 and the UTF-16 BOM will be added. If encoding
189-
* support is not available, the output will be encoded as a standard
190-
* string.
191-
* So is encoding support available?
184+
192185
*/
193-
case 'csv':
186+
187+
/* Phasing this out. UTF-8 doesn't need a BOM
188+
case 'csv': // UTF-8
194189
// writeFile(workbook, fileName, {bookType: 'csv', FS: ','});
195190
data = XlsxUtils.sheet_to_csv(worksheet, {FS: ','});
196191
data = '\uFEFF' + data; //BOM
197192
saveBlob(data, fileName, 'text/plain;charset=UTF-8');
198193
break;
194+
*/
199195

196+
/* This case won't work until we convert data to UTF-16
200197
case 'csv (UTF-16)':
201198
//writeFile(workbook, fileName, {bookType: 'txt', FS: ','});
202199
data = XlsxUtils.sheet_to_csv(worksheet, {FS: ','});
203200
data = '\uFEFF' + data; //BOM
204201
saveBlob(data, fileName, 'text/plain;charset=UTF-16LE');
205202
break;
203+
*/
204+
205+
case 'csv':
206+
case 'csv (UTF-8, no BOM)':
207+
data = XlsxUtils.sheet_to_csv(worksheet, {FS: ','});
208+
saveBlob(data, fileName, 'text/plain;charset=UTF-8');
209+
break;
210+
211+
/* This case won't work until we convert data to ASCII
212+
case 'csv (ASCII)': // no BOM
213+
data = XlsxUtils.sheet_to_csv(worksheet, {FS: ','});
214+
saveBlob(data, fileName, 'text/plain;charset=us-ascii');
215+
break;
216+
*/
206217

207-
case 'tsv': // BOM version
218+
/*
219+
* https://stackoverflow.com/questions/8336355/what-exactly-is-unicode-codepage-1200
220+
* sheet_to_txt(): sheetjs notes: "If encoding support is available, the
221+
* output will be encoded in CP1200 and the UTF-16 BOM will be added. If
222+
* encoding support is not available, the output will be encoded as a
223+
* standard string." In DH tests it seems "encoding support" is not
224+
* available, and resulting file is UTF-8 +BOM anyways.
225+
*/
226+
case 'tsv': // UTF-8 BOM version
227+
// SheetJS note: For compatibility with Excel, csv output will always
228+
// include the UTF-8 byte order mark ("BOM").
208229
//writeFile(workbook, fileName, {bookType: 'csv', FS: '\t'});
209230
data = XlsxUtils.sheet_to_csv(worksheet, {FS: '\t'});
210231
data = '\uFEFF' + data; //BOM
211232
saveBlob(data, fileName, 'text/plain;charset=UTF-8');
212233
break;
213234

214-
case 'tsv (UTF-16)':
235+
/* Not working, produces hexidecimal file - is charset="UTF-16LE" recognized?
236+
* See Table 2-4: unicode.org/versions/Unicode6.0.0/ch02.pdf"
237+
* UTF-16 little endian, aka code page 1200, is not permitted to have a BOM,
238+
* according to the Unicode standard.
239+
* DATA NEEDS TO BE CONVERTED TO UTF-16
240+
*
241+
case 'tsv (UTF-16)': // no BOM
242+
// See: https://localizely.com/character-encodings/utf16le/
243+
//writeFile(workbook, fileName, {bookType: 'tsv', FS: '\t'});
215244
data = XlsxUtils.sheet_to_txt(worksheet, {FS: '\t'});
216245
saveBlob(data, fileName, 'text/plain;charset=UTF-16LE');
217246
break;
218-
219-
case 'csv (UTF-8, no BOM)':
220-
data = XlsxUtils.sheet_to_csv(worksheet, {FS: ','});
221-
saveBlob(data, fileName, 'text/plain;charset=UTF-8');
222-
break;
223-
224-
case 'csv (ASCII)': // no BOM
225-
data = XlsxUtils.sheet_to_csv(worksheet, {FS: ','});
226-
saveBlob(data, fileName, 'text/plain;charset=us-ascii');
227-
break;
247+
*/
228248
}
229249
})
230250
};
@@ -253,17 +273,13 @@ function saveBlob(
253273
}
254274
*/
255275

256-
// https://docs.sheetjs.com/docs/api/utilities/csv#csv-output
257-
// "If encoding support is available, the output will be encoded in CP1200 and the UTF-16 BOM will be added. If encoding support is not available, the output will be encoded as a standard string.""
258-
259276
// Enhancing with mimeType
260277
const blob = new Blob([data], { type: mimeType });
261278
saveAs(blob, fileName);
262279
};
263280

264281
// TODO: refactor to export matrix
265282
export function exportFile(matrix, baseName, ext) {
266-
console.log("running exportFile", matrix, baseName, ext)
267283
const worksheet = XlsxUtils.aoa_to_sheet(matrix);
268284
const workbook = XlsxUtils.book_new();
269285
XlsxUtils.book_append_sheet(workbook, worksheet, DEFAULT_SHEETNAME);

0 commit comments

Comments
 (0)