Skip to content

Commit

Permalink
fix(corpus): properly specify inputFormat
Browse files Browse the repository at this point in the history
  • Loading branch information
ajmacdonald committed Aug 16, 2024
1 parent e624d5e commit 624860b
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 113 deletions.
6 changes: 4 additions & 2 deletions src/corpus.js
Original file line number Diff line number Diff line change
Expand Up @@ -1629,15 +1629,17 @@ class Corpus {
file = new Blob([nodeString], {type: 'text/xml'});
}
formData.append('input', file);
formData.append('inputFormat', Util.getFileExtensionFromMimeType(file.type));
const fileExt = Util.getFileExtensionFromMimeType(file.type);
formData.append('inputFormat', Util.getVoyantDocumentFormatFromFileExtension(fileExt));
});
} else {
if (Util.isNode(config)) {
const nodeString = new XMLSerializer().serializeToString(config);
config = new Blob([nodeString], {type: 'text/xml'});
}
formData.set('input', config);
formData.set('inputFormat', Util.getFileExtensionFromMimeType(config.type));
const fileExt = Util.getFileExtensionFromMimeType(config.type);
formData.set('inputFormat', Util.getVoyantDocumentFormatFromFileExtension(fileExt));
}

// append any other form options that may have been included
Expand Down
28 changes: 28 additions & 0 deletions src/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,34 @@ class Util {
}
}
}

/**
* Takes a file extension and returns the corresponding Voyant Document Format name.
* @param {String} fileExtension
* @returns {String}
*/
static getVoyantDocumentFormatFromFileExtension(fileExtension) {
fileExtension = fileExtension.trim().toLowerCase();
switch(fileExtension) {
case 'txt':
return 'text';
case 'xhtml':
return 'html';
case 'doc':
return 'msword';
case 'docx':
return 'mswordx';
case 'xls':
return 'xlsx';
case 'zip':
return 'archive';
case 'gzip':
case 'bzip2':
return 'compressed';
default:
return fileExtension;
}
}
}

export default Util;
233 changes: 122 additions & 111 deletions test/util.js
Original file line number Diff line number Diff line change
@@ -1,111 +1,122 @@
/**
* @jest-environment jsdom
*/

import Util from '../src/util';

import * as Mocks from './mocks/corpus';

beforeAll(() => {
})

beforeEach(() => {
})

test('id', () => {
const id = Util.id(16);
expect(id.length).toBe(16);
})

test('toString short', () => {
const string = Util.toString(['foo', 'bar']);
expect(string).toBe('["foo","bar"]');
})

test('toString long', () => {
const string = Util.toString(Mocks.DocumentsMetadata);
expect(string).toMatch('<svg');
})

test('more', () => {
const string = Util.more('foo', Util.toString(Mocks.DocumentsMetadata), 'bar');
expect(string).toMatch('foo<svg');
})

test('dataUrlToBlob', () => {
const blob = Util.dataUrlToBlob('data:text/plain;base64,Zm9v');
expect(blob instanceof Blob).toBe(true);
})

test('blobToDataUrl', () => {
const blob = Util.dataUrlToBlob('data:text/plain;base64,Zm9v');
return Util.blobToDataUrl(blob).then((dataUrl) => {
expect(dataUrl).toMatch(/data:text\/plain;base64,Zm9v/);
});
});

test('blobToString', () => {
const blob = Util.dataUrlToBlob('data:text/plain;base64,Zm9v');
return Util.blobToString(blob).then((str) => {
expect(str).toMatch(/foo/);
});
});

// test('transformXml', () => {
// const xmlDoc = '<foo><bar>baz</bar></foo>';
// const xslDoc = `<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
// <xsl:template match="/foo/bar"><xsl:value-of select="child::text()" /></xsl:template></xsl:stylesheet>`;
// const result = Util.transformXml(xmlDoc, xslDoc, false);
// expect(result.firstChild.textContent).toMatch('baz');
// })

test('isString', () => {
expect(Util.isString('foo')).toBe(true);
})

test('isNumber', () => {
expect(Util.isNumber(5)).toBe(true);
})

test('isBoolean', () => {
expect(Util.isBoolean(false)).toBe(true);
})

test('isUndefined', () => {
expect(Util.isUndefined(undefined)).toBe(true);
})

test('isArray', () => {
expect(Util.isArray(['foo'])).toBe(true);
})

test('isObject', () => {
expect(Util.isObject({'foo': 'bar'})).toBe(true);
})

test('isNull', () => {
expect(Util.isNull(undefined)).toBe(false);
})

test('isNode', () => {
expect(Util.isNode('foo')).toBe(false);
})

test('isFunction', () => {
expect(Util.isFunction(()=>{})).toBe(true);
})

test('isPromise', () => {
expect(Util.isPromise(new Promise(() => {}))).toBe(true);
})

test('isBlob', () => {
expect(Util.isBlob(new Blob())).toBe(true);
})

test('getFileExtensionFromMimeType', () => {
expect(Util.getFileExtensionFromMimeType('application/vnd.oasis.opendocument.text')).toBe('odt');
expect(Util.getFileExtensionFromMimeType('application/json')).toBe('json');
expect(Util.getFileExtensionFromMimeType('application/x-apple-diskimage')).toBe(undefined);
expect(Util.getFileExtensionFromMimeType('text/csv')).toBe('txt');
})
/**
* @jest-environment jsdom
*/

import Util from '../src/util';

import * as Mocks from './mocks/corpus';

beforeAll(() => {
})

beforeEach(() => {
})

test('id', () => {
const id = Util.id(16);
expect(id.length).toBe(16);
})

test('toString short', () => {
const string = Util.toString(['foo', 'bar']);
expect(string).toBe('["foo","bar"]');
})

test('toString long', () => {
const string = Util.toString(Mocks.DocumentsMetadata);
expect(string).toMatch('<svg');
})

test('more', () => {
const string = Util.more('foo', Util.toString(Mocks.DocumentsMetadata), 'bar');
expect(string).toMatch('foo<svg');
})

test('dataUrlToBlob', () => {
const blob = Util.dataUrlToBlob('data:text/plain;base64,Zm9v');
expect(blob instanceof Blob).toBe(true);
})

test('blobToDataUrl', () => {
const blob = Util.dataUrlToBlob('data:text/plain;base64,Zm9v');
return Util.blobToDataUrl(blob).then((dataUrl) => {
expect(dataUrl).toMatch(/data:text\/plain;base64,Zm9v/);
});
});

test('blobToString', () => {
const blob = Util.dataUrlToBlob('data:text/plain;base64,Zm9v');
return Util.blobToString(blob).then((str) => {
expect(str).toMatch(/foo/);
});
});

// test('transformXml', () => {
// const xmlDoc = '<foo><bar>baz</bar></foo>';
// const xslDoc = `<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
// <xsl:template match="/foo/bar"><xsl:value-of select="child::text()" /></xsl:template></xsl:stylesheet>`;
// const result = Util.transformXml(xmlDoc, xslDoc, false);
// expect(result.firstChild.textContent).toMatch('baz');
// })

test('isString', () => {
expect(Util.isString('foo')).toBe(true);
})

test('isNumber', () => {
expect(Util.isNumber(5)).toBe(true);
})

test('isBoolean', () => {
expect(Util.isBoolean(false)).toBe(true);
})

test('isUndefined', () => {
expect(Util.isUndefined(undefined)).toBe(true);
})

test('isArray', () => {
expect(Util.isArray(['foo'])).toBe(true);
})

test('isObject', () => {
expect(Util.isObject({'foo': 'bar'})).toBe(true);
})

test('isNull', () => {
expect(Util.isNull(undefined)).toBe(false);
})

test('isNode', () => {
expect(Util.isNode('foo')).toBe(false);
})

test('isFunction', () => {
expect(Util.isFunction(()=>{})).toBe(true);
})

test('isPromise', () => {
expect(Util.isPromise(new Promise(() => {}))).toBe(true);
})

test('isBlob', () => {
expect(Util.isBlob(new Blob())).toBe(true);
})

test('getFileExtensionFromMimeType', () => {
expect(Util.getFileExtensionFromMimeType('application/vnd.oasis.opendocument.text')).toBe('odt');
expect(Util.getFileExtensionFromMimeType('application/json')).toBe('json');
expect(Util.getFileExtensionFromMimeType('application/x-apple-diskimage')).toBe(undefined);
expect(Util.getFileExtensionFromMimeType('text/csv')).toBe('txt');
})

test('getVoyantDocumentFormatFromFileExtension', () => {
expect(Util.getVoyantDocumentFormatFromFileExtension('txt')).toBe('text');
expect(Util.getVoyantDocumentFormatFromFileExtension('xhtml')).toBe('html');
expect(Util.getVoyantDocumentFormatFromFileExtension('doc')).toBe('msword');
expect(Util.getVoyantDocumentFormatFromFileExtension('docx')).toBe('mswordx');
expect(Util.getVoyantDocumentFormatFromFileExtension('xls')).toBe('xlsx');
expect(Util.getVoyantDocumentFormatFromFileExtension('zip')).toBe('archive');
expect(Util.getVoyantDocumentFormatFromFileExtension('gzip')).toBe('compressed');
expect(Util.getVoyantDocumentFormatFromFileExtension('pdf')).toBe('pdf');
})

0 comments on commit 624860b

Please sign in to comment.