From 71c8b5f81ba938fb6be08ef378607f9325b2ac73 Mon Sep 17 00:00:00 2001 From: Giacomo Citi Date: Thu, 31 Oct 2024 10:43:08 +0100 Subject: [PATCH] cleanup --- packages/model/CsvColumn.ts | 6 +- .../datatypeInference.ts} | 40 +++++++------- packages/model/test/DatatypeChecker.test.ts | 55 ------------------- .../model/test/lib/datatypeInference.test.ts | 53 ++++++++++++++++++ 4 files changed, 74 insertions(+), 80 deletions(-) rename packages/model/{DatatypeChecker.ts => lib/datatypeInference.ts} (53%) delete mode 100644 packages/model/test/DatatypeChecker.test.ts create mode 100644 packages/model/test/lib/datatypeInference.test.ts diff --git a/packages/model/CsvColumn.ts b/packages/model/CsvColumn.ts index 7aadf60b1..dc8082fc5 100644 --- a/packages/model/CsvColumn.ts +++ b/packages/model/CsvColumn.ts @@ -6,9 +6,7 @@ import { csvw, dtype, schema } from '@tpluscode/rdf-ns-builders' import { cc } from '@cube-creator/core/namespace' import { NamedNode } from '@rdfjs/types' import { initializer } from './lib/initializer' -import { DatatypeChecker } from './DatatypeChecker' - -const datatypeChecker = new DatatypeChecker() +import { inferDatatype } from './lib/datatypeInference' export interface CsvColumn extends RdfResource { name: string @@ -29,7 +27,7 @@ export function CsvColumnMixin(Resource: Base): Mixin samples!: string[] get defaultDatatype(): NamedNode { - return datatypeChecker.determineDatatype(this.samples) + return inferDatatype(this.samples) } } diff --git a/packages/model/DatatypeChecker.ts b/packages/model/lib/datatypeInference.ts similarity index 53% rename from packages/model/DatatypeChecker.ts rename to packages/model/lib/datatypeInference.ts index 6b0e6bbc2..392391858 100644 --- a/packages/model/DatatypeChecker.ts +++ b/packages/model/lib/datatypeInference.ts @@ -38,30 +38,28 @@ const nextUntil = (iterator: Iterator, predicate: (value: T) => boolean) = } } -export class DatatypeChecker { - public determineDatatype(values: Iterable): NamedNode { - // get the first datatype that matches the first (non-empty) value - const valueIterator = values[Symbol.iterator]() - let currentValue = nextUntil(valueIterator, value => value !== '') +export function inferDatatype(values: Iterable): NamedNode { + // get the first datatype that matches the first (non-empty) value + const valueIterator = values[Symbol.iterator]() + let currentValue = nextUntil(valueIterator, value => value !== '') + if (currentValue.done) { + return xsd.string // no values to check + } + const datatypeIterator = getDatatypes()[Symbol.iterator]() + let currentDatatype = nextUntil(datatypeIterator, type => type.check(currentValue.value)) + if (currentDatatype.done) { + return xsd.string // no datatype found that matches the first value + } + // iterate over the rest of the values, moving to broader types if needed + while (true) { + currentValue = nextUntil(valueIterator, value => value !== '' && !currentDatatype.value.check(value)) if (currentValue.done) { - return xsd.string // no values to check + return currentDatatype.value.name // all values successfuly checked } - const datatypeIterator = getDatatypes()[Symbol.iterator]() - let currentDatatype = nextUntil(datatypeIterator, type => type.check(currentValue.value)) + // look for broader types + currentDatatype = nextUntil(currentDatatype.value.broader[Symbol.iterator](), type => type.check(currentValue.value)) if (currentDatatype.done) { - return xsd.string // no datatype found that matches the first value - } - // iterate over the rest of the values, moving to broader types if needed - while (true) { - currentValue = nextUntil(valueIterator, value => value !== '' && !currentDatatype.value.check(value)) - if (currentValue.done) { - return currentDatatype.value.name // all values successfuly checked - } - // look for broader types - currentDatatype = nextUntil(currentDatatype.value.broader[Symbol.iterator](), type => type.check(currentValue.value)) - if (currentDatatype.done) { - return xsd.string // no broader type found that matches the value - } + return xsd.string // no broader type found that matches the value } } } diff --git a/packages/model/test/DatatypeChecker.test.ts b/packages/model/test/DatatypeChecker.test.ts deleted file mode 100644 index 950fe5197..000000000 --- a/packages/model/test/DatatypeChecker.test.ts +++ /dev/null @@ -1,55 +0,0 @@ -import { describe, it } from 'mocha' -import { expect } from 'chai' -import { xsd } from '@tpluscode/rdf-ns-builders' -import { DatatypeChecker } from '../DatatypeChecker' - -const datatypeChecker = new DatatypeChecker() - -describe('@cube-creator/model/DatatypeChecker', () => { - it('recognize xsd:integer', () => { - expect(datatypeChecker.determineDatatype(['42'])).to.eq(xsd.integer) - }) - it('recognize xsd:decimal', () => { - expect(datatypeChecker.determineDatatype(['42.1'])).to.eq(xsd.decimal) - }) - it('recognize xsd:boolean', () => { - // if the first value was 0 or 1, it would be considered as xsd:integer - expect(datatypeChecker.determineDatatype(['true', 'false', '0', '1'])).to.eq(xsd.boolean) - }) - it('recognize xsd:date', () => { - expect(datatypeChecker.determineDatatype(['2021-01-01'])).to.eq(xsd.date) - }) - it('recognize xsd:time', () => { - expect(datatypeChecker.determineDatatype(['23:57:05'])).to.eq(xsd.time) - }) - it('recognize xsd:dateTime', () => { - expect(datatypeChecker.determineDatatype(['2021-01-01T23:57:05'])).to.eq(xsd.dateTime) - }) - it('recognize xsd:gYearMonth', () => { - expect(datatypeChecker.determineDatatype(['2021-12'])).to.eq(xsd.gYearMonth) - }) - it('recognize two xsd:integer values', () => { - expect(datatypeChecker.determineDatatype(['42', '42'])).to.eq(xsd.integer) - }) - it('recognize xsd:string with empty array', () => { - expect(datatypeChecker.determineDatatype([])).to.eq(xsd.string) - }) - it('recognize xsd:string with empty string', () => { - expect(datatypeChecker.determineDatatype([''])).to.eq(xsd.string) - }) - it('recognize xd:integer ignoring empty strings', () => { - expect(datatypeChecker.determineDatatype(['', '42', ''])).to.eq(xsd.integer) - }) - it('recognize xsd:string after xsd:date', () => { - expect(datatypeChecker.determineDatatype(['2021-01-01', 'foo'])).to.eq(xsd.string) - }) - it('recognize xsd:decimal after xsd:integer', () => { - expect(datatypeChecker.determineDatatype(['42', '42.1'])).to.eq(xsd.decimal) - }) - it('recognize xsd:string after xsd:integer', () => { - expect(datatypeChecker.determineDatatype(['42', 'foo'])).to.eq(xsd.string) - }) - it('recognize xd:string when mixed types', () => { - expect(datatypeChecker.determineDatatype(['', '42', '2021-01-01'])).to.eq(xsd.string) - }) -}) diff --git a/packages/model/test/lib/datatypeInference.test.ts b/packages/model/test/lib/datatypeInference.test.ts new file mode 100644 index 000000000..3a700a19d --- /dev/null +++ b/packages/model/test/lib/datatypeInference.test.ts @@ -0,0 +1,53 @@ +import { describe, it } from 'mocha' +import { expect } from 'chai' +import { xsd } from '@tpluscode/rdf-ns-builders' +import { inferDatatype } from '../../lib/datatypeInference' + +describe('@cube-creator/model/DatatypeChecker', () => { + it('recognize xsd:integer', () => { + expect(inferDatatype(['42'])).to.eq(xsd.integer) + }) + it('recognize xsd:decimal', () => { + expect(inferDatatype(['42.1'])).to.eq(xsd.decimal) + }) + it('recognize xsd:boolean', () => { + // if the first value was 0 or 1, it would be considered as xsd:integer + expect(inferDatatype(['true', 'false', '0', '1'])).to.eq(xsd.boolean) + }) + it('recognize xsd:date', () => { + expect(inferDatatype(['2021-01-01'])).to.eq(xsd.date) + }) + it('recognize xsd:time', () => { + expect(inferDatatype(['23:57:05'])).to.eq(xsd.time) + }) + it('recognize xsd:dateTime', () => { + expect(inferDatatype(['2021-01-01T23:57:05'])).to.eq(xsd.dateTime) + }) + it('recognize xsd:gYearMonth', () => { + expect(inferDatatype(['2021-12'])).to.eq(xsd.gYearMonth) + }) + it('recognize two xsd:integer values', () => { + expect(inferDatatype(['42', '42'])).to.eq(xsd.integer) + }) + it('recognize xsd:string with empty array', () => { + expect(inferDatatype([])).to.eq(xsd.string) + }) + it('recognize xsd:string with empty string', () => { + expect(inferDatatype([''])).to.eq(xsd.string) + }) + it('recognize xd:integer ignoring empty strings', () => { + expect(inferDatatype(['', '42', ''])).to.eq(xsd.integer) + }) + it('recognize xsd:string after xsd:date', () => { + expect(inferDatatype(['2021-01-01', 'foo'])).to.eq(xsd.string) + }) + it('recognize xsd:decimal after xsd:integer', () => { + expect(inferDatatype(['42', '42.1'])).to.eq(xsd.decimal) + }) + it('recognize xsd:string after xsd:integer', () => { + expect(inferDatatype(['42', 'foo'])).to.eq(xsd.string) + }) + it('recognize xd:string when mixed types', () => { + expect(inferDatatype(['', '42', '2021-01-01'])).to.eq(xsd.string) + }) +})