Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
giacomociti committed Oct 31, 2024
1 parent 2c91720 commit 71c8b5f
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 80 deletions.
6 changes: 2 additions & 4 deletions packages/model/CsvColumn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@ import { csvw, dtype, schema } from '@tpluscode/rdf-ns-builders'
import { cc } from '@cube-creator/core/namespace'
import { NamedNode } from '@rdfjs/types'
import { initializer } from './lib/initializer'
import { DatatypeChecker } from './DatatypeChecker'

const datatypeChecker = new DatatypeChecker()
import { inferDatatype } from './lib/datatypeInference'

export interface CsvColumn extends RdfResource {
name: string
Expand All @@ -29,7 +27,7 @@ export function CsvColumnMixin<Base extends Constructor>(Resource: Base): Mixin
samples!: string[]

get defaultDatatype(): NamedNode {
return datatypeChecker.determineDatatype(this.samples)
return inferDatatype(this.samples)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,30 +38,28 @@ const nextUntil = <T>(iterator: Iterator<T>, predicate: (value: T) => boolean) =
}
}

export class DatatypeChecker {
public determineDatatype(values: Iterable<string>): NamedNode {
// get the first datatype that matches the first (non-empty) value
const valueIterator = values[Symbol.iterator]()
let currentValue = nextUntil(valueIterator, value => value !== '')
export function inferDatatype(values: Iterable<string>): NamedNode {
// get the first datatype that matches the first (non-empty) value
const valueIterator = values[Symbol.iterator]()
let currentValue = nextUntil(valueIterator, value => value !== '')
if (currentValue.done) {
return xsd.string // no values to check
}
const datatypeIterator = getDatatypes()[Symbol.iterator]()
let currentDatatype = nextUntil(datatypeIterator, type => type.check(currentValue.value))
if (currentDatatype.done) {
return xsd.string // no datatype found that matches the first value
}
// iterate over the rest of the values, moving to broader types if needed
while (true) {
currentValue = nextUntil(valueIterator, value => value !== '' && !currentDatatype.value.check(value))
if (currentValue.done) {
return xsd.string // no values to check
return currentDatatype.value.name // all values successfuly checked
}
const datatypeIterator = getDatatypes()[Symbol.iterator]()
let currentDatatype = nextUntil(datatypeIterator, type => type.check(currentValue.value))
// look for broader types
currentDatatype = nextUntil(currentDatatype.value.broader[Symbol.iterator](), type => type.check(currentValue.value))
if (currentDatatype.done) {
return xsd.string // no datatype found that matches the first value
}
// iterate over the rest of the values, moving to broader types if needed
while (true) {
currentValue = nextUntil(valueIterator, value => value !== '' && !currentDatatype.value.check(value))
if (currentValue.done) {
return currentDatatype.value.name // all values successfuly checked
}
// look for broader types
currentDatatype = nextUntil(currentDatatype.value.broader[Symbol.iterator](), type => type.check(currentValue.value))
if (currentDatatype.done) {
return xsd.string // no broader type found that matches the value
}
return xsd.string // no broader type found that matches the value
}
}
}
55 changes: 0 additions & 55 deletions packages/model/test/DatatypeChecker.test.ts

This file was deleted.

53 changes: 53 additions & 0 deletions packages/model/test/lib/datatypeInference.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import { describe, it } from 'mocha'
import { expect } from 'chai'
import { xsd } from '@tpluscode/rdf-ns-builders'
import { inferDatatype } from '../../lib/datatypeInference'

describe('@cube-creator/model/DatatypeChecker', () => {
it('recognize xsd:integer', () => {
expect(inferDatatype(['42'])).to.eq(xsd.integer)
})
it('recognize xsd:decimal', () => {
expect(inferDatatype(['42.1'])).to.eq(xsd.decimal)
})
it('recognize xsd:boolean', () => {
// if the first value was 0 or 1, it would be considered as xsd:integer
expect(inferDatatype(['true', 'false', '0', '1'])).to.eq(xsd.boolean)
})
it('recognize xsd:date', () => {
expect(inferDatatype(['2021-01-01'])).to.eq(xsd.date)
})
it('recognize xsd:time', () => {
expect(inferDatatype(['23:57:05'])).to.eq(xsd.time)
})
it('recognize xsd:dateTime', () => {
expect(inferDatatype(['2021-01-01T23:57:05'])).to.eq(xsd.dateTime)
})
it('recognize xsd:gYearMonth', () => {
expect(inferDatatype(['2021-12'])).to.eq(xsd.gYearMonth)
})
it('recognize two xsd:integer values', () => {
expect(inferDatatype(['42', '42'])).to.eq(xsd.integer)
})
it('recognize xsd:string with empty array', () => {
expect(inferDatatype([])).to.eq(xsd.string)
})
it('recognize xsd:string with empty string', () => {
expect(inferDatatype([''])).to.eq(xsd.string)
})
it('recognize xd:integer ignoring empty strings', () => {
expect(inferDatatype(['', '42', ''])).to.eq(xsd.integer)
})
it('recognize xsd:string after xsd:date', () => {
expect(inferDatatype(['2021-01-01', 'foo'])).to.eq(xsd.string)
})
it('recognize xsd:decimal after xsd:integer', () => {
expect(inferDatatype(['42', '42.1'])).to.eq(xsd.decimal)
})
it('recognize xsd:string after xsd:integer', () => {
expect(inferDatatype(['42', 'foo'])).to.eq(xsd.string)
})
it('recognize xd:string when mixed types', () => {
expect(inferDatatype(['', '42', '2021-01-01'])).to.eq(xsd.string)
})
})

0 comments on commit 71c8b5f

Please sign in to comment.