diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index acb6336c..d97dfae4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -11,12 +11,13 @@ jobs: - '16' package: - barnard59 + - barnard59-base - barnard59-core + - barnard59-csvw + - barnard59-ftp + - barnard59-graph-store - barnard59-rdf - barnard59-sparql - - barnard59-base - - barnard59-graph-store - - barnard59-ftp steps: - uses: actions/checkout@v3 - uses: actions/setup-node@v3 diff --git a/README.md b/README.md index 995e4ba7..190dba97 100644 --- a/README.md +++ b/README.md @@ -9,11 +9,12 @@ In this monorepo you will find the various `barnard59-*` packages: | Package | Latest version | | |-------------------------------------------------|-------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------| | [`barnard59`](packages/cli) | [![](https://badge.fury.io/js/barnard59.svg)](https://npm.im/barnard59) | CLI to run pipelines | -| [`barnard59-core`](packages/core) | [![](https://badge.fury.io/js/barnard59-core.svg)](https://npm.im/barnard59-core) | Core package | | [`barnard59-base`](packages/base) | [![](https://badge.fury.io/js/barnard59-base.svg)](https://npm.im/barnard59-base) | Provides the basic pipeline steps | +| [`barnard59-core`](packages/core) | [![](https://badge.fury.io/js/barnard59-core.svg)](https://npm.im/barnard59-core) | Core package | +| [`barnard59-csvw`](packages/csvw) | [![](https://badge.fury.io/js/barnard59-csvw.svg)](https://npm.im/barnard59-csvw) | Simplifies handling CSVW mapping documents in pipelines | | [`barnard59-ftp`](packages/ftp) | [![](https://badge.fury.io/js/barnard59-ftp.svg)](https://npm.im/barnard59-ftp) | FTP support for Linked Data pipelines | | [`barnard59-graph-store`](packages/graph-store) | [![](https://badge.fury.io/js/barnard59-graph-store.svg)](https://npm.im/barnard59-graph-store) | [SPARQL Graph Store Protocol](https://www.w3.org/TR/sparql11-http-rdf-update/) support | -| [`barnard59-sparql`](packages/sparql) | [![](https://badge.fury.io/js/barnard59-sparql.svg)](https://npm.im/barnard59-sparql) | Query SPARQL endpoint from pipeline | | [`barnard59-rdf`](packages/rdf) | [![](https://badge.fury.io/js/barnard59-rdf.svg)](https://npm.im/barnard59-rdf) | Operations for RDF/JS quads and datasets | +| [`barnard59-sparql`](packages/sparql) | [![](https://badge.fury.io/js/barnard59-sparql.svg)](https://npm.im/barnard59-sparql) | Query SPARQL endpoint from pipeline | More to come as we gradually consolidate other, initially separate repositories. diff --git a/codecov.yml b/codecov.yml index 1e3e1e48..810eb3c2 100644 --- a/codecov.yml +++ b/codecov.yml @@ -3,8 +3,10 @@ flag_management: carryforward: true individual_flags: - name: barnard59 + - name: barnard59-base - name: barnard59-core + - name: barnard59-csvw + - name: barnard59-ftp + - name: barnard59-graph-store - name: barnard59-rdf - name: barnard59-sparql - - name: barnard59-base - - name: barnard59-graph-store diff --git a/package-lock.json b/package-lock.json index d554dc3b..e4d728d3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5110,6 +5110,10 @@ "resolved": "packages/core", "link": true }, + "node_modules/barnard59-csvw": { + "resolved": "packages/csvw", + "link": true + }, "node_modules/barnard59-formats": { "version": "1.4.0", "license": "MIT", @@ -9621,7 +9625,6 @@ }, "node_modules/isstream": { "version": "0.1.2", - "dev": true, "license": "MIT" }, "node_modules/istanbul-lib-coverage": { @@ -20500,7 +20503,7 @@ }, "packages/cli": { "name": "barnard59", - "version": "1.1.7", + "version": "2.0.0", "license": "MIT", "dependencies": { "@opentelemetry/api": "^1.0.0", @@ -20528,7 +20531,7 @@ "devDependencies": { "barnard59-base": "^1.2.1", "barnard59-formats": "^1.4.0", - "barnard59-graph-store": "^1.0.2", + "barnard59-graph-store": "^1.1.0", "barnard59-http": "^1.1.0", "barnard59-shell": "^0.1.0", "barnard59-test-support": "^0.0.1", @@ -20604,6 +20607,111 @@ "node": ">=16" } }, + "packages/csvw": { + "name": "barnard59-csvw", + "version": "1.0.0", + "license": "MIT", + "dependencies": { + "@rdfjs/fetch": "^2.1.0", + "@zazuko/env": "^1.0.1", + "duplex-to": "^1.0.1", + "file-fetch": "^1.7.0", + "node-fetch": "^3.0.0", + "proto-fetch": "^1.0.0", + "readable-stream": "^3.6.0" + }, + "devDependencies": { + "express-as-promise": "^1.2.0", + "get-stream": "^7.0.1", + "is-stream": "^3.0.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "packages/csvw/node_modules/@rdfjs/data-model": { + "version": "1.3.4", + "resolved": "https://registry.npmjs.org/@rdfjs/data-model/-/data-model-1.3.4.tgz", + "integrity": "sha512-iKzNcKvJotgbFDdti7GTQDCYmL7GsGldkYStiP0K8EYtN7deJu5t7U11rKTz+nR7RtesUggT+lriZ7BakFv8QQ==", + "dependencies": { + "@rdfjs/types": ">=1.0.1" + }, + "bin": { + "rdfjs-data-model-test": "bin/test.js" + } + }, + "packages/csvw/node_modules/@rdfjs/dataset": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@rdfjs/dataset/-/dataset-1.1.1.tgz", + "integrity": "sha512-BNwCSvG0cz0srsG5esq6CQKJc1m8g/M0DZpLuiEp0MMpfwguXX7VeS8TCg4UUG3DV/DqEvhy83ZKSEjdsYseeA==", + "dependencies": { + "@rdfjs/data-model": "^1.2.0" + }, + "bin": { + "rdfjs-dataset-test": "bin/test.js" + } + }, + "packages/csvw/node_modules/@rdfjs/fetch": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@rdfjs/fetch/-/fetch-2.1.0.tgz", + "integrity": "sha512-1bhXqGfbQQKHrmuZOmUUQmCpDNQC25fksYoGXUvlQ80kWuk8r/PdcdUmzApCp7HSyHFUjmgH89Pkym/9WXyDkQ==", + "dependencies": { + "@rdfjs/dataset": "^1.0.1", + "@rdfjs/fetch-lite": "^2.1.0", + "@rdfjs/formats-common": "^2.0.1" + } + }, + "packages/csvw/node_modules/@rdfjs/fetch-lite": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/@rdfjs/fetch-lite/-/fetch-lite-2.1.2.tgz", + "integrity": "sha512-M66ShQbQH94/XdavOuCWLu5TFbx4pHxNLolC7oUvmIZI03mQOYyapEinGaaUozpdQoY8iOrekree4OORdRKFCA==", + "dependencies": { + "isstream": "^0.1.2", + "nodeify-fetch": "^2.2.1", + "readable-stream": "^3.3.0" + } + }, + "packages/csvw/node_modules/get-stream": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-7.0.1.tgz", + "integrity": "sha512-3M8C1EOFN6r8AMUhwUAACIoXZJEOufDU5+0gFFN5uNs6XYOralD2Pqkl7m046va6x77FwposWXbAhPPIOus7mQ==", + "dev": true, + "engines": { + "node": ">=16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "packages/csvw/node_modules/node-fetch": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz", + "integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==", + "dependencies": { + "data-uri-to-buffer": "^4.0.0", + "fetch-blob": "^3.1.4", + "formdata-polyfill": "^4.0.10" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/node-fetch" + } + }, + "packages/csvw/node_modules/nodeify-fetch": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/nodeify-fetch/-/nodeify-fetch-2.2.2.tgz", + "integrity": "sha512-4b1Jysy9RGyya0wJpseTQyxUgSbx6kw9ocHTY0OFRXWlxa2Uy5PrSo/P/nwoUn59rBR9YKty2kd7g4LKXmsZVA==", + "dependencies": { + "@zazuko/node-fetch": "^2.6.6", + "concat-stream": "^1.6.0", + "cross-fetch": "^3.0.4", + "readable-error": "^1.0.0", + "readable-stream": "^3.5.0" + } + }, "packages/ftp": { "name": "barnard59-ftp", "version": "1.0.2", @@ -20759,7 +20867,7 @@ }, "packages/graph-store": { "name": "barnard59-graph-store", - "version": "1.0.2", + "version": "1.1.0", "license": "MIT", "dependencies": { "@rdfjs/data-model": "^2.0.1", diff --git a/packages/csvw/README.md b/packages/csvw/README.md new file mode 100644 index 00000000..66622e47 --- /dev/null +++ b/packages/csvw/README.md @@ -0,0 +1,35 @@ +# bardnard59-csvw + +Simplifies handling CSVW mapping documents in barnard59 pipelines + +## Install + +``` +npm install barnard59-csvw --save +``` + +## Exported steps + +### `fetch` + +A step to automate loading CSVW mapping documents and the source CSV, by following the `csvw:url` property. The URL can be local filesystem path or remote URL. + +| Argument | Type | Description | +| -- | -- | -- | +| `csvw` | `string` | Local path or URL of the mapping to load | + +### Example: Loading CSVW from filesystem + +```turtle +@prefix p: . +@prefix code: . + +<#CsvwStep> a p:Step; + code:implementedBy [ a code:EcmaScriptModule; + code:link + ]; + code:arguments [ + code:name "csvw"; + code:value "file:/test/mappings/remote.csvw.json" + ]. +``` diff --git a/packages/csvw/fetch.js b/packages/csvw/fetch.js new file mode 100644 index 00000000..7fc2378b --- /dev/null +++ b/packages/csvw/fetch.js @@ -0,0 +1,25 @@ +import rdf from '@zazuko/env' +import toReadable from 'duplex-to/readable.js' +import { PassThrough } from 'readable-stream' +import fetchData from './lib/fetchData.js' +import fetchMetadata from './lib/fetchMetadata.js' + +function fetch({ csvw }) { + const output = new PassThrough() + + Promise.resolve().then(async () => { + try { + const metadata = await fetchMetadata(csvw) + const url = metadata.any().has(rdf.ns.csvw.url).out(rdf.ns.csvw.url) + const dataStream = await fetchData(url.value) + + dataStream.pipe(output) + } catch (err) { + output.destroy(err) + } + }) + + return toReadable(output) +} + +export default fetch diff --git a/packages/csvw/index.js b/packages/csvw/index.js new file mode 100644 index 00000000..2f46c9b4 --- /dev/null +++ b/packages/csvw/index.js @@ -0,0 +1,5 @@ +import fetch from './fetch.js' + +export { + fetch, +} diff --git a/packages/csvw/lib/checkResponse.js b/packages/csvw/lib/checkResponse.js new file mode 100644 index 00000000..c18c9a6e --- /dev/null +++ b/packages/csvw/lib/checkResponse.js @@ -0,0 +1,7 @@ +async function checkResponse(res) { + if (!res.ok) { + throw new Error(`${res.statusText}(${res.status}): ${await res.text()}`) + } +} + +export default checkResponse diff --git a/packages/csvw/lib/commonFetch.js b/packages/csvw/lib/commonFetch.js new file mode 100644 index 00000000..4bf7913e --- /dev/null +++ b/packages/csvw/lib/commonFetch.js @@ -0,0 +1,11 @@ +import fileFetch from 'file-fetch' +import httpFetch from 'node-fetch' +import protoFetch from 'proto-fetch' + +const commonFetch = protoFetch({ + file: fileFetch, + http: httpFetch, + https: httpFetch, +}) + +export default commonFetch diff --git a/packages/csvw/lib/fetchData.js b/packages/csvw/lib/fetchData.js new file mode 100644 index 00000000..efbf95ee --- /dev/null +++ b/packages/csvw/lib/fetchData.js @@ -0,0 +1,12 @@ +import checkResponse from './checkResponse.js' +import commonFetch from './commonFetch.js' + +async function fetchData(url) { + const res = await commonFetch(url) + + await checkResponse(res) + + return res.body +} + +export default fetchData diff --git a/packages/csvw/lib/fetchMetadata.js b/packages/csvw/lib/fetchMetadata.js new file mode 100644 index 00000000..300377a5 --- /dev/null +++ b/packages/csvw/lib/fetchMetadata.js @@ -0,0 +1,22 @@ +import rdfFetch from '@rdfjs/fetch' +import rdf from '@zazuko/env' +import checkResponse from './checkResponse.js' +import commonFetch from './commonFetch.js' + +async function fetchMetadata(url) { + const res = await rdfFetch(url.toString(), { + contentTypeLookup: extension => extension === '.json' ? 'application/ld+json' : undefined, + factory: rdf, + fetch: commonFetch, + }) + + await checkResponse(res) + + if (!res.dataset) { + throw new Error('response is empty') + } + + return rdf.clownface({ dataset: await res.dataset() }) +} + +export default fetchMetadata diff --git a/packages/csvw/manifest.ttl b/packages/csvw/manifest.ttl new file mode 100644 index 00000000..1a618149 --- /dev/null +++ b/packages/csvw/manifest.ttl @@ -0,0 +1,11 @@ +@base . +@prefix code: . +@prefix p: . +@prefix rdfs: . + + a p:Operation, p:Readable; + rdfs:label "Fetch CSVW"; + rdfs:comment "Loads a CSVW file from the local filesystem or the web depending on the given argument"; + code:implementedBy [ a code:EcmaScriptModule; + code:link + ]. diff --git a/packages/csvw/package.json b/packages/csvw/package.json new file mode 100644 index 00000000..f1f5cdd5 --- /dev/null +++ b/packages/csvw/package.json @@ -0,0 +1,39 @@ +{ + "name": "barnard59-csvw", + "version": "1.0.0", + "description": "Simplifies handling CSVW mapping documents in barnard59 pipelines", + "type": "module", + "main": "index.js", + "scripts": { + "test": "mocha" + }, + "repository": { + "type": "git", + "url": "git://github.com/zazuko/barnard59.git", + "directory": "packages/csvw" + }, + "keywords": [], + "author": "Zazuko GmbH", + "license": "MIT", + "bugs": { + "url": "https://github.com/zazuko/barnard59/issues" + }, + "homepage": "https://github.com/zazuko/barnard59", + "dependencies": { + "@rdfjs/fetch": "^2.1.0", + "@zazuko/env": "^1.0.1", + "duplex-to": "^1.0.1", + "file-fetch": "^1.7.0", + "node-fetch": "^3.0.0", + "proto-fetch": "^1.0.0", + "readable-stream": "^3.6.0" + }, + "devDependencies": { + "express-as-promise": "^1.2.0", + "get-stream": "^7.0.1", + "is-stream": "^3.0.0" + }, + "engines": { + "node": ">= 14.0.0" + } +} diff --git a/packages/csvw/test/fetch.test.js b/packages/csvw/test/fetch.test.js new file mode 100644 index 00000000..b65c97b8 --- /dev/null +++ b/packages/csvw/test/fetch.test.js @@ -0,0 +1,135 @@ +import { rejects, strictEqual } from 'assert' +import withServer from 'express-as-promise/withServer.js' +import getStream from 'get-stream' +import { isReadableStream } from 'is-stream' +import fetch from '../fetch.js' + +const csvContent = 'id,text\n1,abc\n' +const fileMetdataUrl = 'file:./test/support/test.metadata.json' +const fileMetdataTtlUrl = 'file:./test/support/test.metadata.ttl' + +describe('fetch', () => { + it('should be a function', () => { + strictEqual(typeof fetch, 'function') + }) + + it('should return a readable stream', () => { + const result = fetch({ csvw: fileMetdataUrl }) + + strictEqual(isReadableStream(result), true) + }) + + it('should process file: url', async () => { + const stream = fetch({ csvw: fileMetdataUrl }) + const content = await getStream(stream) + + strictEqual(content, csvContent) + }) + + it('should process turtle file: url', async () => { + const stream = fetch({ csvw: fileMetdataTtlUrl }) + const content = await getStream(stream) + + strictEqual(content, csvContent) + }) + + it('should throw an error for non-existing file: metadata url', async () => { + const stream = fetch({ csvw: 'file:./test/support/non-existing.metadata.json' }) + + await rejects(getStream(stream)) + }) + + it('should throw an error for non-existing file: data url', async () => { + const stream = fetch({ csvw: 'file:./test/support/no-data.metadata.json' }) + + await rejects(getStream(stream)) + }) + + it('should process http: url', async () => { + await withServer(async server => { + server.app.get('/metadata', (req, res) => { + res.set('content-type', 'application/ld+json').json({ + '@context': 'http://www.w3.org/ns/csvw', + url: new URL('/data', baseUrl), + }) + }) + + server.app.get('/data', (req, res) => { + res.set('content-type', 'text/csv').end(csvContent) + }) + + const baseUrl = await server.listen() + const stream = fetch({ csvw: new URL('/metadata', baseUrl) }) + const content = await getStream(stream) + + strictEqual(content, csvContent) + }) + }) + + it('should process turtle http: url', async () => { + await withServer(async server => { + server.app.get('/metadata', (req, res) => { + res.set('content-type', 'text/turtle').end(`[ + <${new URL('data', baseUrl)}> + ].`) + }) + + server.app.get('/data', (req, res) => { + res.set('content-type', 'text/csv').end(csvContent) + }) + + const baseUrl = await server.listen() + const stream = fetch({ csvw: new URL('/metadata', baseUrl) }) + const content = await getStream(stream) + + strictEqual(content, csvContent) + }) + }) + + it('should throw an error for non-existing http: metadata url', async () => { + await withServer(async server => { + const baseUrl = await server.listen() + const stream = fetch({ csvw: new URL('/metadata', baseUrl) }) + + await rejects(getStream(stream)) + }) + }) + + it('should throw an error for non-existing http: data url', async () => { + await withServer(async server => { + server.app.get('/metadata', (req, res) => { + res.set('content-type', 'application/ld+json').json({ + '@context': 'http://www.w3.org/ns/csvw', + url: new URL('/data', baseUrl), + }) + }) + + const baseUrl = await server.listen() + const stream = fetch({ csvw: new URL('/metadata', baseUrl) }) + + await rejects(getStream(stream)) + }) + }) + + it('should throw an error if http connection is killed', async () => { + await withServer(async server => { + server.app.get('/metadata', (req, res) => { + res.set('content-type', 'application/ld+json').json({ + '@context': 'http://www.w3.org/ns/csvw', + url: new URL('/data', baseUrl), + }) + }) + + server.app.get('/data', async (req, res) => { + res.set('content-type', 'text/csv') + res.write('abc') + res.socket.destroy() + }) + + const baseUrl = await server.listen() + const stream = fetch({ csvw: new URL('/metadata', baseUrl) }) + + await rejects(getStream(stream)) + }) + }) +}) diff --git a/packages/csvw/test/support/no-data.metadata.json b/packages/csvw/test/support/no-data.metadata.json new file mode 100644 index 00000000..da1330c8 --- /dev/null +++ b/packages/csvw/test/support/no-data.metadata.json @@ -0,0 +1,4 @@ +{ + "@context": "http://www.w3.org/ns/csvw", + "url": "file:./test/support/non-existing.csv" +} diff --git a/packages/csvw/test/support/test.csv b/packages/csvw/test/support/test.csv new file mode 100644 index 00000000..0c074d4e --- /dev/null +++ b/packages/csvw/test/support/test.csv @@ -0,0 +1,2 @@ +id,text +1,abc diff --git a/packages/csvw/test/support/test.metadata.json b/packages/csvw/test/support/test.metadata.json new file mode 100644 index 00000000..f8a10f31 --- /dev/null +++ b/packages/csvw/test/support/test.metadata.json @@ -0,0 +1,4 @@ +{ + "@context": "http://www.w3.org/ns/csvw", + "url": "file:./test/support/test.csv" +} diff --git a/packages/csvw/test/support/test.metadata.ttl b/packages/csvw/test/support/test.metadata.ttl new file mode 100644 index 00000000..74ba0ddc --- /dev/null +++ b/packages/csvw/test/support/test.metadata.ttl @@ -0,0 +1,3 @@ +[ + +].