diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index acb6336c..d97dfae4 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -11,12 +11,13 @@ jobs:
- '16'
package:
- barnard59
+ - barnard59-base
- barnard59-core
+ - barnard59-csvw
+ - barnard59-ftp
+ - barnard59-graph-store
- barnard59-rdf
- barnard59-sparql
- - barnard59-base
- - barnard59-graph-store
- - barnard59-ftp
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
diff --git a/README.md b/README.md
index 995e4ba7..190dba97 100644
--- a/README.md
+++ b/README.md
@@ -9,11 +9,12 @@ In this monorepo you will find the various `barnard59-*` packages:
| Package | Latest version | |
|-------------------------------------------------|-------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------|
| [`barnard59`](packages/cli) | [![](https://badge.fury.io/js/barnard59.svg)](https://npm.im/barnard59) | CLI to run pipelines |
-| [`barnard59-core`](packages/core) | [![](https://badge.fury.io/js/barnard59-core.svg)](https://npm.im/barnard59-core) | Core package |
| [`barnard59-base`](packages/base) | [![](https://badge.fury.io/js/barnard59-base.svg)](https://npm.im/barnard59-base) | Provides the basic pipeline steps |
+| [`barnard59-core`](packages/core) | [![](https://badge.fury.io/js/barnard59-core.svg)](https://npm.im/barnard59-core) | Core package |
+| [`barnard59-csvw`](packages/csvw) | [![](https://badge.fury.io/js/barnard59-csvw.svg)](https://npm.im/barnard59-csvw) | Simplifies handling CSVW mapping documents in pipelines |
| [`barnard59-ftp`](packages/ftp) | [![](https://badge.fury.io/js/barnard59-ftp.svg)](https://npm.im/barnard59-ftp) | FTP support for Linked Data pipelines |
| [`barnard59-graph-store`](packages/graph-store) | [![](https://badge.fury.io/js/barnard59-graph-store.svg)](https://npm.im/barnard59-graph-store) | [SPARQL Graph Store Protocol](https://www.w3.org/TR/sparql11-http-rdf-update/) support |
-| [`barnard59-sparql`](packages/sparql) | [![](https://badge.fury.io/js/barnard59-sparql.svg)](https://npm.im/barnard59-sparql) | Query SPARQL endpoint from pipeline |
| [`barnard59-rdf`](packages/rdf) | [![](https://badge.fury.io/js/barnard59-rdf.svg)](https://npm.im/barnard59-rdf) | Operations for RDF/JS quads and datasets |
+| [`barnard59-sparql`](packages/sparql) | [![](https://badge.fury.io/js/barnard59-sparql.svg)](https://npm.im/barnard59-sparql) | Query SPARQL endpoint from pipeline |
More to come as we gradually consolidate other, initially separate repositories.
diff --git a/codecov.yml b/codecov.yml
index 1e3e1e48..810eb3c2 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -3,8 +3,10 @@ flag_management:
carryforward: true
individual_flags:
- name: barnard59
+ - name: barnard59-base
- name: barnard59-core
+ - name: barnard59-csvw
+ - name: barnard59-ftp
+ - name: barnard59-graph-store
- name: barnard59-rdf
- name: barnard59-sparql
- - name: barnard59-base
- - name: barnard59-graph-store
diff --git a/package-lock.json b/package-lock.json
index d554dc3b..e4d728d3 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -5110,6 +5110,10 @@
"resolved": "packages/core",
"link": true
},
+ "node_modules/barnard59-csvw": {
+ "resolved": "packages/csvw",
+ "link": true
+ },
"node_modules/barnard59-formats": {
"version": "1.4.0",
"license": "MIT",
@@ -9621,7 +9625,6 @@
},
"node_modules/isstream": {
"version": "0.1.2",
- "dev": true,
"license": "MIT"
},
"node_modules/istanbul-lib-coverage": {
@@ -20500,7 +20503,7 @@
},
"packages/cli": {
"name": "barnard59",
- "version": "1.1.7",
+ "version": "2.0.0",
"license": "MIT",
"dependencies": {
"@opentelemetry/api": "^1.0.0",
@@ -20528,7 +20531,7 @@
"devDependencies": {
"barnard59-base": "^1.2.1",
"barnard59-formats": "^1.4.0",
- "barnard59-graph-store": "^1.0.2",
+ "barnard59-graph-store": "^1.1.0",
"barnard59-http": "^1.1.0",
"barnard59-shell": "^0.1.0",
"barnard59-test-support": "^0.0.1",
@@ -20604,6 +20607,111 @@
"node": ">=16"
}
},
+ "packages/csvw": {
+ "name": "barnard59-csvw",
+ "version": "1.0.0",
+ "license": "MIT",
+ "dependencies": {
+ "@rdfjs/fetch": "^2.1.0",
+ "@zazuko/env": "^1.0.1",
+ "duplex-to": "^1.0.1",
+ "file-fetch": "^1.7.0",
+ "node-fetch": "^3.0.0",
+ "proto-fetch": "^1.0.0",
+ "readable-stream": "^3.6.0"
+ },
+ "devDependencies": {
+ "express-as-promise": "^1.2.0",
+ "get-stream": "^7.0.1",
+ "is-stream": "^3.0.0"
+ },
+ "engines": {
+ "node": ">= 14.0.0"
+ }
+ },
+ "packages/csvw/node_modules/@rdfjs/data-model": {
+ "version": "1.3.4",
+ "resolved": "https://registry.npmjs.org/@rdfjs/data-model/-/data-model-1.3.4.tgz",
+ "integrity": "sha512-iKzNcKvJotgbFDdti7GTQDCYmL7GsGldkYStiP0K8EYtN7deJu5t7U11rKTz+nR7RtesUggT+lriZ7BakFv8QQ==",
+ "dependencies": {
+ "@rdfjs/types": ">=1.0.1"
+ },
+ "bin": {
+ "rdfjs-data-model-test": "bin/test.js"
+ }
+ },
+ "packages/csvw/node_modules/@rdfjs/dataset": {
+ "version": "1.1.1",
+ "resolved": "https://registry.npmjs.org/@rdfjs/dataset/-/dataset-1.1.1.tgz",
+ "integrity": "sha512-BNwCSvG0cz0srsG5esq6CQKJc1m8g/M0DZpLuiEp0MMpfwguXX7VeS8TCg4UUG3DV/DqEvhy83ZKSEjdsYseeA==",
+ "dependencies": {
+ "@rdfjs/data-model": "^1.2.0"
+ },
+ "bin": {
+ "rdfjs-dataset-test": "bin/test.js"
+ }
+ },
+ "packages/csvw/node_modules/@rdfjs/fetch": {
+ "version": "2.1.0",
+ "resolved": "https://registry.npmjs.org/@rdfjs/fetch/-/fetch-2.1.0.tgz",
+ "integrity": "sha512-1bhXqGfbQQKHrmuZOmUUQmCpDNQC25fksYoGXUvlQ80kWuk8r/PdcdUmzApCp7HSyHFUjmgH89Pkym/9WXyDkQ==",
+ "dependencies": {
+ "@rdfjs/dataset": "^1.0.1",
+ "@rdfjs/fetch-lite": "^2.1.0",
+ "@rdfjs/formats-common": "^2.0.1"
+ }
+ },
+ "packages/csvw/node_modules/@rdfjs/fetch-lite": {
+ "version": "2.1.2",
+ "resolved": "https://registry.npmjs.org/@rdfjs/fetch-lite/-/fetch-lite-2.1.2.tgz",
+ "integrity": "sha512-M66ShQbQH94/XdavOuCWLu5TFbx4pHxNLolC7oUvmIZI03mQOYyapEinGaaUozpdQoY8iOrekree4OORdRKFCA==",
+ "dependencies": {
+ "isstream": "^0.1.2",
+ "nodeify-fetch": "^2.2.1",
+ "readable-stream": "^3.3.0"
+ }
+ },
+ "packages/csvw/node_modules/get-stream": {
+ "version": "7.0.1",
+ "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-7.0.1.tgz",
+ "integrity": "sha512-3M8C1EOFN6r8AMUhwUAACIoXZJEOufDU5+0gFFN5uNs6XYOralD2Pqkl7m046va6x77FwposWXbAhPPIOus7mQ==",
+ "dev": true,
+ "engines": {
+ "node": ">=16"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/sindresorhus"
+ }
+ },
+ "packages/csvw/node_modules/node-fetch": {
+ "version": "3.3.2",
+ "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz",
+ "integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==",
+ "dependencies": {
+ "data-uri-to-buffer": "^4.0.0",
+ "fetch-blob": "^3.1.4",
+ "formdata-polyfill": "^4.0.10"
+ },
+ "engines": {
+ "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
+ },
+ "funding": {
+ "type": "opencollective",
+ "url": "https://opencollective.com/node-fetch"
+ }
+ },
+ "packages/csvw/node_modules/nodeify-fetch": {
+ "version": "2.2.2",
+ "resolved": "https://registry.npmjs.org/nodeify-fetch/-/nodeify-fetch-2.2.2.tgz",
+ "integrity": "sha512-4b1Jysy9RGyya0wJpseTQyxUgSbx6kw9ocHTY0OFRXWlxa2Uy5PrSo/P/nwoUn59rBR9YKty2kd7g4LKXmsZVA==",
+ "dependencies": {
+ "@zazuko/node-fetch": "^2.6.6",
+ "concat-stream": "^1.6.0",
+ "cross-fetch": "^3.0.4",
+ "readable-error": "^1.0.0",
+ "readable-stream": "^3.5.0"
+ }
+ },
"packages/ftp": {
"name": "barnard59-ftp",
"version": "1.0.2",
@@ -20759,7 +20867,7 @@
},
"packages/graph-store": {
"name": "barnard59-graph-store",
- "version": "1.0.2",
+ "version": "1.1.0",
"license": "MIT",
"dependencies": {
"@rdfjs/data-model": "^2.0.1",
diff --git a/packages/csvw/README.md b/packages/csvw/README.md
new file mode 100644
index 00000000..66622e47
--- /dev/null
+++ b/packages/csvw/README.md
@@ -0,0 +1,35 @@
+# bardnard59-csvw
+
+Simplifies handling CSVW mapping documents in barnard59 pipelines
+
+## Install
+
+```
+npm install barnard59-csvw --save
+```
+
+## Exported steps
+
+### `fetch`
+
+A step to automate loading CSVW mapping documents and the source CSV, by following the `csvw:url` property. The URL can be local filesystem path or remote URL.
+
+| Argument | Type | Description |
+| -- | -- | -- |
+| `csvw` | `string` | Local path or URL of the mapping to load |
+
+### Example: Loading CSVW from filesystem
+
+```turtle
+@prefix p: .
+@prefix code: .
+
+<#CsvwStep> a p:Step;
+ code:implementedBy [ a code:EcmaScriptModule;
+ code:link
+ ];
+ code:arguments [
+ code:name "csvw";
+ code:value "file:/test/mappings/remote.csvw.json"
+ ].
+```
diff --git a/packages/csvw/fetch.js b/packages/csvw/fetch.js
new file mode 100644
index 00000000..7fc2378b
--- /dev/null
+++ b/packages/csvw/fetch.js
@@ -0,0 +1,25 @@
+import rdf from '@zazuko/env'
+import toReadable from 'duplex-to/readable.js'
+import { PassThrough } from 'readable-stream'
+import fetchData from './lib/fetchData.js'
+import fetchMetadata from './lib/fetchMetadata.js'
+
+function fetch({ csvw }) {
+ const output = new PassThrough()
+
+ Promise.resolve().then(async () => {
+ try {
+ const metadata = await fetchMetadata(csvw)
+ const url = metadata.any().has(rdf.ns.csvw.url).out(rdf.ns.csvw.url)
+ const dataStream = await fetchData(url.value)
+
+ dataStream.pipe(output)
+ } catch (err) {
+ output.destroy(err)
+ }
+ })
+
+ return toReadable(output)
+}
+
+export default fetch
diff --git a/packages/csvw/index.js b/packages/csvw/index.js
new file mode 100644
index 00000000..2f46c9b4
--- /dev/null
+++ b/packages/csvw/index.js
@@ -0,0 +1,5 @@
+import fetch from './fetch.js'
+
+export {
+ fetch,
+}
diff --git a/packages/csvw/lib/checkResponse.js b/packages/csvw/lib/checkResponse.js
new file mode 100644
index 00000000..c18c9a6e
--- /dev/null
+++ b/packages/csvw/lib/checkResponse.js
@@ -0,0 +1,7 @@
+async function checkResponse(res) {
+ if (!res.ok) {
+ throw new Error(`${res.statusText}(${res.status}): ${await res.text()}`)
+ }
+}
+
+export default checkResponse
diff --git a/packages/csvw/lib/commonFetch.js b/packages/csvw/lib/commonFetch.js
new file mode 100644
index 00000000..4bf7913e
--- /dev/null
+++ b/packages/csvw/lib/commonFetch.js
@@ -0,0 +1,11 @@
+import fileFetch from 'file-fetch'
+import httpFetch from 'node-fetch'
+import protoFetch from 'proto-fetch'
+
+const commonFetch = protoFetch({
+ file: fileFetch,
+ http: httpFetch,
+ https: httpFetch,
+})
+
+export default commonFetch
diff --git a/packages/csvw/lib/fetchData.js b/packages/csvw/lib/fetchData.js
new file mode 100644
index 00000000..efbf95ee
--- /dev/null
+++ b/packages/csvw/lib/fetchData.js
@@ -0,0 +1,12 @@
+import checkResponse from './checkResponse.js'
+import commonFetch from './commonFetch.js'
+
+async function fetchData(url) {
+ const res = await commonFetch(url)
+
+ await checkResponse(res)
+
+ return res.body
+}
+
+export default fetchData
diff --git a/packages/csvw/lib/fetchMetadata.js b/packages/csvw/lib/fetchMetadata.js
new file mode 100644
index 00000000..300377a5
--- /dev/null
+++ b/packages/csvw/lib/fetchMetadata.js
@@ -0,0 +1,22 @@
+import rdfFetch from '@rdfjs/fetch'
+import rdf from '@zazuko/env'
+import checkResponse from './checkResponse.js'
+import commonFetch from './commonFetch.js'
+
+async function fetchMetadata(url) {
+ const res = await rdfFetch(url.toString(), {
+ contentTypeLookup: extension => extension === '.json' ? 'application/ld+json' : undefined,
+ factory: rdf,
+ fetch: commonFetch,
+ })
+
+ await checkResponse(res)
+
+ if (!res.dataset) {
+ throw new Error('response is empty')
+ }
+
+ return rdf.clownface({ dataset: await res.dataset() })
+}
+
+export default fetchMetadata
diff --git a/packages/csvw/manifest.ttl b/packages/csvw/manifest.ttl
new file mode 100644
index 00000000..1a618149
--- /dev/null
+++ b/packages/csvw/manifest.ttl
@@ -0,0 +1,11 @@
+@base .
+@prefix code: .
+@prefix p: .
+@prefix rdfs: .
+
+ a p:Operation, p:Readable;
+ rdfs:label "Fetch CSVW";
+ rdfs:comment "Loads a CSVW file from the local filesystem or the web depending on the given argument";
+ code:implementedBy [ a code:EcmaScriptModule;
+ code:link
+ ].
diff --git a/packages/csvw/package.json b/packages/csvw/package.json
new file mode 100644
index 00000000..f1f5cdd5
--- /dev/null
+++ b/packages/csvw/package.json
@@ -0,0 +1,39 @@
+{
+ "name": "barnard59-csvw",
+ "version": "1.0.0",
+ "description": "Simplifies handling CSVW mapping documents in barnard59 pipelines",
+ "type": "module",
+ "main": "index.js",
+ "scripts": {
+ "test": "mocha"
+ },
+ "repository": {
+ "type": "git",
+ "url": "git://github.com/zazuko/barnard59.git",
+ "directory": "packages/csvw"
+ },
+ "keywords": [],
+ "author": "Zazuko GmbH",
+ "license": "MIT",
+ "bugs": {
+ "url": "https://github.com/zazuko/barnard59/issues"
+ },
+ "homepage": "https://github.com/zazuko/barnard59",
+ "dependencies": {
+ "@rdfjs/fetch": "^2.1.0",
+ "@zazuko/env": "^1.0.1",
+ "duplex-to": "^1.0.1",
+ "file-fetch": "^1.7.0",
+ "node-fetch": "^3.0.0",
+ "proto-fetch": "^1.0.0",
+ "readable-stream": "^3.6.0"
+ },
+ "devDependencies": {
+ "express-as-promise": "^1.2.0",
+ "get-stream": "^7.0.1",
+ "is-stream": "^3.0.0"
+ },
+ "engines": {
+ "node": ">= 14.0.0"
+ }
+}
diff --git a/packages/csvw/test/fetch.test.js b/packages/csvw/test/fetch.test.js
new file mode 100644
index 00000000..b65c97b8
--- /dev/null
+++ b/packages/csvw/test/fetch.test.js
@@ -0,0 +1,135 @@
+import { rejects, strictEqual } from 'assert'
+import withServer from 'express-as-promise/withServer.js'
+import getStream from 'get-stream'
+import { isReadableStream } from 'is-stream'
+import fetch from '../fetch.js'
+
+const csvContent = 'id,text\n1,abc\n'
+const fileMetdataUrl = 'file:./test/support/test.metadata.json'
+const fileMetdataTtlUrl = 'file:./test/support/test.metadata.ttl'
+
+describe('fetch', () => {
+ it('should be a function', () => {
+ strictEqual(typeof fetch, 'function')
+ })
+
+ it('should return a readable stream', () => {
+ const result = fetch({ csvw: fileMetdataUrl })
+
+ strictEqual(isReadableStream(result), true)
+ })
+
+ it('should process file: url', async () => {
+ const stream = fetch({ csvw: fileMetdataUrl })
+ const content = await getStream(stream)
+
+ strictEqual(content, csvContent)
+ })
+
+ it('should process turtle file: url', async () => {
+ const stream = fetch({ csvw: fileMetdataTtlUrl })
+ const content = await getStream(stream)
+
+ strictEqual(content, csvContent)
+ })
+
+ it('should throw an error for non-existing file: metadata url', async () => {
+ const stream = fetch({ csvw: 'file:./test/support/non-existing.metadata.json' })
+
+ await rejects(getStream(stream))
+ })
+
+ it('should throw an error for non-existing file: data url', async () => {
+ const stream = fetch({ csvw: 'file:./test/support/no-data.metadata.json' })
+
+ await rejects(getStream(stream))
+ })
+
+ it('should process http: url', async () => {
+ await withServer(async server => {
+ server.app.get('/metadata', (req, res) => {
+ res.set('content-type', 'application/ld+json').json({
+ '@context': 'http://www.w3.org/ns/csvw',
+ url: new URL('/data', baseUrl),
+ })
+ })
+
+ server.app.get('/data', (req, res) => {
+ res.set('content-type', 'text/csv').end(csvContent)
+ })
+
+ const baseUrl = await server.listen()
+ const stream = fetch({ csvw: new URL('/metadata', baseUrl) })
+ const content = await getStream(stream)
+
+ strictEqual(content, csvContent)
+ })
+ })
+
+ it('should process turtle http: url', async () => {
+ await withServer(async server => {
+ server.app.get('/metadata', (req, res) => {
+ res.set('content-type', 'text/turtle').end(`[
+ <${new URL('data', baseUrl)}>
+ ].`)
+ })
+
+ server.app.get('/data', (req, res) => {
+ res.set('content-type', 'text/csv').end(csvContent)
+ })
+
+ const baseUrl = await server.listen()
+ const stream = fetch({ csvw: new URL('/metadata', baseUrl) })
+ const content = await getStream(stream)
+
+ strictEqual(content, csvContent)
+ })
+ })
+
+ it('should throw an error for non-existing http: metadata url', async () => {
+ await withServer(async server => {
+ const baseUrl = await server.listen()
+ const stream = fetch({ csvw: new URL('/metadata', baseUrl) })
+
+ await rejects(getStream(stream))
+ })
+ })
+
+ it('should throw an error for non-existing http: data url', async () => {
+ await withServer(async server => {
+ server.app.get('/metadata', (req, res) => {
+ res.set('content-type', 'application/ld+json').json({
+ '@context': 'http://www.w3.org/ns/csvw',
+ url: new URL('/data', baseUrl),
+ })
+ })
+
+ const baseUrl = await server.listen()
+ const stream = fetch({ csvw: new URL('/metadata', baseUrl) })
+
+ await rejects(getStream(stream))
+ })
+ })
+
+ it('should throw an error if http connection is killed', async () => {
+ await withServer(async server => {
+ server.app.get('/metadata', (req, res) => {
+ res.set('content-type', 'application/ld+json').json({
+ '@context': 'http://www.w3.org/ns/csvw',
+ url: new URL('/data', baseUrl),
+ })
+ })
+
+ server.app.get('/data', async (req, res) => {
+ res.set('content-type', 'text/csv')
+ res.write('abc')
+ res.socket.destroy()
+ })
+
+ const baseUrl = await server.listen()
+ const stream = fetch({ csvw: new URL('/metadata', baseUrl) })
+
+ await rejects(getStream(stream))
+ })
+ })
+})
diff --git a/packages/csvw/test/support/no-data.metadata.json b/packages/csvw/test/support/no-data.metadata.json
new file mode 100644
index 00000000..da1330c8
--- /dev/null
+++ b/packages/csvw/test/support/no-data.metadata.json
@@ -0,0 +1,4 @@
+{
+ "@context": "http://www.w3.org/ns/csvw",
+ "url": "file:./test/support/non-existing.csv"
+}
diff --git a/packages/csvw/test/support/test.csv b/packages/csvw/test/support/test.csv
new file mode 100644
index 00000000..0c074d4e
--- /dev/null
+++ b/packages/csvw/test/support/test.csv
@@ -0,0 +1,2 @@
+id,text
+1,abc
diff --git a/packages/csvw/test/support/test.metadata.json b/packages/csvw/test/support/test.metadata.json
new file mode 100644
index 00000000..f8a10f31
--- /dev/null
+++ b/packages/csvw/test/support/test.metadata.json
@@ -0,0 +1,4 @@
+{
+ "@context": "http://www.w3.org/ns/csvw",
+ "url": "file:./test/support/test.csv"
+}
diff --git a/packages/csvw/test/support/test.metadata.ttl b/packages/csvw/test/support/test.metadata.ttl
new file mode 100644
index 00000000..74ba0ddc
--- /dev/null
+++ b/packages/csvw/test/support/test.metadata.ttl
@@ -0,0 +1,3 @@
+[
+
+].