From 582865a614c57cc2c034b78ee90380b2aeb40271 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Thu, 2 Nov 2023 18:07:40 +0100
Subject: [PATCH 01/34] feat: cube package with validation pipeline

---
 packages/cli/lib/discoverManifests.js      |   1 +
 packages/cli/lib/pipeline.js               |   2 +-
 packages/cube/README.md                    |  68 +++++++
 packages/cube/lib/batch.js                 |  30 +++
 packages/cube/lib/quads.js                 |  11 ++
 packages/cube/lib/target.js                |  12 ++
 packages/cube/package.json                 |  37 ++++
 packages/cube/pipeline/cube-validation.ttl | 216 +++++++++++++++++++++
 packages/sparql/manifest.ttl               |   4 +-
 9 files changed, 378 insertions(+), 3 deletions(-)
 create mode 100644 packages/cube/README.md
 create mode 100644 packages/cube/lib/batch.js
 create mode 100644 packages/cube/lib/quads.js
 create mode 100644 packages/cube/lib/target.js
 create mode 100644 packages/cube/package.json
 create mode 100644 packages/cube/pipeline/cube-validation.ttl

diff --git a/packages/cli/lib/discoverManifests.js b/packages/cli/lib/discoverManifests.js
index 01351f6f..1e816673 100644
--- a/packages/cli/lib/discoverManifests.js
+++ b/packages/cli/lib/discoverManifests.js
@@ -9,6 +9,7 @@ const require = module.createRequire(import.meta.url)
 export default async function * () {
   const packages = findPlugins({
     includeDev: true,
+    includePeer: true,
     filter({ pkg }) {
       return packagePattern.test(pkg.name) && hasManifest(pkg.name)
     },
diff --git a/packages/cli/lib/pipeline.js b/packages/cli/lib/pipeline.js
index 543e9e23..94e9165c 100644
--- a/packages/cli/lib/pipeline.js
+++ b/packages/cli/lib/pipeline.js
@@ -34,7 +34,7 @@ export const desugar = async (dataset, { logger, knownOperations } = {}) => {
       const [quad] = step.dataset.match(step.term)
       const knownStep = knownOperations.get(quad?.predicate)
       if (!knownStep) {
-        logger?.warn(`Operation <${quad?.predicate.value}> not found in known manifests. Have you added the right \`branard59-*\` package as dependency?`)
+        logger?.warn(`Operation <${quad?.predicate.value}> not found in known manifests. Have you added the right \`barnard59-*\` package as dependency?`)
         continue
       }
 
diff --git a/packages/cube/README.md b/packages/cube/README.md
new file mode 100644
index 00000000..f7c87ce9
--- /dev/null
+++ b/packages/cube/README.md
@@ -0,0 +1,68 @@
+# barnard59-cube
+
+
+## Cube validation
+
+`cube-validation.ttl` contains pipelines to retrieve and validate cube observations and their constraints.
+
+### fetch cube constraint
+
+Pipeline `fetch-cube-constraint` queries a given SPARQL endpoint (default is https://lindas.admin.ch/query) to retrieve 
+a [concise bounded description](https://docs.stardog.com/query-stardog/#describe-queries) of the `cube:Constraint` part of a given cube.
+
+```bash
+npx barnard59 run ./pipeline/cube-validation.ttl \
+    --pipeline http://barnard59.zazuko.com/pipeline/cube-validation/fetch-cube-constraint \
+    --variable cube=https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2 \
+    --variable endpoint=https://int.lindas.admin.ch/query 
+```
+
+This pipeline is useful mainly for cubes published with [cube creator](https://github.com/zazuko/cube-creator) (if the cube definition is manually crafted, likely it's already available as a local file).
+
+
+### check cube constraint
+
+Pipeline `check-cube-constraint` validates the input constraint against the shapes provided with the `profile` variable (the default profile is https://cube.link/latest/shape/standalone-constraint-constraint but [cube link](https://cube.link/) defines additional ones).
+
+The pipeline reads the constraint from `stdin`, allowing input from a local file (as in the following example) as well as from the output of the `fetch-cube-constraint` pipeline (in most cases it's useful to have the constraint in a local file because it's needed also for the `check-cube-observations` pipeline).
+
+```bash
+cat myConstraint.ttl \
+| npx barnard59 run ./pipeline/cube-validation.ttl \
+    --pipeline http://barnard59.zazuko.com/pipeline/cube-validation/check-cube-constraint \
+    --variable profile=https://cube.link/v0.0.5/shape/standalone-constraint-constraint
+```
+TODO: explain how validation errors are reported
+
+
+### fetch cube observations
+
+Pipeline `fetch-cube-observations` queries a given SPARQL endpoint (default is https://lindas.admin.ch/query) to retrieve the observations of a given cube.
+
+```bash
+npx barnard59 run ./pipeline/cube-validation.ttl \
+    --pipeline http://barnard59.zazuko.com/pipeline/cube-validation/fetch-cube-observations \
+    --variable cube=https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2 \
+    --variable endpoint=https://int.lindas.admin.ch/query
+```
+Results are returned sorted by observation so that the potentially big output stream can be split (by the `check-cube-observations` pipeline) and each observation can be processed separately.
+
+
+
+### check cube observations
+
+Pipeline `check-cube-observations` validates the input observations against the shapes provided with the _constraint_ variable.
+
+The pipeline reads the observations from `stdin`, allowing input from a local file (as in the following example) as well as from the output of the `fetch-cube-observations` pipeline.
+
+```bash
+cat myObservations.ttl \
+| npx barnard59 run ./pipeline/cube-validation.ttl \
+    --pipeline http://barnard59.zazuko.com/pipeline/cube-validation/check-cube-observations \
+    --variable constraint=myConstraint.ttl
+```
+To enable validation, the pipeline adds to the constraint a `sh:targetClass` property with value `cube:Observation` (assuming that each observation has an explicit property `rdf:type` with value `cube:Observation`).
+
+To leverage streaming, the pipeline also assumes the triples for the same observation to be adjacent (`fetch-cube-observations` achieves this sorting by observation).
+
+TODO: explain how validation errors are reported
diff --git a/packages/cube/lib/batch.js b/packages/cube/lib/batch.js
new file mode 100644
index 00000000..3cf81833
--- /dev/null
+++ b/packages/cube/lib/batch.js
@@ -0,0 +1,30 @@
+import { Duplex } from 'stream'
+import rdf from '@zazuko/env-node'
+
+// Iterable<X> => Iterable<X[]>
+export async function * chunkObjectsBySize(size, iterable) {
+  let chunk = []
+  for await (const item of iterable) {
+    chunk.push(item)
+    if (chunk.length === size) {
+      yield chunk
+      chunk = []
+    }
+  }
+  if (chunk.length > 0) {
+    yield chunk
+  }
+}
+
+// Iterable<Dataset> => Iterable<Dataset>
+export async function * chunkBySize(size, iterable) {
+  for await (const array of chunkObjectsBySize(size, iterable)) {
+    const batch = rdf.dataset()
+    for (const dataset of array) {
+      batch.addAll(dataset)
+    }
+    yield batch
+  }
+}
+
+export const batch = size => Duplex.from(iterable => chunkBySize(Number(size), iterable))
diff --git a/packages/cube/lib/quads.js b/packages/cube/lib/quads.js
new file mode 100644
index 00000000..670a9d8f
--- /dev/null
+++ b/packages/cube/lib/quads.js
@@ -0,0 +1,11 @@
+import { Transform } from 'node:stream'
+import rdf from '@zazuko/env-node'
+
+export const toQuad = () => new Transform({
+  readableObjectMode: true,
+  writableObjectMode: true,
+  transform(row, _encoding, callback) {
+    this.push(rdf.quad(row.s, row.p, row.o))
+    callback()
+  },
+})
diff --git a/packages/cube/lib/target.js b/packages/cube/lib/target.js
new file mode 100644
index 00000000..c8d0df92
--- /dev/null
+++ b/packages/cube/lib/target.js
@@ -0,0 +1,12 @@
+import rdf from '@zazuko/env-node'
+
+const cube = rdf.namespace('https://cube.link/')
+
+export const addTarget = shape => {
+  const constraint = rdf.clownface({ dataset: shape, term: cube.Constraint }).in(rdf.ns.rdf.type)
+  if (!constraint.term) {
+    throw new Error('could not find a unique constraint')
+  }
+  constraint.addOut(rdf.ns.sh.targetClass, cube.Observation)
+  return shape
+}
diff --git a/packages/cube/package.json b/packages/cube/package.json
new file mode 100644
index 00000000..bafab833
--- /dev/null
+++ b/packages/cube/package.json
@@ -0,0 +1,37 @@
+{
+  "name": "barnard59-cube",
+  "version": "2.0.0",
+  "description": "RDF cubes for Linked Data pipelines",
+  "type": "module",
+  "main": "index.js",
+  "scripts": {
+    "test": "mocha"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git://github.com/zazuko/barnard59.git",
+    "directory": "packages/cube"
+  },
+  "keywords": [],
+  "author": "Thomas Bergwinkl <bergi@axolotlfarm.org> (https://www.bergnet.org/people/bergi/card#me)",
+  "license": "MIT",
+  "bugs": {
+    "url": "https://github.com/zazuko/barnard59/issues"
+  },
+  "homepage": "https://github.com/zazuko/barnard59",
+  "peerDependencies": {
+    "barnard59-base": "^2.0.0",
+    "barnard59-rdf": "^2.0.0",
+    "barnard59-http": "^2.0.0",
+    "barnard59-sparql": "^2.0.0",
+    "barnard59-formats": "^2.0.0",
+    "barnard59-validate-shacl": "^0.3.8"
+  },
+  "engines": {
+    "node": ">= 14.0.0"
+  },
+  "dependencies": {
+    "@zazuko/env-node": "^1.0.0",
+    "rdf-stream-to-dataset-stream": "^1.0.0"
+  }
+}
diff --git a/packages/cube/pipeline/cube-validation.ttl b/packages/cube/pipeline/cube-validation.ttl
new file mode 100644
index 00000000..1406211b
--- /dev/null
+++ b/packages/cube/pipeline/cube-validation.ttl
@@ -0,0 +1,216 @@
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix code: <https://code.described.at/> .
+@prefix p: <https://pipeline.described.at/> .
+@prefix sparql: <https://barnard59.zazuko.com/operations/sparql/> .
+@prefix http: <https://barnard59.zazuko.com/operations/http/> .
+@prefix shacl: <https://barnard59.zazuko.com/operations/shacl/> .
+@prefix base: <https://barnard59.zazuko.com/operations/base/> .
+@prefix n3: <https://barnard59.zazuko.com/operations/formats/n3/> .
+@prefix ntriples: <https://barnard59.zazuko.com/operations/formats/ntriples/> .
+
+@base <http://barnard59.zazuko.com/pipeline/cube-validation/> .
+
+_:endpoint a p:Variable ;
+  p:name "endpoint" ;
+  rdfs:label "SPARQL endpoint" ;
+  p:value "https://lindas.admin.ch/query"
+.
+
+_:cube a p:Variable ;
+  p:name "cube" ;
+  rdfs:label "cube URI" ;
+.
+
+_:constraint a p:Variable ;
+  p:name "constraint" ;
+  rdfs:label "cube constraint file" ;
+.
+
+_:profile a p:Variable ;
+  p:name "profile" ;
+  rdfs:label "cube constraint profile URL" ;
+  p:value "https://cube.link/latest/shape/standalone-constraint-constraint" ;
+.
+
+<fetch-cube-constraint> a p:Pipeline ;
+  p:variables [ p:variable _:endpoint, _:cube ] ;
+  p:steps
+    [
+      p:stepList
+        (
+          _:queryConstraint
+          [ ntriples:serialize () ]
+          [ base:stdout () ]
+        ) 
+    ] 
+.
+
+<fetch-cube-observations> a p:Pipeline ;
+  p:variables [ p:variable _:endpoint, _:cube ] ;
+  p:steps
+    [
+      p:stepList
+        (
+          _:queryObservations
+          _:toQuads
+          [ ntriples:serialize () ]
+          [ base:stdout () ]
+        ) 
+    ] 
+.
+
+<check-cube-constraint> a p:Pipeline ;
+  p:variables [ p:variable _:profile ] ;
+  p:steps
+    [
+      p:stepList ( 
+        _:stdin
+        [ n3:parse () ]
+        _:toDataset
+        _:validateWithProfile
+      )
+    ]
+.
+
+<check-cube-observations> a p:Pipeline ;
+  p:variables [ p:variable _:constraint ] ;
+  p:steps
+    [
+      p:stepList ( 
+        _:stdin
+        [ n3:parse () ]
+        _:toDatasetBySubject
+        _:batch
+        _:validateWithConstraint
+      )
+    ]
+.
+
+# relying on cbd is not ideal (vendor specific)
+_:queryConstraint sparql:construct
+    [ code:name "endpoint"; code:value "endpoint"^^p:VariableName ] , 
+    [
+      code:name "query";
+      code:value """
+      #pragma describe.strategy cbd
+
+      PREFIX cube: <https://cube.link/>
+
+      DESCRIBE ?s
+      WHERE {
+        <${cube}> cube:observationConstraint ?s .
+      }
+      """^^code:EcmaScriptTemplateLiteral 
+    ]
+.
+
+# use SELECT instead of CONSTRUCT to ensure result is ordered by subject
+_:queryObservations sparql:select
+    [
+      code:name "endpoint";
+      code:value "endpoint"^^p:VariableName
+    ] , 
+    [
+      code:name "query";
+      code:value """
+      PREFIX cube: <https://cube.link/>
+
+      SELECT ?s ?p ?o
+      WHERE {
+        <${cube}> cube:observationSet/cube:observation ?s .
+        ?s ?p ?o 
+      }
+      ORDER BY ?s
+      """^^code:EcmaScriptTemplateLiteral 
+    ]
+.
+
+_:toQuads a p:Step ;
+  code:implementedBy 
+    [ 
+      a code:EcmaScriptModule;
+      code:link <file:../lib/quads.js#toQuad>
+    ]
+.
+
+_:stdin a p:Step ;
+  code:implementedBy "() => process.stdin"^^code:EcmaScript  
+.
+
+_:toDataset a p:Step ;
+	code:implementedBy 
+    [
+      rdf:type code:EcmaScript ;
+      code:link <node:rdf-stream-to-dataset-stream/toDatasetStream.js> ;
+    ] 
+.
+
+_:toDatasetBySubject a p:Step ;
+	code:implementedBy 
+    [
+      rdf:type code:EcmaScript ;
+      code:link <node:rdf-stream-to-dataset-stream/bySubject.js> ;
+    ] 
+. 
+
+_:batch a p:Step ;
+  code:implementedBy 
+    [ 
+      a code:EcmaScriptModule ;
+      code:link <file:../lib/batch.js#batch> ;
+    ] ;
+  code:arguments (50)
+.
+
+_:validateWithProfile shacl:validate
+  [ code:name "shape" ; code:value _:getProfile ] 
+  # beware of maxError = 1, see https://github.com/zazuko/rdf-validate-shacl/issues/88
+.
+
+_:validateWithConstraint shacl:validate
+  [ code:name "shape" ; code:value _:getConstraint ] 
+  # beware of maxError = 1, see https://github.com/zazuko/rdf-validate-shacl/issues/88
+.
+
+
+_:getProfile a p:Pipeline , p:ReadableObjectMode;
+  p:steps
+    [
+      p:stepList
+        (
+          [ http:get [ code:name "url" ; code:value "profile"^^p:VariableName ] ] 
+          [ n3:parse () ]
+        ) 
+    ] 
+  .
+
+_:getConstraint a p:Pipeline , p:ReadableObjectMode ;
+  p:variables [ p:variable _:constraint ] ;
+  p:steps
+    [
+      p:stepList
+        (
+          _:readConstraint
+          _:toDataset
+          _:addTarget
+          [ base:flatten () ]
+        ) 
+    ] 
+.
+
+_:readConstraint a p:Step ;
+  code:implementedBy 
+    [ 
+      a code:EcmaScriptModule ;
+      code:link <node:rdf-utils-fs#fromFile> ;
+    ] ;
+  code:arguments ("constraint"^^p:VariableName)
+.
+
+_:addTarget base:map (
+  [
+    a code:EcmaScriptModule ;
+    code:link <file:../lib/target.js#addTarget>
+  ]
+) .
diff --git a/packages/sparql/manifest.ttl b/packages/sparql/manifest.ttl
index 7a83ad19..2e927d1a 100644
--- a/packages/sparql/manifest.ttl
+++ b/packages/sparql/manifest.ttl
@@ -7,12 +7,12 @@
   rdfs:label "SPARQL Select";
   rdfs:comment "Runs the given CONSTRUCT query against the given endpoint parses the result.";
   code:implementedBy [ a code:EcmaScriptModule;
-    code:link <node:barnard59-sparql#construct.js#default>
+    code:link <node:barnard59-sparql/construct.js#default>
   ].
 
 <select> a p:Operation, p:ReadableObjectMode;
   rdfs:label "SPARQL Select";
   rdfs:comment "Runs the given SELECT query against the given endpoint parses the result into rows of RDF/JS key-value pairs.";
   code:implementedBy [ a code:EcmaScriptModule;
-    code:link <node:barnard59-sparql#select.js#default>
+    code:link <node:barnard59-sparql/select.js#default>
   ].

From 957e8c63b79590d129019f11f7864933e7942f21 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Fri, 3 Nov 2023 10:07:57 +0100
Subject: [PATCH 02/34] add manifest

---
 package-lock.json          | 76 +++++++++++++++++++++++++++++++++++---
 packages/cube/README.md    |  2 +-
 packages/cube/manifest.ttl | 36 ++++++++++++++++++
 3 files changed, 108 insertions(+), 6 deletions(-)
 create mode 100644 packages/cube/manifest.ttl

diff --git a/package-lock.json b/package-lock.json
index 1ff77f30..2695f4a7 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -6405,14 +6405,15 @@
       "integrity": "sha512-1hXJe79ipXJbo/ywVGdefEcHPutedK2ilZv9G6op71wTibeUlCe1ik9OvnSeyH42RfLHqDs2vLvK3zH50RZS+w=="
     },
     "node_modules/@zazuko/env": {
-      "version": "1.9.0",
-      "resolved": "https://registry.npmjs.org/@zazuko/env/-/env-1.9.0.tgz",
-      "integrity": "sha512-2aZeNY3R7f0enBy5FY+um8rRuBsYkTxOWtAp7uKt0nw9hbF0CVsw0G1BRbrUYnZ2oWZk4STTJGZZyUQytxjZ2w==",
+      "version": "1.10.1",
+      "resolved": "https://registry.npmjs.org/@zazuko/env/-/env-1.10.1.tgz",
+      "integrity": "sha512-+2O/hJVBQbwhDaB0+5XHevSsVAjLQEx/Au5eE4WhS9BIDBg1kG+F3UpGjFyHnWrzL7luFdysA32g+lzgEmXz7w==",
       "dependencies": {
         "@rdfjs/dataset": "^2.0.1",
         "@rdfjs/environment": "^0.1.2",
         "@rdfjs/traverser": "^0.1.2",
         "@tpluscode/rdf-ns-builders": "^4.1.0",
+        "@zazuko/prefixes": "^2.1.0",
         "clownface": "^2.0.1",
         "get-stream": "^8.0.1",
         "rdf-dataset-ext": "^1.1.0"
@@ -6506,8 +6507,9 @@
       }
     },
     "node_modules/@zazuko/prefixes": {
-      "version": "2.0.0",
-      "license": "MIT"
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/@zazuko/prefixes/-/prefixes-2.1.0.tgz",
+      "integrity": "sha512-dm0/YBNzuwJUm8cXoF3Dn9DfQetnRDaOJ8NdlgLY645OaUddCzUAAYcanm+xZmEo1SWX+/Tp3jbScwCaN2b/aQ=="
     },
     "node_modules/@zazuko/rdf-utils-fs": {
       "version": "3.0.2",
@@ -8749,6 +8751,10 @@
       "resolved": "packages/csvw",
       "link": true
     },
+    "node_modules/barnard59-cube": {
+      "resolved": "packages/cube",
+      "link": true
+    },
     "node_modules/barnard59-env": {
       "resolved": "packages/env",
       "link": true
@@ -9446,6 +9452,14 @@
         "node": ">= 6"
       }
     },
+    "node_modules/chunkify-stream": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/chunkify-stream/-/chunkify-stream-1.0.0.tgz",
+      "integrity": "sha512-LTzfbDrGyMglcGijQtHCTr6uzQIz7VAtvpitKzzIZeDZbBDZzSEhrpu0lH8eyL4/1LO10Oy9kUcXbQtVXlZUMw==",
+      "dependencies": {
+        "readable-stream": "^3.4.0"
+      }
+    },
     "node_modules/ci-info": {
       "version": "3.8.0",
       "dev": true,
@@ -22291,6 +22305,38 @@
       "version": "3.0.2",
       "license": "MIT"
     },
+    "node_modules/rdf-stream-to-dataset-stream": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/rdf-stream-to-dataset-stream/-/rdf-stream-to-dataset-stream-1.0.0.tgz",
+      "integrity": "sha512-ogWBAsrc92B2tJkKw7Q3aIim2EOA4OvFn/PQilQ/ovs0pAbrDU+f6PfqS08JGxmdLcUsP8rCOztdualeVyf+vw==",
+      "dependencies": {
+        "@rdfjs/data-model": "^1.1.2",
+        "@rdfjs/dataset": "^1.0.1",
+        "chunkify-stream": "^1.0.0"
+      }
+    },
+    "node_modules/rdf-stream-to-dataset-stream/node_modules/@rdfjs/data-model": {
+      "version": "1.3.4",
+      "resolved": "https://registry.npmjs.org/@rdfjs/data-model/-/data-model-1.3.4.tgz",
+      "integrity": "sha512-iKzNcKvJotgbFDdti7GTQDCYmL7GsGldkYStiP0K8EYtN7deJu5t7U11rKTz+nR7RtesUggT+lriZ7BakFv8QQ==",
+      "dependencies": {
+        "@rdfjs/types": ">=1.0.1"
+      },
+      "bin": {
+        "rdfjs-data-model-test": "bin/test.js"
+      }
+    },
+    "node_modules/rdf-stream-to-dataset-stream/node_modules/@rdfjs/dataset": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@rdfjs/dataset/-/dataset-1.1.1.tgz",
+      "integrity": "sha512-BNwCSvG0cz0srsG5esq6CQKJc1m8g/M0DZpLuiEp0MMpfwguXX7VeS8TCg4UUG3DV/DqEvhy83ZKSEjdsYseeA==",
+      "dependencies": {
+        "@rdfjs/data-model": "^1.2.0"
+      },
+      "bin": {
+        "rdfjs-dataset-test": "bin/test.js"
+      }
+    },
     "node_modules/rdf-transform-triple-to-quad": {
       "version": "1.0.2",
       "license": "MIT",
@@ -26706,6 +26752,26 @@
         "url": "https://opencollective.com/node-fetch"
       }
     },
+    "packages/cube": {
+      "name": "barnard59-cube",
+      "version": "2.0.0",
+      "license": "MIT",
+      "dependencies": {
+        "@zazuko/env-node": "^1.0.0",
+        "rdf-stream-to-dataset-stream": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 14.0.0"
+      },
+      "peerDependencies": {
+        "barnard59-base": "^2.0.0",
+        "barnard59-formats": "^2.0.0",
+        "barnard59-http": "^2.0.0",
+        "barnard59-rdf": "^2.0.0",
+        "barnard59-sparql": "^2.0.0",
+        "barnard59-validate-shacl": "^0.3.8"
+      }
+    },
     "packages/env": {
       "name": "barnard59-env",
       "version": "0.0.0",
diff --git a/packages/cube/README.md b/packages/cube/README.md
index f7c87ce9..e7bcea35 100644
--- a/packages/cube/README.md
+++ b/packages/cube/README.md
@@ -51,7 +51,7 @@ Results are returned sorted by observation so that the potentially big output st
 
 ### check cube observations
 
-Pipeline `check-cube-observations` validates the input observations against the shapes provided with the _constraint_ variable.
+Pipeline `check-cube-observations` validates the input observations against the shapes provided with the `constraint` variable.
 
 The pipeline reads the observations from `stdin`, allowing input from a local file (as in the following example) as well as from the output of the `fetch-cube-observations` pipeline.
 
diff --git a/packages/cube/manifest.ttl b/packages/cube/manifest.ttl
new file mode 100644
index 00000000..9ae277c5
--- /dev/null
+++ b/packages/cube/manifest.ttl
@@ -0,0 +1,36 @@
+@base <https://barnard59.zazuko.com/> .
+@prefix b59: <https://barnard59.zazuko.com/vocab#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+
+
+<command/cube/fetch-constraint>
+  a b59:CliCommand ;
+  b59:command "fetch-constraint" ;
+  rdfs:label "Retrieves cube constraint from SPARQL endpoint" ;
+  b59:source "barnard59-cube/pipeline/cube-validation.ttl" ;
+  b59:pipeline <http://barnard59.zazuko.com/pipeline/cube-validation/fetch-cube-constraint> 
+.
+
+<command/cube/check-constraint>
+  a b59:CliCommand ;
+  b59:command "check-constraint" ;
+  rdfs:label "Validate input constraint against profile shapes" ;
+  b59:source "barnard59-cube/pipeline/cube-validation.ttl" ;
+  b59:pipeline <http://barnard59.zazuko.com/pipeline/cube-validation/check-cube-constraint> 
+.
+
+<command/cube/fetch-observations>
+  a b59:CliCommand ;
+  b59:command "fetch-observations" ;
+  rdfs:label "Retrieves cube observations from SPARQL endpoint" ;
+  b59:source "barnard59-cube/pipeline/cube-validation.ttl" ;
+  b59:pipeline <http://barnard59.zazuko.com/pipeline/cube-validation/fetch-cube-observations> 
+.
+
+<command/cube/check-observations>
+  a b59:CliCommand ;
+  b59:command "check-observations" ;
+  rdfs:label "Validate input observations against cube constraint" ;
+  b59:source "barnard59-cube/pipeline/cube-validation.ttl" ;
+  b59:pipeline <http://barnard59.zazuko.com/pipeline/cube-validation/check-cube-observations> 
+.

From b7df2d8a3c5a979fbbe016db79eb2ad7d0bd1413 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Fri, 3 Nov 2023 16:50:42 +0100
Subject: [PATCH 03/34] feat: SHACL report step

---
 packages/cube/pipeline/cube-validation.ttl | 22 +++++-------
 packages/validate-shacl/manifest.ttl       |  7 ++++
 packages/validate-shacl/report.js          | 39 ++++++++++++++++++++++
 3 files changed, 55 insertions(+), 13 deletions(-)
 create mode 100644 packages/validate-shacl/report.js

diff --git a/packages/cube/pipeline/cube-validation.ttl b/packages/cube/pipeline/cube-validation.ttl
index 1406211b..9fd64fc0 100644
--- a/packages/cube/pipeline/cube-validation.ttl
+++ b/packages/cube/pipeline/cube-validation.ttl
@@ -68,7 +68,10 @@ _:profile a p:Variable ;
         _:stdin
         [ n3:parse () ]
         _:toDataset
-        _:validateWithProfile
+        [ shacl:report (_:getProfile) ]
+        [ base:flatten () ]
+        [ ntriples:serialize () ]
+        [ base:stdout () ]
       )
     ]
 .
@@ -82,7 +85,11 @@ _:profile a p:Variable ;
         [ n3:parse () ]
         _:toDatasetBySubject
         _:batch
-        _:validateWithConstraint
+        [ shacl:report (_:getConstraint) ]
+        [ base:limit (10)]
+        [ base:flatten () ]
+        [ ntriples:serialize () ]
+        [ base:stdout () ]
       )
     ]
 .
@@ -163,17 +170,6 @@ _:batch a p:Step ;
   code:arguments (50)
 .
 
-_:validateWithProfile shacl:validate
-  [ code:name "shape" ; code:value _:getProfile ] 
-  # beware of maxError = 1, see https://github.com/zazuko/rdf-validate-shacl/issues/88
-.
-
-_:validateWithConstraint shacl:validate
-  [ code:name "shape" ; code:value _:getConstraint ] 
-  # beware of maxError = 1, see https://github.com/zazuko/rdf-validate-shacl/issues/88
-.
-
-
 _:getProfile a p:Pipeline , p:ReadableObjectMode;
   p:steps
     [
diff --git a/packages/validate-shacl/manifest.ttl b/packages/validate-shacl/manifest.ttl
index d9a46b26..90d25696 100644
--- a/packages/validate-shacl/manifest.ttl
+++ b/packages/validate-shacl/manifest.ttl
@@ -9,3 +9,10 @@
           code:implementedBy [ a code:EcmaScriptModule;
                               code:link <node:barnard59-validate-shacl/validate.js#shacl>
                              ].
+
+<report> a p:Operation, p:Readable;
+          rdfs:label "validate rdf";
+          rdfs:comment "Validates a RDF graph against a set of conditions specified in a SHACL document";
+          code:implementedBy [ a code:EcmaScriptModule;
+                              code:link <node:barnard59-validate-shacl/report.js#shacl>
+                             ].
diff --git a/packages/validate-shacl/report.js b/packages/validate-shacl/report.js
new file mode 100644
index 00000000..9134155a
--- /dev/null
+++ b/packages/validate-shacl/report.js
@@ -0,0 +1,39 @@
+import { isStream, isReadableStream } from 'is-stream'
+import SHACLValidator from 'rdf-validate-shacl'
+import { Transform } from 'readable-stream'
+
+class ValidateChunk extends Transform {
+  constructor(context, shape) {
+    super({
+      writableObjectMode: true,
+      readableObjectMode: true,
+    })
+    this.context = context
+    this.validator = new SHACLValidator(shape, { maxErrors: 0, factory: context.env })
+  }
+
+  _transform(data, encoding, callback) {
+    const report = this.validator.validate(data)
+    if (!report.conforms) {
+      callback(null, report.dataset)
+    }
+  }
+}
+
+export async function shacl(arg) {
+  let shape
+  if (isStream(arg)) {
+    shape = arg
+  } else if (arg) {
+    ({ shape } = arg)
+  }
+
+  if (!shape) {
+    throw new Error('Needs a SHACL shape as parameter')
+  }
+  if (!isReadableStream(shape)) {
+    throw new Error(`${shape} is not a readable stream`)
+  }
+
+  return new ValidateChunk(this, await this.env.dataset().import(shape))
+}

From 1506a107beda36115f41f30d10b0706db56df1b4 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Mon, 6 Nov 2023 08:33:08 +0100
Subject: [PATCH 04/34] fix stdout

---
 packages/cube/pipeline/cube-validation.ttl | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/packages/cube/pipeline/cube-validation.ttl b/packages/cube/pipeline/cube-validation.ttl
index 9fd64fc0..7b796b1c 100644
--- a/packages/cube/pipeline/cube-validation.ttl
+++ b/packages/cube/pipeline/cube-validation.ttl
@@ -41,7 +41,7 @@ _:profile a p:Variable ;
         (
           _:queryConstraint
           [ ntriples:serialize () ]
-          [ base:stdout () ]
+          _:stdout
         ) 
     ] 
 .
@@ -55,7 +55,7 @@ _:profile a p:Variable ;
           _:queryObservations
           _:toQuads
           [ ntriples:serialize () ]
-          [ base:stdout () ]
+          _:stdout
         ) 
     ] 
 .
@@ -71,7 +71,7 @@ _:profile a p:Variable ;
         [ shacl:report (_:getProfile) ]
         [ base:flatten () ]
         [ ntriples:serialize () ]
-        [ base:stdout () ]
+        _:stdout
       )
     ]
 .
@@ -89,7 +89,7 @@ _:profile a p:Variable ;
         [ base:limit (10)]
         [ base:flatten () ]
         [ ntriples:serialize () ]
-        [ base:stdout () ]
+        _:stdout
       )
     ]
 .
@@ -145,6 +145,10 @@ _:stdin a p:Step ;
   code:implementedBy "() => process.stdin"^^code:EcmaScript  
 .
 
+_:stdout a p:Step ;
+  code:implementedBy "() => process.stdout"^^code:EcmaScript  
+.
+
 _:toDataset a p:Step ;
 	code:implementedBy 
     [

From ae3e2625c51155bf43dd59ac7a29f548f50a0710 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Tue, 7 Nov 2023 10:59:38 +0100
Subject: [PATCH 05/34] fix shacl report

---
 packages/cube/pipeline/cube-validation.ttl | 23 +++++++++++---
 packages/validate-shacl/report.js          | 36 ++++++++++++----------
 2 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/packages/cube/pipeline/cube-validation.ttl b/packages/cube/pipeline/cube-validation.ttl
index 7b796b1c..ef45ce40 100644
--- a/packages/cube/pipeline/cube-validation.ttl
+++ b/packages/cube/pipeline/cube-validation.ttl
@@ -33,6 +33,19 @@ _:profile a p:Variable ;
   p:value "https://cube.link/latest/shape/standalone-constraint-constraint" ;
 .
 
+_:batchSize a p:Variable ;
+  p:name "batchSize" ;
+  rdfs:label "batch size" ;
+  p:value 50 ;
+.
+
+_:maxViolations a p:Variable ;
+  p:name "maxViolations" ;
+  rdfs:label "max number of violations" ;
+  p:value 3 ;
+.
+
+
 <fetch-cube-constraint> a p:Pipeline ;
   p:variables [ p:variable _:endpoint, _:cube ] ;
   p:steps
@@ -77,7 +90,7 @@ _:profile a p:Variable ;
 .
 
 <check-cube-observations> a p:Pipeline ;
-  p:variables [ p:variable _:constraint ] ;
+  p:variables [ p:variable _:constraint , _:batchSize , _:maxViolations ] ;
   p:steps
     [
       p:stepList ( 
@@ -85,8 +98,10 @@ _:profile a p:Variable ;
         [ n3:parse () ]
         _:toDatasetBySubject
         _:batch
-        [ shacl:report (_:getConstraint) ]
-        [ base:limit (10)]
+        [ shacl:report 
+          [code:name "shape" ; code:value _:getConstraint ] ,
+          [code:name "maxErrors" ; code:value "maxViolations"^^p:VariableName ] 
+        ]
         [ base:flatten () ]
         [ ntriples:serialize () ]
         _:stdout
@@ -171,7 +186,7 @@ _:batch a p:Step ;
       a code:EcmaScriptModule ;
       code:link <file:../lib/batch.js#batch> ;
     ] ;
-  code:arguments (50)
+  code:arguments ("batchSize"^^p:VariableName)
 .
 
 _:getProfile a p:Pipeline , p:ReadableObjectMode;
diff --git a/packages/validate-shacl/report.js b/packages/validate-shacl/report.js
index 9134155a..6f6a4726 100644
--- a/packages/validate-shacl/report.js
+++ b/packages/validate-shacl/report.js
@@ -1,31 +1,33 @@
+import { Duplex } from 'stream'
 import { isStream, isReadableStream } from 'is-stream'
 import SHACLValidator from 'rdf-validate-shacl'
-import { Transform } from 'readable-stream'
 
-class ValidateChunk extends Transform {
-  constructor(context, shape) {
-    super({
-      writableObjectMode: true,
-      readableObjectMode: true,
-    })
-    this.context = context
-    this.validator = new SHACLValidator(shape, { maxErrors: 0, factory: context.env })
-  }
-
-  _transform(data, encoding, callback) {
-    const report = this.validator.validate(data)
+async function * validate(iterable, validator, maxViolations) {
+  let violations = 0
+  for await (const chunk of iterable) {
+    const report = validator.validate(chunk)
     if (!report.conforms) {
-      callback(null, report.dataset)
+      yield report.dataset
+      violations = violations + report.results.filter(r => r.severity.value === 'http://www.w3.org/ns/shacl#Violation').length
+      if (maxViolations && violations > maxViolations) {
+        throw new Error(`limit of ${maxViolations} violations exceeded. ${violations} violations found`)
+      }
     }
   }
+  if (violations) {
+    throw new Error(`${violations} violations found`)
+  }
 }
 
 export async function shacl(arg) {
   let shape
+  let options
+  let maxErrors
   if (isStream(arg)) {
     shape = arg
   } else if (arg) {
-    ({ shape } = arg)
+    ({ shape, ...options } = arg)
+    maxErrors = options.maxErrors < 1 ? undefined : Number(options.maxErrors)
   }
 
   if (!shape) {
@@ -35,5 +37,7 @@ export async function shacl(arg) {
     throw new Error(`${shape} is not a readable stream`)
   }
 
-  return new ValidateChunk(this, await this.env.dataset().import(shape))
+  const ds = await this.env.dataset().import(shape)
+  const validator = new SHACLValidator(ds, { maxErrors: 0, factory: this.env })
+  return Duplex.from(iterable => validate(iterable, validator, maxErrors))
 }

From ace571f4bab9e2939ec9252d089f076f9bff829b Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Wed, 8 Nov 2023 08:47:19 +0100
Subject: [PATCH 06/34] use external sorting to avoid db timeout

---
 package-lock.json                          | 14 ++++++++++++++
 packages/cube/lib/sort.js                  | 15 +++++++++++++++
 packages/cube/package.json                 |  5 +++--
 packages/cube/pipeline/cube-validation.ttl | 10 +++++++++-
 packages/validate-shacl/report.js          | 11 +++++++----
 5 files changed, 48 insertions(+), 7 deletions(-)
 create mode 100644 packages/cube/lib/sort.js

diff --git a/package-lock.json b/package-lock.json
index 2695f4a7..954a6948 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -12596,6 +12596,14 @@
         "node": ">=4"
       }
     },
+    "node_modules/external-merge-sort": {
+      "version": "0.1.2",
+      "resolved": "https://registry.npmjs.org/external-merge-sort/-/external-merge-sort-0.1.2.tgz",
+      "integrity": "sha512-/seGAbKvi50O81p7r4hv7X6KGOAiuQdxo4xTPY0uFrqn4Xi10PiJO9SifXIFf9tev1/gkkH/TAtu++Yru77otQ==",
+      "dependencies": {
+        "heap": "^0.2.7"
+      }
+    },
     "node_modules/extglob": {
       "version": "2.0.4",
       "dev": true,
@@ -13766,6 +13774,11 @@
         "he": "bin/he"
       }
     },
+    "node_modules/heap": {
+      "version": "0.2.7",
+      "resolved": "https://registry.npmjs.org/heap/-/heap-0.2.7.tgz",
+      "integrity": "sha512-2bsegYkkHO+h/9MGbn6KWcE45cHZgPANo5LXF7EvWdT0yT2EguSVO1nDgU5c8+ZOPwp2vMNa7YFsJhVcDR9Sdg=="
+    },
     "node_modules/hosted-git-info": {
       "version": "2.8.9",
       "license": "ISC"
@@ -26758,6 +26771,7 @@
       "license": "MIT",
       "dependencies": {
         "@zazuko/env-node": "^1.0.0",
+        "external-merge-sort": "^0.1.2",
         "rdf-stream-to-dataset-stream": "^1.0.0"
       },
       "engines": {
diff --git a/packages/cube/lib/sort.js b/packages/cube/lib/sort.js
new file mode 100644
index 00000000..0aec9a11
--- /dev/null
+++ b/packages/cube/lib/sort.js
@@ -0,0 +1,15 @@
+import { Readable, Duplex } from 'stream'
+import { sort, compareOn, createStore } from 'external-merge-sort'
+import rdf from '@zazuko/env-node'
+
+async function write(chunk, filename) {
+  await rdf.toFile(Readable.from(chunk), filename)
+  return rdf.fromFile(filename)
+}
+
+export const sortRDF = key => {
+  const comparer = compareOn(key)
+  const store = createStore(write, '.nt')
+
+  return Duplex.from(iterable => sort(iterable, { comparer, store, maxSize: 100000 }))
+}
diff --git a/packages/cube/package.json b/packages/cube/package.json
index bafab833..b8712042 100644
--- a/packages/cube/package.json
+++ b/packages/cube/package.json
@@ -21,10 +21,10 @@
   "homepage": "https://github.com/zazuko/barnard59",
   "peerDependencies": {
     "barnard59-base": "^2.0.0",
-    "barnard59-rdf": "^2.0.0",
+    "barnard59-formats": "^2.0.0",
     "barnard59-http": "^2.0.0",
+    "barnard59-rdf": "^2.0.0",
     "barnard59-sparql": "^2.0.0",
-    "barnard59-formats": "^2.0.0",
     "barnard59-validate-shacl": "^0.3.8"
   },
   "engines": {
@@ -32,6 +32,7 @@
   },
   "dependencies": {
     "@zazuko/env-node": "^1.0.0",
+    "external-merge-sort": "^0.1.2",
     "rdf-stream-to-dataset-stream": "^1.0.0"
   }
 }
diff --git a/packages/cube/pipeline/cube-validation.ttl b/packages/cube/pipeline/cube-validation.ttl
index ef45ce40..e2921572 100644
--- a/packages/cube/pipeline/cube-validation.ttl
+++ b/packages/cube/pipeline/cube-validation.ttl
@@ -96,6 +96,7 @@ _:maxViolations a p:Variable ;
       p:stepList ( 
         _:stdin
         [ n3:parse () ]
+        _:sortRDF
         _:toDatasetBySubject
         _:batch
         [ shacl:report 
@@ -143,7 +144,6 @@ _:queryObservations sparql:select
         <${cube}> cube:observationSet/cube:observation ?s .
         ?s ?p ?o 
       }
-      ORDER BY ?s
       """^^code:EcmaScriptTemplateLiteral 
     ]
 .
@@ -229,3 +229,11 @@ _:addTarget base:map (
     code:link <file:../lib/target.js#addTarget>
   ]
 ) .
+
+
+_:sortRDF a p:Step ;
+  code:implementedBy [ a code:EcmaScriptModule ;
+    code:link <file:../lib/sort.js#sortRDF>
+  ] ;
+  code:arguments ("x => x.subject.value"^^code:EcmaScript) 
+.
diff --git a/packages/validate-shacl/report.js b/packages/validate-shacl/report.js
index 6f6a4726..0b5eee24 100644
--- a/packages/validate-shacl/report.js
+++ b/packages/validate-shacl/report.js
@@ -5,15 +5,18 @@ import SHACLValidator from 'rdf-validate-shacl'
 async function * validate(iterable, validator, maxViolations) {
   let violations = 0
   for await (const chunk of iterable) {
+    if (maxViolations && violations >= maxViolations) {
+      continue // skip validation but continue to avoid finalization issues
+    }
     const report = validator.validate(chunk)
     if (!report.conforms) {
-      yield report.dataset
       violations = violations + report.results.filter(r => r.severity.value === 'http://www.w3.org/ns/shacl#Violation').length
-      if (maxViolations && violations > maxViolations) {
-        throw new Error(`limit of ${maxViolations} violations exceeded. ${violations} violations found`)
-      }
+      yield report.dataset
     }
   }
+  if (violations && maxViolations) {
+    throw new Error(`At least ${violations} violations found`)
+  }
   if (violations) {
     throw new Error(`${violations} violations found`)
   }

From ecd68d4bbafde5ef681e8c739e20be90e8c30116 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Wed, 8 Nov 2023 16:05:05 +0100
Subject: [PATCH 07/34] query observations with construct

---
 packages/cube/lib/quads.js                 | 11 -----------
 packages/cube/pipeline/cube-validation.ttl | 18 ++++--------------
 2 files changed, 4 insertions(+), 25 deletions(-)
 delete mode 100644 packages/cube/lib/quads.js

diff --git a/packages/cube/lib/quads.js b/packages/cube/lib/quads.js
deleted file mode 100644
index 670a9d8f..00000000
--- a/packages/cube/lib/quads.js
+++ /dev/null
@@ -1,11 +0,0 @@
-import { Transform } from 'node:stream'
-import rdf from '@zazuko/env-node'
-
-export const toQuad = () => new Transform({
-  readableObjectMode: true,
-  writableObjectMode: true,
-  transform(row, _encoding, callback) {
-    this.push(rdf.quad(row.s, row.p, row.o))
-    callback()
-  },
-})
diff --git a/packages/cube/pipeline/cube-validation.ttl b/packages/cube/pipeline/cube-validation.ttl
index e2921572..fb1e5925 100644
--- a/packages/cube/pipeline/cube-validation.ttl
+++ b/packages/cube/pipeline/cube-validation.ttl
@@ -66,7 +66,6 @@ _:maxViolations a p:Variable ;
       p:stepList
         (
           _:queryObservations
-          _:toQuads
           [ ntriples:serialize () ]
           _:stdout
         ) 
@@ -96,7 +95,7 @@ _:maxViolations a p:Variable ;
       p:stepList ( 
         _:stdin
         [ n3:parse () ]
-        _:sortRDF
+        _:sortBySubject
         _:toDatasetBySubject
         _:batch
         [ shacl:report 
@@ -128,8 +127,7 @@ _:queryConstraint sparql:construct
     ]
 .
 
-# use SELECT instead of CONSTRUCT to ensure result is ordered by subject
-_:queryObservations sparql:select
+_:queryObservations sparql:construct
     [
       code:name "endpoint";
       code:value "endpoint"^^p:VariableName
@@ -139,7 +137,7 @@ _:queryObservations sparql:select
       code:value """
       PREFIX cube: <https://cube.link/>
 
-      SELECT ?s ?p ?o
+      CONSTRUCT { ?s ?p ?o }
       WHERE {
         <${cube}> cube:observationSet/cube:observation ?s .
         ?s ?p ?o 
@@ -148,14 +146,6 @@ _:queryObservations sparql:select
     ]
 .
 
-_:toQuads a p:Step ;
-  code:implementedBy 
-    [ 
-      a code:EcmaScriptModule;
-      code:link <file:../lib/quads.js#toQuad>
-    ]
-.
-
 _:stdin a p:Step ;
   code:implementedBy "() => process.stdin"^^code:EcmaScript  
 .
@@ -231,7 +221,7 @@ _:addTarget base:map (
 ) .
 
 
-_:sortRDF a p:Step ;
+_:sortBySubject a p:Step ;
   code:implementedBy [ a code:EcmaScriptModule ;
     code:link <file:../lib/sort.js#sortRDF>
   ] ;

From 4582bea79778c21293de108edafa5d6c09e23e85 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Thu, 9 Nov 2023 10:24:08 +0100
Subject: [PATCH 08/34] doc: cube pipelines with package specific commans

---
 packages/cube/README.md | 41 ++++++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/packages/cube/README.md b/packages/cube/README.md
index e7bcea35..d37472a2 100644
--- a/packages/cube/README.md
+++ b/packages/cube/README.md
@@ -17,22 +17,30 @@ npx barnard59 run ./pipeline/cube-validation.ttl \
     --variable endpoint=https://int.lindas.admin.ch/query 
 ```
 
+Taking advantage of [package-specific commands](https://data-centric.zazuko.com/docs/workflows/reference/cli/#package-specific-commands), we can express the same as:
+
+```bash
+npx barnard59 cube fetch-constraint \
+  --cube https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2 \
+  --endpoint https://int.lindas.admin.ch/query
+```
+
+
 This pipeline is useful mainly for cubes published with [cube creator](https://github.com/zazuko/cube-creator) (if the cube definition is manually crafted, likely it's already available as a local file).
 
 
 ### check cube constraint
 
-Pipeline `check-cube-constraint` validates the input constraint against the shapes provided with the `profile` variable (the default profile is https://cube.link/latest/shape/standalone-constraint-constraint but [cube link](https://cube.link/) defines additional ones).
+Pipeline `check-cube-constraint` validates the input constraint against the shapes provided with the `profile` variable (the default profile is https://cube.link/latest/shape/standalone-constraint-constraint).
 
 The pipeline reads the constraint from `stdin`, allowing input from a local file (as in the following example) as well as from the output of the `fetch-cube-constraint` pipeline (in most cases it's useful to have the constraint in a local file because it's needed also for the `check-cube-observations` pipeline).
 
 ```bash
 cat myConstraint.ttl \
-| npx barnard59 run ./pipeline/cube-validation.ttl \
-    --pipeline http://barnard59.zazuko.com/pipeline/cube-validation/check-cube-constraint \
-    --variable profile=https://cube.link/v0.0.5/shape/standalone-constraint-constraint
+| npx barnard59 cube check-constraint \
+    --profile https://cube.link/v0.1.0/shape/standalone-constraint-constraint
 ```
-TODO: explain how validation errors are reported
+SHACL reports for violations are written to `stdout`.
 
 
 ### fetch cube observations
@@ -40,14 +48,11 @@ TODO: explain how validation errors are reported
 Pipeline `fetch-cube-observations` queries a given SPARQL endpoint (default is https://lindas.admin.ch/query) to retrieve the observations of a given cube.
 
 ```bash
-npx barnard59 run ./pipeline/cube-validation.ttl \
-    --pipeline http://barnard59.zazuko.com/pipeline/cube-validation/fetch-cube-observations \
-    --variable cube=https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2 \
-    --variable endpoint=https://int.lindas.admin.ch/query
+npx barnard59 cube fetch-observations \
+    --cube https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2 \
+    --endpoint https://int.lindas.admin.ch/query
 ```
-Results are returned sorted by observation so that the potentially big output stream can be split (by the `check-cube-observations` pipeline) and each observation can be processed separately.
-
-
+Results are written to `stdout`.
 
 ### check cube observations
 
@@ -57,12 +62,14 @@ The pipeline reads the observations from `stdin`, allowing input from a local fi
 
 ```bash
 cat myObservations.ttl \
-| npx barnard59 run ./pipeline/cube-validation.ttl \
-    --pipeline http://barnard59.zazuko.com/pipeline/cube-validation/check-cube-observations \
-    --variable constraint=myConstraint.ttl
+| npx barnard59 cube check-observations \
+    --constraint myConstraint.ttl
 ```
+
 To enable validation, the pipeline adds to the constraint a `sh:targetClass` property with value `cube:Observation` (assuming that each observation has an explicit property `rdf:type` with value `cube:Observation`).
 
-To leverage streaming, the pipeline also assumes the triples for the same observation to be adjacent (`fetch-cube-observations` achieves this sorting by observation).
+To leverage streaming, input is split and validated in little batches of adjustable size (the default is 50 and likely it's appropriate in most cases). This allows the validation of very big cubes because observations are not loaded in memory all at once. To ensure triples for the same observation are adjacent (hence processed in the same batch), the input is sorted by subject (and in case the input is large the sorting step relies on temporary local files).
+
+SHACL reports for violations are written to `stdout`.
 
-TODO: explain how validation errors are reported
+To limit the output size, there is also a `maxViolations` option to stop validation when the given number of violations is reached.

From 7c9871b481b53e2d0aa470056ec8c6b95f5dd33b Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Fri, 10 Nov 2023 11:05:54 +0100
Subject: [PATCH 09/34] test cube validation pipeline

---
 package-lock.json                             |  6 ++-
 packages/cube/package.json                    |  3 ++
 packages/cube/test/support/constraint01.ttl   | 54 +++++++++++++++++++
 packages/cube/test/support/observations01.ttl | 12 +++++
 packages/cube/test/support/observations02.ttl | 12 +++++
 .../cube/test/validation.pipeline.test.js     | 36 +++++++++++++
 6 files changed, 122 insertions(+), 1 deletion(-)
 create mode 100644 packages/cube/test/support/constraint01.ttl
 create mode 100644 packages/cube/test/support/observations01.ttl
 create mode 100644 packages/cube/test/support/observations02.ttl
 create mode 100644 packages/cube/test/validation.pipeline.test.js

diff --git a/package-lock.json b/package-lock.json
index 954a6948..6a9b7c8d 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -23486,8 +23486,9 @@
     },
     "node_modules/shelljs": {
       "version": "0.8.5",
+      "resolved": "https://registry.npmjs.org/shelljs/-/shelljs-0.8.5.tgz",
+      "integrity": "sha512-TiwcRcrkhHvbrZbnRcFYMLl30Dfov3HKqzp5tO5b4pt6G/SezKcYhmDg15zXVBswHmctSAQKznqNW2LO5tTDow==",
       "dev": true,
-      "license": "BSD-3-Clause",
       "dependencies": {
         "glob": "^7.0.0",
         "interpret": "^1.0.0",
@@ -26774,6 +26775,9 @@
         "external-merge-sort": "^0.1.2",
         "rdf-stream-to-dataset-stream": "^1.0.0"
       },
+      "devDependencies": {
+        "shelljs": "^0.8.5"
+      },
       "engines": {
         "node": ">= 14.0.0"
       },
diff --git a/packages/cube/package.json b/packages/cube/package.json
index b8712042..fd6885c5 100644
--- a/packages/cube/package.json
+++ b/packages/cube/package.json
@@ -34,5 +34,8 @@
     "@zazuko/env-node": "^1.0.0",
     "external-merge-sort": "^0.1.2",
     "rdf-stream-to-dataset-stream": "^1.0.0"
+  },
+  "devDependencies": {
+    "shelljs": "^0.8.5"
   }
 }
diff --git a/packages/cube/test/support/constraint01.ttl b/packages/cube/test/support/constraint01.ttl
new file mode 100644
index 00000000..e84e5841
--- /dev/null
+++ b/packages/cube/test/support/constraint01.ttl
@@ -0,0 +1,54 @@
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix sh: <http://www.w3.org/ns/shacl#> .
+@prefix schema: <http://schema.org/> .
+@prefix qudt: <http://qudt.org/schema/qudt/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix unit: <http://qudt.org/vocab/unit/> .
+@prefix cube: <https://cube.link/> .
+
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/shape/> a sh:NodeShape, cube:Constraint ;
+	sh:closed true ;
+	sh:property [
+		rdf:type cube:MeasureDimension ;
+		schema:name "Valeur de correction"@fr, "Korrekturwert"@de, "Correction value"@en, "Valore di correzione"@it ;
+		sh:path <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/correctionvalue> ;
+		sh:nodeKind sh:Literal ;
+		sh:datatype xsd:decimal ;
+		sh:minCount 1 ;
+		sh:maxCount 1 ;
+		sh:minInclusive "-20"^^xsd:decimal ;
+		sh:maxInclusive "30"^^xsd:decimal ;
+		qudt:unit unit:KiloGM-PER-HA ;
+		qudt:scaleType qudt:RatioScale ;
+	], [
+		rdf:type cube:KeyDimension ;
+		schema:name "Intensità delle precipitazioni"@it, "Niederschlagsintensität"@de, "Intensity of precipitation"@en, "Intensité de précipitations"@fr ;
+		sh:path <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/precipitation-intensity> ;
+		sh:in (
+			<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/Precipitationintensity/low>
+			<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/Precipitationintensity/high>
+			<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/Precipitationintensity/average>
+		) ;
+		sh:nodeKind sh:IRI ;
+		sh:minCount 1 ;
+		sh:maxCount 1 ;
+		qudt:scaleType qudt:NominalScale ;
+	], [
+		sh:path rdf:type ;
+		sh:in (
+			cube:Observation
+		) ;
+		sh:nodeKind sh:IRI ;
+		sh:minCount 1 ;
+		sh:maxCount 1 ;
+	], [
+		sh:path cube:observedBy ;
+		sh:in (
+			<https://agriculture.ld.admin.ch/>
+      <https://register.ld.admin.ch/staatskalender/organization/10003634> 
+		) ;
+		sh:nodeKind sh:IRI ;
+		sh:minCount 1 ;
+		sh:maxCount 1 ;
+	] .
+
diff --git a/packages/cube/test/support/observations01.ttl b/packages/cube/test/support/observations01.ttl
new file mode 100644
index 00000000..cf49c098
--- /dev/null
+++ b/packages/cube/test/support/observations01.ttl
@@ -0,0 +1,12 @@
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatBread/winter/low> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://cube.link/Observation> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatBread/winter/low> <https://cube.link/observedBy> <https://register.ld.admin.ch/staatskalender/organization/10003634> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatBread/winter/low> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/correctionvalue> "-20.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatBread/winter/low> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/precipitation-intensity> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/Precipitationintensity/low> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatFodder/winter/low> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://cube.link/Observation> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatFodder/winter/low> <https://cube.link/observedBy> <https://register.ld.admin.ch/staatskalender/organization/10003634> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatFodder/winter/low> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/correctionvalue> "-20.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatFodder/winter/low> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/precipitation-intensity> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/Precipitationintensity/low> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/springWheat/winter/low> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://cube.link/Observation> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/springWheat/winter/low> <https://cube.link/observedBy> <https://register.ld.admin.ch/staatskalender/organization/10003634> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/springWheat/winter/low> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/correctionvalue> "-20.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/springWheat/winter/low> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/precipitation-intensity> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/Precipitationintensity/low> .
diff --git a/packages/cube/test/support/observations02.ttl b/packages/cube/test/support/observations02.ttl
new file mode 100644
index 00000000..fcb4b6a1
--- /dev/null
+++ b/packages/cube/test/support/observations02.ttl
@@ -0,0 +1,12 @@
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatBread/winter/low> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://cube.link/Observation> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatBread/winter/low> <https://cube.link/observedBy> <https://register.ld.admin.ch/staatskalender/organization/10003634> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatBread/winter/low> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/correctionvalue> "-20.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatBread/winter/low> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/precipitation-intensity> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/Precipitationintensity/low> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatFodder/winter/low> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://cube.link/Observation> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatFodder/winter/low> <https://cube.link/observedBy> <https://register.ld.admin.ch/staatskalender/organization/10003634> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatFodder/winter/low> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/correctionvalue> "-21.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/winterWheatFodder/winter/low> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/precipitation-intensity> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/Precipitationintensity/low> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/springWheat/winter/low> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://cube.link/Observation> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/springWheat/winter/low> <https://cube.link/observedBy> <https://register.ld.admin.ch/staatskalender/organization/10003634> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/springWheat/winter/low> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/correctionvalue> "-20.0"^^<http://www.w3.org/2001/XMLSchema#decimal> .
+<https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/observation/springWheat/winter/low> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/precipitation-intensity> <https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2/Precipitationintensity/low> .
diff --git a/packages/cube/test/validation.pipeline.test.js b/packages/cube/test/validation.pipeline.test.js
new file mode 100644
index 00000000..e60142e5
--- /dev/null
+++ b/packages/cube/test/validation.pipeline.test.js
@@ -0,0 +1,36 @@
+import { strictEqual, ok } from 'assert'
+import shell from 'shelljs'
+
+const barnard59 = (new URL('../../../packages/cli/bin/barnard59.js', import.meta.url)).pathname
+const support = (new URL('./support', import.meta.url)).pathname
+const cwd = new URL('..', import.meta.url).pathname
+
+describe('cube validation pipeline', function () {
+  this.timeout(10000)
+
+  it('should run check-cube-observations pipeline without error', () => {
+    const pipelineFile = (new URL('../pipeline/cube-validation.ttl', import.meta.url)).pathname
+    const pipelineURI = 'http://barnard59.zazuko.com/pipeline/cube-validation/check-cube-observations'
+    const constraintFile = `${support}/constraint01.ttl`
+    const command = `cat ${support}/observations01.ttl | ${barnard59} run ${pipelineFile} --pipeline=${pipelineURI} --variable constraint=${constraintFile}`
+
+    const result = shell.exec(command, { silent: true, cwd })
+
+    strictEqual(result.code, 0)
+    strictEqual(result.stderr, '')
+    strictEqual(result.stdout, '')
+  })
+
+  it('should run check-cube-observations pipeline with error', () => {
+    const pipelineFile = (new URL('../pipeline/cube-validation.ttl', import.meta.url)).pathname
+    const pipelineURI = 'http://barnard59.zazuko.com/pipeline/cube-validation/check-cube-observations'
+    const constraintFile = `${support}/constraint01.ttl`
+    const command = `cat ${support}/observations02.ttl | ${barnard59} run ${pipelineFile} --pipeline=${pipelineURI} --variable constraint=${constraintFile}`
+
+    const result = shell.exec(command, { silent: true, cwd })
+
+    strictEqual(result.code, 1)
+    ok(result.stderr.startsWith('Error: At least 1 violations found'))
+    ok(result.stdout.includes('_:report <http://www.w3.org/ns/shacl#conforms> "false"^^<http://www.w3.org/2001/XMLSchema#boolean>'))
+  })
+})

From 68c034accc8cee733074fbfd91aae250ca475cb1 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Fri, 10 Nov 2023 16:54:35 +0100
Subject: [PATCH 10/34] move cube operations from rdf package

---
 .changeset/silver-humans-joke.md               |  6 ++++++
 codecov.yml                                    |  1 +
 package-lock.json                              | 10 +++++++++-
 packages/cube/README.md                        | 15 +++++++++++++++
 packages/{rdf => cube}/cube.js                 |  0
 packages/{rdf => cube}/lib/cbdCopy.js          |  2 +-
 .../lib/cube/buildCubeShape/Cube.js            |  2 +-
 .../lib/cube/buildCubeShape/Dimension.js       |  2 +-
 .../lib/cube/buildCubeShape/index.js           |  2 +-
 .../{rdf => cube}/lib/cube/toObservation.js    |  2 +-
 packages/{rdf => cube}/lib/dateToId.js         |  0
 packages/cube/lib/namespaces.js                |  6 ++++++
 packages/{rdf => cube}/lib/urlJoin.js          |  0
 packages/cube/manifest.ttl                     | 18 ++++++++++++++++++
 packages/cube/package.json                     | 10 +++++++++-
 packages/{rdf => cube}/test/cube.test.js       |  0
 .../test/cube/buildCubeShape.test.js           |  2 +-
 .../test/cube/toObservation.test.js            |  2 +-
 .../test/support/createObservationsStream.js   |  0
 .../test/support/datasetStreamToClownface.js   |  0
 packages/cube/test/support/namespaces.js       | 14 ++++++++++++++
 packages/rdf/README.md                         |  8 --------
 packages/rdf/manifest.ttl                      | 14 --------------
 packages/rdf/test/support/namespaces.js        |  2 --
 24 files changed, 85 insertions(+), 33 deletions(-)
 create mode 100644 .changeset/silver-humans-joke.md
 rename packages/{rdf => cube}/cube.js (100%)
 rename packages/{rdf => cube}/lib/cbdCopy.js (93%)
 rename packages/{rdf => cube}/lib/cube/buildCubeShape/Cube.js (98%)
 rename packages/{rdf => cube}/lib/cube/buildCubeShape/Dimension.js (98%)
 rename packages/{rdf => cube}/lib/cube/buildCubeShape/index.js (98%)
 rename packages/{rdf => cube}/lib/cube/toObservation.js (99%)
 rename packages/{rdf => cube}/lib/dateToId.js (100%)
 create mode 100644 packages/cube/lib/namespaces.js
 rename packages/{rdf => cube}/lib/urlJoin.js (100%)
 rename packages/{rdf => cube}/test/cube.test.js (100%)
 rename packages/{rdf => cube}/test/cube/buildCubeShape.test.js (99%)
 rename packages/{rdf => cube}/test/cube/toObservation.test.js (99%)
 rename packages/{rdf => cube}/test/support/createObservationsStream.js (100%)
 rename packages/{rdf => cube}/test/support/datasetStreamToClownface.js (100%)
 create mode 100644 packages/cube/test/support/namespaces.js

diff --git a/.changeset/silver-humans-joke.md b/.changeset/silver-humans-joke.md
new file mode 100644
index 00000000..fa78b1a2
--- /dev/null
+++ b/.changeset/silver-humans-joke.md
@@ -0,0 +1,6 @@
+---
+"barnard59-cube": major
+"barnard59-rdf": major
+---
+
+Move cube operations from rdf package to the new cube package
diff --git a/codecov.yml b/codecov.yml
index 3c42c174..5c8f8da4 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -6,6 +6,7 @@ flag_management:
     - name: barnard59-base
     - name: barnard59-core
     - name: barnard59-csvw
+    - name: barnard59-cube
     - name: barnard59-formats
     - name: barnard59-ftp
     - name: barnard59-graph-store
diff --git a/package-lock.json b/package-lock.json
index 941283a7..c0aec1ca 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -26799,9 +26799,17 @@
       "dependencies": {
         "@zazuko/env-node": "^1.0.0",
         "external-merge-sort": "^0.1.2",
-        "rdf-stream-to-dataset-stream": "^1.0.0"
+        "lodash": "^4.17.21",
+        "rdf-dataset-ext": "^1.0.1",
+        "rdf-literal": "^1.3.0",
+        "rdf-stream-to-dataset-stream": "^1.0.0",
+        "readable-stream": "^3.6.0"
       },
       "devDependencies": {
+        "@rdfjs/to-ntriples": "^2.0.0",
+        "chai": "^4.3.7",
+        "get-stream": "^6.0.1",
+        "is-stream": "^3.0.0",
         "shelljs": "^0.8.5"
       },
       "engines": {
diff --git a/packages/cube/README.md b/packages/cube/README.md
index d37472a2..a30d5fc0 100644
--- a/packages/cube/README.md
+++ b/packages/cube/README.md
@@ -1,5 +1,20 @@
 # barnard59-cube
 
+This package provides operations and commands for RDF cubes in Barnard59 Linked Data pipelines.
+The `manifest.ttl` file contains a full list of all operations included in this package. 
+
+## Operations
+
+### `cube/buildCubeShape`
+
+TBD
+
+### `cube/toObservation`
+
+TBD
+
+
+## Commands
 
 ## Cube validation
 
diff --git a/packages/rdf/cube.js b/packages/cube/cube.js
similarity index 100%
rename from packages/rdf/cube.js
rename to packages/cube/cube.js
diff --git a/packages/rdf/lib/cbdCopy.js b/packages/cube/lib/cbdCopy.js
similarity index 93%
rename from packages/rdf/lib/cbdCopy.js
rename to packages/cube/lib/cbdCopy.js
index 007fd155..a66126c8 100644
--- a/packages/rdf/lib/cbdCopy.js
+++ b/packages/cube/lib/cbdCopy.js
@@ -1,4 +1,4 @@
-import rdf from '@zazuko/env'
+import rdf from '@zazuko/env-node'
 
 function cbdCopy(source, target, { ignore = rdf.termSet() } = {}) {
   for (const quad of source.dataset.match(source.term)) {
diff --git a/packages/rdf/lib/cube/buildCubeShape/Cube.js b/packages/cube/lib/cube/buildCubeShape/Cube.js
similarity index 98%
rename from packages/rdf/lib/cube/buildCubeShape/Cube.js
rename to packages/cube/lib/cube/buildCubeShape/Cube.js
index 0adf70d8..c60e5cc4 100644
--- a/packages/rdf/lib/cube/buildCubeShape/Cube.js
+++ b/packages/cube/lib/cube/buildCubeShape/Cube.js
@@ -1,4 +1,4 @@
-import rdf from '@zazuko/env'
+import rdf from '@zazuko/env-node'
 import addAll from 'rdf-dataset-ext/addAll.js'
 import cbdCopy from '../../cbdCopy.js'
 import * as ns from '../../namespaces.js'
diff --git a/packages/rdf/lib/cube/buildCubeShape/Dimension.js b/packages/cube/lib/cube/buildCubeShape/Dimension.js
similarity index 98%
rename from packages/rdf/lib/cube/buildCubeShape/Dimension.js
rename to packages/cube/lib/cube/buildCubeShape/Dimension.js
index 85df6df4..69631cff 100644
--- a/packages/rdf/lib/cube/buildCubeShape/Dimension.js
+++ b/packages/cube/lib/cube/buildCubeShape/Dimension.js
@@ -1,4 +1,4 @@
-import rdf from '@zazuko/env'
+import rdf from '@zazuko/env-node'
 import { fromRdf } from 'rdf-literal'
 import cbdCopy from '../../cbdCopy.js'
 import * as ns from '../../namespaces.js'
diff --git a/packages/rdf/lib/cube/buildCubeShape/index.js b/packages/cube/lib/cube/buildCubeShape/index.js
similarity index 98%
rename from packages/rdf/lib/cube/buildCubeShape/index.js
rename to packages/cube/lib/cube/buildCubeShape/index.js
index 7b14fa3e..b6ac7a10 100644
--- a/packages/rdf/lib/cube/buildCubeShape/index.js
+++ b/packages/cube/lib/cube/buildCubeShape/index.js
@@ -1,5 +1,5 @@
 import once from 'lodash/once.js'
-import $rdf from '@zazuko/env'
+import $rdf from '@zazuko/env-node'
 import { Transform } from 'readable-stream'
 import fromStream from 'rdf-dataset-ext/fromStream.js'
 import * as ns from '../../namespaces.js'
diff --git a/packages/rdf/lib/cube/toObservation.js b/packages/cube/lib/cube/toObservation.js
similarity index 99%
rename from packages/rdf/lib/cube/toObservation.js
rename to packages/cube/lib/cube/toObservation.js
index f39efc39..321aa5f1 100644
--- a/packages/rdf/lib/cube/toObservation.js
+++ b/packages/cube/lib/cube/toObservation.js
@@ -1,5 +1,5 @@
 import { URL } from 'url'
-import $rdf from '@zazuko/env'
+import $rdf from '@zazuko/env-node'
 import { Transform } from 'readable-stream'
 import dateToId from '../dateToId.js'
 import urlJoin from '../urlJoin.js'
diff --git a/packages/rdf/lib/dateToId.js b/packages/cube/lib/dateToId.js
similarity index 100%
rename from packages/rdf/lib/dateToId.js
rename to packages/cube/lib/dateToId.js
diff --git a/packages/cube/lib/namespaces.js b/packages/cube/lib/namespaces.js
new file mode 100644
index 00000000..5f49c52c
--- /dev/null
+++ b/packages/cube/lib/namespaces.js
@@ -0,0 +1,6 @@
+import $rdf from '@zazuko/env-node'
+
+const cube = $rdf.namespace('https://cube.link/')
+const { rdf, rdfs, sh, xsd, _void, dcat, schema, dcterms } = $rdf.ns
+
+export { cube, rdf, rdfs, sh, xsd, _void, dcat, schema, dcterms }
diff --git a/packages/rdf/lib/urlJoin.js b/packages/cube/lib/urlJoin.js
similarity index 100%
rename from packages/rdf/lib/urlJoin.js
rename to packages/cube/lib/urlJoin.js
diff --git a/packages/cube/manifest.ttl b/packages/cube/manifest.ttl
index 9ae277c5..9297e6f0 100644
--- a/packages/cube/manifest.ttl
+++ b/packages/cube/manifest.ttl
@@ -1,6 +1,24 @@
 @base <https://barnard59.zazuko.com/> .
 @prefix b59: <https://barnard59.zazuko.com/vocab#> .
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix code: <https://code.described.at/> .
+@prefix p: <https://pipeline.described.at/> .
+
+# renamed from <https://barnard59.zazuko.com/operations/rdf/cube.js#buildCubeShape>
+<operations/cube/buildCubeShape> a p:Operation, p:WritableObjectMode, p:ReadableObjectMode;
+  rdfs:label "build Cube Shape";
+  rdfs:comment "Builds a Cube Shape based on Cube Observation datasets";
+  code:implementedBy [ a code:EcmaScriptModule;
+    code:link <node:barnard59-cube/cube.js#buildCubeShape>
+  ].
+
+# renamed from <https://barnard59.zazuko.com/operations/rdf/cube.js#toObservation>
+<operations/cube/toObservation> a p:Operation, p:WritableObjectMode, p:ReadableObjectMode;
+  rdfs:label "to Cube Observation";
+  rdfs:comment "Converts a set of quads to a Cube Observation";
+  code:implementedBy [ a code:EcmaScriptModule;
+    code:link <node:barnard59-cube/cube.js#toObservation>
+  ].
 
 
 <command/cube/fetch-constraint>
diff --git a/packages/cube/package.json b/packages/cube/package.json
index fd6885c5..f9db2d2f 100644
--- a/packages/cube/package.json
+++ b/packages/cube/package.json
@@ -33,9 +33,17 @@
   "dependencies": {
     "@zazuko/env-node": "^1.0.0",
     "external-merge-sort": "^0.1.2",
-    "rdf-stream-to-dataset-stream": "^1.0.0"
+    "lodash": "^4.17.21",
+    "rdf-dataset-ext": "^1.0.1",
+    "rdf-literal": "^1.3.0",
+    "rdf-stream-to-dataset-stream": "^1.0.0",
+    "readable-stream": "^3.6.0"
   },
   "devDependencies": {
+    "@rdfjs/to-ntriples": "^2.0.0",
+    "chai": "^4.3.7",
+    "is-stream": "^3.0.0",
+    "get-stream": "^6.0.1",
     "shelljs": "^0.8.5"
   }
 }
diff --git a/packages/rdf/test/cube.test.js b/packages/cube/test/cube.test.js
similarity index 100%
rename from packages/rdf/test/cube.test.js
rename to packages/cube/test/cube.test.js
diff --git a/packages/rdf/test/cube/buildCubeShape.test.js b/packages/cube/test/cube/buildCubeShape.test.js
similarity index 99%
rename from packages/rdf/test/cube/buildCubeShape.test.js
rename to packages/cube/test/cube/buildCubeShape.test.js
index 4ee03c52..70171348 100644
--- a/packages/rdf/test/cube/buildCubeShape.test.js
+++ b/packages/cube/test/cube/buildCubeShape.test.js
@@ -1,7 +1,7 @@
 import { deepStrictEqual, strictEqual } from 'assert'
 import toNT from '@rdfjs/to-ntriples'
 import { isDuplexStream as isDuplex } from 'is-stream'
-import rdf from '@zazuko/env'
+import rdf from '@zazuko/env-node'
 import toStream from 'rdf-dataset-ext/toStream.js'
 import { expect } from 'chai'
 import buildCubeShape from '../../lib/cube/buildCubeShape/index.js'
diff --git a/packages/rdf/test/cube/toObservation.test.js b/packages/cube/test/cube/toObservation.test.js
similarity index 99%
rename from packages/rdf/test/cube/toObservation.test.js
rename to packages/cube/test/cube/toObservation.test.js
index d4fd8a04..289756a4 100644
--- a/packages/rdf/test/cube/toObservation.test.js
+++ b/packages/cube/test/cube/toObservation.test.js
@@ -2,7 +2,7 @@ import { rejects, strictEqual } from 'assert'
 import toNT from '@rdfjs/to-ntriples'
 import getStream from 'get-stream'
 import { isDuplexStream as isDuplex } from 'is-stream'
-import rdf from '@zazuko/env'
+import rdf from '@zazuko/env-node'
 import { Readable } from 'readable-stream'
 import toObservation from '../../lib/cube/toObservation.js'
 import dateToId from '../../lib/dateToId.js'
diff --git a/packages/rdf/test/support/createObservationsStream.js b/packages/cube/test/support/createObservationsStream.js
similarity index 100%
rename from packages/rdf/test/support/createObservationsStream.js
rename to packages/cube/test/support/createObservationsStream.js
diff --git a/packages/rdf/test/support/datasetStreamToClownface.js b/packages/cube/test/support/datasetStreamToClownface.js
similarity index 100%
rename from packages/rdf/test/support/datasetStreamToClownface.js
rename to packages/cube/test/support/datasetStreamToClownface.js
diff --git a/packages/cube/test/support/namespaces.js b/packages/cube/test/support/namespaces.js
new file mode 100644
index 00000000..ab159983
--- /dev/null
+++ b/packages/cube/test/support/namespaces.js
@@ -0,0 +1,14 @@
+import $rdf from '@zazuko/env'
+
+const cube = $rdf.namespace('https://cube.link/')
+const ex = $rdf.namespace('http://example.org/')
+const { rdf, schema, sh, xsd } = $rdf.ns
+
+export {
+  cube,
+  ex,
+  rdf,
+  schema,
+  sh,
+  xsd,
+}
diff --git a/packages/rdf/README.md b/packages/rdf/README.md
index 04aec5c6..e403d217 100644
--- a/packages/rdf/README.md
+++ b/packages/rdf/README.md
@@ -5,14 +5,6 @@ The `manifest.ttl` file contains a full list of all operations included in this
 
 ## Operations
 
-### `cube/buildCubeShape`
-
-TBD
-
-### `cube/toObservation`
-
-TBD
-
 ### `fs/parse`
 
 Reads a file where the input chunk is its path and outputs quad stream of its contents. The appropriate parser is automatically selected by the [rdf-utils-fs](https://npm.im/rdf-utils-fs) package.
diff --git a/packages/rdf/manifest.ttl b/packages/rdf/manifest.ttl
index f5f135ce..0ba3408f 100644
--- a/packages/rdf/manifest.ttl
+++ b/packages/rdf/manifest.ttl
@@ -10,20 +10,6 @@
     code:link <node:barnard59-rdf/mapMatch.js#default>
   ].
 
-<cube.js#buildCubeShape> a p:Operation, p:WritableObjectMode, p:ReadableObjectMode;
-  rdfs:label "build Cube Shape";
-  rdfs:comment "Builds a Cube Shape based on Cube Observation datasets";
-  code:implementedBy [ a code:EcmaScriptModule;
-    code:link <node:barnard59-rdf/cube.js#buildCubeShape>
-  ].
-
-<cube.js#toObservation> a p:Operation, p:WritableObjectMode, p:ReadableObjectMode;
-  rdfs:label "to Cube Observation";
-  rdfs:comment "Converts a set of quads to a Cube Observation";
-  code:implementedBy [ a code:EcmaScriptModule;
-    code:link <node:barnard59-rdf/cube.js#toObservation>
-  ].
-
 <setGraph> a p:Operation, p:WritableObjectMode, p:ReadableObjectMode;
   rdfs:label "Set Graph";
   rdfs:comment "Sets the graph of all quads to the given fixed value.";
diff --git a/packages/rdf/test/support/namespaces.js b/packages/rdf/test/support/namespaces.js
index ab159983..fc275bac 100644
--- a/packages/rdf/test/support/namespaces.js
+++ b/packages/rdf/test/support/namespaces.js
@@ -1,11 +1,9 @@
 import $rdf from '@zazuko/env'
 
-const cube = $rdf.namespace('https://cube.link/')
 const ex = $rdf.namespace('http://example.org/')
 const { rdf, schema, sh, xsd } = $rdf.ns
 
 export {
-  cube,
   ex,
   rdf,
   schema,

From ec27cc8ff06c6ed275c2bc2ad3d37c9175af065d Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Fri, 10 Nov 2023 17:04:21 +0100
Subject: [PATCH 11/34] fix lint

---
 packages/cube/test/support/createObservationsStream.js | 2 +-
 packages/cube/test/support/datasetStreamToClownface.js | 2 +-
 packages/cube/test/support/namespaces.js               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/cube/test/support/createObservationsStream.js b/packages/cube/test/support/createObservationsStream.js
index cb16540b..97783ddd 100644
--- a/packages/cube/test/support/createObservationsStream.js
+++ b/packages/cube/test/support/createObservationsStream.js
@@ -1,4 +1,4 @@
-import rdf from '@zazuko/env'
+import rdf from '@zazuko/env-node'
 import { Readable } from 'readable-stream'
 import * as ns from './namespaces.js'
 
diff --git a/packages/cube/test/support/datasetStreamToClownface.js b/packages/cube/test/support/datasetStreamToClownface.js
index e73f01eb..1cc28252 100644
--- a/packages/cube/test/support/datasetStreamToClownface.js
+++ b/packages/cube/test/support/datasetStreamToClownface.js
@@ -1,5 +1,5 @@
 import getStream from 'get-stream'
-import rdf from '@zazuko/env'
+import rdf from '@zazuko/env-node'
 import addAll from 'rdf-dataset-ext/addAll.js'
 
 async function datasetStreamToClownface(stream) {
diff --git a/packages/cube/test/support/namespaces.js b/packages/cube/test/support/namespaces.js
index ab159983..e2fccfa6 100644
--- a/packages/cube/test/support/namespaces.js
+++ b/packages/cube/test/support/namespaces.js
@@ -1,4 +1,4 @@
-import $rdf from '@zazuko/env'
+import $rdf from '@zazuko/env-node'
 
 const cube = $rdf.namespace('https://cube.link/')
 const ex = $rdf.namespace('http://example.org/')

From c3feb1439f144d701cf7f48557fc4be7549a6a7e Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Mon, 13 Nov 2023 09:05:56 +0100
Subject: [PATCH 12/34] fix: use pattern for command name

---
 packages/cli/lib/discoverManifests.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/cli/lib/discoverManifests.js b/packages/cli/lib/discoverManifests.js
index ccca849a..67e07cab 100644
--- a/packages/cli/lib/discoverManifests.js
+++ b/packages/cli/lib/discoverManifests.js
@@ -20,7 +20,7 @@ export default async function * () {
   if (hasManifest(dir)) {
     const { name, version } = require(`${dir}/package.json`)
     yield {
-      name,
+      name: packagePattern.test(name) ? name.match(packagePattern)[1] : name,
       manifest: rdf.clownface({ dataset: await rdf.dataset().import(rdf.fromFile(`${dir}/manifest.ttl`)) }),
       version,
     }

From 7cf0d651289025aa49709b53eeb987b1100f8f38 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Wed, 15 Nov 2023 12:06:43 +0100
Subject: [PATCH 13/34] Update packages/cube/package.json

start from version 0.0.0

Co-authored-by: Tomasz Pluskiewicz <tpluscode@users.noreply.github.com>
---
 packages/cube/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/cube/package.json b/packages/cube/package.json
index f9db2d2f..c9c29e30 100644
--- a/packages/cube/package.json
+++ b/packages/cube/package.json
@@ -1,6 +1,6 @@
 {
   "name": "barnard59-cube",
-  "version": "2.0.0",
+  "version": "0.0.0",
   "description": "RDF cubes for Linked Data pipelines",
   "type": "module",
   "main": "index.js",

From bfb6bdaa7d6dbfc69fe960a3ba6eff85251617bd Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Wed, 15 Nov 2023 12:13:14 +0100
Subject: [PATCH 14/34] Apply suggestions from code review

Co-authored-by: Tomasz Pluskiewicz <tpluscode@users.noreply.github.com>
---
 packages/cube/README.md    | 2 +-
 packages/cube/lib/batch.js | 8 +++++---
 packages/cube/package.json | 7 ++-----
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/packages/cube/README.md b/packages/cube/README.md
index a30d5fc0..63f81e9a 100644
--- a/packages/cube/README.md
+++ b/packages/cube/README.md
@@ -81,7 +81,7 @@ cat myObservations.ttl \
     --constraint myConstraint.ttl
 ```
 
-To enable validation, the pipeline adds to the constraint a `sh:targetClass` property with value `cube:Observation` (assuming that each observation has an explicit property `rdf:type` with value `cube:Observation`).
+To enable validation, the pipeline adds to the constraint a `sh:targetClass` property with value `cube:Observation`, requiring that each observation has an explicit `rdf:type`.
 
 To leverage streaming, input is split and validated in little batches of adjustable size (the default is 50 and likely it's appropriate in most cases). This allows the validation of very big cubes because observations are not loaded in memory all at once. To ensure triples for the same observation are adjacent (hence processed in the same batch), the input is sorted by subject (and in case the input is large the sorting step relies on temporary local files).
 
diff --git a/packages/cube/lib/batch.js b/packages/cube/lib/batch.js
index 3cf81833..1fd8f3dc 100644
--- a/packages/cube/lib/batch.js
+++ b/packages/cube/lib/batch.js
@@ -17,9 +17,9 @@ export async function * chunkObjectsBySize(size, iterable) {
 }
 
 // Iterable<Dataset> => Iterable<Dataset>
-export async function * chunkBySize(size, iterable) {
+export async function * chunkBySize({ env }, size, iterable) {
   for await (const array of chunkObjectsBySize(size, iterable)) {
-    const batch = rdf.dataset()
+    const batch = env.dataset()
     for (const dataset of array) {
       batch.addAll(dataset)
     }
@@ -27,4 +27,6 @@ export async function * chunkBySize(size, iterable) {
   }
 }
 
-export const batch = size => Duplex.from(iterable => chunkBySize(Number(size), iterable))
+export const batch = function (size) => {
+  return Duplex.from(iterable => chunkBySize(this, Number(size), iterable))
+}
diff --git a/packages/cube/package.json b/packages/cube/package.json
index c9c29e30..bdc71873 100644
--- a/packages/cube/package.json
+++ b/packages/cube/package.json
@@ -1,7 +1,7 @@
 {
   "name": "barnard59-cube",
   "version": "0.0.0",
-  "description": "RDF cubes for Linked Data pipelines",
+  "description": "Build and check RDF cubes in Linked Data pipelines",
   "type": "module",
   "main": "index.js",
   "scripts": {
@@ -27,9 +27,6 @@
     "barnard59-sparql": "^2.0.0",
     "barnard59-validate-shacl": "^0.3.8"
   },
-  "engines": {
-    "node": ">= 14.0.0"
-  },
   "dependencies": {
     "@zazuko/env-node": "^1.0.0",
     "external-merge-sort": "^0.1.2",
@@ -37,7 +34,7 @@
     "rdf-dataset-ext": "^1.0.1",
     "rdf-literal": "^1.3.0",
     "rdf-stream-to-dataset-stream": "^1.0.0",
-    "readable-stream": "^3.6.0"
+    "readable-stream": "3 - 4"
   },
   "devDependencies": {
     "@rdfjs/to-ntriples": "^2.0.0",

From de52160d727cb52fcf26049589e8be4922dd371a Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Wed, 15 Nov 2023 12:24:20 +0100
Subject: [PATCH 15/34] fix syntax

---
 packages/cube/lib/batch.js | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/packages/cube/lib/batch.js b/packages/cube/lib/batch.js
index 1fd8f3dc..c7da1596 100644
--- a/packages/cube/lib/batch.js
+++ b/packages/cube/lib/batch.js
@@ -1,5 +1,4 @@
 import { Duplex } from 'stream'
-import rdf from '@zazuko/env-node'
 
 // Iterable<X> => Iterable<X[]>
 export async function * chunkObjectsBySize(size, iterable) {
@@ -27,6 +26,6 @@ export async function * chunkBySize({ env }, size, iterable) {
   }
 }
 
-export const batch = function (size) => {
+export const batch = function (size) {
   return Duplex.from(iterable => chunkBySize(this, Number(size), iterable))
 }

From bb3b99b30535b4a5fabfc7e4b5e2bf2fb5495662 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Wed, 15 Nov 2023 12:54:12 +0100
Subject: [PATCH 16/34] more code review suggestions

---
 .changeset/five-cups-wash.md               |  5 +++++
 .changeset/strong-lions-wait.md            |  5 +++++
 packages/cube/README.md                    | 17 ++++++-----------
 packages/cube/pipeline/cube-validation.ttl |  1 -
 packages/validate-shacl/manifest.ttl       |  2 +-
 5 files changed, 17 insertions(+), 13 deletions(-)
 create mode 100644 .changeset/five-cups-wash.md
 create mode 100644 .changeset/strong-lions-wait.md

diff --git a/.changeset/five-cups-wash.md b/.changeset/five-cups-wash.md
new file mode 100644
index 00000000..0bdbeca1
--- /dev/null
+++ b/.changeset/five-cups-wash.md
@@ -0,0 +1,5 @@
+---
+"barnard59-sparql": patch
+---
+
+fix code link in manifest
diff --git a/.changeset/strong-lions-wait.md b/.changeset/strong-lions-wait.md
new file mode 100644
index 00000000..7696671e
--- /dev/null
+++ b/.changeset/strong-lions-wait.md
@@ -0,0 +1,5 @@
+---
+"barnard59": patch
+---
+
+include peer dependencies in manifest discovery
diff --git a/packages/cube/README.md b/packages/cube/README.md
index 63f81e9a..6d1b1815 100644
--- a/packages/cube/README.md
+++ b/packages/cube/README.md
@@ -22,18 +22,9 @@ TBD
 
 ### fetch cube constraint
 
-Pipeline `fetch-cube-constraint` queries a given SPARQL endpoint (default is https://lindas.admin.ch/query) to retrieve 
+Pipeline `fetch-cube-constraint` queries a given SPARQL endpoint to retrieve 
 a [concise bounded description](https://docs.stardog.com/query-stardog/#describe-queries) of the `cube:Constraint` part of a given cube.
 
-```bash
-npx barnard59 run ./pipeline/cube-validation.ttl \
-    --pipeline http://barnard59.zazuko.com/pipeline/cube-validation/fetch-cube-constraint \
-    --variable cube=https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2 \
-    --variable endpoint=https://int.lindas.admin.ch/query 
-```
-
-Taking advantage of [package-specific commands](https://data-centric.zazuko.com/docs/workflows/reference/cli/#package-specific-commands), we can express the same as:
-
 ```bash
 npx barnard59 cube fetch-constraint \
   --cube https://agriculture.ld.admin.ch/agroscope/PRIFm8t15/2 \
@@ -60,7 +51,7 @@ SHACL reports for violations are written to `stdout`.
 
 ### fetch cube observations
 
-Pipeline `fetch-cube-observations` queries a given SPARQL endpoint (default is https://lindas.admin.ch/query) to retrieve the observations of a given cube.
+Pipeline `fetch-cube-observations` queries a given SPARQL endpoint to retrieve the observations of a given cube.
 
 ```bash
 npx barnard59 cube fetch-observations \
@@ -88,3 +79,7 @@ To leverage streaming, input is split and validated in little batches of adjusta
 SHACL reports for violations are written to `stdout`.
 
 To limit the output size, there is also a `maxViolations` option to stop validation when the given number of violations is reached.
+
+### Known issues
+
+COmmand `check-constraint` may fail if there are `sh:in` constraints with too many values.
diff --git a/packages/cube/pipeline/cube-validation.ttl b/packages/cube/pipeline/cube-validation.ttl
index fb1e5925..279a6010 100644
--- a/packages/cube/pipeline/cube-validation.ttl
+++ b/packages/cube/pipeline/cube-validation.ttl
@@ -14,7 +14,6 @@
 _:endpoint a p:Variable ;
   p:name "endpoint" ;
   rdfs:label "SPARQL endpoint" ;
-  p:value "https://lindas.admin.ch/query"
 .
 
 _:cube a p:Variable ;
diff --git a/packages/validate-shacl/manifest.ttl b/packages/validate-shacl/manifest.ttl
index 90d25696..c37482f6 100644
--- a/packages/validate-shacl/manifest.ttl
+++ b/packages/validate-shacl/manifest.ttl
@@ -12,7 +12,7 @@
 
 <report> a p:Operation, p:Readable;
           rdfs:label "validate rdf";
-          rdfs:comment "Validates a RDF graph against a set of conditions specified in a SHACL document";
+          rdfs:comment "Validates a RDF graph against a set of conditions specified in a SHACL document, returning the validation report";
           code:implementedBy [ a code:EcmaScriptModule;
                               code:link <node:barnard59-validate-shacl/report.js#shacl>
                              ].

From bf7af42020da4170aa0b26fbccdb34bdd5e4da43 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Wed, 15 Nov 2023 13:00:31 +0100
Subject: [PATCH 17/34] Update packages/cube/pipeline/cube-validation.ttl

Co-authored-by: Tomasz Pluskiewicz <tpluscode@users.noreply.github.com>
---
 packages/cube/pipeline/cube-validation.ttl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/packages/cube/pipeline/cube-validation.ttl b/packages/cube/pipeline/cube-validation.ttl
index 279a6010..d8786c09 100644
--- a/packages/cube/pipeline/cube-validation.ttl
+++ b/packages/cube/pipeline/cube-validation.ttl
@@ -204,11 +204,11 @@ _:getConstraint a p:Pipeline , p:ReadableObjectMode ;
 .
 
 _:readConstraint a p:Step ;
-  code:implementedBy 
-    [ 
-      a code:EcmaScriptModule ;
-      code:link <node:rdf-utils-fs#fromFile> ;
-    ] ;
+  code:implementedBy """
+  function (path) {
+    return this.env.fromFile(path)
+  }
+  """^^code:EcmaScript ;
   code:arguments ("constraint"^^p:VariableName)
 .
 

From a32425d3beba58c10836af80e058ba38bfe09991 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Wed, 15 Nov 2023 15:34:04 +0100
Subject: [PATCH 18/34] split pipelines in multiple files

---
 .changeset/silver-humans-joke.md              |  18 ++-
 packages/cube/manifest.ttl                    |  12 +-
 .../cube/pipeline/check-cube-constraint.ttl   |  63 ++++++++++
 ...dation.ttl => check-cube-observations.ttl} | 109 +-----------------
 .../cube/pipeline/fetch-cube-constraint.ttl   |  53 +++++++++
 .../cube/pipeline/fetch-cube-observations.ttl |  54 +++++++++
 .../cube/test/validation.pipeline.test.js     |   8 +-
 7 files changed, 195 insertions(+), 122 deletions(-)
 create mode 100644 packages/cube/pipeline/check-cube-constraint.ttl
 rename packages/cube/pipeline/{cube-validation.ttl => check-cube-observations.ttl} (55%)
 create mode 100644 packages/cube/pipeline/fetch-cube-constraint.ttl
 create mode 100644 packages/cube/pipeline/fetch-cube-observations.ttl

diff --git a/.changeset/silver-humans-joke.md b/.changeset/silver-humans-joke.md
index fa78b1a2..386d8c3b 100644
--- a/.changeset/silver-humans-joke.md
+++ b/.changeset/silver-humans-joke.md
@@ -3,4 +3,20 @@
 "barnard59-rdf": major
 ---
 
-Move cube operations from rdf package to the new cube package
+Move cube operations from package `barnard59-rdf` to the new package `barnard59-cube`.
+
+
+```diff
+<#toObservation> a p:Step;
+  code:implementedBy [ a code:EcmaScriptModule;
+    - code:link <node:barnard59-rdf/cube.js#toObservation>
+    + code:link <node:barnard59-cube/cube.js#toObservation>
+  ].
+
+<#buildCubeShape> a p:Step;
+  code:implementedBy [ a code:EcmaScriptModule;
+    - code:link <node:barnard59-rdf/cube.js#buildCubeShape>
+    + code:link <node:barnard59-code/cube.js#buildCubeShape>
+  ].
+
+```
diff --git a/packages/cube/manifest.ttl b/packages/cube/manifest.ttl
index 9297e6f0..b3c30a2b 100644
--- a/packages/cube/manifest.ttl
+++ b/packages/cube/manifest.ttl
@@ -25,30 +25,26 @@
   a b59:CliCommand ;
   b59:command "fetch-constraint" ;
   rdfs:label "Retrieves cube constraint from SPARQL endpoint" ;
-  b59:source "barnard59-cube/pipeline/cube-validation.ttl" ;
-  b59:pipeline <http://barnard59.zazuko.com/pipeline/cube-validation/fetch-cube-constraint> 
+  b59:source "barnard59-cube/pipeline/fetch-cube-constraint.ttl" ;
 .
 
 <command/cube/check-constraint>
   a b59:CliCommand ;
   b59:command "check-constraint" ;
   rdfs:label "Validate input constraint against profile shapes" ;
-  b59:source "barnard59-cube/pipeline/cube-validation.ttl" ;
-  b59:pipeline <http://barnard59.zazuko.com/pipeline/cube-validation/check-cube-constraint> 
+  b59:source "barnard59-cube/pipeline/check-cube-constraint.ttl" ;
 .
 
 <command/cube/fetch-observations>
   a b59:CliCommand ;
   b59:command "fetch-observations" ;
   rdfs:label "Retrieves cube observations from SPARQL endpoint" ;
-  b59:source "barnard59-cube/pipeline/cube-validation.ttl" ;
-  b59:pipeline <http://barnard59.zazuko.com/pipeline/cube-validation/fetch-cube-observations> 
+  b59:source "barnard59-cube/pipeline/fetch-cube-observations.ttl" ;
 .
 
 <command/cube/check-observations>
   a b59:CliCommand ;
   b59:command "check-observations" ;
   rdfs:label "Validate input observations against cube constraint" ;
-  b59:source "barnard59-cube/pipeline/cube-validation.ttl" ;
-  b59:pipeline <http://barnard59.zazuko.com/pipeline/cube-validation/check-cube-observations> 
+  b59:source "barnard59-cube/pipeline/check-cube-observations.ttl" ;
 .
diff --git a/packages/cube/pipeline/check-cube-constraint.ttl b/packages/cube/pipeline/check-cube-constraint.ttl
new file mode 100644
index 00000000..383040b7
--- /dev/null
+++ b/packages/cube/pipeline/check-cube-constraint.ttl
@@ -0,0 +1,63 @@
+@prefix code: <https://code.described.at/> .
+@prefix p: <https://pipeline.described.at/> .
+@prefix http: <https://barnard59.zazuko.com/operations/http/> .
+@prefix shacl: <https://barnard59.zazuko.com/operations/shacl/> .
+@prefix base: <https://barnard59.zazuko.com/operations/base/> .
+@prefix n3: <https://barnard59.zazuko.com/operations/formats/n3/> .
+@prefix ntriples: <https://barnard59.zazuko.com/operations/formats/ntriples/> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+
+@base <http://barnard59.zazuko.com/pipeline/cube-validation/> .
+
+
+_:profile a p:Variable ;
+  p:name "profile" ;
+  rdfs:label "cube constraint profile URL" ;
+  p:value "https://cube.link/latest/shape/standalone-constraint-constraint" ;
+.
+
+
+<check-cube-constraint> a p:Pipeline ;
+  p:variables [ p:variable _:profile ] ;
+  p:steps
+    [
+      p:stepList ( 
+        _:stdin
+        [ n3:parse () ]
+        _:toDataset
+        [ shacl:report (_:getProfile) ]
+        [ base:flatten () ]
+        [ ntriples:serialize () ]
+        _:stdout
+      )
+    ]
+.
+
+_:stdin a p:Step ;
+  code:implementedBy "() => process.stdin"^^code:EcmaScript  
+.
+
+_:stdout a p:Step ;
+  code:implementedBy "() => process.stdout"^^code:EcmaScript  
+.
+
+_:toDataset a p:Step ;
+	code:implementedBy 
+    [
+      rdf:type code:EcmaScript ;
+      code:link <node:rdf-stream-to-dataset-stream/toDatasetStream.js> ;
+    ] 
+.
+
+
+_:getProfile a p:Pipeline , p:ReadableObjectMode;
+  p:steps
+    [
+      p:stepList
+        (
+          [ http:get [ code:name "url" ; code:value "profile"^^p:VariableName ] ] 
+          [ n3:parse () ]
+        ) 
+    ] 
+  .
diff --git a/packages/cube/pipeline/cube-validation.ttl b/packages/cube/pipeline/check-cube-observations.ttl
similarity index 55%
rename from packages/cube/pipeline/cube-validation.ttl
rename to packages/cube/pipeline/check-cube-observations.ttl
index d8786c09..7d5cb597 100644
--- a/packages/cube/pipeline/cube-validation.ttl
+++ b/packages/cube/pipeline/check-cube-observations.ttl
@@ -1,37 +1,20 @@
-@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
-@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
 @prefix code: <https://code.described.at/> .
 @prefix p: <https://pipeline.described.at/> .
-@prefix sparql: <https://barnard59.zazuko.com/operations/sparql/> .
-@prefix http: <https://barnard59.zazuko.com/operations/http/> .
 @prefix shacl: <https://barnard59.zazuko.com/operations/shacl/> .
 @prefix base: <https://barnard59.zazuko.com/operations/base/> .
 @prefix n3: <https://barnard59.zazuko.com/operations/formats/n3/> .
 @prefix ntriples: <https://barnard59.zazuko.com/operations/formats/ntriples/> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
 
 @base <http://barnard59.zazuko.com/pipeline/cube-validation/> .
 
-_:endpoint a p:Variable ;
-  p:name "endpoint" ;
-  rdfs:label "SPARQL endpoint" ;
-.
-
-_:cube a p:Variable ;
-  p:name "cube" ;
-  rdfs:label "cube URI" ;
-.
 
 _:constraint a p:Variable ;
   p:name "constraint" ;
   rdfs:label "cube constraint file" ;
 .
 
-_:profile a p:Variable ;
-  p:name "profile" ;
-  rdfs:label "cube constraint profile URL" ;
-  p:value "https://cube.link/latest/shape/standalone-constraint-constraint" ;
-.
-
 _:batchSize a p:Variable ;
   p:name "batchSize" ;
   rdfs:label "batch size" ;
@@ -45,48 +28,6 @@ _:maxViolations a p:Variable ;
 .
 
 
-<fetch-cube-constraint> a p:Pipeline ;
-  p:variables [ p:variable _:endpoint, _:cube ] ;
-  p:steps
-    [
-      p:stepList
-        (
-          _:queryConstraint
-          [ ntriples:serialize () ]
-          _:stdout
-        ) 
-    ] 
-.
-
-<fetch-cube-observations> a p:Pipeline ;
-  p:variables [ p:variable _:endpoint, _:cube ] ;
-  p:steps
-    [
-      p:stepList
-        (
-          _:queryObservations
-          [ ntriples:serialize () ]
-          _:stdout
-        ) 
-    ] 
-.
-
-<check-cube-constraint> a p:Pipeline ;
-  p:variables [ p:variable _:profile ] ;
-  p:steps
-    [
-      p:stepList ( 
-        _:stdin
-        [ n3:parse () ]
-        _:toDataset
-        [ shacl:report (_:getProfile) ]
-        [ base:flatten () ]
-        [ ntriples:serialize () ]
-        _:stdout
-      )
-    ]
-.
-
 <check-cube-observations> a p:Pipeline ;
   p:variables [ p:variable _:constraint , _:batchSize , _:maxViolations ] ;
   p:steps
@@ -108,42 +49,6 @@ _:maxViolations a p:Variable ;
     ]
 .
 
-# relying on cbd is not ideal (vendor specific)
-_:queryConstraint sparql:construct
-    [ code:name "endpoint"; code:value "endpoint"^^p:VariableName ] , 
-    [
-      code:name "query";
-      code:value """
-      #pragma describe.strategy cbd
-
-      PREFIX cube: <https://cube.link/>
-
-      DESCRIBE ?s
-      WHERE {
-        <${cube}> cube:observationConstraint ?s .
-      }
-      """^^code:EcmaScriptTemplateLiteral 
-    ]
-.
-
-_:queryObservations sparql:construct
-    [
-      code:name "endpoint";
-      code:value "endpoint"^^p:VariableName
-    ] , 
-    [
-      code:name "query";
-      code:value """
-      PREFIX cube: <https://cube.link/>
-
-      CONSTRUCT { ?s ?p ?o }
-      WHERE {
-        <${cube}> cube:observationSet/cube:observation ?s .
-        ?s ?p ?o 
-      }
-      """^^code:EcmaScriptTemplateLiteral 
-    ]
-.
 
 _:stdin a p:Step ;
   code:implementedBy "() => process.stdin"^^code:EcmaScript  
@@ -178,16 +83,6 @@ _:batch a p:Step ;
   code:arguments ("batchSize"^^p:VariableName)
 .
 
-_:getProfile a p:Pipeline , p:ReadableObjectMode;
-  p:steps
-    [
-      p:stepList
-        (
-          [ http:get [ code:name "url" ; code:value "profile"^^p:VariableName ] ] 
-          [ n3:parse () ]
-        ) 
-    ] 
-  .
 
 _:getConstraint a p:Pipeline , p:ReadableObjectMode ;
   p:variables [ p:variable _:constraint ] ;
diff --git a/packages/cube/pipeline/fetch-cube-constraint.ttl b/packages/cube/pipeline/fetch-cube-constraint.ttl
new file mode 100644
index 00000000..a48800b4
--- /dev/null
+++ b/packages/cube/pipeline/fetch-cube-constraint.ttl
@@ -0,0 +1,53 @@
+@prefix code: <https://code.described.at/> .
+@prefix p: <https://pipeline.described.at/> .
+@prefix sparql: <https://barnard59.zazuko.com/operations/sparql/> .
+@prefix ntriples: <https://barnard59.zazuko.com/operations/formats/ntriples/> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+
+@base <http://barnard59.zazuko.com/pipeline/cube-validation/> .
+
+_:endpoint a p:Variable ;
+  p:name "endpoint" ;
+  rdfs:label "SPARQL endpoint" ;
+.
+
+_:cube a p:Variable ;
+  p:name "cube" ;
+  rdfs:label "cube URI" ;
+.
+
+<fetch-cube-constraint> a p:Pipeline ;
+  p:variables [ p:variable _:endpoint, _:cube ] ;
+  p:steps
+    [
+      p:stepList
+        (
+          _:queryConstraint
+          [ ntriples:serialize () ]
+          _:stdout
+        ) 
+    ] 
+.
+
+
+# relying on cbd is not ideal (vendor specific)
+_:queryConstraint sparql:construct
+    [ code:name "endpoint"; code:value "endpoint"^^p:VariableName ] , 
+    [
+      code:name "query";
+      code:value """
+      #pragma describe.strategy cbd
+
+      PREFIX cube: <https://cube.link/>
+
+      DESCRIBE ?s
+      WHERE {
+        <${cube}> cube:observationConstraint ?s .
+      }
+      """^^code:EcmaScriptTemplateLiteral 
+    ]
+.
+
+_:stdout a p:Step ;
+  code:implementedBy "() => process.stdout"^^code:EcmaScript  
+.
diff --git a/packages/cube/pipeline/fetch-cube-observations.ttl b/packages/cube/pipeline/fetch-cube-observations.ttl
new file mode 100644
index 00000000..b79e7700
--- /dev/null
+++ b/packages/cube/pipeline/fetch-cube-observations.ttl
@@ -0,0 +1,54 @@
+@prefix code: <https://code.described.at/> .
+@prefix p: <https://pipeline.described.at/> .
+@prefix sparql: <https://barnard59.zazuko.com/operations/sparql/> .
+@prefix ntriples: <https://barnard59.zazuko.com/operations/formats/ntriples/> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+
+@base <http://barnard59.zazuko.com/pipeline/cube-validation/> .
+
+_:endpoint a p:Variable ;
+  p:name "endpoint" ;
+  rdfs:label "SPARQL endpoint" ;
+.
+
+_:cube a p:Variable ;
+  p:name "cube" ;
+  rdfs:label "cube URI" ;
+.
+
+<fetch-cube-observations> a p:Pipeline ;
+  p:variables [ p:variable _:endpoint, _:cube ] ;
+  p:steps
+    [
+      p:stepList
+        (
+          _:queryObservations
+          [ ntriples:serialize () ]
+          _:stdout
+        ) 
+    ] 
+.
+
+
+_:queryObservations sparql:construct
+    [
+      code:name "endpoint";
+      code:value "endpoint"^^p:VariableName
+    ] , 
+    [
+      code:name "query";
+      code:value """
+      PREFIX cube: <https://cube.link/>
+
+      CONSTRUCT { ?s ?p ?o }
+      WHERE {
+        <${cube}> cube:observationSet/cube:observation ?s .
+        ?s ?p ?o 
+      }
+      """^^code:EcmaScriptTemplateLiteral 
+    ]
+.
+
+_:stdout a p:Step ;
+  code:implementedBy "() => process.stdout"^^code:EcmaScript  
+.
diff --git a/packages/cube/test/validation.pipeline.test.js b/packages/cube/test/validation.pipeline.test.js
index e60142e5..ac7dedca 100644
--- a/packages/cube/test/validation.pipeline.test.js
+++ b/packages/cube/test/validation.pipeline.test.js
@@ -9,10 +9,8 @@ describe('cube validation pipeline', function () {
   this.timeout(10000)
 
   it('should run check-cube-observations pipeline without error', () => {
-    const pipelineFile = (new URL('../pipeline/cube-validation.ttl', import.meta.url)).pathname
-    const pipelineURI = 'http://barnard59.zazuko.com/pipeline/cube-validation/check-cube-observations'
     const constraintFile = `${support}/constraint01.ttl`
-    const command = `cat ${support}/observations01.ttl | ${barnard59} run ${pipelineFile} --pipeline=${pipelineURI} --variable constraint=${constraintFile}`
+    const command = `cat ${support}/observations01.ttl | ${barnard59} cube check-observations --constraint ${constraintFile}`
 
     const result = shell.exec(command, { silent: true, cwd })
 
@@ -22,10 +20,8 @@ describe('cube validation pipeline', function () {
   })
 
   it('should run check-cube-observations pipeline with error', () => {
-    const pipelineFile = (new URL('../pipeline/cube-validation.ttl', import.meta.url)).pathname
-    const pipelineURI = 'http://barnard59.zazuko.com/pipeline/cube-validation/check-cube-observations'
     const constraintFile = `${support}/constraint01.ttl`
-    const command = `cat ${support}/observations02.ttl | ${barnard59} run ${pipelineFile} --pipeline=${pipelineURI} --variable constraint=${constraintFile}`
+    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile}`
 
     const result = shell.exec(command, { silent: true, cwd })
 

From 8287ed60804b22fbc8831128ed32371529965f0a Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Wed, 15 Nov 2023 15:42:24 +0100
Subject: [PATCH 19/34] rename pipelines

---
 packages/cube/README.md                       | 24 +++++++++----------
 packages/cube/manifest.ttl                    |  8 +++----
 ...be-constraint.ttl => check-constraint.ttl} |  2 +-
 ...bservations.ttl => check-observations.ttl} |  2 +-
 ...be-constraint.ttl => fetch-constraint.ttl} |  2 +-
 ...bservations.ttl => fetch-observations.ttl} |  2 +-
 6 files changed, 20 insertions(+), 20 deletions(-)
 rename packages/cube/pipeline/{check-cube-constraint.ttl => check-constraint.ttl} (97%)
 rename packages/cube/pipeline/{check-cube-observations.ttl => check-observations.ttl} (98%)
 rename packages/cube/pipeline/{fetch-cube-constraint.ttl => fetch-constraint.ttl} (96%)
 rename packages/cube/pipeline/{fetch-cube-observations.ttl => fetch-observations.ttl} (96%)

diff --git a/packages/cube/README.md b/packages/cube/README.md
index 6d1b1815..749366de 100644
--- a/packages/cube/README.md
+++ b/packages/cube/README.md
@@ -18,11 +18,11 @@ TBD
 
 ## Cube validation
 
-`cube-validation.ttl` contains pipelines to retrieve and validate cube observations and their constraints.
+The following pipelines retrieve and validate cube observations and their constraints.
 
-### fetch cube constraint
+### fetch constraint
 
-Pipeline `fetch-cube-constraint` queries a given SPARQL endpoint to retrieve 
+Pipeline `fetch-constraint` queries a given SPARQL endpoint to retrieve 
 a [concise bounded description](https://docs.stardog.com/query-stardog/#describe-queries) of the `cube:Constraint` part of a given cube.
 
 ```bash
@@ -35,11 +35,11 @@ npx barnard59 cube fetch-constraint \
 This pipeline is useful mainly for cubes published with [cube creator](https://github.com/zazuko/cube-creator) (if the cube definition is manually crafted, likely it's already available as a local file).
 
 
-### check cube constraint
+### check constraint
 
-Pipeline `check-cube-constraint` validates the input constraint against the shapes provided with the `profile` variable (the default profile is https://cube.link/latest/shape/standalone-constraint-constraint).
+Pipeline `check-constraint` validates the input constraint against the shapes provided with the `profile` variable (the default profile is https://cube.link/latest/shape/standalone-constraint-constraint).
 
-The pipeline reads the constraint from `stdin`, allowing input from a local file (as in the following example) as well as from the output of the `fetch-cube-constraint` pipeline (in most cases it's useful to have the constraint in a local file because it's needed also for the `check-cube-observations` pipeline).
+The pipeline reads the constraint from `stdin`, allowing input from a local file (as in the following example) as well as from the output of the `fetch-constraint` pipeline (in most cases it's useful to have the constraint in a local file because it's needed also for the `check-observations` pipeline).
 
 ```bash
 cat myConstraint.ttl \
@@ -49,9 +49,9 @@ cat myConstraint.ttl \
 SHACL reports for violations are written to `stdout`.
 
 
-### fetch cube observations
+### fetch observations
 
-Pipeline `fetch-cube-observations` queries a given SPARQL endpoint to retrieve the observations of a given cube.
+Pipeline `fetch-observations` queries a given SPARQL endpoint to retrieve the observations of a given cube.
 
 ```bash
 npx barnard59 cube fetch-observations \
@@ -60,11 +60,11 @@ npx barnard59 cube fetch-observations \
 ```
 Results are written to `stdout`.
 
-### check cube observations
+### check observations
 
-Pipeline `check-cube-observations` validates the input observations against the shapes provided with the `constraint` variable.
+Pipeline `check-observations` validates the input observations against the shapes provided with the `constraint` variable.
 
-The pipeline reads the observations from `stdin`, allowing input from a local file (as in the following example) as well as from the output of the `fetch-cube-observations` pipeline.
+The pipeline reads the observations from `stdin`, allowing input from a local file (as in the following example) as well as from the output of the `fetch-observations` pipeline.
 
 ```bash
 cat myObservations.ttl \
@@ -82,4 +82,4 @@ To limit the output size, there is also a `maxViolations` option to stop validat
 
 ### Known issues
 
-COmmand `check-constraint` may fail if there are `sh:in` constraints with too many values.
+Command `check-constraint` may fail if there are `sh:in` constraints with too many values.
diff --git a/packages/cube/manifest.ttl b/packages/cube/manifest.ttl
index b3c30a2b..04012991 100644
--- a/packages/cube/manifest.ttl
+++ b/packages/cube/manifest.ttl
@@ -25,26 +25,26 @@
   a b59:CliCommand ;
   b59:command "fetch-constraint" ;
   rdfs:label "Retrieves cube constraint from SPARQL endpoint" ;
-  b59:source "barnard59-cube/pipeline/fetch-cube-constraint.ttl" ;
+  b59:source "barnard59-cube/pipeline/fetch-constraint.ttl" ;
 .
 
 <command/cube/check-constraint>
   a b59:CliCommand ;
   b59:command "check-constraint" ;
   rdfs:label "Validate input constraint against profile shapes" ;
-  b59:source "barnard59-cube/pipeline/check-cube-constraint.ttl" ;
+  b59:source "barnard59-cube/pipeline/check-constraint.ttl" ;
 .
 
 <command/cube/fetch-observations>
   a b59:CliCommand ;
   b59:command "fetch-observations" ;
   rdfs:label "Retrieves cube observations from SPARQL endpoint" ;
-  b59:source "barnard59-cube/pipeline/fetch-cube-observations.ttl" ;
+  b59:source "barnard59-cube/pipeline/fetch-observations.ttl" ;
 .
 
 <command/cube/check-observations>
   a b59:CliCommand ;
   b59:command "check-observations" ;
   rdfs:label "Validate input observations against cube constraint" ;
-  b59:source "barnard59-cube/pipeline/check-cube-observations.ttl" ;
+  b59:source "barnard59-cube/pipeline/check-observations.ttl" ;
 .
diff --git a/packages/cube/pipeline/check-cube-constraint.ttl b/packages/cube/pipeline/check-constraint.ttl
similarity index 97%
rename from packages/cube/pipeline/check-cube-constraint.ttl
rename to packages/cube/pipeline/check-constraint.ttl
index 383040b7..d87b9d91 100644
--- a/packages/cube/pipeline/check-cube-constraint.ttl
+++ b/packages/cube/pipeline/check-constraint.ttl
@@ -18,7 +18,7 @@ _:profile a p:Variable ;
 .
 
 
-<check-cube-constraint> a p:Pipeline ;
+<check-constraint> a p:Pipeline ;
   p:variables [ p:variable _:profile ] ;
   p:steps
     [
diff --git a/packages/cube/pipeline/check-cube-observations.ttl b/packages/cube/pipeline/check-observations.ttl
similarity index 98%
rename from packages/cube/pipeline/check-cube-observations.ttl
rename to packages/cube/pipeline/check-observations.ttl
index 7d5cb597..523ca721 100644
--- a/packages/cube/pipeline/check-cube-observations.ttl
+++ b/packages/cube/pipeline/check-observations.ttl
@@ -28,7 +28,7 @@ _:maxViolations a p:Variable ;
 .
 
 
-<check-cube-observations> a p:Pipeline ;
+<check-observations> a p:Pipeline ;
   p:variables [ p:variable _:constraint , _:batchSize , _:maxViolations ] ;
   p:steps
     [
diff --git a/packages/cube/pipeline/fetch-cube-constraint.ttl b/packages/cube/pipeline/fetch-constraint.ttl
similarity index 96%
rename from packages/cube/pipeline/fetch-cube-constraint.ttl
rename to packages/cube/pipeline/fetch-constraint.ttl
index a48800b4..50e022f1 100644
--- a/packages/cube/pipeline/fetch-cube-constraint.ttl
+++ b/packages/cube/pipeline/fetch-constraint.ttl
@@ -16,7 +16,7 @@ _:cube a p:Variable ;
   rdfs:label "cube URI" ;
 .
 
-<fetch-cube-constraint> a p:Pipeline ;
+<fetch-constraint> a p:Pipeline ;
   p:variables [ p:variable _:endpoint, _:cube ] ;
   p:steps
     [
diff --git a/packages/cube/pipeline/fetch-cube-observations.ttl b/packages/cube/pipeline/fetch-observations.ttl
similarity index 96%
rename from packages/cube/pipeline/fetch-cube-observations.ttl
rename to packages/cube/pipeline/fetch-observations.ttl
index b79e7700..76e0fe40 100644
--- a/packages/cube/pipeline/fetch-cube-observations.ttl
+++ b/packages/cube/pipeline/fetch-observations.ttl
@@ -16,7 +16,7 @@ _:cube a p:Variable ;
   rdfs:label "cube URI" ;
 .
 
-<fetch-cube-observations> a p:Pipeline ;
+<fetch-observations> a p:Pipeline ;
   p:variables [ p:variable _:endpoint, _:cube ] ;
   p:steps
     [

From e0bab1af7a401fdfe3ebf8dc07aee30a28ac9a21 Mon Sep 17 00:00:00 2001
From: Tomasz Pluskiewicz <tpluscode@users.noreply.github.com>
Date: Fri, 17 Nov 2023 12:40:31 +0100
Subject: [PATCH 20/34] Remove ENV dependency from Cube (#205)

---
 .changeset/soft-peaches-brake.md              |   5 +
 package-lock.json                             |  10 +-
 packages/cube/lib/cbdCopy.js                  |   5 +-
 packages/cube/lib/cube/buildCubeShape/Cube.js |  44 ++--
 .../cube/lib/cube/buildCubeShape/Dimension.js |  69 ++----
 .../cube/lib/cube/buildCubeShape/datatypes.js |  30 +++
 .../cube/lib/cube/buildCubeShape/index.js     |  41 ++--
 packages/cube/lib/cube/toObservation.js       |  83 ++++---
 packages/cube/lib/namespaces.js               |   6 -
 packages/cube/lib/sort.js                     |  11 +-
 packages/cube/lib/target.js                   |  10 +-
 packages/cube/package.json                    |   3 +-
 .../cube/test/cube/buildCubeShape.test.js     | 226 +++++++++---------
 packages/cube/test/cube/toObservation.test.js | 182 +++++++-------
 .../test/support/createObservationsStream.js  |  12 +-
 .../test/support/datasetStreamToClownface.js  |   5 +-
 packages/cube/test/support/namespaces.js      |  15 +-
 .../cube/test/validation.pipeline.test.js     |   3 +-
 packages/env/lib/Namespaces.js                |   2 +
 19 files changed, 373 insertions(+), 389 deletions(-)
 create mode 100644 .changeset/soft-peaches-brake.md
 create mode 100644 packages/cube/lib/cube/buildCubeShape/datatypes.js
 delete mode 100644 packages/cube/lib/namespaces.js

diff --git a/.changeset/soft-peaches-brake.md b/.changeset/soft-peaches-brake.md
new file mode 100644
index 00000000..dfe4e64d
--- /dev/null
+++ b/.changeset/soft-peaches-brake.md
@@ -0,0 +1,5 @@
+---
+"barnard59-env": minor
+---
+
+Added `cube` and `meta` namespaces
diff --git a/package-lock.json b/package-lock.json
index c0aec1ca..989d7bf0 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -26794,27 +26794,23 @@
     },
     "packages/cube": {
       "name": "barnard59-cube",
-      "version": "2.0.0",
+      "version": "0.0.0",
       "license": "MIT",
       "dependencies": {
-        "@zazuko/env-node": "^1.0.0",
         "external-merge-sort": "^0.1.2",
         "lodash": "^4.17.21",
-        "rdf-dataset-ext": "^1.0.1",
         "rdf-literal": "^1.3.0",
         "rdf-stream-to-dataset-stream": "^1.0.0",
-        "readable-stream": "^3.6.0"
+        "readable-stream": "3 - 4"
       },
       "devDependencies": {
         "@rdfjs/to-ntriples": "^2.0.0",
+        "barnard59-env": "^0.0.0",
         "chai": "^4.3.7",
         "get-stream": "^6.0.1",
         "is-stream": "^3.0.0",
         "shelljs": "^0.8.5"
       },
-      "engines": {
-        "node": ">= 14.0.0"
-      },
       "peerDependencies": {
         "barnard59-base": "^2.0.0",
         "barnard59-formats": "^2.0.0",
diff --git a/packages/cube/lib/cbdCopy.js b/packages/cube/lib/cbdCopy.js
index a66126c8..653485ef 100644
--- a/packages/cube/lib/cbdCopy.js
+++ b/packages/cube/lib/cbdCopy.js
@@ -1,6 +1,4 @@
-import rdf from '@zazuko/env-node'
-
-function cbdCopy(source, target, { ignore = rdf.termSet() } = {}) {
+function cbdCopy(rdf, source, target, { ignore = rdf.termSet() } = {}) {
   for (const quad of source.dataset.match(source.term)) {
     if (ignore.has(quad.predicate)) {
       continue
@@ -10,6 +8,7 @@ function cbdCopy(source, target, { ignore = rdf.termSet() } = {}) {
 
     if (quad.object.termType === 'BlankNode') {
       cbdCopy(
+        rdf,
         { dataset: source.dataset, term: quad.object },
         { dataset: target.dataset, term: quad.object },
       )
diff --git a/packages/cube/lib/cube/buildCubeShape/Cube.js b/packages/cube/lib/cube/buildCubeShape/Cube.js
index c60e5cc4..6c368005 100644
--- a/packages/cube/lib/cube/buildCubeShape/Cube.js
+++ b/packages/cube/lib/cube/buildCubeShape/Cube.js
@@ -1,11 +1,9 @@
-import rdf from '@zazuko/env-node'
-import addAll from 'rdf-dataset-ext/addAll.js'
 import cbdCopy from '../../cbdCopy.js'
-import * as ns from '../../namespaces.js'
 import Dimension from './Dimension.js'
 
 class Cube {
-  constructor({ metadata, observationSet, shape, term, propertyShapeId }) {
+  constructor({ rdf, metadata, observationSet, shape, term, propertyShapeId }) {
+    this.rdf = rdf
     this.metadata = metadata
     this.observationSet = observationSet
     this.shape = shape
@@ -19,11 +17,11 @@ class Cube {
 
     if (!dimension) {
       const metadata = this.metadata
-        .out(ns.cube.observationConstraint)
-        .out(ns.sh.property)
-        .has(ns.sh.path, predicate)
+        .out(this.rdf.ns.cube.observationConstraint)
+        .out(this.rdf.ns.sh.property)
+        .has(this.rdf.ns.sh.path, predicate)
 
-      dimension = new Dimension({ metadata, predicate, object, shapeId: this.propertyShapeId })
+      dimension = new Dimension({ rdf: this.rdf, metadata, predicate, object, shapeId: this.propertyShapeId })
 
       this.dimensions.set(predicate, dimension)
     }
@@ -36,30 +34,30 @@ class Cube {
   }
 
   toDataset({ shapeGraph } = { shapeGraph: undefined }) {
-    const dataset = rdf.dataset()
+    const dataset = this.rdf.dataset()
 
-    const cube = rdf.clownface({ dataset, term: this.term })
-      .addOut(ns.rdf.type, ns.cube.Cube)
-      .addOut(ns.cube.observationSet, this.observationSet)
-      .addOut(ns.cube.observationConstraint, this.shape)
+    const cube = this.rdf.clownface({ dataset, term: this.term })
+      .addOut(this.rdf.ns.rdf.type, this.rdf.ns.cube.Cube)
+      .addOut(this.rdf.ns.cube.observationSet, this.observationSet)
+      .addOut(this.rdf.ns.cube.observationConstraint, this.shape)
 
-    cbdCopy(this.metadata, cube, { ignore: rdf.termSet([ns.cube.observationConstraint]) })
+    cbdCopy(this.rdf, this.metadata, cube, { ignore: this.rdf.termSet([this.rdf.ns.cube.observationConstraint]) })
 
-    rdf.clownface({ dataset, term: this.observationSet })
-      .addOut(ns.rdf.type, ns.cube.ObservationSet)
+    this.rdf.clownface({ dataset, term: this.observationSet })
+      .addOut(this.rdf.ns.rdf.type, this.rdf.ns.cube.ObservationSet)
 
-    const shapeDataset = rdf.dataset()
+    const shapeDataset = this.rdf.dataset()
 
-    rdf.clownface({ dataset: shapeDataset, term: this.shape })
-      .addOut(ns.rdf.type, [ns.sh.NodeShape, ns.cube.Constraint])
-      .addOut(ns.sh.closed, true)
+    this.rdf.clownface({ dataset: shapeDataset, term: this.shape })
+      .addOut(this.rdf.ns.rdf.type, [this.rdf.ns.sh.NodeShape, this.rdf.ns.cube.Constraint])
+      .addOut(this.rdf.ns.sh.closed, true)
 
     for (const dimension of this.dimensions.values()) {
-      addAll(shapeDataset, dimension.toDataset({ cube: this, shape: this.shape }))
+      shapeDataset.addAll(dimension.toDataset({ cube: this, shape: this.shape }))
     }
-    const setGraph = quad => rdf.quad(quad.subject, quad.predicate, quad.object, shapeGraph)
+    const setGraph = quad => this.rdf.quad(quad.subject, quad.predicate, quad.object, shapeGraph)
 
-    return addAll(dataset, [...shapeDataset].map(setGraph))
+    return dataset.addAll([...shapeDataset].map(setGraph))
   }
 }
 
diff --git a/packages/cube/lib/cube/buildCubeShape/Dimension.js b/packages/cube/lib/cube/buildCubeShape/Dimension.js
index 69631cff..4520be24 100644
--- a/packages/cube/lib/cube/buildCubeShape/Dimension.js
+++ b/packages/cube/lib/cube/buildCubeShape/Dimension.js
@@ -1,43 +1,18 @@
-import rdf from '@zazuko/env-node'
-import { fromRdf } from 'rdf-literal'
 import cbdCopy from '../../cbdCopy.js'
-import * as ns from '../../namespaces.js'
-
-const datatypeParsers = rdf.termMap([
-  [ns.xsd.byte, fromRdf],
-  [ns.xsd.date, fromRdf],
-  [ns.xsd.dateTime, fromRdf],
-  [ns.xsd.decimal, fromRdf],
-  [ns.xsd.double, fromRdf],
-  [ns.xsd.float, fromRdf],
-  [ns.xsd.gDay, fromRdf],
-  [ns.xsd.gMonthDay, fromRdf],
-  [ns.xsd.gYear, fromRdf],
-  [ns.xsd.gYearMonth, fromRdf],
-  [ns.xsd.int, fromRdf],
-  [ns.xsd.integer, fromRdf],
-  [ns.xsd.long, fromRdf],
-  [ns.xsd.negativeInteger, fromRdf],
-  [ns.xsd.nonNegativeInteger, fromRdf],
-  [ns.xsd.nonPositiveInteger, fromRdf],
-  [ns.xsd.positiveInteger, fromRdf],
-  [ns.xsd.short, fromRdf],
-  [ns.xsd.unsignedByte, fromRdf],
-  [ns.xsd.unsignedInt, fromRdf],
-  [ns.xsd.unsignedLong, fromRdf],
-  [ns.xsd.unsignedShort, fromRdf],
-])
+import initDatatypeParsers from './datatypes.js'
 
 class Dimension {
-  constructor({ metadata, predicate, object, shapeId = () => rdf.blankNode() }) {
+  constructor({ rdf, metadata, predicate, object, shapeId = () => rdf.blankNode() }) {
+    this.rdf = rdf
     this.metadata = metadata
     this.predicate = predicate
     this.termType = object.termType
     this.datatype = rdf.termSet()
     this.shapeId = shapeId
 
-    if (object.datatype && datatypeParsers.has(object.datatype)) {
-      const datatypeParser = datatypeParsers.get(object.datatype)
+    this.datatypeParsers = initDatatypeParsers(rdf)
+    if (object.datatype && this.datatypeParsers.has(object.datatype)) {
+      const datatypeParser = this.datatypeParsers.get(object.datatype)
 
       const value = datatypeParser(object)
 
@@ -55,8 +30,8 @@ class Dimension {
       this.datatype.add(object.datatype)
     }
 
-    if (object.datatype && datatypeParsers.has(object.datatype)) {
-      const datatypeParser = datatypeParsers.get(object.datatype)
+    if (object.datatype && this.datatypeParsers.has(object.datatype)) {
+      const datatypeParser = this.datatypeParsers.get(object.datatype)
 
       const value = datatypeParser(object)
 
@@ -77,44 +52,44 @@ class Dimension {
   }
 
   toDataset({ cube, shape }) {
-    const dataset = rdf.dataset()
+    const dataset = this.rdf.dataset()
 
-    const graph = rdf.clownface({ dataset })
+    const graph = this.rdf.clownface({ dataset })
     const ptr = graph.node(this.shapeId(cube, this))
 
     ptr
-      .addIn(ns.sh.property, shape)
-      .addOut(ns.sh.path, this.predicate)
-      .addOut(ns.sh.nodeKind, this.termType === 'NamedNode' ? ns.sh.IRI : ns.sh.Literal)
-      .addOut(ns.sh.minCount, 1)
-      .addOut(ns.sh.maxCount, 1)
+      .addIn(this.rdf.ns.sh.property, shape)
+      .addOut(this.rdf.ns.sh.path, this.predicate)
+      .addOut(this.rdf.ns.sh.nodeKind, this.termType === 'NamedNode' ? this.rdf.ns.sh.IRI : this.rdf.ns.sh.Literal)
+      .addOut(this.rdf.ns.sh.minCount, 1)
+      .addOut(this.rdf.ns.sh.maxCount, 1)
 
     if (this.datatype.size === 1) {
-      ptr.addOut(ns.sh.datatype, [...this.datatype][0])
+      ptr.addOut(this.rdf.ns.sh.datatype, [...this.datatype][0])
     }
 
     if (this.datatype.size > 1) {
-      ptr.addList(ns.sh.or, [...this.datatype].map(datatype => {
+      ptr.addList(this.rdf.ns.sh.or, [...this.datatype].map(datatype => {
         return ptr
           .blankNode()
-          .addOut(ns.sh.datatype, datatype)
+          .addOut(this.rdf.ns.sh.datatype, datatype)
       }))
     }
 
     if (this.in) {
-      ptr.addList(ns.sh.in, [...this.in.values()])
+      ptr.addList(this.rdf.ns.sh.in, [...this.in.values()])
     }
 
     if (this.min) {
-      ptr.addOut(ns.sh.minInclusive, this.min)
+      ptr.addOut(this.rdf.ns.sh.minInclusive, this.min)
     }
 
     if (this.max) {
-      ptr.addOut(ns.sh.maxInclusive, this.max)
+      ptr.addOut(this.rdf.ns.sh.maxInclusive, this.max)
     }
 
     if (this.metadata.term) {
-      cbdCopy(this.metadata, ptr)
+      cbdCopy(this.rdf, this.metadata, ptr)
     }
 
     return dataset
diff --git a/packages/cube/lib/cube/buildCubeShape/datatypes.js b/packages/cube/lib/cube/buildCubeShape/datatypes.js
new file mode 100644
index 00000000..d1caf99e
--- /dev/null
+++ b/packages/cube/lib/cube/buildCubeShape/datatypes.js
@@ -0,0 +1,30 @@
+import { fromRdf } from 'rdf-literal'
+
+const datatypes = [
+  'byte',
+  'date',
+  'dateTime',
+  'decimal',
+  'double',
+  'float',
+  'gDay',
+  'gMonthDay',
+  'gYear',
+  'gYearMonth',
+  'int',
+  'integer',
+  'long',
+  'negativeInteger',
+  'nonNegativeInteger',
+  'nonPositiveInteger',
+  'positiveInteger',
+  'short',
+  'unsignedByte',
+  'unsignedInt',
+  'unsignedLong',
+  'unsignedShort',
+]
+
+export default function datatypeParsers(rdf) {
+  return rdf.termMap(datatypes.map(datatype => [rdf.ns.xsd[datatype], fromRdf]))
+}
diff --git a/packages/cube/lib/cube/buildCubeShape/index.js b/packages/cube/lib/cube/buildCubeShape/index.js
index b6ac7a10..bdc08956 100644
--- a/packages/cube/lib/cube/buildCubeShape/index.js
+++ b/packages/cube/lib/cube/buildCubeShape/index.js
@@ -1,8 +1,5 @@
 import once from 'lodash/once.js'
-import $rdf from '@zazuko/env-node'
 import { Transform } from 'readable-stream'
-import fromStream from 'rdf-dataset-ext/fromStream.js'
-import * as ns from '../../namespaces.js'
 import urlJoin from '../../urlJoin.js'
 import Cube from './Cube.js'
 
@@ -13,7 +10,7 @@ function defaultCube({ observationSet }) {
     return null
   }
 
-  return $rdf.namedNode(urlJoin(observationSetIri, '..'))
+  return this.rdf.namedNode(urlJoin(observationSetIri, '..'))
 }
 
 function defaultShape({ term }) {
@@ -23,19 +20,20 @@ function defaultShape({ term }) {
     return null
   }
 
-  return $rdf.namedNode(urlJoin(cubeIri, 'shape'))
+  return this.rdf.namedNode(urlJoin(cubeIri, 'shape'))
 }
 
 class CubeShapeBuilder extends Transform {
-  constructor({ excludeValuesOf, metadata, graph, propertyShapeId } = {}) {
+  constructor({ rdf, excludeValuesOf, metadata, graph, propertyShapeId } = {}) {
     super({ objectMode: true })
 
+    this.rdf = rdf
     this.options = {
-      cubes: $rdf.termMap(),
-      cube: defaultCube,
-      excludeValuesOf: $rdf.termSet(excludeValuesOf ? excludeValuesOf.map(v => $rdf.namedNode(v)) : []),
+      cubes: this.rdf.termMap(),
+      cube: defaultCube.bind({ rdf }),
+      excludeValuesOf: this.rdf.termSet(excludeValuesOf ? excludeValuesOf.map(v => this.rdf.namedNode(v)) : []),
       metadataStream: metadata,
-      shape: defaultShape,
+      shape: defaultShape.bind({ rdf }),
       graph,
       propertyShapeId,
     }
@@ -45,9 +43,9 @@ class CubeShapeBuilder extends Transform {
 
   async _init() {
     if (this.options.metadataStream) {
-      this.options.metadata = await fromStream($rdf.dataset(), this.options.metadataStream)
+      this.options.metadata = await this.rdf.dataset().import(this.options.metadataStream)
     } else {
-      this.options.metadata = $rdf.dataset()
+      this.options.metadata = this.rdf.dataset()
     }
   }
 
@@ -58,22 +56,23 @@ class CubeShapeBuilder extends Transform {
       return callback(err)
     }
 
-    const dataset = $rdf.dataset([...chunk])
+    const dataset = this.rdf.dataset([...chunk])
 
     const context = {
       dataset,
-      ptr: $rdf.clownface({ dataset }).has(ns.rdf.type, ns.cube.Observation),
+      ptr: this.rdf.clownface({ dataset }).has(this.rdf.ns.rdf.type, this.rdf.ns.cube.Observation),
     }
 
-    context.observationSet = context.ptr.in(ns.cube.observation).term
+    context.observationSet = context.ptr.in(this.rdf.ns.cube.observation).term
     context.term = this.options.cube(context)
     context.shape = this.options.shape(context)
     context.cube = this.options.cubes.get(context.term)
 
     if (!context.cube) {
       context.cube = new Cube({
+        rdf: this.rdf,
         term: context.term,
-        metadata: $rdf.clownface({ dataset: this.options.metadata, term: context.term }),
+        metadata: this.rdf.clownface({ dataset: this.options.metadata, term: context.term }),
         observationSet: context.observationSet,
         shape: context.shape,
         propertyShapeId: this.options.propertyShapeId,
@@ -93,20 +92,20 @@ class CubeShapeBuilder extends Transform {
 
   _flush(callback) {
     for (const cube of this.options.cubes.values()) {
-      const dataset = cube.toDataset({ shapeGraph: toNamedNode(this.options.graph) })
+      const dataset = cube.toDataset({ shapeGraph: this.toNamedNode(this.options.graph) })
       this.push(dataset)
     }
 
     callback()
   }
-}
 
-function toNamedNode(item) {
-  return typeof item === 'string' ? $rdf.namedNode(item) : item
+  toNamedNode(item) {
+    return typeof item === 'string' ? this.rdf.namedNode(item) : item
+  }
 }
 
 function buildCubeShape({ excludeValuesOf, metadata, graph, propertyShapeId } = {}) {
-  return new CubeShapeBuilder({ excludeValuesOf, metadata, graph, propertyShapeId })
+  return new CubeShapeBuilder({ rdf: this.env, excludeValuesOf, metadata, graph, propertyShapeId })
 }
 
 export default buildCubeShape
diff --git a/packages/cube/lib/cube/toObservation.js b/packages/cube/lib/cube/toObservation.js
index 321aa5f1..8319fa8b 100644
--- a/packages/cube/lib/cube/toObservation.js
+++ b/packages/cube/lib/cube/toObservation.js
@@ -1,11 +1,9 @@
 import { URL } from 'url'
-import $rdf from '@zazuko/env-node'
 import { Transform } from 'readable-stream'
 import dateToId from '../dateToId.js'
 import urlJoin from '../urlJoin.js'
-import { cube, rdf, xsd } from '../namespaces.js'
 
-function findRoot({ dataset }) {
+function findRoot($rdf, { dataset }) {
   const subjects = [...dataset].filter(quad => quad.subject.termType === 'NamedNode').reduce((subjects, quad) => {
     const count = subjects.get(quad.subject) || 0
 
@@ -20,7 +18,7 @@ function findRoot({ dataset }) {
 }
 
 function defaultObserver({ dataset, subject }) {
-  const observer = $rdf.clownface({ dataset }).out(cube.observedBy).term
+  const observer = this.rdf.clownface({ dataset }).out(this.rdf.ns.cube.observedBy).term
 
   if (observer) {
     return observer
@@ -30,38 +28,38 @@ function defaultObserver({ dataset, subject }) {
 
   iri.pathname = '/'
 
-  return $rdf.namedNode(iri.toString())
+  return this.rdf.namedNode(iri.toString())
 }
 
 function defaultObservations({ subject }) {
   const iri = urlJoin(subject.value, '..')
 
   if (iri.endsWith('/observation')) {
-    return $rdf.namedNode(`${iri}/`)
+    return this.rdf.namedNode(`${iri}/`)
   }
 
-  return $rdf.namedNode(`${iri}/observation/`)
+  return this.rdf.namedNode(`${iri}/observation/`)
 }
 
 function defaultObservation({ observations, subject }) {
   const url = new URL(subject.value)
   const id = url.pathname.split('/').slice(-1)[0]
 
-  return $rdf.namedNode(urlJoin(observations.value, id))
+  return this.rdf.namedNode(urlJoin(observations.value, id))
 }
 
 function dateByProperty(property) {
-  return ({ dataset }) => {
-    return $rdf.clownface({ dataset }).out(property).term
+  return function ({ dataset }) {
+    return this.rdf.clownface({ dataset }).out(property).term
   }
 }
 
 function dateNow() {
-  return $rdf.literal((new Date()).toISOString(), xsd.dateTime)
+  return this.rdf.literal((new Date()).toISOString(), this.rdf.ns.xsd.dateTime)
 }
 
 function dateByDatatype({ dataset }) {
-  const terms = $rdf.clownface({ dataset }).out().filter(ptr => xsd.dateTime.equals(ptr.term.datatype)).terms
+  const terms = this.rdf.clownface({ dataset }).out().filter(ptr => this.rdf.ns.xsd.dateTime.equals(ptr.term.datatype)).terms
 
   if (terms.length === 0) {
     throw new Error('now date value found')
@@ -77,14 +75,14 @@ function dateByDatatype({ dataset }) {
 function dateObservation({ dataset, observations, useDate }) {
   const date = useDate({ dataset })
 
-  return $rdf.namedNode(urlJoin(observations.value, dateToId(date.value)))
+  return this.rdf.namedNode(urlJoin(observations.value, dateToId(date.value)))
 }
 
 function indexObservation({ index, observations }) {
-  return $rdf.namedNode(urlJoin(observations.value, `./${index.toString()}`))
+  return this.rdf.namedNode(urlJoin(observations.value, `./${index.toString()}`))
 }
 
-function asTermObject(value) {
+function asTermObject($rdf, value) {
   if (typeof value === 'string') {
     value = $rdf.namedNode(value)
   }
@@ -95,24 +93,25 @@ function asTermObject(value) {
 }
 
 class ToObservation extends Transform {
-  constructor({ blacklist, dimensions, observation, observations, observer, useDate, useIndex } = {}) {
+  constructor({ rdf, blacklist, dimensions, observation, observations, observer, useDate, useIndex } = {}) {
     super({ objectMode: true })
 
+    this.rdf = rdf
     this.options = {
       index: 0,
-      blacklist: $rdf.termSet(),
-      dimensions: $rdf.termSet(),
+      blacklist: this.rdf.termSet(),
+      dimensions: this.rdf.termSet(),
     }
 
     if (blacklist) {
       for (const item of blacklist) {
-        this.options.blacklist.add(typeof item === 'string' ? $rdf.namedNode(item) : item.term)
+        this.options.blacklist.add(typeof item === 'string' ? this.rdf.namedNode(item) : item.term)
       }
     }
 
     if (dimensions) {
       for (const item of dimensions) {
-        this.options.dimensions.add(typeof item === 'string' ? $rdf.namedNode(item) : item.term)
+        this.options.dimensions.add(typeof item === 'string' ? this.rdf.namedNode(item) : item.term)
       }
     }
 
@@ -120,31 +119,31 @@ class ToObservation extends Transform {
       if (typeof observer === 'function') {
         this.options.observer = observer
       } else if (observer) {
-        this.options.observer = asTermObject(observer)
+        this.options.observer = asTermObject(rdf, observer)
       }
     } else {
-      this.options.observer = defaultObserver
+      this.options.observer = defaultObserver.bind({ rdf })
     }
 
     if (observations) {
       if (typeof observations === 'function') {
         this.options.observations = observations
       } else if (observations) {
-        this.options.observations = asTermObject(observations)
+        this.options.observations = asTermObject(rdf, observations)
       }
     } else {
-      this.options.observations = defaultObservations
+      this.options.observations = defaultObservations.bind({ rdf })
     }
 
     if (useDate) {
       if (useDate === true || useDate === 'true') {
-        this.options.useDate = dateByDatatype
+        this.options.useDate = dateByDatatype.bind({ rdf })
       } else if (useDate === 'now') {
-        this.options.useDate = dateNow
+        this.options.useDate = dateNow.bind({ rdf })
       } else if (typeof useDate === 'string') {
-        this.options.useDate = dateByProperty($rdf.namedNode(useDate))
+        this.options.useDate = dateByProperty(this.rdf.namedNode(useDate)).bind({ rdf })
       } else if (useDate.termType) {
-        this.options.useDate = dateByProperty(useDate)
+        this.options.useDate = dateByProperty(useDate).bind({ rdf })
       } else if (typeof useDate === 'function') {
         this.options.useDate = useDate
       }
@@ -154,11 +153,11 @@ class ToObservation extends Transform {
       this.options.observation = observation
     } else {
       if (this.options.useDate) {
-        this.options.observation = dateObservation
+        this.options.observation = dateObservation.bind({ rdf })
       } else if (useIndex) {
-        this.options.observation = indexObservation
+        this.options.observation = indexObservation.bind({ rdf })
       } else {
-        this.options.observation = defaultObservation
+        this.options.observation = defaultObservation.bind({ rdf })
       }
     }
   }
@@ -166,46 +165,46 @@ class ToObservation extends Transform {
   _transform(chunk, encoding, callback) {
     try {
       const context = {
-        dataset: $rdf.dataset([...chunk]),
+        dataset: this.rdf.dataset([...chunk]),
         ...this.options,
       }
 
-      context.subject = findRoot(context)
+      context.subject = findRoot(this.rdf, context)
       context.observer = this.options.observer(context)
       context.observations = this.options.observations(context)
       context.observation = this.options.observation(context)
 
-      const dataset = $rdf.dataset([...context.dataset]
-        .filter(quad => !quad.predicate.equals(rdf.type))
+      const dataset = this.rdf.dataset([...context.dataset]
+        .filter(quad => !quad.predicate.equals(this.rdf.ns.rdf.type))
         .filter(quad => !this.options.blacklist.has(quad.predicate))
         .map(quad => {
-          return $rdf.quad(
+          return this.rdf.quad(
             quad.subject.termType === 'NamedNode' ? context.observation : quad.subject,
             quad.predicate,
             quad.object,
           )
         }))
 
-      dataset.add($rdf.quad(context.observation, rdf.type, cube.Observation))
+      dataset.add(this.rdf.quad(context.observation, this.rdf.ns.rdf.type, this.rdf.ns.cube.Observation))
 
       if (context.date && context.dateProperty) {
-        dataset.add($rdf.quad(context.observation, context.dateProperty, context.date))
+        dataset.add(this.rdf.quad(context.observation, context.dateProperty, context.date))
       }
 
       if (context.observer) {
-        dataset.add($rdf.quad(context.observation, cube.observedBy, context.observer))
+        dataset.add(this.rdf.quad(context.observation, this.rdf.ns.cube.observedBy, context.observer))
       }
 
       if (this.options.dimensions) {
         for (const term of this.options.dimensions) {
           if (dataset.match(context.observation, term).size === 0) {
-            dataset.add($rdf.quad(context.observation, term, $rdf.literal('NaN', xsd.double)))
+            dataset.add(this.rdf.quad(context.observation, term, this.rdf.literal('NaN', this.rdf.ns.xsd.double)))
           }
         }
       }
 
       if (context.observations) {
-        dataset.add($rdf.quad(context.observations, cube.observation, context.observation))
+        dataset.add(this.rdf.quad(context.observations, this.rdf.ns.cube.observation, context.observation))
       }
 
       this.push([...dataset])
@@ -229,7 +228,7 @@ function toObservation({
   dateProperty,
   useIndex,
 } = {}) {
-  return new ToObservation({ blacklist, dimensions, observation, observations, observer, useDate, dateProperty, useIndex })
+  return new ToObservation({ rdf: this.env, blacklist, dimensions, observation, observations, observer, useDate, dateProperty, useIndex })
 }
 
 export default toObservation
diff --git a/packages/cube/lib/namespaces.js b/packages/cube/lib/namespaces.js
deleted file mode 100644
index 5f49c52c..00000000
--- a/packages/cube/lib/namespaces.js
+++ /dev/null
@@ -1,6 +0,0 @@
-import $rdf from '@zazuko/env-node'
-
-const cube = $rdf.namespace('https://cube.link/')
-const { rdf, rdfs, sh, xsd, _void, dcat, schema, dcterms } = $rdf.ns
-
-export { cube, rdf, rdfs, sh, xsd, _void, dcat, schema, dcterms }
diff --git a/packages/cube/lib/sort.js b/packages/cube/lib/sort.js
index 0aec9a11..7c2bcc17 100644
--- a/packages/cube/lib/sort.js
+++ b/packages/cube/lib/sort.js
@@ -1,13 +1,12 @@
 import { Readable, Duplex } from 'stream'
 import { sort, compareOn, createStore } from 'external-merge-sort'
-import rdf from '@zazuko/env-node'
 
-async function write(chunk, filename) {
-  await rdf.toFile(Readable.from(chunk), filename)
-  return rdf.fromFile(filename)
-}
+export function sortRDF(key) {
+  const write = async (chunk, filename) => {
+    await this.rdf.toFile(Readable.from(chunk), filename)
+    return this.rdf.fromFile(filename)
+  }
 
-export const sortRDF = key => {
   const comparer = compareOn(key)
   const store = createStore(write, '.nt')
 
diff --git a/packages/cube/lib/target.js b/packages/cube/lib/target.js
index c8d0df92..3e583591 100644
--- a/packages/cube/lib/target.js
+++ b/packages/cube/lib/target.js
@@ -1,12 +1,8 @@
-import rdf from '@zazuko/env-node'
-
-const cube = rdf.namespace('https://cube.link/')
-
-export const addTarget = shape => {
-  const constraint = rdf.clownface({ dataset: shape, term: cube.Constraint }).in(rdf.ns.rdf.type)
+export const addTarget = function (shape) {
+  const constraint = this.env.clownface({ dataset: shape, term: this.env.ns.cube.Constraint }).in(this.env.ns.rdf.type)
   if (!constraint.term) {
     throw new Error('could not find a unique constraint')
   }
-  constraint.addOut(rdf.ns.sh.targetClass, cube.Observation)
+  constraint.addOut(this.env.ns.sh.targetClass, this.env.ns.cube.Observation)
   return shape
 }
diff --git a/packages/cube/package.json b/packages/cube/package.json
index bdc71873..85f88906 100644
--- a/packages/cube/package.json
+++ b/packages/cube/package.json
@@ -28,16 +28,15 @@
     "barnard59-validate-shacl": "^0.3.8"
   },
   "dependencies": {
-    "@zazuko/env-node": "^1.0.0",
     "external-merge-sort": "^0.1.2",
     "lodash": "^4.17.21",
-    "rdf-dataset-ext": "^1.0.1",
     "rdf-literal": "^1.3.0",
     "rdf-stream-to-dataset-stream": "^1.0.0",
     "readable-stream": "3 - 4"
   },
   "devDependencies": {
     "@rdfjs/to-ntriples": "^2.0.0",
+    "barnard59-env": "^0.0.0",
     "chai": "^4.3.7",
     "is-stream": "^3.0.0",
     "get-stream": "^6.0.1",
diff --git a/packages/cube/test/cube/buildCubeShape.test.js b/packages/cube/test/cube/buildCubeShape.test.js
index 70171348..d88b762a 100644
--- a/packages/cube/test/cube/buildCubeShape.test.js
+++ b/packages/cube/test/cube/buildCubeShape.test.js
@@ -1,16 +1,18 @@
 import { deepStrictEqual, strictEqual } from 'assert'
 import toNT from '@rdfjs/to-ntriples'
 import { isDuplexStream as isDuplex } from 'is-stream'
-import rdf from '@zazuko/env-node'
-import toStream from 'rdf-dataset-ext/toStream.js'
+import rdf from 'barnard59-env'
 import { expect } from 'chai'
-import buildCubeShape from '../../lib/cube/buildCubeShape/index.js'
+import buildCubeShapeUnbound from '../../lib/cube/buildCubeShape/index.js'
 import createObservationsStream from '../support/createObservationsStream.js'
 import datasetStreamToClownface from '../support/datasetStreamToClownface.js'
-import * as ns from '../support/namespaces.js'
+import { ex } from '../support/namespaces.js'
+
+const ns = rdf.ns
+const buildCubeShape = buildCubeShapeUnbound.bind({ env: rdf })
 
 function checkMinMax(result, min, max) {
-  const propertyShape = result.has(ns.sh.path, ns.ex.property)
+  const propertyShape = result.has(ns.sh.path, ex.property)
   const shapeMin = propertyShape.out(ns.sh.minInclusive)
   const shapeMax = propertyShape.out(ns.sh.maxInclusive)
 
@@ -51,7 +53,7 @@ describe('cube.buildCubeShape', () => {
     const result = await datasetStreamToClownface(transform)
 
     const cube = result
-      .node(ns.ex('cube'))
+      .node(ex('cube'))
       .has(ns.rdf.type, ns.cube.Cube)
 
     strictEqual(cube.terms.length, 1)
@@ -79,7 +81,7 @@ describe('cube.buildCubeShape', () => {
     const result = await datasetStreamToClownface(transform)
 
     const cube = result
-      .node(ns.ex('cube/observation/'))
+      .node(ex('cube/observation/'))
       .has(ns.rdf.type, ns.cube.ObservationSet)
 
     strictEqual(cube.terms.length, 1)
@@ -94,8 +96,8 @@ describe('cube.buildCubeShape', () => {
     const result = await datasetStreamToClownface(transform)
 
     const observationSet = result
-      .node(ns.ex('cube'))
-      .has(ns.cube.observationSet, ns.ex('cube/observation/'))
+      .node(ex('cube'))
+      .has(ns.cube.observationSet, ex('cube/observation/'))
 
     strictEqual(observationSet.terms.length, 1)
   })
@@ -122,7 +124,7 @@ describe('cube.buildCubeShape', () => {
     const result = await datasetStreamToClownface(transform)
 
     const shape = result
-      .node(ns.ex('cube/shape'))
+      .node(ex('cube/shape'))
       .has(ns.rdf.type, ns.sh.NodeShape)
 
     strictEqual(shape.terms.length, 1)
@@ -137,8 +139,8 @@ describe('cube.buildCubeShape', () => {
     const result = await datasetStreamToClownface(transform)
 
     const cube = result
-      .node(ns.ex('cube'))
-      .has(ns.cube.observationConstraint, ns.ex('cube/shape'))
+      .node(ex('cube'))
+      .has(ns.cube.observationConstraint, ex('cube/shape'))
 
     strictEqual(cube.terms.length, 1)
   })
@@ -146,8 +148,8 @@ describe('cube.buildCubeShape', () => {
   it('should generate a property shape for each dimension', async () => {
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.propertyA.value]: rdf.literal('A'),
-        [ns.ex.propertyB.value]: rdf.literal('B'),
+        [ex.propertyA.value]: rdf.literal('A'),
+        [ex.propertyB.value]: rdf.literal('B'),
       }],
     })
     const transform = buildCubeShape()
@@ -156,20 +158,20 @@ describe('cube.buildCubeShape', () => {
 
     const result = await datasetStreamToClownface(transform)
 
-    const propertyShapes = result.node(ns.ex('cube/shape')).out(ns.sh.property)
+    const propertyShapes = result.node(ex('cube/shape')).out(ns.sh.property)
     const pathes = rdf.termSet(propertyShapes.out(ns.sh.path).terms)
 
     strictEqual(propertyShapes.terms.length, 3)
     strictEqual(pathes.has(ns.rdf.type), true)
-    strictEqual(pathes.has(ns.ex.propertyA), true)
-    strictEqual(pathes.has(ns.ex.propertyB), true)
+    strictEqual(pathes.has(ex.propertyA), true)
+    strictEqual(pathes.has(ex.propertyB), true)
   })
 
   it('should generate a NamedNode property shape for each dimension', async () => {
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.propertyA.value]: rdf.literal('A'),
-        [ns.ex.propertyB.value]: rdf.literal('B'),
+        [ex.propertyA.value]: rdf.literal('A'),
+        [ex.propertyB.value]: rdf.literal('B'),
       }],
     })
     const transform = buildCubeShape({
@@ -182,7 +184,7 @@ describe('cube.buildCubeShape', () => {
 
     const result = await datasetStreamToClownface(transform)
 
-    const propertyShapes = result.node(ns.ex('cube/shape')).out(ns.sh.property)
+    const propertyShapes = result.node(ex('cube/shape')).out(ns.sh.property)
 
     expect(propertyShapes.terms).to.deep.contain.all.members([
       rdf.namedNode('http://example.org/cube/dim?id=http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
@@ -194,9 +196,9 @@ describe('cube.buildCubeShape', () => {
   it('should generate nodeKind for literal values', async () => {
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: rdf.literal('A'),
+        [ex.property.value]: rdf.literal('A'),
       }, {
-        [ns.ex.property.value]: rdf.literal('B'),
+        [ex.property.value]: rdf.literal('B'),
       }],
     })
     const transform = buildCubeShape()
@@ -205,7 +207,7 @@ describe('cube.buildCubeShape', () => {
 
     const result = await datasetStreamToClownface(transform)
 
-    const propertyShape = result.has(ns.sh.path, ns.ex.property)
+    const propertyShape = result.has(ns.sh.path, ex.property)
     const nodeKind = propertyShape.out(ns.sh.nodeKind)
 
     strictEqual(toNT(nodeKind.term), toNT(ns.sh.Literal))
@@ -214,9 +216,9 @@ describe('cube.buildCubeShape', () => {
   it('should generate nodeKind for named node values', async () => {
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: ns.ex.valueA,
+        [ex.property.value]: ex.valueA,
       }, {
-        [ns.ex.property.value]: ns.ex.valueB,
+        [ex.property.value]: ex.valueB,
       }],
     })
     const transform = buildCubeShape()
@@ -225,7 +227,7 @@ describe('cube.buildCubeShape', () => {
 
     const result = await datasetStreamToClownface(transform)
 
-    const propertyShape = result.has(ns.sh.path, ns.ex.property)
+    const propertyShape = result.has(ns.sh.path, ex.property)
     const nodeKind = propertyShape.out(ns.sh.nodeKind)
 
     strictEqual(toNT(nodeKind.term), toNT(ns.sh.IRI))
@@ -234,9 +236,9 @@ describe('cube.buildCubeShape', () => {
   it('should generate a sh:in list for plain string values', async () => {
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: rdf.literal('A'),
+        [ex.property.value]: rdf.literal('A'),
       }, {
-        [ns.ex.property.value]: rdf.literal('B'),
+        [ex.property.value]: rdf.literal('B'),
       }],
     })
     const transform = buildCubeShape()
@@ -245,7 +247,7 @@ describe('cube.buildCubeShape', () => {
 
     const result = await datasetStreamToClownface(transform)
 
-    const propertyShape = result.has(ns.sh.path, ns.ex.property)
+    const propertyShape = result.has(ns.sh.path, ex.property)
     const values = [...propertyShape.out(ns.sh.in).list()].map(ptr => ptr.value).sort()
 
     deepStrictEqual(values, ['A', 'B'])
@@ -254,9 +256,9 @@ describe('cube.buildCubeShape', () => {
   it('should generate a sh:in list for named node values', async () => {
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: ns.ex.valueA,
+        [ex.property.value]: ex.valueA,
       }, {
-        [ns.ex.property.value]: ns.ex.valueB,
+        [ex.property.value]: ex.valueB,
       }],
     })
     const transform = buildCubeShape()
@@ -265,11 +267,11 @@ describe('cube.buildCubeShape', () => {
 
     const result = await datasetStreamToClownface(transform)
 
-    const propertyShape = result.has(ns.sh.path, ns.ex.property)
+    const propertyShape = result.has(ns.sh.path, ex.property)
     const values = rdf.termSet([...propertyShape.out(ns.sh.in).list()].map(ptr => ptr.term))
 
-    strictEqual(values.has(ns.ex.valueA), true)
-    strictEqual(values.has(ns.ex.valueB), true)
+    strictEqual(values.has(ex.valueA), true)
+    strictEqual(values.has(ex.valueB), true)
   })
 
   it('should generate sh:minInclusive an sh:maxInclusive properties for date values', async () => {
@@ -277,11 +279,11 @@ describe('cube.buildCubeShape', () => {
     const max = rdf.literal('2000-01-03T00:00:00.000Z', ns.xsd.date)
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: rdf.literal('2000-01-02T00:00:00.000Z', ns.xsd.date),
+        [ex.property.value]: rdf.literal('2000-01-02T00:00:00.000Z', ns.xsd.date),
       }, {
-        [ns.ex.property.value]: max,
+        [ex.property.value]: max,
       }, {
-        [ns.ex.property.value]: min,
+        [ex.property.value]: min,
       }],
     })
     const transform = buildCubeShape()
@@ -298,11 +300,11 @@ describe('cube.buildCubeShape', () => {
     const max = rdf.literal('3.3', ns.xsd.double)
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: rdf.literal('2.2', ns.xsd.double),
+        [ex.property.value]: rdf.literal('2.2', ns.xsd.double),
       }, {
-        [ns.ex.property.value]: max,
+        [ex.property.value]: max,
       }, {
-        [ns.ex.property.value]: min,
+        [ex.property.value]: min,
       }],
     })
     const transform = buildCubeShape()
@@ -319,11 +321,11 @@ describe('cube.buildCubeShape', () => {
     const max = rdf.literal('3.3', ns.xsd.float)
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: rdf.literal('2.2', ns.xsd.float),
+        [ex.property.value]: rdf.literal('2.2', ns.xsd.float),
       }, {
-        [ns.ex.property.value]: max,
+        [ex.property.value]: max,
       }, {
-        [ns.ex.property.value]: min,
+        [ex.property.value]: min,
       }],
     })
     const transform = buildCubeShape()
@@ -340,11 +342,11 @@ describe('cube.buildCubeShape', () => {
     const max = rdf.literal('3', ns.xsd.int)
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: rdf.literal('2', ns.xsd.int),
+        [ex.property.value]: rdf.literal('2', ns.xsd.int),
       }, {
-        [ns.ex.property.value]: max,
+        [ex.property.value]: max,
       }, {
-        [ns.ex.property.value]: min,
+        [ex.property.value]: min,
       }],
     })
     const transform = buildCubeShape()
@@ -361,11 +363,11 @@ describe('cube.buildCubeShape', () => {
     const max = rdf.literal('1300', ns.xsd.gYear)
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: rdf.literal('0100', ns.xsd.gYear),
+        [ex.property.value]: rdf.literal('0100', ns.xsd.gYear),
       }, {
-        [ns.ex.property.value]: max,
+        [ex.property.value]: max,
       }, {
-        [ns.ex.property.value]: min,
+        [ex.property.value]: min,
       }],
     })
     const transform = buildCubeShape()
@@ -382,11 +384,11 @@ describe('cube.buildCubeShape', () => {
     const max = rdf.literal('3', ns.xsd.integer)
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: rdf.literal('2', ns.xsd.integer),
+        [ex.property.value]: rdf.literal('2', ns.xsd.integer),
       }, {
-        [ns.ex.property.value]: max,
+        [ex.property.value]: max,
       }, {
-        [ns.ex.property.value]: min,
+        [ex.property.value]: min,
       }],
     })
     const transform = buildCubeShape()
@@ -401,9 +403,9 @@ describe('cube.buildCubeShape', () => {
   it('should generate sh:or for multiple datatypes', async () => {
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: rdf.literal('a', ns.xsd.integer),
+        [ex.property.value]: rdf.literal('a', ns.xsd.integer),
       }, {
-        [ns.ex.property.value]: rdf.literal('', ns.cube.Undefined),
+        [ex.property.value]: rdf.literal('', ns.cube.Undefined),
       }],
     })
     const transform = buildCubeShape()
@@ -412,7 +414,7 @@ describe('cube.buildCubeShape', () => {
 
     const result = await datasetStreamToClownface(transform)
 
-    const propertyShape = result.has(ns.sh.path, ns.ex.property)
+    const propertyShape = result.has(ns.sh.path, ex.property)
     const datatypes = rdf.termSet([...propertyShape.out(ns.sh.or).list()].map(or => or.out(ns.sh.datatype).term))
 
     strictEqual(datatypes.has(ns.xsd.integer), true)
@@ -420,13 +422,13 @@ describe('cube.buildCubeShape', () => {
   })
 
   it('should merge given metadata to cube metadata', async () => {
-    const metadata = toStream(rdf.dataset([
-      rdf.quad(ns.ex.cube, ns.schema.name, rdf.literal('Test Cube')),
-      rdf.quad(ns.ex.other, ns.schema.name, rdf.literal('Test Other')),
-    ]))
+    const metadata = rdf.dataset([
+      rdf.quad(ex.cube, ns.schema.name, rdf.literal('Test Cube')),
+      rdf.quad(ex.other, ns.schema.name, rdf.literal('Test Other')),
+    ]).toStream()
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: rdf.literal('test'),
+        [ex.property.value]: rdf.literal('test'),
       }],
     })
     const transform = buildCubeShape({ metadata })
@@ -441,12 +443,12 @@ describe('cube.buildCubeShape', () => {
   })
 
   it('should ignore cube:observationConstraint property in cube metadata', async () => {
-    const metadata = toStream(rdf.dataset([
-      rdf.quad(ns.ex.cube, ns.cube.observationConstraint, ns.ex.shape),
-    ]))
+    const metadata = rdf.dataset([
+      rdf.quad(ex.cube, ns.cube.observationConstraint, ex.shape),
+    ]).toStream()
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: rdf.literal('test'),
+        [ex.property.value]: rdf.literal('test'),
       }],
     })
     const transform = buildCubeShape({ metadata })
@@ -458,85 +460,85 @@ describe('cube.buildCubeShape', () => {
     const constraints = result.out(ns.cube.observationConstraint).terms
 
     strictEqual(constraints.length, 1)
-    strictEqual(constraints[0].equals(ns.ex.shape), false)
+    strictEqual(constraints[0].equals(ex.shape), false)
   })
 
   it('should merge given metadata with blank nodes to cube metadata', async () => {
     const dataset = rdf.dataset()
 
-    rdf.clownface({ dataset, term: ns.ex.cube })
-      .addOut(ns.ex.propertyA, null, ptr => {
+    rdf.clownface({ dataset, term: ex.cube })
+      .addOut(ex.propertyA, null, ptr => {
         ptr
-          .addOut(ns.ex.propertyB, 'Text B')
-          .addOut(ns.ex.propertyC, null, ptr => {
-            ptr.addOut(ns.ex.propertyD, 'Text D')
+          .addOut(ex.propertyB, 'Text B')
+          .addOut(ex.propertyC, null, ptr => {
+            ptr.addOut(ex.propertyD, 'Text D')
           })
-          .addOut(ns.ex.propertyF, ns.ex.node, ptr => {
-            ptr.addOut(ns.ex.propertyE, 'Text E')
+          .addOut(ex.propertyF, ex.node, ptr => {
+            ptr.addOut(ex.propertyE, 'Text E')
           })
       })
-      .addOut(ns.cube.observationConstraint, ns.ex.test)
+      .addOut(ns.cube.observationConstraint, ex.test)
 
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property.value]: rdf.literal('test'),
+        [ex.property.value]: rdf.literal('test'),
       }],
     })
-    const transform = buildCubeShape({ metadata: toStream(dataset) })
+    const transform = buildCubeShape({ metadata: dataset.toStream() })
 
     input.pipe(transform)
 
     const result = await datasetStreamToClownface(transform)
 
-    const propertyA = result.out(ns.ex.propertyA)
-    const propertyB = propertyA.out(ns.ex.propertyB)
-    const propertyC = propertyA.out(ns.ex.propertyC)
-    const propertyD = propertyC.out(ns.ex.propertyD)
-    const propertyF = propertyA.out(ns.ex.propertyF)
-    const propertyG = propertyF.out(ns.ex.propertyG)
+    const propertyA = result.out(ex.propertyA)
+    const propertyB = propertyA.out(ex.propertyB)
+    const propertyC = propertyA.out(ex.propertyC)
+    const propertyD = propertyC.out(ex.propertyD)
+    const propertyF = propertyA.out(ex.propertyF)
+    const propertyG = propertyF.out(ex.propertyG)
 
     strictEqual(propertyA.term.termType, 'BlankNode')
     strictEqual(rdf.literal('Text B').equals(propertyB.term), true)
     strictEqual(propertyC.term.termType, 'BlankNode')
     strictEqual(rdf.literal('Text D').equals(propertyD.term), true)
-    strictEqual(ns.ex.node.equals(propertyF.term), true)
+    strictEqual(ex.node.equals(propertyF.term), true)
     strictEqual(propertyG.term, undefined)
   })
 
   it('should merge given metadata to dimension metadata', async () => {
     const dataset = rdf.dataset()
 
-    rdf.clownface({ dataset, term: ns.ex.cube })
+    rdf.clownface({ dataset, term: ex.cube })
       .addOut(ns.cube.observationConstraint, shape => {
         shape.addOut(ns.sh.property, property => {
           property
-            .addOut(ns.sh.path, ns.ex.property1)
+            .addOut(ns.sh.path, ex.property1)
             .addOut(ns.schema.name, 'Test Property')
         })
       })
 
-    rdf.clownface({ dataset, term: ns.ex.otherCube })
+    rdf.clownface({ dataset, term: ex.otherCube })
       .addOut(ns.cube.observationConstraint, shape => {
         shape.addOut(ns.sh.property, property => {
           property
-            .addOut(ns.sh.path, ns.ex.property1)
+            .addOut(ns.sh.path, ex.property1)
             .addOut(ns.schema.name, 'Other Property')
         })
       })
 
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property1.value]: rdf.literal('A'),
-        [ns.ex.property2.value]: rdf.literal('B'),
+        [ex.property1.value]: rdf.literal('A'),
+        [ex.property2.value]: rdf.literal('B'),
       }],
     })
-    const transform = buildCubeShape({ metadata: toStream(dataset) })
+    const transform = buildCubeShape({ metadata: dataset.toStream() })
 
     input.pipe(transform)
 
     const result = await datasetStreamToClownface(transform)
 
-    const property1Name = result.has(ns.sh.path, ns.ex.property1)
+    const property1Name = result.has(ns.sh.path, ex.property1)
       .out(ns.schema.name).term
 
     strictEqual(rdf.literal('Test Property').equals(property1Name), true)
@@ -545,44 +547,44 @@ describe('cube.buildCubeShape', () => {
   it('should merge given metadata with blank nodes to dimension metadata', async () => {
     const dataset = rdf.dataset()
 
-    rdf.clownface({ dataset, term: ns.ex.cube })
+    rdf.clownface({ dataset, term: ex.cube })
       .addOut(ns.cube.observationConstraint, shape => {
         shape.addOut(ns.sh.property, property => {
           property
-            .addOut(ns.sh.path, ns.ex.property1)
-            .addOut(ns.ex.propertyA, null, ptr => {
+            .addOut(ns.sh.path, ex.property1)
+            .addOut(ex.propertyA, null, ptr => {
               ptr
-                .addOut(ns.ex.propertyB, 'Text B')
-                .addOut(ns.ex.propertyC, null, ptr => ptr.addOut(ns.ex.propertyD, 'Text D'))
-                .addOut(ns.ex.propertyF, ns.ex.node, ptr => ptr.addOut(ns.ex.propertyE, 'Text E'))
+                .addOut(ex.propertyB, 'Text B')
+                .addOut(ex.propertyC, null, ptr => ptr.addOut(ex.propertyD, 'Text D'))
+                .addOut(ex.propertyF, ex.node, ptr => ptr.addOut(ex.propertyE, 'Text E'))
             })
         })
       })
 
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property1.value]: rdf.literal('A'),
-        [ns.ex.property2.value]: rdf.literal('B'),
+        [ex.property1.value]: rdf.literal('A'),
+        [ex.property2.value]: rdf.literal('B'),
       }],
     })
-    const transform = buildCubeShape({ metadata: toStream(dataset) })
+    const transform = buildCubeShape({ metadata: dataset.toStream() })
 
     input.pipe(transform)
 
     const result = await datasetStreamToClownface(transform)
 
-    const propertyA = result.has(ns.sh.path, ns.ex.property1).out(ns.ex.propertyA)
-    const propertyB = propertyA.out(ns.ex.propertyB)
-    const propertyC = propertyA.out(ns.ex.propertyC)
-    const propertyD = propertyC.out(ns.ex.propertyD)
-    const propertyF = propertyA.out(ns.ex.propertyF)
-    const propertyG = propertyF.out(ns.ex.propertyG)
+    const propertyA = result.has(ns.sh.path, ex.property1).out(ex.propertyA)
+    const propertyB = propertyA.out(ex.propertyB)
+    const propertyC = propertyA.out(ex.propertyC)
+    const propertyD = propertyC.out(ex.propertyD)
+    const propertyF = propertyA.out(ex.propertyF)
+    const propertyG = propertyF.out(ex.propertyG)
 
     strictEqual(propertyA.term.termType, 'BlankNode')
     strictEqual(rdf.literal('Text B').equals(propertyB.term), true)
     strictEqual(propertyC.term.termType, 'BlankNode')
     strictEqual(rdf.literal('Text D').equals(propertyD.term), true)
-    strictEqual(ns.ex.node.equals(propertyF.term), true)
+    strictEqual(ex.node.equals(propertyF.term), true)
     strictEqual(propertyG.term, undefined)
   })
 
@@ -591,12 +593,12 @@ describe('cube.buildCubeShape', () => {
 
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property1.value]: rdf.literal('A'),
-        [ns.ex.property2.value]: rdf.literal('B'),
+        [ex.property1.value]: rdf.literal('A'),
+        [ex.property2.value]: rdf.literal('B'),
       }],
     })
     const targetShapeGraph = rdf.namedNode('http://shapes.org')
-    const transform = buildCubeShape({ metadata: toStream(dataset), graph: targetShapeGraph })
+    const transform = buildCubeShape({ metadata: dataset.toStream(), graph: targetShapeGraph })
 
     input.pipe(transform)
 
@@ -614,12 +616,12 @@ describe('cube.buildCubeShape', () => {
 
     const input = createObservationsStream({
       observations: [{
-        [ns.ex.property1.value]: rdf.literal('A'),
-        [ns.ex.property2.value]: rdf.literal('B'),
+        [ex.property1.value]: rdf.literal('A'),
+        [ex.property2.value]: rdf.literal('B'),
       }],
     })
     const targetShapeGraph = 'http://shapes.org'
-    const transform = buildCubeShape({ metadata: toStream(dataset), graph: targetShapeGraph })
+    const transform = buildCubeShape({ metadata: dataset.toStream(), graph: targetShapeGraph })
 
     input.pipe(transform)
 
diff --git a/packages/cube/test/cube/toObservation.test.js b/packages/cube/test/cube/toObservation.test.js
index 289756a4..8aca1c1f 100644
--- a/packages/cube/test/cube/toObservation.test.js
+++ b/packages/cube/test/cube/toObservation.test.js
@@ -2,18 +2,20 @@ import { rejects, strictEqual } from 'assert'
 import toNT from '@rdfjs/to-ntriples'
 import getStream from 'get-stream'
 import { isDuplexStream as isDuplex } from 'is-stream'
-import rdf from '@zazuko/env-node'
+import rdf from 'barnard59-env'
 import { Readable } from 'readable-stream'
-import toObservation from '../../lib/cube/toObservation.js'
+import toObservationUnbound from '../../lib/cube/toObservation.js'
 import dateToId from '../../lib/dateToId.js'
-import * as ns from '../support/namespaces.js'
+import { ex } from '../support/namespaces.js'
 
-function createMeasure({ term = ns.ex('topic/a') } = {}) {
+const toObservation = toObservationUnbound.bind({ env: rdf })
+
+function createMeasure({ term = ex('topic/a') } = {}) {
   return rdf.clownface({ dataset: rdf.dataset(), term })
 }
 
 function findObservation(result) {
-  return rdf.clownface({ dataset: rdf.dataset(result[0]) }).has(ns.rdf.type, ns.cube.Observation)
+  return rdf.clownface({ dataset: rdf.dataset(result[0]) }).has(rdf.ns.rdf.type, rdf.ns.cube.Observation)
 }
 
 describe('cube.toObservation', () => {
@@ -28,7 +30,7 @@ describe('cube.toObservation', () => {
   })
 
   it('should create an observation with default values', async () => {
-    const dataset = createMeasure().addOut(ns.ex.property, 'value').dataset
+    const dataset = createMeasure().addOut(ex.property, 'value').dataset
 
     const transform = toObservation()
 
@@ -37,18 +39,18 @@ describe('cube.toObservation', () => {
     const result = await getStream.array(transform)
     const observation = findObservation(result)
 
-    strictEqual(toNT(ns.ex('topic/observation/a')), toNT(observation.term)) // observation IRI
-    strictEqual(toNT(rdf.literal('value')), toNT(observation.out(ns.ex.property).term)) // data
-    strictEqual(toNT(ns.ex('')), toNT(observation.out(ns.cube.observedBy).term)) // observer
-    strictEqual(toNT(ns.cube.Observation), toNT(observation.out(ns.rdf.type).term)) // type
-    strictEqual(toNT(ns.ex('topic/observation/')), toNT(observation.in(ns.cube.observation).term)) // observation set
+    strictEqual(toNT(ex('topic/observation/a')), toNT(observation.term)) // observation IRI
+    strictEqual(toNT(rdf.literal('value')), toNT(observation.out(ex.property).term)) // data
+    strictEqual(toNT(ex('')), toNT(observation.out(rdf.ns.cube.observedBy).term)) // observer
+    strictEqual(toNT(rdf.ns.cube.Observation), toNT(observation.out(rdf.ns.rdf.type).term)) // type
+    strictEqual(toNT(ex('topic/observation/')), toNT(observation.in(rdf.ns.cube.observation).term)) // observation set
   })
 
   describe('observer', () => {
     it('should not touch an existing observer', async () => {
       const dataset = createMeasure()
-        .addOut(ns.cube.observedBy, ns.ex.observer)
-        .addOut(ns.ex.property, 'value')
+        .addOut(rdf.ns.cube.observedBy, ex.observer)
+        .addOut(ex.property, 'value')
         .dataset
 
       const transform = toObservation()
@@ -58,40 +60,40 @@ describe('cube.toObservation', () => {
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex.observer), toNT(observation.out(ns.cube.observedBy).term))
+      strictEqual(toNT(ex.observer), toNT(observation.out(rdf.ns.cube.observedBy).term))
     })
 
     it('should use the given observer IRI given as string', async () => {
-      const dataset = createMeasure().addOut(ns.ex.property, 'value').dataset
+      const dataset = createMeasure().addOut(ex.property, 'value').dataset
 
-      const transform = toObservation({ observer: ns.ex.observer.value })
+      const transform = toObservation({ observer: ex.observer.value })
 
       Readable.from([dataset]).pipe(transform)
 
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex.observer), toNT(observation.out(ns.cube.observedBy).term))
+      strictEqual(toNT(ex.observer), toNT(observation.out(rdf.ns.cube.observedBy).term))
     })
 
     it('should use the given observer given as NamedNode', async () => {
-      const dataset = createMeasure().addOut(ns.ex.property, 'value').dataset
+      const dataset = createMeasure().addOut(ex.property, 'value').dataset
 
-      const transform = toObservation({ observer: ns.ex.observer })
+      const transform = toObservation({ observer: ex.observer })
 
       Readable.from([dataset]).pipe(transform)
 
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex.observer), toNT(observation.out(ns.cube.observedBy).term))
+      strictEqual(toNT(ex.observer), toNT(observation.out(rdf.ns.cube.observedBy).term))
     })
   })
 
   describe('index', () => {
     it('should use an IRI with an index to generate the observation term', async () => {
-      const dataset1 = createMeasure().addOut(ns.ex.property, 'value').dataset
-      const dataset2 = createMeasure().addOut(ns.ex.property, 'value').dataset
+      const dataset1 = createMeasure().addOut(ex.property, 'value').dataset
+      const dataset2 = createMeasure().addOut(ex.property, 'value').dataset
 
       const transform = toObservation({
         useIndex: true,
@@ -103,8 +105,8 @@ describe('cube.toObservation', () => {
       const observation1 = findObservation([result[0]])
       const observation2 = findObservation([result[1]])
 
-      strictEqual(toNT(ns.ex('topic/observation/0')), toNT(observation1.term))
-      strictEqual(toNT(ns.ex('topic/observation/1')), toNT(observation2.term))
+      strictEqual(toNT(ex('topic/observation/0')), toNT(observation1.term))
+      strictEqual(toNT(ex('topic/observation/1')), toNT(observation2.term))
     })
   })
 
@@ -112,8 +114,8 @@ describe('cube.toObservation', () => {
     it('should find the date by datatype if useDate is boolean true', async () => {
       const date = new Date('2020-01-01T00:00:00.000Z')
       const dataset = createMeasure()
-        .addOut(ns.ex.property, 'value')
-        .addOut(ns.ex.date, rdf.literal(date.toISOString(), ns.xsd.dateTime))
+        .addOut(ex.property, 'value')
+        .addOut(ex.date, rdf.literal(date.toISOString(), rdf.ns.xsd.dateTime))
         .dataset
 
       const transform = toObservation({ useDate: true })
@@ -123,14 +125,14 @@ describe('cube.toObservation', () => {
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex(`topic/observation/${dateToId(date)}`)), toNT(observation.term))
+      strictEqual(toNT(ex(`topic/observation/${dateToId(date)}`)), toNT(observation.term))
     })
 
     it('should find the date by datatype if useDate is string true', async () => {
       const date = new Date('2020-01-01T00:00:00.000Z')
       const dataset = createMeasure()
-        .addOut(ns.ex.property, 'value')
-        .addOut(ns.ex.date, rdf.literal(date.toISOString(), ns.xsd.dateTime))
+        .addOut(ex.property, 'value')
+        .addOut(ex.date, rdf.literal(date.toISOString(), rdf.ns.xsd.dateTime))
         .dataset
 
       const transform = toObservation({ useDate: 'true' })
@@ -140,15 +142,15 @@ describe('cube.toObservation', () => {
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex(`topic/observation/${dateToId(date)}`)), toNT(observation.term))
+      strictEqual(toNT(ex(`topic/observation/${dateToId(date)}`)), toNT(observation.term))
     })
 
     it('should throw an error if multiple objects with a data datatype are found', async () => {
       const date = new Date('2020-01-01T00:00:00.000Z')
       const dataset = createMeasure()
-        .addOut(ns.ex.property, 'value')
-        .addOut(ns.ex.date1, rdf.literal(date.toISOString(), ns.xsd.dateTime))
-        .addOut(ns.ex.date2, rdf.literal(date.toISOString(), ns.xsd.dateTime))
+        .addOut(ex.property, 'value')
+        .addOut(ex.date1, rdf.literal(date.toISOString(), rdf.ns.xsd.dateTime))
+        .addOut(ex.date2, rdf.literal(date.toISOString(), rdf.ns.xsd.dateTime))
         .dataset
 
       const transform = toObservation({ useDate: true })
@@ -165,20 +167,20 @@ describe('cube.toObservation', () => {
       const date2 = new Date('2020-01-02T00:00:00.000Z')
       const date3 = new Date('2020-01-03T00:00:00.000Z')
       const dataset = createMeasure()
-        .addOut(ns.ex.property, 'value')
-        .addOut(ns.ex.date1, rdf.literal(date1.toISOString(), ns.xsd.dateTime))
-        .addOut(ns.ex.date2, rdf.literal(date2.toISOString(), ns.xsd.dateTime))
-        .addOut(ns.ex.date3, rdf.literal(date3.toISOString(), ns.xsd.dateTime))
+        .addOut(ex.property, 'value')
+        .addOut(ex.date1, rdf.literal(date1.toISOString(), rdf.ns.xsd.dateTime))
+        .addOut(ex.date2, rdf.literal(date2.toISOString(), rdf.ns.xsd.dateTime))
+        .addOut(ex.date3, rdf.literal(date3.toISOString(), rdf.ns.xsd.dateTime))
         .dataset
 
-      const transform = toObservation({ useDate: ns.ex.date2.value })
+      const transform = toObservation({ useDate: ex.date2.value })
 
       Readable.from([dataset]).pipe(transform)
 
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex(`topic/observation/${dateToId(date2)}`)), toNT(observation.term))
+      strictEqual(toNT(ex(`topic/observation/${dateToId(date2)}`)), toNT(observation.term))
     })
 
     it('should find the date using the given property', async () => {
@@ -186,29 +188,29 @@ describe('cube.toObservation', () => {
       const date2 = new Date('2020-01-02T00:00:00.000Z')
       const date3 = new Date('2020-01-03T00:00:00.000Z')
       const dataset = createMeasure()
-        .addOut(ns.ex.property, 'value')
-        .addOut(ns.ex.date1, rdf.literal(date1.toISOString(), ns.xsd.dateTime))
-        .addOut(ns.ex.date2, rdf.literal(date2.toISOString(), ns.xsd.dateTime))
-        .addOut(ns.ex.date3, rdf.literal(date3.toISOString(), ns.xsd.dateTime))
+        .addOut(ex.property, 'value')
+        .addOut(ex.date1, rdf.literal(date1.toISOString(), rdf.ns.xsd.dateTime))
+        .addOut(ex.date2, rdf.literal(date2.toISOString(), rdf.ns.xsd.dateTime))
+        .addOut(ex.date3, rdf.literal(date3.toISOString(), rdf.ns.xsd.dateTime))
         .dataset
 
-      const transform = toObservation({ useDate: ns.ex.date2 })
+      const transform = toObservation({ useDate: ex.date2 })
 
       Readable.from([dataset]).pipe(transform)
 
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex(`topic/observation/${dateToId(date2)}`)), toNT(observation.term))
+      strictEqual(toNT(ex(`topic/observation/${dateToId(date2)}`)), toNT(observation.term))
     })
 
     it('should use the given function to generate the date', async () => {
       const date = new Date('2020-01-01T00:00:00.000Z')
-      const dataset = createMeasure().addOut(ns.ex.property, 'value').dataset
+      const dataset = createMeasure().addOut(ex.property, 'value').dataset
 
       const transform = toObservation({
         useDate: () => {
-          return rdf.literal(date.toISOString(), ns.xsd.dateTime)
+          return rdf.literal(date.toISOString(), rdf.ns.xsd.dateTime)
         },
       })
 
@@ -217,11 +219,11 @@ describe('cube.toObservation', () => {
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex(`topic/observation/${dateToId(date)}`)), toNT(observation.term))
+      strictEqual(toNT(ex(`topic/observation/${dateToId(date)}`)), toNT(observation.term))
     })
 
     it('should use the current date if useDate is now', async () => {
-      const dataset = createMeasure().addOut(ns.ex.property, 'value').dataset
+      const dataset = createMeasure().addOut(ex.property, 'value').dataset
 
       const transform = toObservation({ useDate: 'now' })
 
@@ -230,13 +232,13 @@ describe('cube.toObservation', () => {
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(ns.ex(`topic/observation/${dateToId(new Date())}`).value.slice(0, -4), observation.value.slice(0, -4))
+      strictEqual(ex(`topic/observation/${dateToId(new Date())}`).value.slice(0, -4), observation.value.slice(0, -4))
     })
   })
 
   describe('observations', () => {
     it('should use the IRI of the observation without the observation ID extended by observation/', async () => {
-      const dataset = createMeasure().addOut(ns.ex.property, 'value').dataset
+      const dataset = createMeasure().addOut(ex.property, 'value').dataset
 
       const transform = toObservation()
 
@@ -245,11 +247,11 @@ describe('cube.toObservation', () => {
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex('topic/observation/')), toNT(observation.in(ns.cube.observation).term))
+      strictEqual(toNT(ex('topic/observation/')), toNT(observation.in(rdf.ns.cube.observation).term))
     })
 
     it('should not create duplicate observation/ pathes', async () => {
-      const dataset = createMeasure({ term: ns.ex('topic/observation/a') }).addOut(ns.ex.property, 'value').dataset
+      const dataset = createMeasure({ term: ex('topic/observation/a') }).addOut(ex.property, 'value').dataset
 
       const transform = toObservation()
 
@@ -258,15 +260,15 @@ describe('cube.toObservation', () => {
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex('topic/observation/')), toNT(observation.in(ns.cube.observation).term))
+      strictEqual(toNT(ex('topic/observation/')), toNT(observation.in(rdf.ns.cube.observation).term))
     })
 
     it('should use the given observations function to generate the observations term', async () => {
-      const dataset = createMeasure().addOut(ns.ex.property, 'value').dataset
+      const dataset = createMeasure().addOut(ex.property, 'value').dataset
 
       const transform = toObservation({
         observations: () => {
-          return ns.ex('observation/')
+          return ex('observation/')
         },
       })
 
@@ -275,43 +277,43 @@ describe('cube.toObservation', () => {
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex('observation/')), toNT(observation.in(ns.cube.observation).term))
+      strictEqual(toNT(ex('observation/')), toNT(observation.in(rdf.ns.cube.observation).term))
     })
 
     it('should use the given observations IRI string as observation set', async () => {
-      const dataset = createMeasure().addOut(ns.ex.property, 'value').dataset
+      const dataset = createMeasure().addOut(ex.property, 'value').dataset
 
-      const transform = toObservation({ observations: ns.ex.observation.value })
+      const transform = toObservation({ observations: ex.observation.value })
 
       Readable.from([dataset]).pipe(transform)
 
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex.observation), toNT(observation.in(ns.cube.observation).term))
+      strictEqual(toNT(ex.observation), toNT(observation.in(rdf.ns.cube.observation).term))
     })
 
     it('should use the given observations observation set', async () => {
-      const dataset = createMeasure().addOut(ns.ex.property, 'value').dataset
+      const dataset = createMeasure().addOut(ex.property, 'value').dataset
 
-      const transform = toObservation({ observations: ns.ex.observation })
+      const transform = toObservation({ observations: ex.observation })
 
       Readable.from([dataset]).pipe(transform)
 
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex.observation), toNT(observation.in(ns.cube.observation).term))
+      strictEqual(toNT(ex.observation), toNT(observation.in(rdf.ns.cube.observation).term))
     })
   })
 
   describe('observation', () => {
     it('should use the given observation function to generate the observation term', async () => {
-      const dataset = createMeasure().addOut(ns.ex.property, 'value').dataset
+      const dataset = createMeasure().addOut(ex.property, 'value').dataset
 
       const transform = toObservation({
         observation: () => {
-          return ns.ex('observation/123')
+          return ex('observation/123')
         },
       })
 
@@ -320,39 +322,39 @@ describe('cube.toObservation', () => {
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(ns.ex('observation/123')), toNT(observation.term))
+      strictEqual(toNT(ex('observation/123')), toNT(observation.term))
     })
   })
 
   describe('blacklist', () => {
     it('should delete properties given as Array of strings in the blacklist', async () => {
       const dataset = createMeasure()
-        .addOut(ns.ex.property1, 'value1')
-        .addOut(ns.ex.property2, 'value2')
-        .addOut(ns.ex.property3, 'value3')
+        .addOut(ex.property1, 'value1')
+        .addOut(ex.property2, 'value2')
+        .addOut(ex.property3, 'value3')
         .dataset
 
-      const transform = toObservation({ blacklist: [ns.ex.property1.value, ns.ex.property3.value] })
+      const transform = toObservation({ blacklist: [ex.property1.value, ex.property3.value] })
 
       Readable.from([dataset]).pipe(transform)
 
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(observation.out(ns.ex.property1).terms.length, 0)
-      strictEqual(observation.out(ns.ex.property2).terms.length, 1)
-      strictEqual(observation.out(ns.ex.property3).terms.length, 0)
+      strictEqual(observation.out(ex.property1).terms.length, 0)
+      strictEqual(observation.out(ex.property2).terms.length, 1)
+      strictEqual(observation.out(ex.property3).terms.length, 0)
     })
 
     it('should delete properties given as of graph pointers in the blacklist', async () => {
       const dataset = createMeasure()
-        .addOut(ns.ex.property1, 'value1')
-        .addOut(ns.ex.property2, 'value2')
-        .addOut(ns.ex.property3, 'value3')
+        .addOut(ex.property1, 'value1')
+        .addOut(ex.property2, 'value2')
+        .addOut(ex.property3, 'value3')
         .dataset
 
       const transform = toObservation({
-        blacklist: [rdf.clownface({ term: ns.ex.property1 }), rdf.clownface({ term: ns.ex.property3 })],
+        blacklist: [rdf.clownface({ term: ex.property1 }), rdf.clownface({ term: ex.property3 })],
       })
 
       Readable.from([dataset]).pipe(transform)
@@ -360,37 +362,37 @@ describe('cube.toObservation', () => {
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(observation.out(ns.ex.property1).terms.length, 0)
-      strictEqual(observation.out(ns.ex.property2).terms.length, 1)
-      strictEqual(observation.out(ns.ex.property3).terms.length, 0)
+      strictEqual(observation.out(ex.property1).terms.length, 0)
+      strictEqual(observation.out(ex.property2).terms.length, 1)
+      strictEqual(observation.out(ex.property3).terms.length, 0)
     })
   })
 
   describe('dimensions', () => {
     it('should fill properties given as Array of strings in dimensions with NaN if there is no value', async () => {
       const dataset = createMeasure()
-        .addOut(ns.ex.property2, 'value2')
+        .addOut(ex.property2, 'value2')
         .dataset
 
-      const transform = toObservation({ dimensions: [ns.ex.property1.value, ns.ex.property3.value] })
+      const transform = toObservation({ dimensions: [ex.property1.value, ex.property3.value] })
 
       Readable.from([dataset]).pipe(transform)
 
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(observation.out(ns.ex.property1).term), toNT(rdf.literal('NaN', ns.xsd.double)))
-      strictEqual(toNT(observation.out(ns.ex.property2).term), toNT(rdf.literal('value2')))
-      strictEqual(toNT(observation.out(ns.ex.property3).term), toNT(rdf.literal('NaN', ns.xsd.double)))
+      strictEqual(toNT(observation.out(ex.property1).term), toNT(rdf.literal('NaN', rdf.ns.xsd.double)))
+      strictEqual(toNT(observation.out(ex.property2).term), toNT(rdf.literal('value2')))
+      strictEqual(toNT(observation.out(ex.property3).term), toNT(rdf.literal('NaN', rdf.ns.xsd.double)))
     })
 
     it('should fill properties given as Array of graph pointers in dimensions with NaN if there is no value', async () => {
       const dataset = createMeasure()
-        .addOut(ns.ex.property2, 'value2')
+        .addOut(ex.property2, 'value2')
         .dataset
 
       const transform = toObservation({
-        dimensions: [rdf.clownface({ term: ns.ex.property1 }), rdf.clownface({ term: ns.ex.property3 })],
+        dimensions: [rdf.clownface({ term: ex.property1 }), rdf.clownface({ term: ex.property3 })],
       })
 
       Readable.from([dataset]).pipe(transform)
@@ -398,9 +400,9 @@ describe('cube.toObservation', () => {
       const result = await getStream.array(transform)
       const observation = findObservation(result)
 
-      strictEqual(toNT(observation.out(ns.ex.property1).term), toNT(rdf.literal('NaN', ns.xsd.double)))
-      strictEqual(toNT(observation.out(ns.ex.property2).term), toNT(rdf.literal('value2')))
-      strictEqual(toNT(observation.out(ns.ex.property3).term), toNT(rdf.literal('NaN', ns.xsd.double)))
+      strictEqual(toNT(observation.out(ex.property1).term), toNT(rdf.literal('NaN', rdf.ns.xsd.double)))
+      strictEqual(toNT(observation.out(ex.property2).term), toNT(rdf.literal('value2')))
+      strictEqual(toNT(observation.out(ex.property3).term), toNT(rdf.literal('NaN', rdf.ns.xsd.double)))
     })
   })
 })
diff --git a/packages/cube/test/support/createObservationsStream.js b/packages/cube/test/support/createObservationsStream.js
index 97783ddd..6aba7148 100644
--- a/packages/cube/test/support/createObservationsStream.js
+++ b/packages/cube/test/support/createObservationsStream.js
@@ -1,18 +1,18 @@
-import rdf from '@zazuko/env-node'
+import rdf from 'barnard59-env'
 import { Readable } from 'readable-stream'
-import * as ns from './namespaces.js'
+import { ex } from './namespaces.js'
 
-function createObservationsStream({ observations = [{ [ns.ex.property.value]: rdf.literal('test') }] } = {}) {
+function createObservationsStream({ observations = [{ [ex.property.value]: rdf.literal('test') }] } = {}) {
   const datasets = []
 
   observations.forEach((observation, index) => {
     const observationPtr = rdf.clownface({
       dataset: rdf.dataset(),
-      term: ns.ex(`cube/observation/${index + 1}`),
+      term: ex(`cube/observation/${index + 1}`),
     })
 
-    observationPtr.addIn(ns.cube.observation, ns.ex('cube/observation/'))
-    observationPtr.addOut(ns.rdf.type, ns.cube.Observation)
+    observationPtr.addIn(rdf.ns.cube.observation, ex('cube/observation/'))
+    observationPtr.addOut(rdf.ns.rdf.type, rdf.ns.cube.Observation)
 
     for (const [property, value] of Object.entries(observation)) {
       observationPtr.addOut(rdf.namedNode(property), value)
diff --git a/packages/cube/test/support/datasetStreamToClownface.js b/packages/cube/test/support/datasetStreamToClownface.js
index 1cc28252..f2717367 100644
--- a/packages/cube/test/support/datasetStreamToClownface.js
+++ b/packages/cube/test/support/datasetStreamToClownface.js
@@ -1,11 +1,10 @@
 import getStream from 'get-stream'
-import rdf from '@zazuko/env-node'
-import addAll from 'rdf-dataset-ext/addAll.js'
+import rdf from 'barnard59-env'
 
 async function datasetStreamToClownface(stream) {
   const datasets = await getStream.array(stream)
 
-  return rdf.clownface({ dataset: datasets.reduce((all, current) => addAll(all, current), rdf.dataset()) })
+  return rdf.clownface({ dataset: datasets.reduce((all, current) => all.addAll(current), rdf.dataset()) })
 }
 
 export default datasetStreamToClownface
diff --git a/packages/cube/test/support/namespaces.js b/packages/cube/test/support/namespaces.js
index e2fccfa6..4a04590b 100644
--- a/packages/cube/test/support/namespaces.js
+++ b/packages/cube/test/support/namespaces.js
@@ -1,14 +1,3 @@
-import $rdf from '@zazuko/env-node'
+import $rdf from 'barnard59-env'
 
-const cube = $rdf.namespace('https://cube.link/')
-const ex = $rdf.namespace('http://example.org/')
-const { rdf, schema, sh, xsd } = $rdf.ns
-
-export {
-  cube,
-  ex,
-  rdf,
-  schema,
-  sh,
-  xsd,
-}
+export const ex = $rdf.namespace('http://example.org/')
diff --git a/packages/cube/test/validation.pipeline.test.js b/packages/cube/test/validation.pipeline.test.js
index ac7dedca..6a9ba597 100644
--- a/packages/cube/test/validation.pipeline.test.js
+++ b/packages/cube/test/validation.pipeline.test.js
@@ -1,4 +1,5 @@
 import { strictEqual, ok } from 'assert'
+import { expect } from 'chai'
 import shell from 'shelljs'
 
 const barnard59 = (new URL('../../../packages/cli/bin/barnard59.js', import.meta.url)).pathname
@@ -26,7 +27,7 @@ describe('cube validation pipeline', function () {
     const result = shell.exec(command, { silent: true, cwd })
 
     strictEqual(result.code, 1)
-    ok(result.stderr.startsWith('Error: At least 1 violations found'))
+    expect(result.stderr).to.match(/^Error: At least 1 violations found/)
     ok(result.stdout.includes('_:report <http://www.w3.org/ns/shacl#conforms> "false"^^<http://www.w3.org/2001/XMLSchema#boolean>'))
   })
 })
diff --git a/packages/env/lib/Namespaces.js b/packages/env/lib/Namespaces.js
index 111ffcc4..921ee73c 100644
--- a/packages/env/lib/Namespaces.js
+++ b/packages/env/lib/Namespaces.js
@@ -4,6 +4,8 @@ export default class NamespacesFactory {
       ...this.ns,
       p: this.namespace('https://pipeline.described.at/'),
       code: this.namespace('https://code.described.at/'),
+      cube: this.namespace('https://cube.link/'),
+      meta: this.namespace('https://cube.link/meta/'),
       b59: this.namespace('https://barnard59.zazuko.com/vocab#'),
     }
   }

From e69448b3eeebecec1f7c17039df2cf1d32acdcc6 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Fri, 17 Nov 2023 16:39:51 +0100
Subject: [PATCH 21/34] fix: env in sort step

---
 packages/cube/lib/sort.js                      | 11 ++++++-----
 packages/cube/pipeline/check-observations.ttl  | 11 ++++++++---
 packages/cube/test/validation.pipeline.test.js |  2 +-
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/packages/cube/lib/sort.js b/packages/cube/lib/sort.js
index 7c2bcc17..2e122809 100644
--- a/packages/cube/lib/sort.js
+++ b/packages/cube/lib/sort.js
@@ -1,14 +1,15 @@
 import { Readable, Duplex } from 'stream'
 import { sort, compareOn, createStore } from 'external-merge-sort'
 
-export function sortRDF(key) {
+export function sortBySubject(sortChunkSize) {
   const write = async (chunk, filename) => {
-    await this.rdf.toFile(Readable.from(chunk), filename)
-    return this.rdf.fromFile(filename)
+    await this.env.toFile(Readable.from(chunk), filename)
+    return this.env.fromFile(filename)
   }
 
-  const comparer = compareOn(key)
+  const comparer = compareOn(x => x.subject.value)
   const store = createStore(write, '.nt')
+  const maxSize = Number(sortChunkSize)
 
-  return Duplex.from(iterable => sort(iterable, { comparer, store, maxSize: 100000 }))
+  return Duplex.from(iterable => sort(iterable, { comparer, store, maxSize }))
 }
diff --git a/packages/cube/pipeline/check-observations.ttl b/packages/cube/pipeline/check-observations.ttl
index 523ca721..0c931c0e 100644
--- a/packages/cube/pipeline/check-observations.ttl
+++ b/packages/cube/pipeline/check-observations.ttl
@@ -27,9 +27,14 @@ _:maxViolations a p:Variable ;
   p:value 3 ;
 .
 
+_:sortChunkSize a p:Variable ;
+  p:name "sortChunkSize" ;
+  rdfs:label "sort chunk size" ;
+  p:value 100000 ;
+.
 
 <check-observations> a p:Pipeline ;
-  p:variables [ p:variable _:constraint , _:batchSize , _:maxViolations ] ;
+  p:variables [ p:variable _:constraint , _:batchSize , _:maxViolations , _:sortChunkSize ] ;
   p:steps
     [
       p:stepList ( 
@@ -117,7 +122,7 @@ _:addTarget base:map (
 
 _:sortBySubject a p:Step ;
   code:implementedBy [ a code:EcmaScriptModule ;
-    code:link <file:../lib/sort.js#sortRDF>
+    code:link <file:../lib/sort.js#sortBySubject>
   ] ;
-  code:arguments ("x => x.subject.value"^^code:EcmaScript) 
+  code:arguments ("sortChunkSize"^^p:VariableName)
 .
diff --git a/packages/cube/test/validation.pipeline.test.js b/packages/cube/test/validation.pipeline.test.js
index 6a9ba597..1c42ae8a 100644
--- a/packages/cube/test/validation.pipeline.test.js
+++ b/packages/cube/test/validation.pipeline.test.js
@@ -11,7 +11,7 @@ describe('cube validation pipeline', function () {
 
   it('should run check-cube-observations pipeline without error', () => {
     const constraintFile = `${support}/constraint01.ttl`
-    const command = `cat ${support}/observations01.ttl | ${barnard59} cube check-observations --constraint ${constraintFile}`
+    const command = `cat ${support}/observations01.ttl | ${barnard59} cube check-observations --constraint ${constraintFile} --sortChunkSize 10`
 
     const result = shell.exec(command, { silent: true, cwd })
 

From 6c60476727061b5ba7895079d46c63bcfe659210 Mon Sep 17 00:00:00 2001
From: tpluscode <tpluscode@users.noreply.github.com>
Date: Mon, 20 Nov 2023 13:37:34 +0100
Subject: [PATCH 22/34] refactor: no stdout

---
 packages/cube/lib/validation.js               | 15 +++++++
 packages/cube/package.json                    |  3 +-
 packages/cube/pipeline/check-constraint.ttl   | 19 +++-----
 packages/cube/pipeline/check-observations.ttl | 44 +++++++++++--------
 packages/cube/pipeline/fetch-constraint.ttl   | 13 ++----
 packages/cube/pipeline/fetch-observations.ttl | 15 +++----
 packages/cube/test/support/constraint01.ttl   |  4 +-
 .../cube/test/validation.pipeline.test.js     |  4 +-
 packages/validate-shacl/report.js             | 29 ++++++------
 9 files changed, 77 insertions(+), 69 deletions(-)
 create mode 100644 packages/cube/lib/validation.js

diff --git a/packages/cube/lib/validation.js b/packages/cube/lib/validation.js
new file mode 100644
index 00000000..9ab8502f
--- /dev/null
+++ b/packages/cube/lib/validation.js
@@ -0,0 +1,15 @@
+import through2 from 'through2'
+
+export function failOnViolations() {
+  return through2.obj(function (chunk, _, next) {
+    return next(null, chunk)
+  }, (done) => {
+    const violations = this.variables.get('violations')
+    const maxViolations = this.variables.get('maxViolations') || 0
+
+    if (violations && violations > maxViolations) {
+      throw new Error(`${violations} violations found`)
+    }
+    done()
+  })
+}
diff --git a/packages/cube/package.json b/packages/cube/package.json
index 85f88906..fd2cba8c 100644
--- a/packages/cube/package.json
+++ b/packages/cube/package.json
@@ -32,7 +32,8 @@
     "lodash": "^4.17.21",
     "rdf-literal": "^1.3.0",
     "rdf-stream-to-dataset-stream": "^1.0.0",
-    "readable-stream": "3 - 4"
+    "readable-stream": "3 - 4",
+    "through2": "^4.0.2"
   },
   "devDependencies": {
     "@rdfjs/to-ntriples": "^2.0.0",
diff --git a/packages/cube/pipeline/check-constraint.ttl b/packages/cube/pipeline/check-constraint.ttl
index d87b9d91..c4afbbd2 100644
--- a/packages/cube/pipeline/check-constraint.ttl
+++ b/packages/cube/pipeline/check-constraint.ttl
@@ -22,32 +22,27 @@ _:profile a p:Variable ;
   p:variables [ p:variable _:profile ] ;
   p:steps
     [
-      p:stepList ( 
+      p:stepList (
         _:stdin
         [ n3:parse () ]
         _:toDataset
         [ shacl:report (_:getProfile) ]
         [ base:flatten () ]
         [ ntriples:serialize () ]
-        _:stdout
       )
     ]
 .
 
 _:stdin a p:Step ;
-  code:implementedBy "() => process.stdin"^^code:EcmaScript  
-.
-
-_:stdout a p:Step ;
-  code:implementedBy "() => process.stdout"^^code:EcmaScript  
+  code:implementedBy "() => process.stdin"^^code:EcmaScript
 .
 
 _:toDataset a p:Step ;
-	code:implementedBy 
+	code:implementedBy
     [
       rdf:type code:EcmaScript ;
       code:link <node:rdf-stream-to-dataset-stream/toDatasetStream.js> ;
-    ] 
+    ]
 .
 
 
@@ -56,8 +51,8 @@ _:getProfile a p:Pipeline , p:ReadableObjectMode;
     [
       p:stepList
         (
-          [ http:get [ code:name "url" ; code:value "profile"^^p:VariableName ] ] 
+          [ http:get [ code:name "url" ; code:value "profile"^^p:VariableName ] ]
           [ n3:parse () ]
-        ) 
-    ] 
+        )
+    ]
   .
diff --git a/packages/cube/pipeline/check-observations.ttl b/packages/cube/pipeline/check-observations.ttl
index 0c931c0e..f4c0aa55 100644
--- a/packages/cube/pipeline/check-observations.ttl
+++ b/packages/cube/pipeline/check-observations.ttl
@@ -33,55 +33,52 @@ _:sortChunkSize a p:Variable ;
   p:value 100000 ;
 .
 
-<check-observations> a p:Pipeline ;
+<check-observations> a p:Pipeline , p:Readable;
   p:variables [ p:variable _:constraint , _:batchSize , _:maxViolations , _:sortChunkSize ] ;
   p:steps
     [
-      p:stepList ( 
+      p:stepList
+      (
         _:stdin
         [ n3:parse () ]
         _:sortBySubject
         _:toDatasetBySubject
         _:batch
-        [ shacl:report 
+        [ shacl:report
           [code:name "shape" ; code:value _:getConstraint ] ,
-          [code:name "maxErrors" ; code:value "maxViolations"^^p:VariableName ] 
+          [code:name "maxErrors" ; code:value "maxViolations"^^p:VariableName ]
         ]
         [ base:flatten () ]
         [ ntriples:serialize () ]
-        _:stdout
+        _:failOnViolations
       )
     ]
 .
 
 
 _:stdin a p:Step ;
-  code:implementedBy "() => process.stdin"^^code:EcmaScript  
-.
-
-_:stdout a p:Step ;
-  code:implementedBy "() => process.stdout"^^code:EcmaScript  
+  code:implementedBy "() => process.stdin"^^code:EcmaScript
 .
 
 _:toDataset a p:Step ;
-	code:implementedBy 
+	code:implementedBy
     [
       rdf:type code:EcmaScript ;
       code:link <node:rdf-stream-to-dataset-stream/toDatasetStream.js> ;
-    ] 
+    ]
 .
 
 _:toDatasetBySubject a p:Step ;
-	code:implementedBy 
+	code:implementedBy
     [
       rdf:type code:EcmaScript ;
       code:link <node:rdf-stream-to-dataset-stream/bySubject.js> ;
-    ] 
-. 
+    ]
+.
 
 _:batch a p:Step ;
-  code:implementedBy 
-    [ 
+  code:implementedBy
+    [
       a code:EcmaScriptModule ;
       code:link <file:../lib/batch.js#batch> ;
     ] ;
@@ -99,8 +96,8 @@ _:getConstraint a p:Pipeline , p:ReadableObjectMode ;
           _:toDataset
           _:addTarget
           [ base:flatten () ]
-        ) 
-    ] 
+        )
+    ]
 .
 
 _:readConstraint a p:Step ;
@@ -126,3 +123,12 @@ _:sortBySubject a p:Step ;
   ] ;
   code:arguments ("sortChunkSize"^^p:VariableName)
 .
+
+_:failOnViolations
+  a p:Step ;
+  code:implementedBy
+    [
+      a code:EcmaScriptModule ;
+      code:link <file:../lib/validation.js#failOnViolations>
+    ] ;
+.
diff --git a/packages/cube/pipeline/fetch-constraint.ttl b/packages/cube/pipeline/fetch-constraint.ttl
index 50e022f1..877af16d 100644
--- a/packages/cube/pipeline/fetch-constraint.ttl
+++ b/packages/cube/pipeline/fetch-constraint.ttl
@@ -24,15 +24,14 @@ _:cube a p:Variable ;
         (
           _:queryConstraint
           [ ntriples:serialize () ]
-          _:stdout
-        ) 
-    ] 
+        )
+    ]
 .
 
 
 # relying on cbd is not ideal (vendor specific)
 _:queryConstraint sparql:construct
-    [ code:name "endpoint"; code:value "endpoint"^^p:VariableName ] , 
+    [ code:name "endpoint"; code:value "endpoint"^^p:VariableName ] ,
     [
       code:name "query";
       code:value """
@@ -44,10 +43,6 @@ _:queryConstraint sparql:construct
       WHERE {
         <${cube}> cube:observationConstraint ?s .
       }
-      """^^code:EcmaScriptTemplateLiteral 
+      """^^code:EcmaScriptTemplateLiteral
     ]
 .
-
-_:stdout a p:Step ;
-  code:implementedBy "() => process.stdout"^^code:EcmaScript  
-.
diff --git a/packages/cube/pipeline/fetch-observations.ttl b/packages/cube/pipeline/fetch-observations.ttl
index 76e0fe40..74059412 100644
--- a/packages/cube/pipeline/fetch-observations.ttl
+++ b/packages/cube/pipeline/fetch-observations.ttl
@@ -24,9 +24,8 @@ _:cube a p:Variable ;
         (
           _:queryObservations
           [ ntriples:serialize () ]
-          _:stdout
-        ) 
-    ] 
+        )
+    ]
 .
 
 
@@ -34,7 +33,7 @@ _:queryObservations sparql:construct
     [
       code:name "endpoint";
       code:value "endpoint"^^p:VariableName
-    ] , 
+    ] ,
     [
       code:name "query";
       code:value """
@@ -43,12 +42,8 @@ _:queryObservations sparql:construct
       CONSTRUCT { ?s ?p ?o }
       WHERE {
         <${cube}> cube:observationSet/cube:observation ?s .
-        ?s ?p ?o 
+        ?s ?p ?o
       }
-      """^^code:EcmaScriptTemplateLiteral 
+      """^^code:EcmaScriptTemplateLiteral
     ]
 .
-
-_:stdout a p:Step ;
-  code:implementedBy "() => process.stdout"^^code:EcmaScript  
-.
diff --git a/packages/cube/test/support/constraint01.ttl b/packages/cube/test/support/constraint01.ttl
index e84e5841..4e84925a 100644
--- a/packages/cube/test/support/constraint01.ttl
+++ b/packages/cube/test/support/constraint01.ttl
@@ -16,7 +16,7 @@
 		sh:datatype xsd:decimal ;
 		sh:minCount 1 ;
 		sh:maxCount 1 ;
-		sh:minInclusive "-20"^^xsd:decimal ;
+		sh:minExclusive "-20"^^xsd:decimal ;
 		sh:maxInclusive "30"^^xsd:decimal ;
 		qudt:unit unit:KiloGM-PER-HA ;
 		qudt:scaleType qudt:RatioScale ;
@@ -45,7 +45,7 @@
 		sh:path cube:observedBy ;
 		sh:in (
 			<https://agriculture.ld.admin.ch/>
-      <https://register.ld.admin.ch/staatskalender/organization/10003634> 
+      <https://register.ld.admin.ch/staatskalender/organization/10003634>
 		) ;
 		sh:nodeKind sh:IRI ;
 		sh:minCount 1 ;
diff --git a/packages/cube/test/validation.pipeline.test.js b/packages/cube/test/validation.pipeline.test.js
index 1c42ae8a..7c0feb42 100644
--- a/packages/cube/test/validation.pipeline.test.js
+++ b/packages/cube/test/validation.pipeline.test.js
@@ -22,12 +22,12 @@ describe('cube validation pipeline', function () {
 
   it('should run check-cube-observations pipeline with error', () => {
     const constraintFile = `${support}/constraint01.ttl`
-    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile}`
+    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile} --maxViolations 1`
 
     const result = shell.exec(command, { silent: true, cwd })
 
     strictEqual(result.code, 1)
-    expect(result.stderr).to.match(/^Error: At least 1 violations found/)
+    expect(result.stderr).to.match(/Error: 3 violations found/)
     ok(result.stdout.includes('_:report <http://www.w3.org/ns/shacl#conforms> "false"^^<http://www.w3.org/2001/XMLSchema#boolean>'))
   })
 })
diff --git a/packages/validate-shacl/report.js b/packages/validate-shacl/report.js
index 0b5eee24..3079d340 100644
--- a/packages/validate-shacl/report.js
+++ b/packages/validate-shacl/report.js
@@ -2,35 +2,32 @@ import { Duplex } from 'stream'
 import { isStream, isReadableStream } from 'is-stream'
 import SHACLValidator from 'rdf-validate-shacl'
 
-async function * validate(iterable, validator, maxViolations) {
-  let violations = 0
+async function * validate(validator, maxViolations, iterable) {
   for await (const chunk of iterable) {
-    if (maxViolations && violations >= maxViolations) {
-      continue // skip validation but continue to avoid finalization issues
+    const totalViolations = this.variables.get('violations')
+    if (maxViolations && totalViolations > maxViolations) {
+      this.logger.warn('Reached max violations. Aborting')
+      break
     }
+
     const report = validator.validate(chunk)
     if (!report.conforms) {
-      violations = violations + report.results.filter(r => r.severity.value === 'http://www.w3.org/ns/shacl#Violation').length
+      const violations = report.results.filter(r => this.env.ns.sh.Violation.equals(r.severity)).length
+      this.variables.set('violations', totalViolations + violations)
       yield report.dataset
     }
   }
-  if (violations && maxViolations) {
-    throw new Error(`At least ${violations} violations found`)
-  }
-  if (violations) {
-    throw new Error(`${violations} violations found`)
-  }
 }
 
 export async function shacl(arg) {
   let shape
   let options
-  let maxErrors
+  let maxViolations
   if (isStream(arg)) {
     shape = arg
   } else if (arg) {
     ({ shape, ...options } = arg)
-    maxErrors = options.maxErrors < 1 ? undefined : Number(options.maxErrors)
+    maxViolations = options.maxErrors < 1 ? undefined : Number(options.maxErrors)
   }
 
   if (!shape) {
@@ -42,5 +39,9 @@ export async function shacl(arg) {
 
   const ds = await this.env.dataset().import(shape)
   const validator = new SHACLValidator(ds, { maxErrors: 0, factory: this.env })
-  return Duplex.from(iterable => validate(iterable, validator, maxErrors))
+
+  this.variables.set('violations', 0)
+  this.variables.set('maxViolations', maxViolations)
+
+  return Duplex.from(validate.bind(this, validator, maxViolations))
 }

From c7e328a04013104d9178fb29773b319ccba236f4 Mon Sep 17 00:00:00 2001
From: tpluscode <tpluscode@users.noreply.github.com>
Date: Mon, 20 Nov 2023 14:45:10 +0100
Subject: [PATCH 23/34] refactor: reword

---
 packages/validate-shacl/report.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/validate-shacl/report.js b/packages/validate-shacl/report.js
index 3079d340..7f078714 100644
--- a/packages/validate-shacl/report.js
+++ b/packages/validate-shacl/report.js
@@ -6,7 +6,7 @@ async function * validate(validator, maxViolations, iterable) {
   for await (const chunk of iterable) {
     const totalViolations = this.variables.get('violations')
     if (maxViolations && totalViolations > maxViolations) {
-      this.logger.warn('Reached max violations. Aborting')
+      this.logger.warn('Exceeded max violations. Aborting')
       break
     }
 

From bfb2f043bac3728b335b17a1bf3a327aaffc42ef Mon Sep 17 00:00:00 2001
From: tpluscode <tpluscode@users.noreply.github.com>
Date: Tue, 21 Nov 2023 10:04:41 +0100
Subject: [PATCH 24/34] fix: pipelines must be readable

---
 packages/cube/pipeline/check-constraint.ttl   | 2 +-
 packages/cube/pipeline/fetch-constraint.ttl   | 2 +-
 packages/cube/pipeline/fetch-observations.ttl | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/cube/pipeline/check-constraint.ttl b/packages/cube/pipeline/check-constraint.ttl
index c4afbbd2..58ad39aa 100644
--- a/packages/cube/pipeline/check-constraint.ttl
+++ b/packages/cube/pipeline/check-constraint.ttl
@@ -18,7 +18,7 @@ _:profile a p:Variable ;
 .
 
 
-<check-constraint> a p:Pipeline ;
+<check-constraint> a p:Pipeline , p:Readable ;
   p:variables [ p:variable _:profile ] ;
   p:steps
     [
diff --git a/packages/cube/pipeline/fetch-constraint.ttl b/packages/cube/pipeline/fetch-constraint.ttl
index 877af16d..2cb84a32 100644
--- a/packages/cube/pipeline/fetch-constraint.ttl
+++ b/packages/cube/pipeline/fetch-constraint.ttl
@@ -16,7 +16,7 @@ _:cube a p:Variable ;
   rdfs:label "cube URI" ;
 .
 
-<fetch-constraint> a p:Pipeline ;
+<fetch-constraint> a p:Pipeline , p:Readable ;
   p:variables [ p:variable _:endpoint, _:cube ] ;
   p:steps
     [
diff --git a/packages/cube/pipeline/fetch-observations.ttl b/packages/cube/pipeline/fetch-observations.ttl
index 74059412..d5680060 100644
--- a/packages/cube/pipeline/fetch-observations.ttl
+++ b/packages/cube/pipeline/fetch-observations.ttl
@@ -16,7 +16,7 @@ _:cube a p:Variable ;
   rdfs:label "cube URI" ;
 .
 
-<fetch-observations> a p:Pipeline ;
+<fetch-observations> a p:Pipeline , p:Readable ;
   p:variables [ p:variable _:endpoint, _:cube ] ;
   p:steps
     [

From f1c6548782e3fc5d75cc1b60a84c17dd4c9e83d8 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Tue, 21 Nov 2023 13:37:10 +0100
Subject: [PATCH 25/34] fix: stdout and finalization on error

---
 packages/core/lib/run.js                       |  3 +++
 packages/cube/lib/sort.js                      |  5 ++++-
 packages/cube/lib/validation.js                | 15 ---------------
 packages/cube/pipeline/check-observations.ttl  | 10 ----------
 packages/cube/test/support/constraint01.ttl    |  2 +-
 packages/cube/test/validation.pipeline.test.js |  4 ++--
 packages/validate-shacl/report.js              | 12 +++++++-----
 7 files changed, 17 insertions(+), 34 deletions(-)
 delete mode 100644 packages/cube/lib/validation.js

diff --git a/packages/core/lib/run.js b/packages/core/lib/run.js
index c8b8896f..7561aa7b 100644
--- a/packages/core/lib/run.js
+++ b/packages/core/lib/run.js
@@ -20,6 +20,9 @@ async function run(pipeline, { end = false, resume = false } = {}) {
         pipeline.logger.on('finish', () => resolve())
       })
 
+      if (pipeline.context.error) {
+        throw pipeline.context.error
+      }
       pipeline.logger.end()
       await p
     } catch (err) {
diff --git a/packages/cube/lib/sort.js b/packages/cube/lib/sort.js
index 2e122809..f1f6d4fb 100644
--- a/packages/cube/lib/sort.js
+++ b/packages/cube/lib/sort.js
@@ -11,5 +11,8 @@ export function sortBySubject(sortChunkSize) {
   const store = createStore(write, '.nt')
   const maxSize = Number(sortChunkSize)
 
-  return Duplex.from(iterable => sort(iterable, { comparer, store, maxSize }))
+  const stream = Duplex.from(iterable => sort(iterable, { comparer, store, maxSize }))
+  stream.on('finish', store.dispose)
+  stream.on('error', store.dispose)
+  return stream
 }
diff --git a/packages/cube/lib/validation.js b/packages/cube/lib/validation.js
deleted file mode 100644
index 9ab8502f..00000000
--- a/packages/cube/lib/validation.js
+++ /dev/null
@@ -1,15 +0,0 @@
-import through2 from 'through2'
-
-export function failOnViolations() {
-  return through2.obj(function (chunk, _, next) {
-    return next(null, chunk)
-  }, (done) => {
-    const violations = this.variables.get('violations')
-    const maxViolations = this.variables.get('maxViolations') || 0
-
-    if (violations && violations > maxViolations) {
-      throw new Error(`${violations} violations found`)
-    }
-    done()
-  })
-}
diff --git a/packages/cube/pipeline/check-observations.ttl b/packages/cube/pipeline/check-observations.ttl
index f4c0aa55..db810828 100644
--- a/packages/cube/pipeline/check-observations.ttl
+++ b/packages/cube/pipeline/check-observations.ttl
@@ -50,7 +50,6 @@ _:sortChunkSize a p:Variable ;
         ]
         [ base:flatten () ]
         [ ntriples:serialize () ]
-        _:failOnViolations
       )
     ]
 .
@@ -123,12 +122,3 @@ _:sortBySubject a p:Step ;
   ] ;
   code:arguments ("sortChunkSize"^^p:VariableName)
 .
-
-_:failOnViolations
-  a p:Step ;
-  code:implementedBy
-    [
-      a code:EcmaScriptModule ;
-      code:link <file:../lib/validation.js#failOnViolations>
-    ] ;
-.
diff --git a/packages/cube/test/support/constraint01.ttl b/packages/cube/test/support/constraint01.ttl
index 4e84925a..1b23d21a 100644
--- a/packages/cube/test/support/constraint01.ttl
+++ b/packages/cube/test/support/constraint01.ttl
@@ -16,7 +16,7 @@
 		sh:datatype xsd:decimal ;
 		sh:minCount 1 ;
 		sh:maxCount 1 ;
-		sh:minExclusive "-20"^^xsd:decimal ;
+		sh:minInclusive "-20"^^xsd:decimal ;
 		sh:maxInclusive "30"^^xsd:decimal ;
 		qudt:unit unit:KiloGM-PER-HA ;
 		qudt:scaleType qudt:RatioScale ;
diff --git a/packages/cube/test/validation.pipeline.test.js b/packages/cube/test/validation.pipeline.test.js
index 7c0feb42..85d3f6df 100644
--- a/packages/cube/test/validation.pipeline.test.js
+++ b/packages/cube/test/validation.pipeline.test.js
@@ -22,12 +22,12 @@ describe('cube validation pipeline', function () {
 
   it('should run check-cube-observations pipeline with error', () => {
     const constraintFile = `${support}/constraint01.ttl`
-    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile} --maxViolations 1`
+    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile} --maxViolations 1 --sortChunkSize 1`
 
     const result = shell.exec(command, { silent: true, cwd })
 
     strictEqual(result.code, 1)
-    expect(result.stderr).to.match(/Error: 3 violations found/)
+    expect(result.stderr).to.match(/1 violations found/)
     ok(result.stdout.includes('_:report <http://www.w3.org/ns/shacl#conforms> "false"^^<http://www.w3.org/2001/XMLSchema#boolean>'))
   })
 })
diff --git a/packages/validate-shacl/report.js b/packages/validate-shacl/report.js
index 7f078714..6fcccd68 100644
--- a/packages/validate-shacl/report.js
+++ b/packages/validate-shacl/report.js
@@ -3,8 +3,9 @@ import { isStream, isReadableStream } from 'is-stream'
 import SHACLValidator from 'rdf-validate-shacl'
 
 async function * validate(validator, maxViolations, iterable) {
+  let totalViolations = 0
+
   for await (const chunk of iterable) {
-    const totalViolations = this.variables.get('violations')
     if (maxViolations && totalViolations > maxViolations) {
       this.logger.warn('Exceeded max violations. Aborting')
       break
@@ -13,10 +14,14 @@ async function * validate(validator, maxViolations, iterable) {
     const report = validator.validate(chunk)
     if (!report.conforms) {
       const violations = report.results.filter(r => this.env.ns.sh.Violation.equals(r.severity)).length
-      this.variables.set('violations', totalViolations + violations)
+      totalViolations += violations
       yield report.dataset
     }
   }
+
+  if (totalViolations > 0) {
+    this.error = new Error(`${totalViolations} violations found`)
+  }
 }
 
 export async function shacl(arg) {
@@ -40,8 +45,5 @@ export async function shacl(arg) {
   const ds = await this.env.dataset().import(shape)
   const validator = new SHACLValidator(ds, { maxErrors: 0, factory: this.env })
 
-  this.variables.set('violations', 0)
-  this.variables.set('maxViolations', maxViolations)
-
   return Duplex.from(validate.bind(this, validator, maxViolations))
 }

From e17114b452a8cd08c64c55772cf1d1766da61661 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Tue, 21 Nov 2023 14:04:22 +0100
Subject: [PATCH 26/34] fix test

---
 .github/workflows/ci.yaml                      | 1 +
 packages/core/lib/run.js                       | 2 +-
 packages/cube/test/validation.pipeline.test.js | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 24d99c57..03576f5d 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -12,6 +12,7 @@ jobs:
         package:
           - base
           - core
+          - cube
           - csvw
           - formats
           - ftp
diff --git a/packages/core/lib/run.js b/packages/core/lib/run.js
index 7561aa7b..9668c592 100644
--- a/packages/core/lib/run.js
+++ b/packages/core/lib/run.js
@@ -20,7 +20,7 @@ async function run(pipeline, { end = false, resume = false } = {}) {
         pipeline.logger.on('finish', () => resolve())
       })
 
-      if (pipeline.context.error) {
+      if (pipeline.context?.error) {
         throw pipeline.context.error
       }
       pipeline.logger.end()
diff --git a/packages/cube/test/validation.pipeline.test.js b/packages/cube/test/validation.pipeline.test.js
index 85d3f6df..1906908e 100644
--- a/packages/cube/test/validation.pipeline.test.js
+++ b/packages/cube/test/validation.pipeline.test.js
@@ -22,7 +22,7 @@ describe('cube validation pipeline', function () {
 
   it('should run check-cube-observations pipeline with error', () => {
     const constraintFile = `${support}/constraint01.ttl`
-    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile} --maxViolations 1 --sortChunkSize 1`
+    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile} --maxViolations 1`
 
     const result = shell.exec(command, { silent: true, cwd })
 

From e17e98f74d3bc5608581c75e02a0b49917562b0d Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Tue, 21 Nov 2023 14:09:33 +0100
Subject: [PATCH 27/34] fix test

---
 packages/cube/test/validation.pipeline.test.js | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/cube/test/validation.pipeline.test.js b/packages/cube/test/validation.pipeline.test.js
index 1906908e..88e956b0 100644
--- a/packages/cube/test/validation.pipeline.test.js
+++ b/packages/cube/test/validation.pipeline.test.js
@@ -11,18 +11,18 @@ describe('cube validation pipeline', function () {
 
   it('should run check-cube-observations pipeline without error', () => {
     const constraintFile = `${support}/constraint01.ttl`
-    const command = `cat ${support}/observations01.ttl | ${barnard59} cube check-observations --constraint ${constraintFile} --sortChunkSize 10`
+    const command = `cat ${support}/observations01.ttl | ${barnard59} cube check-observations --constraint ${constraintFile}`
 
     const result = shell.exec(command, { silent: true, cwd })
 
-    strictEqual(result.code, 0)
     strictEqual(result.stderr, '')
     strictEqual(result.stdout, '')
+    strictEqual(result.code, 0)
   })
 
   it('should run check-cube-observations pipeline with error', () => {
     const constraintFile = `${support}/constraint01.ttl`
-    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile} --maxViolations 1`
+    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile}`
 
     const result = shell.exec(command, { silent: true, cwd })
 

From 47d563183451522d2e67e5c573317c8a65f0999c Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Tue, 21 Nov 2023 14:17:59 +0100
Subject: [PATCH 28/34] test pipeline options

---
 packages/cube/test/validation.pipeline.test.js | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/packages/cube/test/validation.pipeline.test.js b/packages/cube/test/validation.pipeline.test.js
index 88e956b0..c596af32 100644
--- a/packages/cube/test/validation.pipeline.test.js
+++ b/packages/cube/test/validation.pipeline.test.js
@@ -30,4 +30,15 @@ describe('cube validation pipeline', function () {
     expect(result.stderr).to.match(/1 violations found/)
     ok(result.stdout.includes('_:report <http://www.w3.org/ns/shacl#conforms> "false"^^<http://www.w3.org/2001/XMLSchema#boolean>'))
   })
+
+  it('should run check-cube-observations pipeline with options', () => {
+    const constraintFile = `${support}/constraint01.ttl`
+    const command = `cat ${support}/observations01.ttl | ${barnard59} cube check-observations --constraint ${constraintFile} --maxViolations 1 --batchSize 1 --sortChunkSize 1`
+
+    const result = shell.exec(command, { silent: true, cwd })
+
+    strictEqual(result.stderr, '')
+    strictEqual(result.stdout, '')
+    strictEqual(result.code, 0)
+  })
 })

From 7c59fe35a149ed5d1e910cdc0d374de2b4fc8485 Mon Sep 17 00:00:00 2001
From: Giacomo Citi <giacomo.citi@zazuko.com>
Date: Tue, 21 Nov 2023 15:16:35 +0100
Subject: [PATCH 29/34] update reference

---
 package-lock.json          | 11 ++++++-----
 packages/cube/package.json |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 989d7bf0..5d67ef10 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -12597,9 +12597,9 @@
       }
     },
     "node_modules/external-merge-sort": {
-      "version": "0.1.2",
-      "resolved": "https://registry.npmjs.org/external-merge-sort/-/external-merge-sort-0.1.2.tgz",
-      "integrity": "sha512-/seGAbKvi50O81p7r4hv7X6KGOAiuQdxo4xTPY0uFrqn4Xi10PiJO9SifXIFf9tev1/gkkH/TAtu++Yru77otQ==",
+      "version": "0.1.3",
+      "resolved": "https://registry.npmjs.org/external-merge-sort/-/external-merge-sort-0.1.3.tgz",
+      "integrity": "sha512-lFm7ErWX9YfEjApT8Kr53f4HG5h3MQNon9sat8YECMwazGN/o8MYmOh9VD+JsVeDqhW21iStnH/t2dA/OPdl7g==",
       "dependencies": {
         "heap": "^0.2.7"
       }
@@ -26797,11 +26797,12 @@
       "version": "0.0.0",
       "license": "MIT",
       "dependencies": {
-        "external-merge-sort": "^0.1.2",
+        "external-merge-sort": "^0.1.3",
         "lodash": "^4.17.21",
         "rdf-literal": "^1.3.0",
         "rdf-stream-to-dataset-stream": "^1.0.0",
-        "readable-stream": "3 - 4"
+        "readable-stream": "3 - 4",
+        "through2": "^4.0.2"
       },
       "devDependencies": {
         "@rdfjs/to-ntriples": "^2.0.0",
diff --git a/packages/cube/package.json b/packages/cube/package.json
index fd2cba8c..1020f7a7 100644
--- a/packages/cube/package.json
+++ b/packages/cube/package.json
@@ -28,7 +28,7 @@
     "barnard59-validate-shacl": "^0.3.8"
   },
   "dependencies": {
-    "external-merge-sort": "^0.1.2",
+    "external-merge-sort": "^0.1.3",
     "lodash": "^4.17.21",
     "rdf-literal": "^1.3.0",
     "rdf-stream-to-dataset-stream": "^1.0.0",

From c4277ee601a18f40206cfef1a4abdcfa78a73c62 Mon Sep 17 00:00:00 2001
From: tpluscode <tpluscode@users.noreply.github.com>
Date: Tue, 21 Nov 2023 22:24:06 +0100
Subject: [PATCH 30/34] test: avoid elvis, prepare tests

---
 packages/core/lib/run.js       | 2 +-
 packages/core/test/run.test.js | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/packages/core/lib/run.js b/packages/core/lib/run.js
index 9668c592..7561aa7b 100644
--- a/packages/core/lib/run.js
+++ b/packages/core/lib/run.js
@@ -20,7 +20,7 @@ async function run(pipeline, { end = false, resume = false } = {}) {
         pipeline.logger.on('finish', () => resolve())
       })
 
-      if (pipeline.context?.error) {
+      if (pipeline.context.error) {
         throw pipeline.context.error
       }
       pipeline.logger.end()
diff --git a/packages/core/test/run.test.js b/packages/core/test/run.test.js
index f5f47565..592b81bf 100644
--- a/packages/core/test/run.test.js
+++ b/packages/core/test/run.test.js
@@ -14,6 +14,7 @@ describe('run', () => {
         read: () => pipeline.stream.push(null),
       }),
       logger: defaultLogger(),
+      context: {},
     }
 
     const events = []
@@ -33,6 +34,7 @@ describe('run', () => {
         read: () => pipeline.stream.push(null),
       }),
       logger: defaultLogger(),
+      context: {},
     }
 
     const events = []
@@ -50,6 +52,7 @@ describe('run', () => {
         final: callback => callback(),
       }),
       logger: defaultLogger(),
+      context: {},
     }
 
     const events = []

From 1dbb51f7dfa6482f579ea9559835d046f6f4923e Mon Sep 17 00:00:00 2001
From: tpluscode <tpluscode@users.noreply.github.com>
Date: Tue, 21 Nov 2023 22:36:12 +0100
Subject: [PATCH 31/34] refactor: default to 0 shape violations, don't fail
 pipeline when max not exceeded

---
 .changeset/clean-owls-think.md                 |  5 +++++
 packages/cube/pipeline/check-observations.ttl  |  2 +-
 packages/cube/test/validation.pipeline.test.js | 11 +++++++++++
 packages/validate-shacl/report.js              | 11 ++++++++---
 4 files changed, 25 insertions(+), 4 deletions(-)
 create mode 100644 .changeset/clean-owls-think.md

diff --git a/.changeset/clean-owls-think.md b/.changeset/clean-owls-think.md
new file mode 100644
index 00000000..a7ef1444
--- /dev/null
+++ b/.changeset/clean-owls-think.md
@@ -0,0 +1,5 @@
+---
+"barnard59-core": minor
+---
+
+Add support for "late errors" where step authors can set `context.error` without immediately breaking the pipeline
diff --git a/packages/cube/pipeline/check-observations.ttl b/packages/cube/pipeline/check-observations.ttl
index db810828..18ace30e 100644
--- a/packages/cube/pipeline/check-observations.ttl
+++ b/packages/cube/pipeline/check-observations.ttl
@@ -24,7 +24,7 @@ _:batchSize a p:Variable ;
 _:maxViolations a p:Variable ;
   p:name "maxViolations" ;
   rdfs:label "max number of violations" ;
-  p:value 3 ;
+  p:value 0 ;
 .
 
 _:sortChunkSize a p:Variable ;
diff --git a/packages/cube/test/validation.pipeline.test.js b/packages/cube/test/validation.pipeline.test.js
index c596af32..cdac1e64 100644
--- a/packages/cube/test/validation.pipeline.test.js
+++ b/packages/cube/test/validation.pipeline.test.js
@@ -31,6 +31,17 @@ describe('cube validation pipeline', function () {
     ok(result.stdout.includes('_:report <http://www.w3.org/ns/shacl#conforms> "false"^^<http://www.w3.org/2001/XMLSchema#boolean>'))
   })
 
+  it('should run check-cube-observations pipeline without error when maxViolations is not exceeded', () => {
+    const constraintFile = `${support}/constraint01.ttl`
+    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile} --maxViolations 2`
+
+    const result = shell.exec(command, { silent: true, cwd })
+
+    strictEqual(result.code, 0)
+    expect(result.stderr).to.match(/1 violations found/)
+    ok(result.stdout.includes('_:report <http://www.w3.org/ns/shacl#conforms> "false"^^<http://www.w3.org/2001/XMLSchema#boolean>'))
+  })
+
   it('should run check-cube-observations pipeline with options', () => {
     const constraintFile = `${support}/constraint01.ttl`
     const command = `cat ${support}/observations01.ttl | ${barnard59} cube check-observations --constraint ${constraintFile} --maxViolations 1 --batchSize 1 --sortChunkSize 1`
diff --git a/packages/validate-shacl/report.js b/packages/validate-shacl/report.js
index 6fcccd68..9c95cf8a 100644
--- a/packages/validate-shacl/report.js
+++ b/packages/validate-shacl/report.js
@@ -19,8 +19,13 @@ async function * validate(validator, maxViolations, iterable) {
     }
   }
 
-  if (totalViolations > 0) {
-    this.error = new Error(`${totalViolations} violations found`)
+  if (totalViolations) {
+    const message = `${totalViolations} violations found`
+    if (totalViolations > maxViolations) {
+      this.error = new Error(message)
+    } else {
+      this.logger.warn(message)
+    }
   }
 }
 
@@ -32,7 +37,7 @@ export async function shacl(arg) {
     shape = arg
   } else if (arg) {
     ({ shape, ...options } = arg)
-    maxViolations = options.maxErrors < 1 ? undefined : Number(options.maxErrors)
+    maxViolations = options.maxErrors < 1 ? 0 : Number(options.maxErrors)
   }
 
   if (!shape) {

From 66d126266fb5cdf8283841c5120ae1fe60ad3727 Mon Sep 17 00:00:00 2001
From: tpluscode <tpluscode@users.noreply.github.com>
Date: Wed, 22 Nov 2023 09:24:52 +0100
Subject: [PATCH 32/34] revert: fail at any number of errors

---
 packages/cube/pipeline/check-observations.ttl  | 2 +-
 packages/cube/test/validation.pipeline.test.js | 8 ++++----
 packages/validate-shacl/report.js              | 9 ++-------
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/packages/cube/pipeline/check-observations.ttl b/packages/cube/pipeline/check-observations.ttl
index 18ace30e..9b20e6a9 100644
--- a/packages/cube/pipeline/check-observations.ttl
+++ b/packages/cube/pipeline/check-observations.ttl
@@ -24,7 +24,7 @@ _:batchSize a p:Variable ;
 _:maxViolations a p:Variable ;
   p:name "maxViolations" ;
   rdfs:label "max number of violations" ;
-  p:value 0 ;
+  p:value 500 ;
 .
 
 _:sortChunkSize a p:Variable ;
diff --git a/packages/cube/test/validation.pipeline.test.js b/packages/cube/test/validation.pipeline.test.js
index cdac1e64..0d40db59 100644
--- a/packages/cube/test/validation.pipeline.test.js
+++ b/packages/cube/test/validation.pipeline.test.js
@@ -22,7 +22,7 @@ describe('cube validation pipeline', function () {
 
   it('should run check-cube-observations pipeline with error', () => {
     const constraintFile = `${support}/constraint01.ttl`
-    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile}`
+    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile} --maxViolations 1`
 
     const result = shell.exec(command, { silent: true, cwd })
 
@@ -31,13 +31,13 @@ describe('cube validation pipeline', function () {
     ok(result.stdout.includes('_:report <http://www.w3.org/ns/shacl#conforms> "false"^^<http://www.w3.org/2001/XMLSchema#boolean>'))
   })
 
-  it('should run check-cube-observations pipeline without error when maxViolations is not exceeded', () => {
+  it('should run check-cube-observations when maxViolations is not exceeded', () => {
     const constraintFile = `${support}/constraint01.ttl`
-    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile} --maxViolations 2`
+    const command = `cat ${support}/observations02.ttl | ${barnard59} cube check-observations --constraint ${constraintFile}`
 
     const result = shell.exec(command, { silent: true, cwd })
 
-    strictEqual(result.code, 0)
+    strictEqual(result.code, 1)
     expect(result.stderr).to.match(/1 violations found/)
     ok(result.stdout.includes('_:report <http://www.w3.org/ns/shacl#conforms> "false"^^<http://www.w3.org/2001/XMLSchema#boolean>'))
   })
diff --git a/packages/validate-shacl/report.js b/packages/validate-shacl/report.js
index 9c95cf8a..12d1a5c9 100644
--- a/packages/validate-shacl/report.js
+++ b/packages/validate-shacl/report.js
@@ -1,5 +1,5 @@
 import { Duplex } from 'stream'
-import { isStream, isReadableStream } from 'is-stream'
+import { isReadableStream, isStream } from 'is-stream'
 import SHACLValidator from 'rdf-validate-shacl'
 
 async function * validate(validator, maxViolations, iterable) {
@@ -20,12 +20,7 @@ async function * validate(validator, maxViolations, iterable) {
   }
 
   if (totalViolations) {
-    const message = `${totalViolations} violations found`
-    if (totalViolations > maxViolations) {
-      this.error = new Error(message)
-    } else {
-      this.logger.warn(message)
-    }
+    this.failPipeline(new Error(`${totalViolations} violations found`))
   }
 }
 

From 925e0e9580245509405dffc83ee997f5adc95c09 Mon Sep 17 00:00:00 2001
From: tpluscode <tpluscode@users.noreply.github.com>
Date: Wed, 22 Nov 2023 10:06:52 +0100
Subject: [PATCH 33/34] feat: handle `this.error` in sub-pipelines without
 overwriting first error

---
 packages/core/lib/factory/pipeline.js       | 13 +++++--
 packages/core/lib/run.js                    |  4 +-
 packages/core/test/run.test.js              |  3 --
 packages/validate-shacl/report.js           |  2 +-
 test/e2e/definitions/sub-pipeline-error.ttl | 41 +++++++++++++++++++++
 test/e2e/pipeline.e2e.test.js               | 17 +++++++++
 6 files changed, 71 insertions(+), 9 deletions(-)
 create mode 100644 test/e2e/definitions/sub-pipeline-error.ttl

diff --git a/packages/core/lib/factory/pipeline.js b/packages/core/lib/factory/pipeline.js
index 2c388493..b49a4b3a 100644
--- a/packages/core/lib/factory/pipeline.js
+++ b/packages/core/lib/factory/pipeline.js
@@ -6,8 +6,8 @@ import { VariableMap } from '../VariableMap.js'
 import createStep from './step.js'
 import createVariables from './variables.js'
 
-async function createPipelineContext(ptr, { basePath, context, logger, variables }) {
-  return { ...context, basePath, logger, variables }
+async function createPipelineContext(ptr, { basePath, context, logger, variables, error }) {
+  return { error, ...context, basePath, logger, variables }
 }
 
 async function createPipelineVariables(ptr, { basePath, context, loaderRegistry, logger, variables }) {
@@ -35,8 +35,15 @@ function createPipeline(ptr, {
   ptr = context.env.clownface({ dataset: ptr.dataset, term: ptr.term })
 
   const onInit = async pipeline => {
+    function error(err) {
+      logger.error(err)
+      if (!pipeline.error) {
+        pipeline.error = err
+      }
+    }
+
     variables = await createPipelineVariables(ptr, { basePath, context, loaderRegistry, logger, variables })
-    context = await createPipelineContext(ptr, { basePath, context, logger, variables })
+    context = await createPipelineContext(ptr, { basePath, context, logger, variables, error })
 
     logVariables(ptr, context, variables)
 
diff --git a/packages/core/lib/run.js b/packages/core/lib/run.js
index 7561aa7b..3944b39c 100644
--- a/packages/core/lib/run.js
+++ b/packages/core/lib/run.js
@@ -20,8 +20,8 @@ async function run(pipeline, { end = false, resume = false } = {}) {
         pipeline.logger.on('finish', () => resolve())
       })
 
-      if (pipeline.context.error) {
-        throw pipeline.context.error
+      if (pipeline.error) {
+        throw pipeline.error
       }
       pipeline.logger.end()
       await p
diff --git a/packages/core/test/run.test.js b/packages/core/test/run.test.js
index 592b81bf..f5f47565 100644
--- a/packages/core/test/run.test.js
+++ b/packages/core/test/run.test.js
@@ -14,7 +14,6 @@ describe('run', () => {
         read: () => pipeline.stream.push(null),
       }),
       logger: defaultLogger(),
-      context: {},
     }
 
     const events = []
@@ -34,7 +33,6 @@ describe('run', () => {
         read: () => pipeline.stream.push(null),
       }),
       logger: defaultLogger(),
-      context: {},
     }
 
     const events = []
@@ -52,7 +50,6 @@ describe('run', () => {
         final: callback => callback(),
       }),
       logger: defaultLogger(),
-      context: {},
     }
 
     const events = []
diff --git a/packages/validate-shacl/report.js b/packages/validate-shacl/report.js
index 12d1a5c9..2ece3b60 100644
--- a/packages/validate-shacl/report.js
+++ b/packages/validate-shacl/report.js
@@ -20,7 +20,7 @@ async function * validate(validator, maxViolations, iterable) {
   }
 
   if (totalViolations) {
-    this.failPipeline(new Error(`${totalViolations} violations found`))
+    this.error(new Error(`${totalViolations} violations found`))
   }
 }
 
diff --git a/test/e2e/definitions/sub-pipeline-error.ttl b/test/e2e/definitions/sub-pipeline-error.ttl
new file mode 100644
index 00000000..903b2478
--- /dev/null
+++ b/test/e2e/definitions/sub-pipeline-error.ttl
@@ -0,0 +1,41 @@
+@base <http://example.org/pipeline/> .
+@prefix code: <https://code.described.at/> .
+@prefix p: <https://pipeline.described.at/> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix op: <https://barnard59.zazuko.com/operations/> .
+
+<> a p:Pipeline, p:Readable ;
+  p:steps
+    [
+      p:stepList
+        (
+          _:subPipeline
+        )
+    ] .
+
+_:subPipeline
+  a p:Pipeline, p:Readable ;
+  p:steps
+    [
+      p:stepList
+        (
+          [
+            a p:Step ;
+            code:implementedBy
+              [
+                a code:EcmaScriptModule ;
+                code:link <node:stream#Readable.from> ;
+              ] ;
+            code:arguments
+              (
+                """['foo', 'bar']"""^^code:EcmaScript
+              )
+          ]
+          [
+            op:base\/map ("""function (chunk) {
+              this.error(new Error(chunk))
+              return chunk
+            }"""^^code:EcmaScript) ;
+          ]
+        )
+    ] .
diff --git a/test/e2e/pipeline.e2e.test.js b/test/e2e/pipeline.e2e.test.js
index bf2fba42..236d02c6 100644
--- a/test/e2e/pipeline.e2e.test.js
+++ b/test/e2e/pipeline.e2e.test.js
@@ -81,4 +81,21 @@ describe('Pipeline', () => {
     const source = await fromStream(rdf.dataset(), fromFile('definitions/file-loader.ttl'))
     expect(toCanonical(out)).to.eq(toCanonical(source))
   })
+
+  it('should be set to fail from sub-pipeline', async () => {
+    // given
+    const ptr = await loadPipelineDefinition('sub-pipeline-error')
+    const pipeline = await createPipeline(ptr, {
+      env,
+      basePath: process.cwd(),
+    })
+
+    // when
+    const out = await getStream(pipeline.stream)
+
+    // then
+    expect(out).to.eq('foobar')
+    expect(pipeline.error).to.be.instanceof(Error)
+    expect(pipeline.error.message).to.eq('foo')
+  })
 })

From 4c48eb4df757acaa2279bc03d069b292313d27d2 Mon Sep 17 00:00:00 2001
From: Tomasz Pluskiewicz <tpluscode@users.noreply.github.com>
Date: Wed, 22 Nov 2023 11:40:56 +0100
Subject: [PATCH 34/34] Update clean-owls-think.md

---
 .changeset/clean-owls-think.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.changeset/clean-owls-think.md b/.changeset/clean-owls-think.md
index a7ef1444..16a65d3b 100644
--- a/.changeset/clean-owls-think.md
+++ b/.changeset/clean-owls-think.md
@@ -2,4 +2,4 @@
 "barnard59-core": minor
 ---
 
-Add support for "late errors" where step authors can set `context.error` without immediately breaking the pipeline
+Add support for "late errors" where step authors can call `context.error()` to avoid immediately breaking the pipeline