diff --git a/.changeset/cyan-falcons-share.md b/.changeset/cyan-falcons-share.md new file mode 100644 index 00000000..b92b3017 --- /dev/null +++ b/.changeset/cyan-falcons-share.md @@ -0,0 +1,5 @@ +--- +"barnard59-core": minor +--- + +Include the current graph in pipeline context diff --git a/packages/core/index.ts b/packages/core/index.ts index 6b19e465..8a026488 100644 --- a/packages/core/index.ts +++ b/packages/core/index.ts @@ -1,6 +1,6 @@ import type { Logger } from 'winston' import type { Environment } from 'barnard59-env' -import type { GraphPointer } from 'clownface' +import type { GraphPointer, AnyPointer } from 'clownface' import defaultLoaderRegistry from './lib/defaultLoaderRegistry.js' import defaultLogger from './lib/defaultLogger.js' import createPipeline from './lib/factory/pipeline.js' @@ -24,6 +24,7 @@ interface TypedMap extends Map { export type VariableMap = (keyof Variables extends never ? Map : TypedMap) export interface Context { + graph: AnyPointer env: Environment logger: Logger variables: VariableMap diff --git a/packages/core/lib/factory/pipeline.ts b/packages/core/lib/factory/pipeline.ts index 23d878bf..490b33bc 100644 --- a/packages/core/lib/factory/pipeline.ts +++ b/packages/core/lib/factory/pipeline.ts @@ -13,14 +13,15 @@ import createStep from './step.js' import createVariables from './variables.js' function createPipelineContext( - { basePath, context, logger, variables, error }: { + { ptr, basePath, context, logger, variables, error }: { + ptr: GraphPointer basePath: string context: Pick logger: Logger variables: VariableMap error: (err: Error) => void }) { - return { error, ...context, basePath, logger, variables } as unknown as Context + return { error, ...context, graph: ptr.any(), basePath, logger, variables } as unknown as Context } async function createPipelineVariables( @@ -75,7 +76,7 @@ function createPipeline(maybePtr: { term?: Term; dataset?: DatasetCore }, init: } variables = await createPipelineVariables(ptr, { basePath, context, loaderRegistry, logger, variables }) - context = await createPipelineContext({ basePath, context, logger, variables, error }) + context = await createPipelineContext({ ptr, basePath, context, logger, variables, error }) logVariables(ptr, context, variables) diff --git a/packages/shacl/manifest.ttl b/packages/shacl/manifest.ttl index 32b16ed1..d7d5ae01 100644 --- a/packages/shacl/manifest.ttl +++ b/packages/shacl/manifest.ttl @@ -23,6 +23,7 @@ b59:command "validate" ; rdfs:label "Validates the RDF in standard input against a SHACL document" ; b59:source "barnard59-shacl/pipeline/validate.ttl" ; + b59:pipeline ; . diff --git a/packages/shacl/package.json b/packages/shacl/package.json index 030254d7..f47da1f7 100644 --- a/packages/shacl/package.json +++ b/packages/shacl/package.json @@ -48,6 +48,7 @@ "string-to-stream": "^3.0.1" }, "mocha": { + "recursive": true, "require": "../../test/mocha-setup.cjs", "loader": "ts-node/esm/transpile-only" } diff --git a/packages/shacl/pipeline/report-summary.ttl b/packages/shacl/pipeline/report-summary.ttl index 8694f723..42ab5b7e 100644 --- a/packages/shacl/pipeline/report-summary.ttl +++ b/packages/shacl/pipeline/report-summary.ttl @@ -4,7 +4,7 @@ @prefix n3: . @prefix rdf: . -@base . +@base . a p:Pipeline , p:Readable ; p:steps diff --git a/packages/shacl/pipeline/validate.js b/packages/shacl/pipeline/validate.js new file mode 100644 index 00000000..4b6276b9 --- /dev/null +++ b/packages/shacl/pipeline/validate.js @@ -0,0 +1,15 @@ +import { shacl } from '../report.js' + +/** + * @this {import('barnard59-core').Context} + */ +export default function () { + const { variables } = this + + if (variables.has('shapes')) { + const shapesPipeline = this.createPipeline(this.graph.namedNode('https://barnard59.zazuko.com/pipeline/shacl/_getShapes'), { variables }) + return shacl.call(this, shapesPipeline.stream) + } + + return shacl.call(this) +} diff --git a/packages/shacl/pipeline/validate.ttl b/packages/shacl/pipeline/validate.ttl index 398365dc..14e4db8c 100644 --- a/packages/shacl/pipeline/validate.ttl +++ b/packages/shacl/pipeline/validate.ttl @@ -3,27 +3,31 @@ @prefix base: . @prefix n3: . @prefix ntriples: . -@prefix getDataset: . @prefix shacl: . @prefix code: . @prefix rdf: . -@base . +@base . -_:shapes a p:Variable ; - p:name "shapes" ; - rdfs:label "URL or path of the shapes graph" ; -. - -_:shapesMediaType a p:Variable ; - p:name "shapesMediaType" ; - rdfs:label "Override the shapes graph format" ; - p:required false ; +_:variables + p:variable + [ + a p:Variable ; + p:name "shapes" ; + rdfs:label "URL or path of the shapes graph" ; + p:required false ; + ], + [ + a p:Variable ; + p:name "shapesMediaType" ; + rdfs:label "Override the shapes graph format" ; + p:required false ; + ] ; . a p:Pipeline, p:Readable ; - p:variables [ p:variable _:shapes, _:shapesMediaType ] ; + p:variables _:variables ; p:steps [ p:stepList @@ -41,15 +45,23 @@ _:shapesMediaType a p:Variable ; p:stepList ( [ n3:parse () ] - [ getDataset: () ] - [ shacl:report ( _:getShapes ) ] + [ rdf:getDataset () ] + [ + a p:Step ; + code:implementedBy + [ + a code:EcmaScriptModule ; + code:link ; + ] ; + ] [ base:flatten () ] [ ntriples:serialize () ] ) ] ; . -_:getShapes a p:Pipeline, p:ReadableObjectMode ; +<_getShapes> a p:Pipeline, p:ReadableObjectMode ; + p:variables _:variables ; p:steps [ p:stepList diff --git a/packages/shacl/report.js b/packages/shacl/report.js index 08f37414..c9417447 100644 --- a/packages/shacl/report.js +++ b/packages/shacl/report.js @@ -5,11 +5,12 @@ import TermCounter from './lib/TermCounter.js' /** * @this {import('barnard59-core').Context} - * @param {import('@rdfjs/types').DatasetCore} ds - * @param {number | undefined} maxViolations + * @param {object} options + * @param {import('@rdfjs/types').DatasetCore | undefined} options.shapes + * @param {number | undefined} options.maxViolations * @param {AsyncIterable} iterable */ -async function * validate(ds, maxViolations, iterable) { +async function * validate({ shapes, maxViolations }, iterable) { let totalViolations = 0 const counter = new TermCounter(this.env) @@ -19,8 +20,7 @@ async function * validate(ds, maxViolations, iterable) { break } - // create a new validator instance at each iteration to avoid memory leaks - const validator = new SHACLValidator(ds, { maxErrors: 0, factory: this.env }) + const validator = new SHACLValidator(shapes || chunk, { maxErrors: 0, factory: this.env }) const report = validator.validate(chunk) if (!report.conforms) { for (const result of report.results) { @@ -63,14 +63,18 @@ export async function shacl(arg) { maxViolations = options.maxErrors < 1 ? 0 : Number(options.maxErrors) } + let ds if (!shape) { - throw new Error('Needs a SHACL shape as parameter') - } - if (!isReadableStream(shape)) { - throw new Error(`${shape} is not a readable stream`) + this.logger.info('No shapes found. Will validate each chunk against shapes found in the chunk itself') + } else { + if (!isReadableStream(shape)) { + throw new Error(`${shape} is not a readable stream`) + } + ds = await this.env.dataset().import(shape) } - const ds = await this.env.dataset().import(shape) - - return Duplex.from(validate.bind(this, ds, maxViolations)) + return Duplex.from(validate.bind(this, { + shapes: ds, + maxViolations, + })) } diff --git a/packages/shacl/test/pipeline/validate.test.js b/packages/shacl/test/pipeline/validate.test.js index e2b86771..c1564f48 100644 --- a/packages/shacl/test/pipeline/validate.test.js +++ b/packages/shacl/test/pipeline/validate.test.js @@ -10,9 +10,8 @@ import { pipelineDefinitionLoader } from 'barnard59-test-support/loadPipelineDef import getStream from 'get-stream' const ex = env.namespace('http://example.org/') -const ns = env.namespace('http://barnard59.zazuko.com/pipeline/shacl/') +const ns = env.namespace('https://barnard59.zazuko.com/pipeline/shacl/') -const basePath = url.fileURLToPath(new URL('../../pipeline', import.meta.url)) const loadPipeline = pipelineDefinitionLoader(import.meta.url, '../../pipeline') describe('pipeline/validate', function () { @@ -24,7 +23,7 @@ describe('pipeline/validate', function () { ${env.ns.schema.name} "John Doe" ; . `.toString() - const ptr = await loadPipeline('validate', { + const { ptr, basePath } = await loadPipeline('validate', { term: ns._validate, }) const variables = new Map([ @@ -36,7 +35,37 @@ describe('pipeline/validate', function () { const output = await getStream(toStream(data).pipe(pipeline.stream)) // then - expect(output).to.be.empty + expect(output).to.match(/conforms> "true"/) + }) + + it('validates against shapes from input', async () => { + // given + const data = turtle` + ${ex.Person} + a ${env.ns.schema.Person} ; + ${env.ns.schema.name} "John Doe" ; + . + + [] + a ${env.ns.sh.NodeShape} ; + ${env.ns.sh.targetClass} ${env.ns.schema.Person} ; + ${env.ns.sh.property} [ + a ${env.ns.sh.PropertyShape} ; + ${env.ns.sh.path} ${env.ns.schema.name} ; + ${env.ns.sh.hasValue} "Jane Doe" ; + ] ; + . + `.toString() + const { ptr, basePath } = await loadPipeline('validate', { + term: ns._validate, + }) + const pipeline = createPipeline(ptr, { basePath, env }) + + // when + const output = await getStream(toStream(data).pipe(pipeline.stream)) + + // then + expect(output).to.match(/conforms> "false"/) }) it('validates against remote shapes', () => { @@ -51,7 +80,7 @@ describe('pipeline/validate', function () { ${env.ns.schema.name} "John Doe" ; . `.toString() - const ptr = await loadPipeline('validate', { + const { ptr, basePath } = await loadPipeline('validate', { term: ns._validate, }) const variables = new Map([ @@ -63,7 +92,7 @@ describe('pipeline/validate', function () { const output = await getStream(toStream(data).pipe(pipeline.stream)) // then - expect(output).to.be.empty + expect(output).to.match(/conforms> "true"/) }) }) @@ -75,7 +104,7 @@ describe('pipeline/validate', function () { ${env.ns.schema.name} "" ; . `.toString() - const ptr = await loadPipeline('validate', { + const { ptr, basePath } = await loadPipeline('validate', { term: ns._validate, }) const variables = new Map([