Skip to content

Commit

Permalink
towards making elton log machine readable provenance logs like Presto…
Browse files Browse the repository at this point in the history
…n does #52
  • Loading branch information
Jorrit Poelen committed Nov 28, 2023
1 parent e469cc0 commit 33a44ba
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 4 deletions.
16 changes: 14 additions & 2 deletions src/main/java/org/globalbioticinteractions/elton/cmd/CmdUtil.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.globalbioticinteractions.elton.cmd;

import bio.guoda.preston.process.StatementListener;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.rdf.api.Quad;
import org.eol.globi.data.ImportLogger;
import org.eol.globi.data.NodeFactory;
import org.eol.globi.data.StudyImporterException;
Expand Down Expand Up @@ -54,11 +56,21 @@ static void handleNamespaces(DatasetRegistry registry,
}
}

static DatasetRegistry createDataFinderLoggingCaching(DatasetRegistry registry, String namespace, String cacheDir, InputStreamFactory factory) {
static DatasetRegistry createDataFinderLoggingCaching(
DatasetRegistry registry,
String namespace,
String cacheDir,
InputStreamFactory factory) {
return new DatasetRegistryWithCache(new DatasetRegistryLogger(registry, cacheDir), dataset -> {
ResourceService remote = new ResourceServiceLocalAndRemote(factory);
ResourceService local = new ResourceServiceLocal(factory);
Cache pullThroughCache = new CachePullThroughPrestonStore(namespace, cacheDir, remote);
Cache pullThroughCache = new CachePullThroughPrestonStore(namespace, cacheDir, remote, new StatementListener() {

@Override
public void on(Quad quad) {
// ignore printing quads for now
}
});
CacheLocalReadonly readOnlyCache = new CacheLocalReadonly(namespace, cacheDir, local);
return new CacheProxy(Arrays.asList(pullThroughCache, readOnlyCache));
});
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package org.globalbioticinteractions.elton.store;

import bio.guoda.preston.HashType;
import bio.guoda.preston.RefNodeConstants;
import bio.guoda.preston.RefNodeFactory;
import bio.guoda.preston.process.StatementListener;
import bio.guoda.preston.store.BlobStoreAppendOnly;
import bio.guoda.preston.store.Dereferencer;
import bio.guoda.preston.store.DereferencerContentAddressed;
Expand All @@ -10,6 +12,7 @@
import bio.guoda.preston.store.ValidatingKeyValueStreamContentAddressedFactory;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.rdf.api.IRI;
import org.apache.commons.rdf.api.Quad;
import org.eol.globi.service.ResourceService;
import org.eol.globi.util.DateUtil;
import org.globalbioticinteractions.cache.CachePullThrough;
Expand All @@ -27,17 +30,33 @@ public class CachePullThroughPrestonStore extends CachePullThrough {
private final String namespace;
private final String cachePath;
private final ResourceService remote;
private final StatementListener listener;

public CachePullThroughPrestonStore(
String namespace,
String cachePath,
ResourceService resourceService
) {
this(namespace, cachePath, resourceService, new StatementListener() {
@Override
public void on(Quad quad) {
// do nothing
}
});
}

public CachePullThroughPrestonStore(
String namespace,
String cachePath,
ResourceService resourceService,
StatementListener listener
) {
super(namespace, cachePath,
resourceService);
this.namespace = namespace;
this.cachePath = cachePath;
this.remote = resourceService;
this.listener = listener;
}

public InputStream retrieve(URI resourceURI) throws IOException {
Expand All @@ -64,6 +83,21 @@ public InputStream retrieve(URI resourceURI) throws IOException {

IRI dereferenced = derefCas.get(RefNodeFactory.toIRI(resourceURI));

streamProvenance(resourceURI, dereferenced, listener);
recordProvenance(resourceURI, keyToPath, dereferenced);
return blobStore.get(dereferenced);
}

private void streamProvenance(URI resourceURI, IRI dereferenced, StatementListener statementListener) {
Quad quad = RefNodeFactory.toStatement(
RefNodeFactory.toIRI(resourceURI),
RefNodeConstants.HAS_VERSION,
dereferenced
);
statementListener.on(quad);
}

private void recordProvenance(URI resourceURI, KeyTo1LevelPath keyToPath, IRI dereferenced) throws IOException {
URI localPathURI = keyToPath.toPath(dereferenced);
ContentProvenance contentProvenanceWithNamespace
= new ContentProvenance(namespace,
Expand All @@ -76,7 +110,6 @@ public InputStream retrieve(URI resourceURI) throws IOException {
new File(cachePath),
contentProvenanceWithNamespace
);
return blobStore.get(dereferenced);
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import bio.guoda.preston.store.ValidatingKeyValueStreamContentAddressedFactory;
import org.apache.commons.io.IOUtils;
import org.apache.commons.rdf.api.IRI;
import org.apache.commons.rdf.api.Quad;
import org.eol.globi.util.ResourceServiceLocal;
import org.globalbioticinteractions.cache.Cache;
import org.hamcrest.core.Is;
Expand All @@ -21,8 +22,10 @@
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;

import static junit.framework.TestCase.assertTrue;
import static org.hamcrest.CoreMatchers.endsWith;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertFalse;

Expand All @@ -33,21 +36,28 @@ public class CachePullThroughPrestonStoreTest {

@Test
public void testPrestonStore() throws IOException, URISyntaxException {
ArrayList<Quad> quads = new ArrayList<>();

Cache cache = new CachePullThroughPrestonStore(
"some/namespace"
, folder.getRoot().getAbsolutePath()
, new ResourceServiceLocal(in -> in)
, quads::add
);

assertThat(quads.size(), Is.is(0));

File namespaceDir = new File(folder.getRoot(), "some/namespace");
assertFalse(new File(namespaceDir, "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824").exists());

InputStream is = cache.retrieve(getClass().getResource("hello.txt").toURI());
assertTrue(new File(namespaceDir, "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824").exists());

assertThat(IOUtils.toString(is, StandardCharsets.UTF_8.name()), Is.is("hello"));

assertThat(quads.size(), Is.is(1));
assertThat(quads.get(0).getSubject().toString(), endsWith("org/globalbioticinteractions/elton/store/hello.txt>"));
assertThat(quads.get(0).getPredicate().toString(), Is.is("<http://purl.org/pav/hasVersion>"));
assertThat(quads.get(0).getObject().toString(), Is.is("<hash://sha256/2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824>"));

assertFalse(new File(folder.getRoot(), "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824").exists());

Expand All @@ -71,6 +81,7 @@ public void testPrestonStore() throws IOException, URISyntaxException {
assertThat(IOUtils.toString(inputStream, StandardCharsets.UTF_8.name()), Is.is("hello"));

assertTrue(new File(folder.getRoot(), "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824").exists());
assertThat(quads.size(), Is.is(1));
}


Expand Down

0 comments on commit 33a44ba

Please sign in to comment.