-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
introducing [elton stream] command to enable streaming of interaction…
…s from a preston archive; related to Big-Bee-Network/bif#1; fyi @zedomel @seltmann
- Loading branch information
Jorrit Poelen
committed
Jul 26, 2024
1 parent
7f9999a
commit 9534e4b
Showing
6 changed files
with
257 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
73 changes: 73 additions & 0 deletions
73
src/main/java/org/globalbioticinteractions/elton/cmd/CmdStream.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package org.globalbioticinteractions.elton.cmd; | ||
|
||
import com.fasterxml.jackson.core.JsonProcessingException; | ||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import org.apache.commons.io.FileUtils; | ||
import org.apache.commons.io.IOUtils; | ||
import org.apache.commons.lang.StringUtils; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import picocli.CommandLine; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.File; | ||
import java.io.IOException; | ||
import java.io.InputStreamReader; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.concurrent.atomic.AtomicBoolean; | ||
|
||
@CommandLine.Command( | ||
name = "stream", | ||
description = "stream interactions associated with dataset configuration provided by globi.json line-json as input.\n" + | ||
"example input:" + | ||
"{ \"namespace\": \"hash://sha256/9cd053d40ef148e16389982ea16d724063b82567f7ba1799962670fc97876fbf\", \"citation\": \"hash://sha256/9cd053d40ef148e16389982ea16d724063b82567f7ba1799962670fc97876fbf\", \"format\": \"dwca\", \"url\": \"https://linker.bio/hash://sha256/9cd053d40ef148e16389982ea16d724063b82567f7ba1799962670fc97876fbf\" }\n" | ||
) | ||
|
||
public class CmdStream extends CmdDefaultParams { | ||
|
||
private final static Logger LOG = LoggerFactory.getLogger(CmdStream.class); | ||
|
||
|
||
@Override | ||
public void run() { | ||
|
||
BufferedReader reader = IOUtils.buffer(new InputStreamReader(getStdin(), StandardCharsets.UTF_8)); | ||
AtomicBoolean isFirst = new AtomicBoolean(true); | ||
try { | ||
String line; | ||
while ((line = reader.readLine()) != null) { | ||
try { | ||
JsonNode jsonNode = new ObjectMapper().readTree(line); | ||
if (jsonNode.has("namespace")) { | ||
String namespace = jsonNode.get("namespace").asText(); | ||
if (StringUtils.isNotBlank(namespace)) { | ||
try { | ||
StreamingNamespaceConfigHandler namespaceHandler = new StreamingNamespaceConfigHandler( | ||
jsonNode, | ||
this.createInputStreamFactory(), | ||
this.getCacheDir(), | ||
this.getStderr(), | ||
this.getStdout() | ||
); | ||
namespaceHandler.setShouldWriteHeader(isFirst.get()); | ||
namespaceHandler.onNamespace(namespace); | ||
isFirst.set(false); | ||
} catch (Exception e) { | ||
LOG.error("failed to add dataset associated with namespace [" + namespace + "]", e); | ||
} finally { | ||
FileUtils.forceDelete(new File(this.getCacheDir())); | ||
} | ||
} | ||
} | ||
} catch (JsonProcessingException e) { | ||
// ignore non-json lines | ||
} | ||
} | ||
} catch (IOException ex) { | ||
LOG.error("failed to read from stdin", ex); | ||
} | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
91 changes: 91 additions & 0 deletions
91
src/main/java/org/globalbioticinteractions/elton/cmd/StreamingNamespaceConfigHandler.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
package org.globalbioticinteractions.elton.cmd; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import org.apache.commons.io.IOUtils; | ||
import org.eol.globi.data.NodeFactory; | ||
import org.eol.globi.data.StudyImporterException; | ||
import org.eol.globi.util.DatasetImportUtil; | ||
import org.eol.globi.util.InputStreamFactory; | ||
import org.globalbioticinteractions.cache.Cache; | ||
import org.globalbioticinteractions.cache.CacheFactory; | ||
import org.globalbioticinteractions.dataset.Dataset; | ||
import org.globalbioticinteractions.dataset.DatasetWithCache; | ||
import org.globalbioticinteractions.dataset.DatasetWithResourceMapping; | ||
import org.globalbioticinteractions.elton.util.DatasetProcessorForTSV; | ||
import org.globalbioticinteractions.elton.util.NamespaceHandler; | ||
import org.globalbioticinteractions.elton.util.NodeFactoryForDataset; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.io.PrintStream; | ||
import java.net.URI; | ||
import java.nio.charset.StandardCharsets; | ||
|
||
class StreamingNamespaceConfigHandler implements NamespaceHandler { | ||
private final static Logger LOG = LoggerFactory.getLogger(StreamingNamespaceConfigHandler.class); | ||
private final String cacheDir; | ||
private final PrintStream stderr; | ||
private final PrintStream stdout; | ||
|
||
private InputStreamFactory factory; | ||
private final JsonNode config; | ||
private boolean shouldWriteHeader; | ||
|
||
public StreamingNamespaceConfigHandler(JsonNode jsonNode, | ||
InputStreamFactoryLogging inputStreamFactory, | ||
String cacheDir, | ||
PrintStream stderr, | ||
PrintStream stdout) { | ||
this.factory = inputStreamFactory; | ||
this.cacheDir = cacheDir; | ||
this.stderr = stderr; | ||
this.stdout = stdout; | ||
this.config = jsonNode; | ||
} | ||
|
||
@Override | ||
public void onNamespace(String namespace) throws Exception { | ||
stderr.println("tracking [" + namespace + "]..."); | ||
CacheFactory cacheFactory = CmdUtil.createCacheFactory( | ||
namespace, | ||
cacheDir, | ||
factory | ||
); | ||
|
||
Dataset dataset = new DatasetWithResourceMapping( | ||
namespace, | ||
URI.create(config.get("url").asText()), | ||
cacheFactory.cacheFor(null) | ||
); | ||
dataset.setConfig(config); | ||
Cache cache = cacheFactory.cacheFor(dataset); | ||
DatasetWithCache datasetWithCache = new DatasetWithCache(dataset, cache); | ||
|
||
CmdInteractions.TsvWriter writer = new CmdInteractions.TsvWriter(stdout); | ||
if (shouldWriteHeader) { | ||
writer.writeHeader(); | ||
} | ||
|
||
NodeFactory factory = new NodeFactoryForDataset(writer, new DatasetProcessorForTSV()); | ||
|
||
factory.getOrCreateDataset(dataset); | ||
try { | ||
DatasetImportUtil.importDataset( | ||
null, | ||
datasetWithCache, | ||
factory, | ||
null); | ||
stderr.println("done."); | ||
} catch (StudyImporterException ex) { | ||
LOG.error("tracking of [" + namespace + "] failed.", ex); | ||
stderr.println("failed with [ " + ex.getMessage() + "]."); | ||
ex.printStackTrace(stderr); | ||
} | ||
|
||
IOUtils.write("wrote [" + namespace + "]\n", stderr, StandardCharsets.UTF_8); | ||
} | ||
|
||
public void setShouldWriteHeader(boolean shouldWriteHeader) { | ||
this.shouldWriteHeader = shouldWriteHeader; | ||
} | ||
} |
Oops, something went wrong.