diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java new file mode 100644 index 000000000..245274230 --- /dev/null +++ b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java @@ -0,0 +1,110 @@ +package org.labkey.api.sequenceanalysis.run; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.Logger; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.writer.PrintWriters; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Arrays; +import java.util.List; + +public class DockerWrapper extends AbstractCommandWrapper +{ + private final String _containerName; + private File _tmpDir = null; + + public DockerWrapper(String containerName, Logger log) + { + super(log); + _containerName = containerName; + } + + public void setTmpDir(File tmpDir) + { + _tmpDir = tmpDir; + } + + public void executeWithDocker(List containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException + { + File localBashScript = new File(workDir, "docker.sh"); + File dockerBashScript = new File(workDir, "dockerRun.sh"); + tracker.addIntermediateFile(localBashScript); + tracker.addIntermediateFile(dockerBashScript); + + setWorkingDir(workDir); + try (PrintWriter writer = PrintWriters.getPrintWriter(localBashScript); PrintWriter dockerWriter = PrintWriters.getPrintWriter(dockerBashScript)) + { + writer.println("#!/bin/bash"); + writer.println("set -x"); + writer.println("WD=`pwd`"); + writer.println("HOME=`echo ~/`"); + writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'"); + writer.println("sudo $DOCKER pull " + _containerName); + writer.println("sudo $DOCKER run --rm=true \\"); + writer.println("\t-v \"${WD}:/work\" \\"); + writer.println("\t-v \"${HOME}:/homeDir\" \\"); + if (_tmpDir != null) + { + writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\"); + } + writer.println("\t--entrypoint /bin/bash \\"); + writer.println("\t-w /work \\"); + Integer maxRam = SequencePipelineService.get().getMaxRam(); + if (maxRam != null) + { + writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\"); + writer.println("\t--memory='" + maxRam + "g' \\"); + } + writer.println("\t" + _containerName + " \\"); + writer.println("\t/work/" + dockerBashScript.getName()); + writer.println("EXIT_CODE=$?"); + writer.println("echo 'Docker run exit code: '$EXIT_CODE"); + writer.println("exit $EXIT_CODE"); + + dockerWriter.println("#!/bin/bash"); + dockerWriter.println("set -x"); + dockerWriter.println(StringUtils.join(containerArgs, " ")); + dockerWriter.println("EXIT_CODE=$?"); + dockerWriter.println("echo 'Exit code: '$?"); + dockerWriter.println("exit $EXIT_CODE"); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + execute(Arrays.asList("/bin/bash", localBashScript.getPath())); + } + + public File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException + { + try + { + if (workingDirectory.equals(input.getParentFile())) + { + return input; + } + + File local = new File(workingDirectory, input.getName()); + if (!local.exists()) + { + getLogger().debug("Copying file locally: " + input.getPath()); + FileUtils.copyFile(input, local); + } + + output.addIntermediateFile(local); + + return local; + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } +} diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java index 20819c231..c1759fcd2 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -16,7 +16,7 @@ import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; -import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; +import org.labkey.api.sequenceanalysis.run.DockerWrapper; import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; import org.labkey.api.util.FileUtil; import org.labkey.api.writer.PrintWriters; @@ -127,6 +127,7 @@ else if (!svVcf.exists()) // id path depth read length // TNPRC-IB18 ../IB18.cram 29.77 150 File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt"); + String rgId = null; try (PrintWriter writer = PrintWriters.getPrintWriter(coverageFile); SamReader reader = SamReaderFactory.makeDefault().open(so.getFile())) { SAMFileHeader header = reader.getFileHeader(); @@ -139,13 +140,13 @@ else if (header.getReadGroups().size() > 1) throw new PipelineJobException("More than one read group found in BAM"); } - String rgId = header.getReadGroups().get(0).getSample(); + rgId = header.getReadGroups().get(0).getSample(); JSONObject json = new JSONObject(FileUtils.readFileToString(coverageJson, Charset.defaultCharset())); writer.println("id\tpath\tdepth\tread length"); double depth = json.getJSONObject("autosome").getDouble("depth"); double readLength = json.getInt("read_length"); - writer.println(rgId + "\t" + so.getFile().getPath() + "\t" + depth + "\t" + readLength); + writer.println(rgId + "\t" + "/work/" + so.getFile().getName() + "\t" + depth + "\t" + readLength); } catch (IOException e) { @@ -153,25 +154,34 @@ else if (header.getReadGroups().size() > 1) } ctx.getFileManager().addIntermediateFile(coverageFile); + DockerWrapper dockerWrapper = new DockerWrapper("ghcr.io/bimberlabinternal/paragraph:latest", ctx.getLogger()); List paragraphArgs = new ArrayList<>(); - paragraphArgs.add(AbstractCommandWrapper.resolveFileInPath("multigrmpy.py", null, true).getPath()); - paragraphArgs.add("--verbose"); + paragraphArgs.add("/opt/paragraph/bin/multigrmpy.py"); - File paragraphOut = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".paragraph.txt"); + dockerWrapper.ensureLocalCopy(so.getFile(), ctx.getWorkingDirectory(), ctx.getFileManager()); + dockerWrapper.ensureLocalCopy(SequenceAnalysisService.get().getExpectedBamOrCramIndex(so.getFile()), ctx.getWorkingDirectory(), ctx.getFileManager()); + + File paragraphOutDir = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile())); paragraphArgs.add("-o"); - paragraphArgs.add(paragraphOut.getPath()); + paragraphArgs.add("/work/" + paragraphOutDir.getName()); paragraphArgs.add("-i"); - paragraphArgs.add(svVcf.getPath()); + dockerWrapper.ensureLocalCopy(svVcf, ctx.getWorkingDirectory(), ctx.getFileManager()); + dockerWrapper.ensureLocalCopy(new File(svVcf.getPath() + ".tbi"), ctx.getWorkingDirectory(), ctx.getFileManager()); + paragraphArgs.add("/work/" + svVcf.getName()); paragraphArgs.add("-m"); - paragraphArgs.add(coverageFile.getPath()); + paragraphArgs.add("/work/" + coverageFile.getName()); paragraphArgs.add("-r"); - paragraphArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath()); + File genomeFasta = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile(); + dockerWrapper.ensureLocalCopy(genomeFasta, ctx.getWorkingDirectory(), ctx.getFileManager()); + dockerWrapper.ensureLocalCopy(new File(genomeFasta.getPath() + ".fai"), ctx.getWorkingDirectory(), ctx.getFileManager()); + paragraphArgs.add("/work/" + genomeFasta.getName()); paragraphArgs.add("--scratch-dir"); - paragraphArgs.add(SequencePipelineService.get().getJavaTempDir()); + paragraphArgs.add("/tmp"); + dockerWrapper.setTmpDir(new File(SequencePipelineService.get().getJavaTempDir())); if (threads != null) { @@ -179,12 +189,9 @@ else if (header.getReadGroups().size() > 1) paragraphArgs.add(threads.toString()); } - paragraphArgs.add("--logfile"); - paragraphArgs.add(new File(ctx.getWorkingDirectory(), "paragraph.log").getPath()); - - new SimpleScriptWrapper(ctx.getLogger()).execute(paragraphArgs); + dockerWrapper.executeWithDocker(paragraphArgs, ctx.getWorkingDirectory(), ctx.getFileManager()); - File genotypes = new File(ctx.getWorkingDirectory(), "genotypes.vcf.gz"); + File genotypes = new File(paragraphOutDir, "genotypes.vcf.gz"); if (!genotypes.exists()) { throw new PipelineJobException("Missing file: " + genotypes.getPath()); @@ -200,6 +207,11 @@ else if (header.getReadGroups().size() > 1) } ctx.getFileManager().addSequenceOutput(genotypes, "paraGRAPH Genotypes: " + so.getName(), "paraGRAPH Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")"); + + ctx.getFileManager().addIntermediateFile(new File(paragraphOutDir, "variants.json.gz")); + ctx.getFileManager().addIntermediateFile(new File(paragraphOutDir, "variants.vcf.gz")); + ctx.getFileManager().addIntermediateFile(new File(paragraphOutDir, "genotypes.json.gz")); + ctx.getFileManager().addIntermediateFile(new File(paragraphOutDir, "grmpy.log")); } } } diff --git a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java index 1b2780f01..a3b1a1a16 100644 --- a/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java +++ b/jbrowse/src/org/labkey/jbrowse/JBrowseLuceneSearch.java @@ -194,7 +194,7 @@ public JSONObject doSearch(User u, String searchString, final int pageSize, fina pointsConfigMap.put(field, doublePointsConfig); } case Integer -> { - numericQueryParserFields.put(field, SortField.Type.INT); + numericQueryParserFields.put(field, SortField.Type.LONG); pointsConfigMap.put(field, intPointsConfig); } }