Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge discvr-24.7 to discvr-24.11 #305

Merged
merged 10 commits into from
Nov 20, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ public ParagraphStep()
put("allowBlank", false);
}}, null),
ToolParameterDescriptor.create("doBndSubset", "Remove BNDs", "If the reference VCF contains BNDs, selecting this option will cause the job to remove them prior to paragraph", "checkbox", new JSONObject(){{
put("checked", true);
put("checked", false);
}}, false),
ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{
put("checked", true);
put("checked", false);
}}, false)
));
}
Expand Down Expand Up @@ -113,20 +113,21 @@ else if (!svVcf.exists())
boolean doBndSubset = ctx.getParams().optBoolean("doBndSubset", false);
if (doBndSubset)
{
File vcfNoBnd = new File(ctx.getOutputDir(), SequenceAnalysisService.get().getUnzippedBaseName(svVcf.getName()) + "nobnd.vcf.gz");
File vcfNoBnd = new File(ctx.getOutputDir(), SequenceAnalysisService.get().getUnzippedBaseName(svVcf.getName()) + "pgSubset.vcf.gz");
File vcfNoBndIdx = new File(vcfNoBnd.getPath() + ".tbi");
if (vcfNoBndIdx.exists())
{
ctx.getLogger().debug("Index exists, will no repeat BND subset");
ctx.getLogger().debug("Index exists, will no repeat VCF subset");
}
else
{
SelectVariantsWrapper svw = new SelectVariantsWrapper(ctx.getLogger());
List<String> selectArgs = new ArrayList<>();
selectArgs.add("-select");
selectArgs.add("SVTYPE != 'BND'");
selectArgs.add("SVTYPE != 'BND' && POS > 150 && !(vc.hasAttribute('SVTYPE') && vc.getAttribute('SVTYPE') == 'INS' && vc.hasSymbolicAlleles() && !vc.hasAttribute('SEQ'))");
selectArgs.add("--exclude-filtered");
selectArgs.add("--exclude-non-variants");
selectArgs.add("--exclude-filtered");
selectArgs.add("--sites-only-vcf-output");

svw.execute(ctx.getSequenceSupport().getCachedGenome(inputFiles.get(0).getLibrary_id()).getWorkingFastaFile(), svVcf, vcfNoBnd, selectArgs);

Expand Down Expand Up @@ -173,7 +174,7 @@ else if (!svVcf.exists())
try (PrintWriter writer = PrintWriters.getPrintWriter(coverageFile); SamReader reader = SamReaderFactory.makeDefault().open(so.getFile()))
{
SAMFileHeader header = reader.getFileHeader();
if (header.getReadGroups().size() == 0)
if (header.getReadGroups().isEmpty())
{
throw new PipelineJobException("No read groups found in input BAM");
}
Expand Down
2 changes: 2 additions & 0 deletions singlecell/src/org/labkey/singlecell/SingleCellModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
import org.labkey.singlecell.run.CellRangerVDJWrapper;
import org.labkey.singlecell.run.NimbleAlignmentStep;
import org.labkey.singlecell.run.NimbleAnalysis;
import org.labkey.singlecell.run.RepeatNimbleReportHandler;
import org.labkey.singlecell.run.VelocytoAlignmentStep;
import org.labkey.singlecell.run.VelocytoAnalysisStep;

Expand Down Expand Up @@ -221,6 +222,7 @@ public static void registerPipelineSteps()
SequenceAnalysisService.get().registerFileHandler(new CellRangerRawDataHandler());
SequenceAnalysisService.get().registerFileHandler(new ProcessSingleCellHandler());
SequenceAnalysisService.get().registerFileHandler(new ProcessSeuratObjectHandler());
SequenceAnalysisService.get().registerFileHandler(new RepeatNimbleReportHandler());

//Single-cell:
SequencePipelineService.get().registerPipelineStep(new AppendCiteSeq.Provider());
Expand Down
140 changes: 79 additions & 61 deletions singlecell/src/org/labkey/singlecell/run/NimbleHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ public class NimbleHelper
private final PipelineStepProvider<?> _provider;
private final int _stepIdx;

public static final String NIMBLE_REPORT_CATEGORY = "Nimble Report";

public NimbleHelper(PipelineContext ctx, PipelineStepProvider<?> provider, int stepIdx)
{
_ctx = ctx;
Expand Down Expand Up @@ -300,7 +302,7 @@ public void doNimbleAlign(File bam, PipelineStepOutput output, Readset rs, Strin
}
else
{
output.addSequenceOutput(reportHtml, basename + ": nimble report", "Nimble Report", rs.getRowId(), null, genome.getGenomeId(), description);
output.addSequenceOutput(reportHtml, basename + ": nimble report", NIMBLE_REPORT_CATEGORY, rs.getRowId(), null, genome.getGenomeId(), description);
}
}
}
Expand Down Expand Up @@ -474,76 +476,82 @@ private Map<NimbleGenome, File> doAlignment(List<NimbleGenome> genomes, List<Fil
}

// Now run nimble report. Always re-run since this is fast:
List<String> reportArgs = new ArrayList<>();
reportArgs.add("python3");
reportArgs.add("-m");
reportArgs.add("nimble");
File reportResultsGz = runNimbleReport(alignResultsGz, genome.genomeId, output, getPipelineCtx());
resultMap.put(genome, reportResultsGz);
}

reportArgs.add("report");
reportArgs.add("-i");
reportArgs.add("/work/" + alignResultsGz.getName());
return resultMap;
}

File reportResultsGz = new File(getPipelineCtx().getWorkingDirectory(), "reportResults." + genome.genomeId + ".txt");
if (reportResultsGz.exists())
{
reportResultsGz.delete();
}
public static File runNimbleReport(File alignResultsGz, int genomeId, PipelineStepOutput output, PipelineContext ctx) throws PipelineJobException
{
List<String> reportArgs = new ArrayList<>();
reportArgs.add("python3");
reportArgs.add("-m");
reportArgs.add("nimble");

reportArgs.add("-o");
reportArgs.add("/work/" + reportResultsGz.getName());
reportArgs.add("report");
reportArgs.add("-i");
reportArgs.add("/work/" + alignResultsGz.getName());

runUsingDocker(reportArgs, output, null);
File reportResultsGz = new File(ctx.getWorkingDirectory(), "reportResults." + genomeId + ".txt");
if (reportResultsGz.exists())
{
reportResultsGz.delete();
}

if (!reportResultsGz.exists())
{
throw new PipelineJobException("Missing file: " + reportResultsGz.getPath());
}
reportArgs.add("-o");
reportArgs.add("/work/" + reportResultsGz.getName());

resultMap.put(genome, reportResultsGz);
runUsingDocker(reportArgs, output, null, ctx);

if (SequencePipelineService.get().hasMinLineCount(alignResultsGz, 2))
{
// Also run nimble plot. Always re-run since this is fast:
List<String> plotArgs = new ArrayList<>();
plotArgs.add("python3");
plotArgs.add("-m");
plotArgs.add("nimble");
if (!reportResultsGz.exists())
{
throw new PipelineJobException("Missing file: " + reportResultsGz.getPath());
}

plotArgs.add("plot");
plotArgs.add("--input_file");
plotArgs.add("/work/" + alignResultsGz.getName());
if (SequencePipelineService.get().hasMinLineCount(alignResultsGz, 2))
{
// Also run nimble plot. Always re-run since this is fast:
List<String> plotArgs = new ArrayList<>();
plotArgs.add("python3");
plotArgs.add("-m");
plotArgs.add("nimble");

File plotResultsHtml = getReportHtmlFileFromResults(reportResultsGz);
if (reportResultsGz.exists())
{
plotResultsHtml.delete();
}
plotArgs.add("plot");
plotArgs.add("--input_file");
plotArgs.add("/work/" + alignResultsGz.getName());

plotArgs.add("--output_file");
plotArgs.add("/work/" + plotResultsHtml.getName());
File plotResultsHtml = getReportHtmlFileFromResults(reportResultsGz);
if (plotResultsHtml.exists())
{
plotResultsHtml.delete();
}

runUsingDocker(plotArgs, output, null);
plotArgs.add("--output_file");
plotArgs.add("/work/" + plotResultsHtml.getName());

if (!plotResultsHtml.exists())
{
throw new PipelineJobException("Missing file: " + plotResultsHtml.getPath());
}
}
else
runUsingDocker(plotArgs, output, null, ctx);

if (!plotResultsHtml.exists())
{
getPipelineCtx().getLogger().info("Only single line found in results, skipping nimble plot");
throw new PipelineJobException("Missing file: " + plotResultsHtml.getPath());
}
}
else
{
ctx.getLogger().info("Only single line found in results, skipping nimble plot");
}

return resultMap;
return reportResultsGz;
}

private File getReportHtmlFileFromResults(File reportResults)
public static File getReportHtmlFileFromResults(File reportResults)
{
return new File(reportResults.getPath().replaceAll("txt(.gz)*$", "html"));
}

private File getNimbleDoneFile(File parentDir, String resumeString)
private static File getNimbleDoneFile(File parentDir, String resumeString)
{
return new File(parentDir, "nimble." + resumeString + ".done");
}
Expand All @@ -552,13 +560,18 @@ private File getNimbleDoneFile(File parentDir, String resumeString)

private boolean runUsingDocker(List<String> nimbleArgs, PipelineStepOutput output, @Nullable String resumeString) throws PipelineJobException
{
File localBashScript = new File(getPipelineCtx().getWorkingDirectory(), "docker.sh");
File dockerBashScript = new File(getPipelineCtx().getWorkingDirectory(), "dockerRun.sh");
return runUsingDocker(nimbleArgs, output, resumeString, getPipelineCtx());
}

private static boolean runUsingDocker(List<String> nimbleArgs, PipelineStepOutput output, @Nullable String resumeString, PipelineContext ctx) throws PipelineJobException
{
File localBashScript = new File(ctx.getWorkingDirectory(), "docker.sh");
File dockerBashScript = new File(ctx.getWorkingDirectory(), "dockerRun.sh");
output.addIntermediateFile(localBashScript);
output.addIntermediateFile(dockerBashScript);

// Create temp folder:
File tmpDir = new File(getPipelineCtx().getWorkingDirectory(), "tmpDir");
File tmpDir = new File(ctx.getWorkingDirectory(), "tmpDir");
if (tmpDir.exists())
{
try
Expand Down Expand Up @@ -592,7 +605,7 @@ private boolean runUsingDocker(List<String> nimbleArgs, PipelineStepOutput outpu
writer.println("\t--memory='" + maxRam + "g' \\");
}

getPipelineCtx().getDockerVolumes().forEach(ln -> writer.println(ln + " \\"));
ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\"));
writer.println("\t-v \"${WD}:/work\" \\");
writer.println("\t-v \"${HOME}:/homeDir\" \\");
writer.println("\t-u $UID \\");
Expand Down Expand Up @@ -623,22 +636,22 @@ private boolean runUsingDocker(List<String> nimbleArgs, PipelineStepOutput outpu
File doneFile = null;
if (resumeString != null)
{
doneFile = getNimbleDoneFile(getPipelineCtx().getWorkingDirectory(), resumeString);
doneFile = getNimbleDoneFile(ctx.getWorkingDirectory(), resumeString);
output.addIntermediateFile(doneFile);

if (doneFile.exists())
{
getPipelineCtx().getLogger().info("Nimble already completed, resuming: " + resumeString);
ctx.getLogger().info("Nimble already completed, resuming: " + resumeString);
return false;
}
else
{
getPipelineCtx().getLogger().debug("done file not found: " + doneFile.getPath());
ctx.getLogger().debug("done file not found: " + doneFile.getPath());
}
}

SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(getPipelineCtx().getLogger());
rWrapper.setWorkingDir(getPipelineCtx().getWorkingDirectory());
SimpleScriptWrapper rWrapper = new SimpleScriptWrapper(ctx.getLogger());
rWrapper.setWorkingDir(ctx.getWorkingDirectory());
rWrapper.execute(Arrays.asList("/bin/bash", localBashScript.getName()));

if (doneFile != null)
Expand All @@ -657,18 +670,23 @@ private boolean runUsingDocker(List<String> nimbleArgs, PipelineStepOutput outpu
}

private File ensureLocalCopy(File input, PipelineStepOutput output) throws PipelineJobException
{
return ensureLocalCopy(input, output, getPipelineCtx());
}

public static File ensureLocalCopy(File input, PipelineStepOutput output, PipelineContext ctx) throws PipelineJobException
{
try
{
if (getPipelineCtx().getWorkingDirectory().equals(input.getParentFile()))
if (ctx.getWorkingDirectory().equals(input.getParentFile()))
{
return input;
}

File local = new File(getPipelineCtx().getWorkingDirectory(), input.getName());
File local = new File(ctx.getWorkingDirectory(), input.getName());
if (!local.exists())
{
getPipelineCtx().getLogger().debug("Copying file locally: " + input.getPath());
ctx.getLogger().debug("Copying file locally: " + input.getPath());
FileUtils.copyFile(input, local);
}

Expand Down
Loading