diff --git a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh index feef079c7..558d131c7 100755 --- a/SequenceAnalysis/pipeline_code/sequence_tools_install.sh +++ b/SequenceAnalysis/pipeline_code/sequence_tools_install.sh @@ -335,30 +335,6 @@ else fi -# -# BisSNP -# -echo "" -echo "" -echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" -echo "Install BisSNP" -echo "" -cd $LKSRC_DIR - -if [[ ! -e ${LKTOOLS_DIR}/BisSNP.jar || ! -z $FORCE_REINSTALL ]]; -then - echo "Cleaning up previous installs" - rm -Rf BisSNP* - rm -Rf $LKTOOLS_DIR/BisSNP.jar - - wget $WGET_OPTS https://downloads.sourceforge.net/project/bissnp/BisSNP-0.82.2/BisSNP-0.82.2.jar - - install ./BisSNP-0.82.2.jar $LKTOOLS_DIR/BisSNP.jar -else - echo "Already installed" -fi - - # #mosaik # diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js b/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js index 290b58322..fba9f476e 100644 --- a/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js +++ b/SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js @@ -104,7 +104,7 @@ Ext4.define('SequenceAnalysis.window.LiftoverWindow', { maxValue: 1.0, value: 0.95, fieldLabel: 'Min Percent Match', - helpPopup: 'In order to lift to the target genome, the feature must have at least this percent match. Lower this value to be more permissive; however, this risks incorrect liftovers', + helpPopup: 'In order to lift to the target genome, the feature must have at least this percent match. Lower this value to be more permissive; however, this risks incorrect liftovers. This is ignored if using bcftools.', itemId: 'pctField' },{ xtype: 'checkbox', @@ -112,6 +112,11 @@ Ext4.define('SequenceAnalysis.window.LiftoverWindow', { checked: false, helpPopup: 'If checked, no genotypes will be written to the output file (applies to VCFs only). This can be useful (and necessary) when lifting VCFs with extremely high sample number.', fieldLabel: 'Drop Genotypes' + },{ + xtype: 'checkbox', + itemId: 'useBcfTools', + checked: false, + fieldLabel: 'Use bcftools' }].concat(SequenceAnalysis.window.OutputHandlerWindow.getCfgForToolParameters(this.toolParameters)), buttons: [{ text: 'Submit', diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java index 1718c9433..1830fbf8b 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java @@ -77,6 +77,7 @@ import org.labkey.sequenceanalysis.run.alignment.BowtieWrapper; import org.labkey.sequenceanalysis.run.alignment.GSnapWrapper; import org.labkey.sequenceanalysis.run.alignment.MosaikWrapper; +import org.labkey.sequenceanalysis.run.alignment.ParagraphStep; import org.labkey.sequenceanalysis.run.alignment.Pbmm2Wrapper; import org.labkey.sequenceanalysis.run.alignment.StarWrapper; import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper; @@ -113,6 +114,7 @@ import org.labkey.sequenceanalysis.run.util.FastqcRunner; import org.labkey.sequenceanalysis.run.util.GenomicsDBAppendHandler; import org.labkey.sequenceanalysis.run.util.GenomicsDBImportHandler; +import org.labkey.sequenceanalysis.run.util.SVAnnotateStep; import org.labkey.sequenceanalysis.run.variant.*; import org.labkey.sequenceanalysis.util.Barcoder; import org.labkey.sequenceanalysis.util.ChainFileValidator; @@ -300,6 +302,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new MendelianViolationReportStep.Provider()); SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider()); SequencePipelineService.get().registerPipelineStep(new BcftoolsFillTagsStep.Provider()); + SequencePipelineService.get().registerPipelineStep(new SVAnnotateStep.Provider()); //handlers SequenceAnalysisService.get().registerFileHandler(new LiftoverHandler()); @@ -334,6 +337,7 @@ public static void registerPipelineSteps() SequenceAnalysisService.get().registerFileHandler(new PbsvJointCallingHandler()); SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler()); SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler()); + SequenceAnalysisService.get().registerFileHandler(new ParagraphStep()); SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler()); SequenceAnalysisService.get().registerReadsetHandler(new RestoreSraDataHandler()); diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java index f7af4b587..600780cb7 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java @@ -28,6 +28,7 @@ import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep; import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper; import org.labkey.api.util.FileType; import org.labkey.api.util.FileUtil; @@ -35,6 +36,7 @@ import org.labkey.api.writer.PrintWriters; import org.labkey.sequenceanalysis.SequenceAnalysisModule; import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler; +import org.labkey.sequenceanalysis.run.util.LiftoverBcfToolsWrapper; import org.labkey.sequenceanalysis.run.util.LiftoverVcfWrapper; import org.labkey.sequenceanalysis.util.SequenceUtil; @@ -49,7 +51,7 @@ /** * Created by bimber on 8/26/2014. */ -public class LiftoverHandler implements SequenceOutputHandler +public class LiftoverHandler implements SequenceOutputHandler, VariantProcessingStep.SupportsScatterGather { private final FileType _bedFileType = new FileType(".bed", false); //private FileType _gffFileType = new FileType("gff", false); @@ -60,6 +62,12 @@ public LiftoverHandler() } + @Override + public boolean doSortAfterMerge() + { + return true; + } + @Override public String getName() { @@ -167,8 +175,9 @@ public void processFilesRemote(List inputFiles, JobContext c JSONObject params = ctx.getParams(); boolean dropGenotypes = params.optBoolean("dropGenotypes", false); + boolean useBcfTools = params.optBoolean("useBcfTools", false); - Integer chainFileId = params.getInt("chainFileId"); + int chainFileId = params.getInt("chainFileId"); File chainFile = ctx.getSequenceSupport().getCachedData(chainFileId); int targetGenomeId = params.getInt("targetGenomeId"); @@ -217,7 +226,7 @@ else if (_vcfFileType.isType(f.getFile())) { ReferenceGenome targetGenome = ctx.getSequenceSupport().getCachedGenome(targetGenomeId); ReferenceGenome sourceGenome = ctx.getSequenceSupport().getCachedGenome(f.getLibrary_id()); - liftOverVcf(ctx, targetGenome, sourceGenome, chainFile, f.getFile(), lifted, unmappedOutput, job, pct, dropGenotypes); + liftOverVcf(ctx, targetGenome, sourceGenome, chainFile, f.getFile(), lifted, unmappedOutput, job, pct, dropGenotypes, useBcfTools); } } catch (Exception e) @@ -293,7 +302,7 @@ else if (!SequenceUtil.hasLineCount(unmappedOutput)) } } - public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceGenome sourceGenome, File chain, File input, File output, @Nullable File unmappedOutput, PipelineJob job, double pct, boolean dropGenotypes) throws IOException, PipelineJobException + public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceGenome sourceGenome, File chain, File input, File output, @Nullable File unmappedOutput, PipelineJob job, double pct, boolean dropGenotypes, boolean useBcfTools) throws IOException, PipelineJobException { File currentVCF = input; if (dropGenotypes) @@ -315,8 +324,16 @@ public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceG ctx.getFileManager().addIntermediateFile(new File(outputFile.getPath() + ".tbi")); } - LiftoverVcfWrapper wrapper = new LiftoverVcfWrapper(job.getLogger()); - wrapper.doLiftover(currentVCF, chain, targetGenome.getWorkingFastaFile(), unmappedOutput, output, pct); + if (useBcfTools) + { + LiftoverBcfToolsWrapper wrapper = new LiftoverBcfToolsWrapper(job.getLogger()); + wrapper.doLiftover(currentVCF, chain, sourceGenome.getWorkingFastaFile(), targetGenome.getWorkingFastaFile(), unmappedOutput, output); + } + else + { + LiftoverVcfWrapper wrapper = new LiftoverVcfWrapper(job.getLogger()); + wrapper.doLiftover(currentVCF, chain, targetGenome.getWorkingFastaFile(), unmappedOutput, output, pct); + } Long mapped = null; if (output.exists()) diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java new file mode 100644 index 000000000..76780d215 --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java @@ -0,0 +1,172 @@ +package org.labkey.sequenceanalysis.run.alignment; + +import org.json.JSONObject; +import org.labkey.api.module.ModuleLoader; +import org.labkey.api.pipeline.PipelineJob; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.pipeline.RecordedAction; +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; +import org.labkey.api.util.FileUtil; +import org.labkey.sequenceanalysis.SequenceAnalysisModule; +import org.labkey.sequenceanalysis.run.variant.DepthOfCoverageHandler; +import org.labkey.sequenceanalysis.util.SequenceUtil; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class ParagraphStep extends AbstractParameterizedOutputHandler +{ + public ParagraphStep() + { + super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Paragraph SV Genotyping", "This will run paraGRAPH on one or more BAM files to genotype SVs", null, Arrays.asList( + ToolParameterDescriptor.createExpDataParam("svVCF", "Input VCF", "This is the DataId of the VCF containing the SVs to genotype", "ldk-expdatafield", new JSONObject() + {{ + put("allowBlank", false); + }}, null) + )); + } + + @Override + public boolean canProcess(SequenceOutputFile o) + { + return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(o.getFile()); + } + + @Override + public boolean doRunRemote() + { + return true; + } + + @Override + public boolean doRunLocal() + { + return false; + } + + @Override + public SequenceOutputProcessor getProcessor() + { + return new DepthOfCoverageHandler.Processor(); + } + + public static class Processor implements SequenceOutputProcessor + { + @Override + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List inputFiles, JSONObject params, File outputDir, List actions, List outputsToCreate) throws UnsupportedOperationException, PipelineJobException + { + + } + + @Override + public void processFilesRemote(List inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException + { + File inputVCF = ctx.getSequenceSupport().getCachedData(ctx.getParams().getInt("svVCF")); + if (!inputVCF.exists()) + { + throw new PipelineJobException("Unable to find file: " + inputVCF.getPath()); + } + + for (SequenceOutputFile so : inputFiles) + { + List depthArgs = new ArrayList<>(); + depthArgs.add("idxdepth"); + depthArgs.add("-d"); + depthArgs.add(so.getFile().getPath()); + + File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt"); + depthArgs.add("-o"); + depthArgs.add(coverageFile.getPath()); + + depthArgs.add("-r"); + depthArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath()); + + new SimpleScriptWrapper(ctx.getLogger()).execute(depthArgs); + + if (!coverageFile.exists()) + { + throw new PipelineJobException("Missing file: " + coverageFile.getPath()); + } + + // Should produce a simple text file: + // id path depth read length + // TNPRC-IB18 ../IB18.cram 29.77 150 + + List paragraphArgs = new ArrayList<>(); + paragraphArgs.add("multigrmpy.py"); + paragraphArgs.add("--verbose"); + + File paragraphOut = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".paragraph.txt"); + paragraphArgs.add("-o"); + paragraphArgs.add(paragraphOut.getPath()); + + int svVcfId = ctx.getParams().optInt("svVCF"); + if (svVcfId == 0) + { + throw new PipelineJobException("Missing svVCF ID"); + } + + File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId); + if (svVcf == null) + { + throw new PipelineJobException("File not found for ID: " + svVcfId); + } + else if (!svVcf.exists()) + { + throw new PipelineJobException("Missing file: " + svVcf.getPath()); + } + + paragraphArgs.add("-i"); + paragraphArgs.add(svVcf.getPath()); + + paragraphArgs.add("-m"); + paragraphArgs.add(coverageFile.getPath()); + + paragraphArgs.add("-r"); + paragraphArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath()); + + paragraphArgs.add("--scratch-dir"); + paragraphArgs.add(SequencePipelineService.get().getJavaTempDir()); + + Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); + if (threads != null) + { + paragraphArgs.add("--threads"); + paragraphArgs.add(threads.toString()); + } + + paragraphArgs.add("--logfile"); + paragraphArgs.add(new File(ctx.getWorkingDirectory(), "paragraph.log").getPath()); + + new SimpleScriptWrapper(ctx.getLogger()).execute(paragraphArgs); + + File genotypes = new File(ctx.getWorkingDirectory(), "genotypes.vcf.gz"); + if (!genotypes.exists()) + { + throw new PipelineJobException("Missing file: " + genotypes.getPath()); + } + + try + { + SequenceAnalysisService.get().ensureVcfIndex(genotypes, ctx.getLogger()); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + ctx.getFileManager().addSequenceOutput(genotypes, "paraGRAPH Genotypes: " + so.getName(), "paraGRAPH Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")"); + } + } + } +} \ No newline at end of file diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/StarWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/StarWrapper.java index d5a0cd1f2..38c635908 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/StarWrapper.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/StarWrapper.java @@ -53,7 +53,7 @@ public StarWrapper(@Nullable Logger logger) public static class StarAlignmentStep extends AbstractAlignmentPipelineStep implements AlignmentStep { - public StarAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) + public StarAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx) { super(provider, ctx, new StarWrapper(ctx.getLogger())); } diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/LiftoverBcfToolsWrapper.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/LiftoverBcfToolsWrapper.java new file mode 100644 index 000000000..1a7c575ca --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/LiftoverBcfToolsWrapper.java @@ -0,0 +1,95 @@ +package org.labkey.sequenceanalysis.run.util; + +import org.apache.logging.log4j.Logger; +import org.jetbrains.annotations.Nullable; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; +import org.labkey.api.sequenceanalysis.run.PicardWrapper; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Created by bimber on 3/24/2016. + */ +public class LiftoverBcfToolsWrapper extends PicardWrapper +{ + public LiftoverBcfToolsWrapper(@Nullable Logger logger) + { + super(logger); + } + + public void doLiftover(File inputVcf, File chainFile, File sourceGenomeFasta, File targetGenomeFasta, @Nullable File rejectVcf, File outputVcf) throws PipelineJobException + { + getLogger().info("Liftover VCF (bcftools): " + inputVcf.getPath()); + + List params = new ArrayList<>(); + params.add(SequencePipelineService.get().getExeForPackage("BCFTOOLS", "bcftools").getPath()); + params.add("+liftover"); + + params.add("--no-version"); + params.add("-Oz"); + + Integer threads = SequencePipelineService.get().getMaxThreads(getLogger()); + if (threads != null) + { + params.add("--threads"); + params.add(threads.toString()); + } + + params.add("-o"); + params.add(outputVcf.getPath()); + + params.add(inputVcf.getPath()); + params.add("--"); + + params.add("-s"); + params.add(sourceGenomeFasta.getPath()); + + params.add("-f"); + params.add(targetGenomeFasta.getPath()); + + params.add("-c"); + params.add(chainFile.getPath()); + + params.add("--write-src"); + params.add("--fix-tags"); + + if (rejectVcf != null) + { + params.add("--reject"); + params.add(rejectVcf.getPath()); + + params.add("--reject-type"); + params.add("z"); + } + + execute(params); + + if (!outputVcf.exists()) + { + throw new PipelineJobException("Output file could not be found: " + outputVcf.getPath()); + } + + if (rejectVcf != null && rejectVcf.exists()) + { + try + { + SequenceAnalysisService.get().ensureVcfIndex(rejectVcf, getLogger()); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + } + } + + @Override + protected String getToolName() + { + return "LiftoverVcf"; + } +} diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/SVAnnotateStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/SVAnnotateStep.java new file mode 100644 index 000000000..cac32155a --- /dev/null +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/SVAnnotateStep.java @@ -0,0 +1,104 @@ +package org.labkey.sequenceanalysis.run.util; + +import htsjdk.samtools.util.Interval; +import org.apache.logging.log4j.Logger; +import org.json.JSONObject; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; +import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep; +import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl; +import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep; +import org.labkey.api.sequenceanalysis.run.AbstractGatk4Wrapper; +import org.labkey.api.util.PageFlowUtil; +import org.labkey.sequenceanalysis.pipeline.SequenceTaskHelper; + +import javax.annotation.Nullable; +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class SVAnnotateStep extends AbstractCommandPipelineStep implements VariantProcessingStep +{ + public static final String GENE_PARAM = "gene_file"; + + public SVAnnotateStep(PipelineStepProvider provider, PipelineContext ctx) + { + super(provider, ctx, new SNAnnotateWrapper(ctx.getLogger())); + } + + public static class Provider extends AbstractVariantProcessingStepProvider + { + public Provider() + { + super("SVAnnotateStep", "GATK SVAnnotate", "GATK", "This will run GATK's SVAnnotate to classify SVs by impact", Arrays.asList( + ToolParameterDescriptor.createExpDataParam(GENE_PARAM, "Gene File", "This is the ID of a GTF or GFF3 file containing genes from this genome.", "sequenceanalysis-genomefileselectorfield", new JSONObject() + {{ + put("extensions", Arrays.asList("gtf")); + put("width", 400); + put("allowBlank", false); + }}, null) + ), PageFlowUtil.set("sequenceanalysis/field/GenomeFileSelectorField.js"), ""); + + } + + @Override + public SVAnnotateStep create(PipelineContext ctx) + { + return new SVAnnotateStep(this, ctx); + } + } + + @Override + public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List intervals) throws PipelineJobException + { + VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl(); + + output.addInput(inputVCF, "Input VCF"); + output.addInput(genome.getWorkingFastaFile(), "Reference Genome"); + + List args = new ArrayList<>(getWrapper().getBaseArgs("SVAnnotate")); + args.add("-V"); + args.add(inputVCF.getPath()); + + if (intervals != null) + { + intervals.forEach(interval -> { + args.add("-L"); + args.add(interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd()); + }); + } + + Integer geneFileId = getProvider().getParameterByName(GENE_PARAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class); + File geneFile = getPipelineCtx().getSequenceSupport().getCachedData(geneFileId); + if (!geneFile.exists()) + { + throw new PipelineJobException("Unable to find file: " + geneFile.getPath()); + } + args.add("--protein-coding-gtf"); + args.add(geneFile.getPath()); + + File outputVcf = new File(outputDirectory, SequenceTaskHelper.getUnzippedBaseName(inputVCF) + ".svannotate.vcf.gz"); + getWrapper().execute(args); + if (!outputVcf.exists()) + { + throw new PipelineJobException("output not found: " + outputVcf); + } + + output.setVcf(outputVcf); + + return output; + } + + public static class SNAnnotateWrapper extends AbstractGatk4Wrapper + { + public SNAnnotateWrapper(@Nullable Logger logger) + { + super(logger); + } + } +} diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SelectSNVsStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SelectSNVsStep.java index ea057d26d..a94a218b7 100644 --- a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SelectSNVsStep.java +++ b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/SelectSNVsStep.java @@ -33,7 +33,7 @@ public class SelectSNVsStep extends AbstractCommandPipelineStep provider, PipelineContext ctx) { super(provider, ctx, new SelectVariantsWrapper(ctx.getLogger())); } diff --git a/cluster/resources/web/cluster/htcondor/pipelineConfig.xml b/cluster/resources/web/cluster/htcondor/pipelineConfig.xml index 36f8e6c7a..e32a28080 100644 --- a/cluster/resources/web/cluster/htcondor/pipelineConfig.xml +++ b/cluster/resources/web/cluster/htcondor/pipelineConfig.xml @@ -89,27 +89,6 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/cluster/src/org/labkey/cluster/pipeline/AbstractClusterEngineConfig.java b/cluster/src/org/labkey/cluster/pipeline/AbstractClusterEngineConfig.java index 2ca6bd501..7d6b38fd0 100644 --- a/cluster/src/org/labkey/cluster/pipeline/AbstractClusterEngineConfig.java +++ b/cluster/src/org/labkey/cluster/pipeline/AbstractClusterEngineConfig.java @@ -109,7 +109,7 @@ protected List getFinalJavaOpts(PipelineJob job, RemoteExecutionEngine e return javaOpts; } - public List getJobArgs(File localPipelineDir, File localSerializedJobXmlFile, PipelineJob job, RemoteExecutionEngine engine) + public List getJobArgs(File localPipelineDir, File localSerializedJobXmlFile, PipelineJob job, RemoteExecutionEngine engine) { List ret = new ArrayList<>(); ret.addAll(getFinalJavaOpts(job, engine)); diff --git a/cluster/src/org/labkey/cluster/pipeline/DockerHTCondorExecutionEngineConfig.java b/cluster/src/org/labkey/cluster/pipeline/DockerHTCondorExecutionEngineConfig.java deleted file mode 100644 index 6293818ac..000000000 --- a/cluster/src/org/labkey/cluster/pipeline/DockerHTCondorExecutionEngineConfig.java +++ /dev/null @@ -1,100 +0,0 @@ -package org.labkey.cluster.pipeline; - -import org.labkey.api.pipeline.PipelineJob; -import org.labkey.api.pipeline.RemoteExecutionEngine; - -import java.io.File; -import java.util.ArrayList; -import java.util.List; - -/** - * Created by bimber on 10/31/2015. - */ -public class DockerHTCondorExecutionEngineConfig extends HTCondorExecutionEngineConfig -{ - private String _dockerImageName = "bbimber/discvr-seq"; - private String _configDir = ""; - protected String _activeMqHost = ""; - - public DockerHTCondorExecutionEngineConfig() - { - _remoteExecutable = "docker"; - _labKeyDir = "/labkey"; - } - - @Override - public List getExtraSubmitLines() - { - List ret = super.getExtraSubmitLines(); - ret.add("requirements = (TARGET.IsDockerComputeNode =?= True)"); - - return ret; - } - - @Override - public List getJobArgs(File localPipelineDir, File localSerializedJobFile, PipelineJob job, RemoteExecutionEngine engine) - { - List ret = new ArrayList<>(); - ret.add("run"); - ret.add("--rm=true"); - //TODO: add flag to force rebuild of image - //ret.add("-e"); - //ret.add("ACTIVEMQ_HOST=X"); - //TODO: mount whole file root - ret.add("-v"); - ret.add(getClusterPath(localPipelineDir) + ":/data"); - ret.add("-v"); - ret.add("/mnt/scratch:/work"); - ret.add("-v"); - ret.add(_configDir + ":/labkey/config"); - - ret.add("--add-host=activeMqServer:" + _activeMqHost); - //TODO: add -env for CPUs, memory? - ret.add(_dockerImageName); - ret.add("java"); - ret.addAll(getFinalJavaOpts(job, engine)); - - //TODO: support as config param - //ret.add("-Xdebug"); - //ret.add("-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"); - ret.add("-cp"); - ret.add("/labkey/labkeyBootstrap.jar"); - ret.add("org.labkey.bootstrap.ClusterBootstrap"); - ret.add("-modulesdir=" + "/labkey/modules"); - ret.add("-webappdir=" + "/labkey/labkeywebapp"); - ret.add("-configdir=" + "/labkey/config"); - ret.add(getClusterPath(localSerializedJobFile, true)); - - return ret; - } - - public String getDockerImageName() - { - return _dockerImageName; - } - - public void setDockerImageName(String dockerImageName) - { - _dockerImageName = dockerImageName; - } - - public String getConfigDir() - { - return _configDir; - } - - public void setConfigDir(String configDir) - { - _configDir = configDir; - } - - public String getActiveMqHost() - { - return _activeMqHost; - } - - public void setActiveMqHost(String activeMqHost) - { - _activeMqHost = activeMqHost; - } -} diff --git a/singlecell/resources/chunks/RunRiraClassification.R b/singlecell/resources/chunks/RunRiraClassification.R index 106bf8426..986208e64 100644 --- a/singlecell/resources/chunks/RunRiraClassification.R +++ b/singlecell/resources/chunks/RunRiraClassification.R @@ -4,8 +4,9 @@ for (datasetId in names(seuratObjects)) { seuratObj <- RIRA::Classify_ImmuneCells(seuratObj, maxBatchSize = maxBatchSize, retainProbabilityMatrix = retainProbabilityMatrix) seuratObj <- RIRA::Classify_TNK(seuratObj, maxBatchSize = maxBatchSize, retainProbabilityMatrix = retainProbabilityMatrix) - seuratObj$RIRA_TNK_v2.predicted_labels[seuratObj$RIRA_Immune_v2.majority_voting != 'T_NK'] <- 'Other' + seuratObj <- RIRA::Classify_Myeloid(seuratObj, maxBatchSize = maxBatchSize, retainProbabilityMatrix = retainProbabilityMatrix) + saveData(seuratObj, datasetId) } \ No newline at end of file diff --git a/singlecell/resources/chunks/UpdateSeuratPrototype.R b/singlecell/resources/chunks/UpdateSeuratPrototype.R new file mode 100644 index 000000000..e9af18a97 --- /dev/null +++ b/singlecell/resources/chunks/UpdateSeuratPrototype.R @@ -0,0 +1,41 @@ +if (!file.exists('/homeDir/.netrc')) { + print(list.files('/homeDir')) + stop('Unable to find file: /homeDir/.netrc') +} + +invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc')) +Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder) + +for (datasetId in names(seuratObjects)) { + printName(datasetId) + seuratObj <- readSeuratRDS(seuratObjects[[datasetId]]) + + if (reapplyMetadata) { + seuratObj <- Rdiscvr::QueryAndApplyCdnaMetadata(seuratObj) + } + + if (runRira) { + seuratObj <- RIRA::Classify_ImmuneCells(seuratObj, maxBatchSize = maxBatchSize, retainProbabilityMatrix = retainProbabilityMatrix) + seuratObj <- RIRA::Classify_TNK(seuratObj, maxBatchSize = maxBatchSize, retainProbabilityMatrix = retainProbabilityMatrix) + seuratObj <- RIRA::Classify_Myeloid(seuratObj, maxBatchSize = maxBatchSize, retainProbabilityMatrix = retainProbabilityMatrix) + } + + if (applyTCR) { + seuratObj <- Rdiscvr::DownloadAndAppendTcrClonotypes(seuratObj, allowMissing = allowMissingTcr) + } + + if (runTNKClassification) { + # ClassifyTNKByExpression will fail without this, so ignore allowMissingTcr + if (!'HasCDR3Data' %in% names(seuratObj@meta.data)) { + seuratObj <- Rdiscvr::DownloadAndAppendTcrClonotypes(seuratObj) + } + + seuratObj <- Rdiscvr::ClassifyTNKByExpression(seuratObj) + } + + saveData(seuratObj, datasetId) + + # Cleanup + rm(seuratObj) + gc() +} \ No newline at end of file diff --git a/singlecell/resources/web/singlecell/panel/LibraryExportPanel.js b/singlecell/resources/web/singlecell/panel/LibraryExportPanel.js index 4978a61db..7bc335470 100644 --- a/singlecell/resources/web/singlecell/panel/LibraryExportPanel.js +++ b/singlecell/resources/web/singlecell/panel/LibraryExportPanel.js @@ -935,17 +935,17 @@ Ext4.define('SingleCell.panel.LibraryExportPanel', { data.push(comment || 'Please QC individually and pool in equal amounts per lane'); } else if (instrument === 'Novogene-New') { - let libraryType = 'Premade-10X Single Cell Transcriptome Library'; + let libraryType = 'UNKNOWN'; switch (suffix) { case 'GEX': - libraryType = 'Premade-10X Single Cell Transcriptome Library'; + libraryType = 'Premade-10X 5 prime Single Cell Transcriptome Library'; break; case 'TCR': libraryType = 'Premade-10X VDJ Library'; break; case 'HTO': case 'CITE': - libraryType = 'Premade-10X Feature Barcode Library'; + libraryType = 'Premade-10X 5 prime Feature Barcode Library'; break; default: console.error('Unknown suffix: ' + suffix); diff --git a/singlecell/src/org/labkey/singlecell/SingleCellModule.java b/singlecell/src/org/labkey/singlecell/SingleCellModule.java index 8dcf1e4cd..18b925e38 100644 --- a/singlecell/src/org/labkey/singlecell/SingleCellModule.java +++ b/singlecell/src/org/labkey/singlecell/SingleCellModule.java @@ -226,6 +226,7 @@ public static void registerPipelineSteps() SequencePipelineService.get().registerPipelineStep(new RunCsCore.Provider()); SequencePipelineService.get().registerPipelineStep(new CustomGSEA.Provider()); SequencePipelineService.get().registerPipelineStep(new StudyMetadata.Provider()); + SequencePipelineService.get().registerPipelineStep(new UpdateSeuratPrototype.Provider()); SequenceAnalysisService.get().registerReadsetListener(new SingleCellReadsetListener()); } diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunSingleR.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunSingleR.java index 7458be6a0..6ca369a04 100644 --- a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunSingleR.java +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/RunSingleR.java @@ -26,7 +26,7 @@ public Provider() SeuratToolParameter.create("nThreads", "# Threads", "If provided, this value will be passed to BiocParallel::MulticoreParam().", "ldk-integerfield", new JSONObject(){{ put("minValue", 0); }}, null), - SeuratToolParameter.create("singleRSpecies", "Tests To Use", "If human, hpca, blueprint, dice, monaco, and immgen will be used. If mouse, MouseRNAseqData will be used.", "ldk-simplecombo", new JSONObject() + SeuratToolParameter.create("singleRSpecies", "Species", "If human, hpca, blueprint, dice, monaco, and immgen will be used. If mouse, MouseRNAseqData will be used.", "ldk-simplecombo", new JSONObject() {{ put("multiSelect", false); put("allowBlank", false); diff --git a/singlecell/src/org/labkey/singlecell/pipeline/singlecell/UpdateSeuratPrototype.java b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/UpdateSeuratPrototype.java new file mode 100644 index 000000000..accf676c7 --- /dev/null +++ b/singlecell/src/org/labkey/singlecell/pipeline/singlecell/UpdateSeuratPrototype.java @@ -0,0 +1,146 @@ +package org.labkey.singlecell.pipeline.singlecell; + +import org.apache.commons.io.FileUtils; +import org.json.JSONObject; +import org.labkey.api.pipeline.PipelineJobException; +import org.labkey.api.sequenceanalysis.SequenceOutputFile; +import org.labkey.api.sequenceanalysis.model.Readset; +import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStepProvider; +import org.labkey.api.sequenceanalysis.pipeline.PipelineContext; +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; +import org.labkey.api.singlecell.pipeline.SeuratToolParameter; +import org.labkey.api.singlecell.pipeline.SingleCellStep; +import org.labkey.api.util.FileUtil; +import org.labkey.api.writer.PrintWriters; +import org.labkey.singlecell.analysis.AbstractSingleCellHandler; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.List; + +import static org.labkey.singlecell.analysis.AbstractSingleCellHandler.SEURAT_PROTOTYPE; + +public class UpdateSeuratPrototype extends AbstractRDiscvrStep +{ + public UpdateSeuratPrototype(PipelineContext ctx, UpdateSeuratPrototype.Provider provider) + { + super(provider, ctx); + } + + public static class Provider extends AbstractPipelineStepProvider + { + public Provider() + { + super("UpdateSeuratPrototype", "Update Seurat Prototype", "CellMembrane/Rdiscvr", "This will re-process an existing seurat prototype object and overwrite the original", Arrays.asList( + SeuratToolParameter.create("reapplyMetadata", "Reapply Metadata", "If checked, metadata will be re-applied", "checkbox", null, true), + SeuratToolParameter.create("runRira", "Run RIRA", "If checked, RIRA classification will be re-run", "checkbox", null, true), + SeuratToolParameter.create("runTNKClassification", "Run T/NK Classification", "If checked, T/NK expression-based classification will be re-run", "checkbox", null, true), + SeuratToolParameter.create("applyTCR", "Append TCR Data", "If checked, TCR data will be applied. This will fail if", "checkbox", null, true), + SeuratToolParameter.create("allowMissingTcr", "Allow Missing TCR Data", "Unless checked, an error will be thrown if any sample lacks TCR data", "checkbox", new JSONObject() + {{ + put("checked", false); + }}, false), + SeuratToolParameter.create("keepOriginal", "Keep Copy of Original File", "If checked, the original file will be copied with the file extension '.bk'", "checkbox", new JSONObject() + {{ + put("checked", false); + }}, false) + ), null, null); + } + + @Override + public UpdateSeuratPrototype create(PipelineContext ctx) + { + return new UpdateSeuratPrototype(ctx, this); + } + } + + @Override + public void init(SequenceOutputHandler.JobContext ctx, List inputFiles) throws PipelineJobException + { + if (inputFiles.size() > 1) + { + throw new PipelineJobException("Seurat prototype step expects this job to have a single input. Consider selecting the option to run jobs individually instead of merged"); + } + + if (inputFiles.get(0).getReadset() == null) + { + throw new PipelineJobException("Seurat prototype step expects all inputs to have a readset ID."); + } + + if (!SEURAT_PROTOTYPE.equals(inputFiles.get(0).getCategory())) + { + throw new PipelineJobException("Expected the input to be a seurat prototype, found: " + inputFiles.get(0).getCategory()); + } + + if (ctx.getSequenceSupport().getCachedGenomes().size() > 1) + { + throw new PipelineJobException("Expected seurat prototype step to use a single genome"); + } + + Readset rs = ctx.getSequenceSupport().getCachedReadset(inputFiles.get(0).getReadset()); + if (!ctx.getJob().getContainer().getId().equalsIgnoreCase(rs.getContainer())) + { + throw new PipelineJobException("Seurat prototype jobs must be submitted to the same folder as the source readset"); + } + } + + @Override + public Output execute(SequenceOutputHandler.JobContext ctx, List inputObjects, String outputPrefix) throws PipelineJobException + { + Output output = super.execute(ctx, inputObjects, outputPrefix); + + if (ctx.getSequenceSupport().getCachedGenomes().size() > 1) + { + throw new PipelineJobException("Expected seurat prototype step to use a single genome"); + } + + if (output.getSeuratObjects().size() != 1) + { + throw new PipelineJobException("Expected a single output object, found: " + output.getSeuratObjects().size()); + } + + SeuratObjectWrapper inputRDS = inputObjects.get(0); + SeuratObjectWrapper wrapper = output.getSeuratObjects().get(0); + if (wrapper.getReadsetId() == null) + { + throw new PipelineJobException("Missing readset Id: " + wrapper.getDatasetId()); + } + + File toReplace = inputRDS.getSequenceOutputFile().getFile(); + if (!toReplace.exists()) + { + throw new PipelineJobException("Missing file: " + toReplace); + } + try + { + ctx.getLogger().info("Replacing existing prototype: " + toReplace.getPath()); + + if (ctx.getParams().optBoolean("keepOriginal", false)) + { + File backup = new File(toReplace.getPath() + ".orig"); + if (backup.exists()) + { + backup.delete(); + } + + FileUtils.moveFile(toReplace, backup); + } + + if (toReplace.exists()) + { + toReplace.delete(); + } + + FileUtils.moveFile(wrapper.getFile(), toReplace); + } + catch (IOException e) + { + throw new PipelineJobException(e); + } + + return output; + } +} diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java index 6f12b12bb..ba5044721 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleAlignmentStep.java @@ -57,9 +57,6 @@ public static List getToolParameters() put("initialValues", "unstranded"); put("delimiter", ";"); }}, null), - ToolParameterDescriptor.create(ALIGN_OUTPUT, "Create Alignment/Debug Output", "If checked, an alignment-level summary TSV will be created", "checkbox", new JSONObject(){{ - put("checked", true); - }}, true), ToolParameterDescriptor.create(MAX_HITS_TO_REPORT, "Max Hits To Report", "If a given hit has more than this number of references, it is discarded", "ldk-integerfield", new JSONObject(){{ put("minValue", 0); }}, 4) diff --git a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java index f7bbfe34a..9850e99ed 100644 --- a/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java +++ b/singlecell/src/org/labkey/singlecell/run/NimbleHelper.java @@ -43,7 +43,6 @@ import java.util.Map; import java.util.stream.Collectors; -import static org.labkey.singlecell.run.NimbleAlignmentStep.ALIGN_OUTPUT; import static org.labkey.singlecell.run.NimbleAlignmentStep.MAX_HITS_TO_REPORT; import static org.labkey.singlecell.run.NimbleAlignmentStep.REF_GENOMES; import static org.labkey.singlecell.run.NimbleAlignmentStep.STRANDEDNESS; @@ -425,14 +424,6 @@ private Map doAlignment(List genomes, List doAlignment(List genomes, List