Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge discvr-23.11 to discvr-24.3 #278

Merged
merged 11 commits into from
Jun 13, 2024
Merged
24 changes: 0 additions & 24 deletions SequenceAnalysis/pipeline_code/sequence_tools_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -335,30 +335,6 @@ else
fi


#
# BisSNP
#
echo ""
echo ""
echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
echo "Install BisSNP"
echo ""
cd $LKSRC_DIR

if [[ ! -e ${LKTOOLS_DIR}/BisSNP.jar || ! -z $FORCE_REINSTALL ]];
then
echo "Cleaning up previous installs"
rm -Rf BisSNP*
rm -Rf $LKTOOLS_DIR/BisSNP.jar

wget $WGET_OPTS https://downloads.sourceforge.net/project/bissnp/BisSNP-0.82.2/BisSNP-0.82.2.jar

install ./BisSNP-0.82.2.jar $LKTOOLS_DIR/BisSNP.jar
else
echo "Already installed"
fi


#
#mosaik
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,19 @@ Ext4.define('SequenceAnalysis.window.LiftoverWindow', {
maxValue: 1.0,
value: 0.95,
fieldLabel: 'Min Percent Match',
helpPopup: 'In order to lift to the target genome, the feature must have at least this percent match. Lower this value to be more permissive; however, this risks incorrect liftovers',
helpPopup: 'In order to lift to the target genome, the feature must have at least this percent match. Lower this value to be more permissive; however, this risks incorrect liftovers. This is ignored if using bcftools.',
itemId: 'pctField'
},{
xtype: 'checkbox',
itemId: 'dropGenotypes',
checked: false,
helpPopup: 'If checked, no genotypes will be written to the output file (applies to VCFs only). This can be useful (and necessary) when lifting VCFs with extremely high sample number.',
fieldLabel: 'Drop Genotypes'
},{
xtype: 'checkbox',
itemId: 'useBcfTools',
checked: false,
fieldLabel: 'Use bcftools'
}].concat(SequenceAnalysis.window.OutputHandlerWindow.getCfgForToolParameters(this.toolParameters)),
buttons: [{
text: 'Submit',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
import org.labkey.sequenceanalysis.run.alignment.BowtieWrapper;
import org.labkey.sequenceanalysis.run.alignment.GSnapWrapper;
import org.labkey.sequenceanalysis.run.alignment.MosaikWrapper;
import org.labkey.sequenceanalysis.run.alignment.ParagraphStep;
import org.labkey.sequenceanalysis.run.alignment.Pbmm2Wrapper;
import org.labkey.sequenceanalysis.run.alignment.StarWrapper;
import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper;
Expand Down Expand Up @@ -113,6 +114,7 @@
import org.labkey.sequenceanalysis.run.util.FastqcRunner;
import org.labkey.sequenceanalysis.run.util.GenomicsDBAppendHandler;
import org.labkey.sequenceanalysis.run.util.GenomicsDBImportHandler;
import org.labkey.sequenceanalysis.run.util.SVAnnotateStep;
import org.labkey.sequenceanalysis.run.variant.*;
import org.labkey.sequenceanalysis.util.Barcoder;
import org.labkey.sequenceanalysis.util.ChainFileValidator;
Expand Down Expand Up @@ -300,6 +302,7 @@ public static void registerPipelineSteps()
SequencePipelineService.get().registerPipelineStep(new MendelianViolationReportStep.Provider());
SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider());
SequencePipelineService.get().registerPipelineStep(new BcftoolsFillTagsStep.Provider());
SequencePipelineService.get().registerPipelineStep(new SVAnnotateStep.Provider());

//handlers
SequenceAnalysisService.get().registerFileHandler(new LiftoverHandler());
Expand Down Expand Up @@ -334,6 +337,7 @@ public static void registerPipelineSteps()
SequenceAnalysisService.get().registerFileHandler(new PbsvJointCallingHandler());
SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler());
SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler());
SequenceAnalysisService.get().registerFileHandler(new ParagraphStep());

SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler());
SequenceAnalysisService.get().registerReadsetHandler(new RestoreSraDataHandler());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper;
import org.labkey.api.util.FileType;
import org.labkey.api.util.FileUtil;
import org.labkey.api.view.ActionURL;
import org.labkey.api.writer.PrintWriters;
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler;
import org.labkey.sequenceanalysis.run.util.LiftoverBcfToolsWrapper;
import org.labkey.sequenceanalysis.run.util.LiftoverVcfWrapper;
import org.labkey.sequenceanalysis.util.SequenceUtil;

Expand All @@ -49,7 +51,7 @@
/**
* Created by bimber on 8/26/2014.
*/
public class LiftoverHandler implements SequenceOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
public class LiftoverHandler implements SequenceOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>, VariantProcessingStep.SupportsScatterGather
{
private final FileType _bedFileType = new FileType(".bed", false);
//private FileType _gffFileType = new FileType("gff", false);
Expand All @@ -60,6 +62,12 @@ public LiftoverHandler()

}

@Override
public boolean doSortAfterMerge()
{
return true;
}

@Override
public String getName()
{
Expand Down Expand Up @@ -167,8 +175,9 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
JSONObject params = ctx.getParams();

boolean dropGenotypes = params.optBoolean("dropGenotypes", false);
boolean useBcfTools = params.optBoolean("useBcfTools", false);

Integer chainFileId = params.getInt("chainFileId");
int chainFileId = params.getInt("chainFileId");
File chainFile = ctx.getSequenceSupport().getCachedData(chainFileId);
int targetGenomeId = params.getInt("targetGenomeId");

Expand Down Expand Up @@ -217,7 +226,7 @@ else if (_vcfFileType.isType(f.getFile()))
{
ReferenceGenome targetGenome = ctx.getSequenceSupport().getCachedGenome(targetGenomeId);
ReferenceGenome sourceGenome = ctx.getSequenceSupport().getCachedGenome(f.getLibrary_id());
liftOverVcf(ctx, targetGenome, sourceGenome, chainFile, f.getFile(), lifted, unmappedOutput, job, pct, dropGenotypes);
liftOverVcf(ctx, targetGenome, sourceGenome, chainFile, f.getFile(), lifted, unmappedOutput, job, pct, dropGenotypes, useBcfTools);
}
}
catch (Exception e)
Expand Down Expand Up @@ -293,7 +302,7 @@ else if (!SequenceUtil.hasLineCount(unmappedOutput))
}
}

public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceGenome sourceGenome, File chain, File input, File output, @Nullable File unmappedOutput, PipelineJob job, double pct, boolean dropGenotypes) throws IOException, PipelineJobException
public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceGenome sourceGenome, File chain, File input, File output, @Nullable File unmappedOutput, PipelineJob job, double pct, boolean dropGenotypes, boolean useBcfTools) throws IOException, PipelineJobException
{
File currentVCF = input;
if (dropGenotypes)
Expand All @@ -315,8 +324,16 @@ public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceG
ctx.getFileManager().addIntermediateFile(new File(outputFile.getPath() + ".tbi"));
}

LiftoverVcfWrapper wrapper = new LiftoverVcfWrapper(job.getLogger());
wrapper.doLiftover(currentVCF, chain, targetGenome.getWorkingFastaFile(), unmappedOutput, output, pct);
if (useBcfTools)
{
LiftoverBcfToolsWrapper wrapper = new LiftoverBcfToolsWrapper(job.getLogger());
wrapper.doLiftover(currentVCF, chain, sourceGenome.getWorkingFastaFile(), targetGenome.getWorkingFastaFile(), unmappedOutput, output);
}
else
{
LiftoverVcfWrapper wrapper = new LiftoverVcfWrapper(job.getLogger());
wrapper.doLiftover(currentVCF, chain, targetGenome.getWorkingFastaFile(), unmappedOutput, output, pct);
}

Long mapped = null;
if (output.exists())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
package org.labkey.sequenceanalysis.run.alignment;

import org.json.JSONObject;
import org.labkey.api.module.ModuleLoader;
import org.labkey.api.pipeline.PipelineJob;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.pipeline.RecordedAction;
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
import org.labkey.api.util.FileUtil;
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
import org.labkey.sequenceanalysis.run.variant.DepthOfCoverageHandler;
import org.labkey.sequenceanalysis.util.SequenceUtil;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class ParagraphStep extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
{
public ParagraphStep()
{
super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Paragraph SV Genotyping", "This will run paraGRAPH on one or more BAM files to genotype SVs", null, Arrays.asList(
ToolParameterDescriptor.createExpDataParam("svVCF", "Input VCF", "This is the DataId of the VCF containing the SVs to genotype", "ldk-expdatafield", new JSONObject()
{{
put("allowBlank", false);
}}, null)
));
}

@Override
public boolean canProcess(SequenceOutputFile o)
{
return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(o.getFile());
}

@Override
public boolean doRunRemote()
{
return true;
}

@Override
public boolean doRunLocal()
{
return false;
}

@Override
public SequenceOutputProcessor getProcessor()
{
return new DepthOfCoverageHandler.Processor();
}

public static class Processor implements SequenceOutputProcessor
{
@Override
public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
{

}

@Override
public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
{
File inputVCF = ctx.getSequenceSupport().getCachedData(ctx.getParams().getInt("svVCF"));
if (!inputVCF.exists())
{
throw new PipelineJobException("Unable to find file: " + inputVCF.getPath());
}

for (SequenceOutputFile so : inputFiles)
{
List<String> depthArgs = new ArrayList<>();
depthArgs.add("idxdepth");
depthArgs.add("-d");
depthArgs.add(so.getFile().getPath());

File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt");
depthArgs.add("-o");
depthArgs.add(coverageFile.getPath());

depthArgs.add("-r");
depthArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath());

new SimpleScriptWrapper(ctx.getLogger()).execute(depthArgs);

if (!coverageFile.exists())
{
throw new PipelineJobException("Missing file: " + coverageFile.getPath());
}

// Should produce a simple text file:
// id path depth read length
// TNPRC-IB18 ../IB18.cram 29.77 150

List<String> paragraphArgs = new ArrayList<>();
paragraphArgs.add("multigrmpy.py");
paragraphArgs.add("--verbose");

File paragraphOut = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".paragraph.txt");
paragraphArgs.add("-o");
paragraphArgs.add(paragraphOut.getPath());

int svVcfId = ctx.getParams().optInt("svVCF");
if (svVcfId == 0)
{
throw new PipelineJobException("Missing svVCF ID");
}

File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId);
if (svVcf == null)
{
throw new PipelineJobException("File not found for ID: " + svVcfId);
}
else if (!svVcf.exists())
{
throw new PipelineJobException("Missing file: " + svVcf.getPath());
}

paragraphArgs.add("-i");
paragraphArgs.add(svVcf.getPath());

paragraphArgs.add("-m");
paragraphArgs.add(coverageFile.getPath());

paragraphArgs.add("-r");
paragraphArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath());

paragraphArgs.add("--scratch-dir");
paragraphArgs.add(SequencePipelineService.get().getJavaTempDir());

Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
if (threads != null)
{
paragraphArgs.add("--threads");
paragraphArgs.add(threads.toString());
}

paragraphArgs.add("--logfile");
paragraphArgs.add(new File(ctx.getWorkingDirectory(), "paragraph.log").getPath());

new SimpleScriptWrapper(ctx.getLogger()).execute(paragraphArgs);

File genotypes = new File(ctx.getWorkingDirectory(), "genotypes.vcf.gz");
if (!genotypes.exists())
{
throw new PipelineJobException("Missing file: " + genotypes.getPath());
}

try
{
SequenceAnalysisService.get().ensureVcfIndex(genotypes, ctx.getLogger());
}
catch (IOException e)
{
throw new PipelineJobException(e);
}

ctx.getFileManager().addSequenceOutput(genotypes, "paraGRAPH Genotypes: " + so.getName(), "paraGRAPH Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")");
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public StarWrapper(@Nullable Logger logger)

public static class StarAlignmentStep extends AbstractAlignmentPipelineStep<StarWrapper> implements AlignmentStep
{
public StarAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx)
public StarAlignmentStep(AlignmentStepProvider<?> provider, PipelineContext ctx)
{
super(provider, ctx, new StarWrapper(ctx.getLogger()));
}
Expand Down
Loading