Skip to content

Commit

Permalink
Implemented loading of pre-trained classifier. Close #13
Browse files Browse the repository at this point in the history
  • Loading branch information
mikessh committed Feb 17, 2015
1 parent 3707346 commit 6f2ed81
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 6 deletions.
32 changes: 27 additions & 5 deletions src/main/java/com/milaboratory/oncomigec/pipeline/MigecCli.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import com.milaboratory.oncomigec.core.io.misc.MigReaderParameters;
import com.milaboratory.oncomigec.core.io.misc.UmiHistogram;
import com.milaboratory.oncomigec.model.classifier.BaseVariantClassifier;
import com.milaboratory.oncomigec.model.classifier.VariantClassifier;
import org.apache.commons.cli.*;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
Expand Down Expand Up @@ -52,7 +54,8 @@ public static void main(String[] args) throws Exception {
OPT_IMPORT_PRESET = "import-preset", OPT_EXPORT_PRESET = "export-preset",
OPT_EXOME_LONG = "exome-mode", OPT_EXOME_SHORT = "E",
OPT_TEST_LONG = "test-mode", OPT_TEST_SHORT = "T",
OPT_APPEND_LONG = "append-mode", OPT_APPEND_SHORT = "A",
OPT_APPEND = "append-mode",
OPT_CLASSIFIER_FILE = "load-classifier",
OPT_BARCODES_LONG = "barcodes", OPT_BARCODES_SHORT = "B",
OPT_NO_BARCODES_LONG = "no-barcodes", OPT_NO_BARCODES_SHORT = "N",
OPT_REFERENCES_LONG = "references", OPT_REFERENCES_SHORT = "R",
Expand Down Expand Up @@ -101,6 +104,14 @@ public static void main(String[] args) throws Exception {
.withDescription("output current parameter preset to the specified XML file")
.withLongOpt(OPT_EXPORT_PRESET)
.create()
)
.addOption(
OptionBuilder
.withArgName("file")
.hasArg(true)
.withDescription("specifies a pre-trained classifier binary file (Weka model)")
.withLongOpt(OPT_CLASSIFIER_FILE)
.create()
)
//
// modes
Expand All @@ -109,8 +120,8 @@ public static void main(String[] args) throws Exception {
.hasArg(false)
.withDescription("append mode, " +
"will not overwrite files if specified")
.withLongOpt(OPT_APPEND_LONG)
.create(OPT_APPEND_SHORT)
.withLongOpt(OPT_APPEND)
.create()
)
.addOption(
OptionBuilder
Expand Down Expand Up @@ -207,6 +218,7 @@ public static void main(String[] args) throws Exception {
// create the parser
CommandLineParser parser = new BasicParser();
MigecPipeline pipeline = null;
VariantClassifier variantClassifier = null;
File outputFolder = null;
double dumpFreq = -1;

Expand Down Expand Up @@ -249,10 +261,15 @@ public static void main(String[] args) throws Exception {
System.exit(0);
}

if (commandLine.hasOption(OPT_CLASSIFIER_FILE)) {
File classifierFile = new File(commandLine.getOptionValue(OPT_CLASSIFIER_FILE));
variantClassifier = BaseVariantClassifier.pretrained(classifierFile);
}

// mode
if (!commandLine.hasOption(OPT_EXOME_SHORT) && !commandLine.hasOption(OPT_TEST_SHORT))
throw new ParseException("No mode has been set");
if (commandLine.hasOption(OPT_APPEND_SHORT))
if (commandLine.hasOption(OPT_APPEND))
appendMode = true;

// barcodes
Expand Down Expand Up @@ -298,7 +315,7 @@ else if (!appendMode)
// =================
// Pipeline creation
// =================
print1("Running MiGEC v" + MigecCli.class.getPackage().getImplementationVersion() +
print1("Running OncoMIGEC v" + MigecCli.class.getPackage().getImplementationVersion() +
" for " +
(paired ?
(commandLine.getOptionValue(OPT_FASTQ1_SHORT) +
Expand Down Expand Up @@ -405,6 +422,11 @@ else if (!appendMode)
return;
}

if (variantClassifier != null) {
// user-defined classifier
pipeline.setVariantClassifier(variantClassifier);
}

runSecondStage(pipeline, outputFolder);

print2("Finished");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public class MigecPipeline {
protected final Map<String, HaplotypeTree> haplotypeTreeBySample;
protected final List<String> sampleNames, skippedSamples;
protected final MigecParameterSet migecParameterSet;
protected final VariantClassifier variantClassifier = BaseVariantClassifier.BUILT_IN; // todo: implement loading from file
protected VariantClassifier variantClassifier;

protected MigecPipeline(MigReader reader,
AssemblerFactory assemblerFactory,
Expand All @@ -60,6 +60,7 @@ protected MigecPipeline(MigReader reader,
assemblerBySample.put(sampleName, assemblerFactory.create());
alignerBySample.put(sampleName, consensusAlignerFactory.create());
}
this.variantClassifier = BaseVariantClassifier.BUILT_IN;
}

public void skipSamples(List<String> samplesToSkip) {
Expand Down Expand Up @@ -216,6 +217,14 @@ public String getHaplotypeTreeFastaOutput(String sampleName) {
return "";
}

public VariantClassifier getVariantClassifier() {
return variantClassifier;
}

public void setVariantClassifier(VariantClassifier variantClassifier) {
this.variantClassifier = variantClassifier;
}

public String getMinorVariantDump(double threshold) {
String dump = "#SampleName\t" + Variant.HEADER;
for (String sample : sampleNames) {
Expand Down

0 comments on commit 6f2ed81

Please sign in to comment.