Skip to content

Commit

Permalink
Merge pull request #6 from Zymo-Research/bacteriaOnly
Browse files Browse the repository at this point in the history
Appears to be working as desired
  • Loading branch information
michael-weinstein authored Feb 26, 2020
2 parents 3352145 + 4f900dd commit 6c992d9
Show file tree
Hide file tree
Showing 9 changed files with 308 additions and 8 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ WORKINGFOLDER | string | /data/working | Path to working folder for temporary fi
OUTPUTFOLDER | string | /data/output | Folder within the container to write output data
REFERENCEGENOME | string | [folderWithPackage]/reference/zrCommunityStandard.fa | File containing the reference sequence for the standard within the container
FILENAMINGSTANDARD | string | ZYMO | How sequence files will be named (other option is "illumina")
BACTERIAONLY | BOOL | FALSE | Analyze and calculate MIQ score on bacteria only.
MODE | string | PE | Running mode. PE for paired-end, SE for single end, LONG for Nanopore reads. PE and SE use similar alignment but different analysis logic. SE and LONG use the same analysis logic, but different aligners.


Expand Down
52 changes: 44 additions & 8 deletions analyzeStandardReads.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,16 @@ def getApplicationParametersSE():
parameters.addParameter("referenceGenome", str, default=default.referenceGenome, expectedFile=True)
parameters.addParameter("fileNamingStandard", str, default="zymo", externalValidation=True)
parameters.addParameter("referenceDataFile", str, default = default.referenceDataFile, expectedFile=True)
parameters.addParameter("goodMiqExample", str, default = default.goodMiqExample, expectedFile=True)
parameters.addParameter("badMiqExample", str, default=default.badMiqExample, expectedFile=True)
parameters.addParameter("bacteriaOnly", bool, required=False, default=False)
if parameters.bacteriaOnly.value:
print("ANALYZING BACTERIA ONLY")
defaultBadExample = default.badMiqExampleBacteriaOnly
defaultGoodExample = default.goodMiqExampleBacteriaOnly
else:
defaultBadExample = default.badMiqExample
defaultGoodExample = default.goodMiqExample
parameters.addParameter("goodMiqExample", str, default=defaultGoodExample, expectedFile=True)
parameters.addParameter("badMiqExample", str, default=defaultBadExample, expectedFile=True)
if not validSampleName(parameters.sampleName.value):
logger.error("Invalid sample name given: %s" %parameters.sampleName.value)
raise ValueError("Invalid sample name given: %s" %parameters.sampleName.value)
Expand All @@ -48,8 +56,16 @@ def getApplicationParametersPE():
parameters.addParameter("referenceGenome", str, default=default.referenceGenome, expectedFile=True)
parameters.addParameter("fileNamingStandard", str, default="zymo", externalValidation=True)
parameters.addParameter("referenceDataFile", str, default=default.referenceDataFile, expectedFile=True)
parameters.addParameter("goodMiqExample", str, default = default.goodMiqExample, expectedFile=True)
parameters.addParameter("badMiqExample", str, default=default.badMiqExample, expectedFile=True)
parameters.addParameter("bacteriaOnly", bool, required=False, default=False)
if parameters.bacteriaOnly.value:
print("ANALYZING BACTERIA ONLY")
defaultBadExample = default.badMiqExampleBacteriaOnly
defaultGoodExample = default.goodMiqExampleBacteriaOnly
else:
defaultBadExample = default.badMiqExample
defaultGoodExample = default.goodMiqExample
parameters.addParameter("goodMiqExample", str, default=defaultGoodExample, expectedFile=True)
parameters.addParameter("badMiqExample", str, default=defaultBadExample, expectedFile=True)
if not validSampleName(parameters.sampleName.value):
logger.error("Invalid sample name given: %s" %parameters.sampleName.value)
raise ValueError("Invalid sample name given: %s" %parameters.sampleName.value)
Expand All @@ -68,8 +84,16 @@ def getApplicationParametersLong():
parameters.addParameter("referenceGenome", str, default=default.referenceGenome, expectedFile=True)
parameters.addParameter("fileNamingStandard", str, default="zymo", externalValidation=True)
parameters.addParameter("referenceDataFile", str, default=default.referenceDataFileHMW, expectedFile=True)
parameters.addParameter("goodMiqExample", str, default = default.goodMiqExampleHMW, expectedFile=True)
parameters.addParameter("badMiqExample", str, default=default.badMiqExampleHMW, expectedFile=True)
parameters.addParameter("bacteriaOnly", bool, required=False, default=False)
if parameters.bacteriaOnly.value:
print("ANALYZING BACTERIA ONLY")
defaultBadExample = default.badMiqExampleBacteriaOnlyHMW
defaultGoodExample = default.goodMiqExampleBacteriaOnlyHMW
else:
defaultBadExample = default.badMiqExampleHMW
defaultGoodExample = default.goodMiqExampleHMW
parameters.addParameter("goodMiqExample", str, default=defaultGoodExample, expectedFile=True)
parameters.addParameter("badMiqExample", str, default=defaultBadExample, expectedFile=True)
if not validSampleName(parameters.sampleName.value):
logger.error("Invalid sample name given: %s" %parameters.sampleName.value)
raise ValueError("Invalid sample name given: %s" %parameters.sampleName.value)
Expand Down Expand Up @@ -178,8 +202,12 @@ def getReadLengthsFromFastq(path:str):


def analyzeStandardResult(resultTable:dict):
if not parameters.bacteriaOnly.value:
analysisMethod = "Genomic"
else:
analysisMethod = "GenomicBacteriaOnly"
referenceData = miqScoreNGSReadCountPublic.referenceHandler.StandardReference(parameters.referenceDataFile.value)
calculator = miqScoreNGSReadCountPublic.MiqScoreCalculator(referenceData, analysisMethod="Genomic", floor=0)
calculator = miqScoreNGSReadCountPublic.MiqScoreCalculator(referenceData, analysisMethod=analysisMethod, floor=0)
miqScoreResult = calculator.calculateMiq(resultTable, parameters.sampleName.value)
miqScoreResult.makeReadFateChart(readFatePrintNames=readFatePrintNames)
miqScoreResult.makeRadarPlots()
Expand Down Expand Up @@ -219,7 +247,11 @@ def generateReport(result:miqScoreNGSReadCountPublic.MiqScoreData):
templateFile = open(templateFilePath, 'r')
template = templateFile.read()
templateFile.close()
goodMiq, badMiq = miqScoreNGSReadCountPublic.loadExampleData(parameters.goodMiqExample.value, parameters.badMiqExample.value, referenceData, "Genomic")
if not parameters.bacteriaOnly.value:
analysisMethod = "Genomic"
else:
analysisMethod = "GenomicBacteriaOnly"
goodMiq, badMiq = miqScoreNGSReadCountPublic.loadExampleData(parameters.goodMiqExample.value, parameters.badMiqExample.value, referenceData, analysisMethod)
replacementTable = generateReportReplacementTable(result, goodMiq, badMiq, readFatePrintNames=readFatePrintNames)
report = miqScoreNGSReadCountPublic.reportGeneration.generateReport(template, replacementTable)
reportFilePath = os.path.join(parameters.outputFolder.value, "%s.html" % parameters.sampleName.value)
Expand All @@ -243,6 +275,8 @@ def generateReport(result:miqScoreNGSReadCountPublic.MiqScoreData):
parameters = getApplicationParametersSE()
elif applicationMode == "LONG":
parameters = getApplicationParametersLong()
else:
raise RuntimeError("This should never be reached as the application mode checker should have caught an invalid mode. Please investigate how this happened. Application mode: %s" %applicationMode)
logger.debug("Starting analysis")
bamFilePath = os.path.join(parameters.outputFolder.value, "%s.bam" %parameters.sampleName.value)
if applicationMode == "PE":
Expand All @@ -254,6 +288,8 @@ def generateReport(result:miqScoreNGSReadCountPublic.MiqScoreData):
elif applicationMode == "LONG":
miqScoreShotgunPublicSupport.alignmentAnalysis.minimap2.minimapAlign(parameters.reads.value, parameters.workingFolder.value, bamFilePath, parameters.referenceGenome.value)
readTable = miqScoreShotgunPublicSupport.alignmentAnalysis.alignmentAnalysisSE.bamFileProcessor(bamFilePath)
else:
raise RuntimeError("This should never be reached as the application mode checker should have caught an invalid mode. Please investigate how this happened. Application mode: %s" %applicationMode)
standardAnalysisResults = analyzeStandardResult(readTable)
saveResult(standardAnalysisResults)
generateReport(standardAnalysisResults)
Expand Down
4 changes: 4 additions & 0 deletions defaults/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,8 @@
badMiqExample = os.path.join(referenceFolder, "badMiq.json")
goodMiqExampleHMW = os.path.join(referenceFolder, "goodMiqHMW.json")
badMiqExampleHMW = os.path.join(referenceFolder, "badMiqHMW.json")
goodMiqExampleBacteriaOnly = os.path.join(referenceFolder, "goodMiqBacteriaOnly.json")
badMiqExampleBacteriaOnly = os.path.join(referenceFolder, "badMiqBacteriaOnly.json")
goodMiqExampleBacteriaOnlyHMW = os.path.join(referenceFolder, "goodMiqBacteriaOnlyHMW.json")
badMiqExampleBacteriaOnlyHMW = os.path.join(referenceFolder, "badMiqBacteriaOnlyHMW.json")
logFile = os.path.join(outputFolder, "dada2.%s.log" %timestamp)
60 changes: 60 additions & 0 deletions reference/badMiqBacteriaOnly.json

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions reference/badMiqBacteriaOnlyHMW.json

Large diffs are not rendered by default.

61 changes: 61 additions & 0 deletions reference/goodMiqBacteriaOnly.json

Large diffs are not rendered by default.

60 changes: 60 additions & 0 deletions reference/goodMiqBacteriaOnlyHMW.json

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions reference/zrCommunityStandard.json
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,16 @@
"scerevisiae": 2,
"cneoformans": 2
},
"GenomicBacteriaOnly": {
"paeruginosa": 12.5,
"ecoli": 12.5,
"senterica": 12.5,
"lfermentum": 12.5,
"efaecalis": 12.5,
"saureus": 12.5,
"lmonocytogenes": 12.5,
"bsubtilis": 12.5
},
"16s": {
"paeruginosa": 4.2,
"ecoli": 10.1,
Expand Down
9 changes: 9 additions & 0 deletions reference/zrCommunityStandardHMW.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,15 @@
"lmonocytogenes": 14,
"bsubtilis": 14,
"scerevisiae": 2
},
"GenomicBacteriaOnly": {
"paeruginosa": 14.29,
"ecoli": 14.29,
"senterica": 14.29,
"efaecalis": 14.29,
"saureus": 14.29,
"lmonocytogenes": 14.29,
"bsubtilis": 14.29
}
}
}

0 comments on commit 6c992d9

Please sign in to comment.