Skip to content

Commit

Permalink
working version
Browse files Browse the repository at this point in the history
this is working….4 million reads in 2.5 mins on a 2012 macbook
  • Loading branch information
crmackay committed Oct 18, 2015
1 parent 3b891b2 commit ece73b3
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 41 deletions.
16 changes: 9 additions & 7 deletions cmd/sblade/main.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package main

import (
"fmt"
//"fmt"
bio "github.com/crmackay/gobioinfo"
"github.com/crmackay/switchblade/workers"
"runtime"
Expand All @@ -11,7 +11,7 @@ import (
/*
func work (input chan FASTARead, ouput chan FASTARead) {
//processRead.Process
fmt.Println("Starting Worker:")
//fmt.Println("Starting Worker:")
}*/

func main() {
Expand All @@ -25,13 +25,15 @@ func main() {
runtime.GOMAXPROCS(totalCPUS)

CPUWorkers := totalCPUS - 1
CPUWorkers = 1
// CPUWorkers = 1
//rawReads := make(chan bio.FASTQRead, 1000)

//processedReads := make(chan bio.FASTQRead, 1000)

// TODO set path to input file
inFile := "/Users/christophermackay/Desktop/deepseq_data/pir1/hits-clip/working_data/sample_data/fastq/sblade-test/sample_25000_2.fastq"
// inFile := "/Users/christophermackay/Desktop/deepseq_data/pir1/hits-clip/working_data/sample_data/fastq/sblade-test/sample_10.fastq"

inFile := "/Users/christophermackay/Desktop/deepseq_data/pir1/hits-clip/working_data/sample_data/fastq/sblade-test/2014_06_HITS_CLIP_1_2_3_4+PhiX_NoIndex_L003_R1_001.fastq.gz"

outFile := "/Users/christophermackay/Desktop/deepseq_data/pir1/hits-clip/working_data/sample_data/fastq/sblade-test/output.fastq"
// TODO set path to output file
Expand All @@ -57,17 +59,17 @@ func main() {

// wait until the are all done
numDones := 0
//fmt.Println("numCPUWorkers", CPUWorkers)
for numDones < CPUWorkers {
<-doneSignal
numDones++
wg.Done()
fmt.Println("Number of Dones: ", numDones)
//fmt.Println("Number of Dones: ", numDones)
}
close(finishedReads)
close(outputData)
<-doneSignal
wg.Done()
wg.Wait()

fmt.Println("this is the main function")
//fmt.Println("this is the main function")
}
18 changes: 18 additions & 0 deletions cmd/test-crosscompile/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package main

import (
"fmt"
"runtime"
)

func main() {
// find the number of logical CPUs on the system
totalCPUS := runtime.NumCPU()

fmt.Println("total CPUs", totalCPUS)
// set the golang runtime to use all the available processors
runtime.GOMAXPROCS(totalCPUS)

CPUWorkers := totalCPUS - 1
fmt.Println("CPU workers", CPUWorkers)
}
Binary file added cmd/test-crosscompile/test.linux
Binary file not shown.
12 changes: 6 additions & 6 deletions fiveprime/barcodes.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package fiveprime

import (
"fmt"
// "fmt"
bio "github.com/crmackay/gobioinfo"
config "github.com/crmackay/switchblade/config"
sw "github.com/crmackay/switchblade/types"
Expand Down Expand Up @@ -195,7 +195,7 @@ func inferBarcode(b string, q []uint8) string {
seqProbs := make(map[string]float64)

for k := range config.Barcodes {
fmt.Println(k)
// fmt.Println(k)
probSeq := float64(1)
bcode := []rune(k)
for i, elem := range b {
Expand All @@ -207,7 +207,7 @@ func inferBarcode(b string, q []uint8) string {
}

}
fmt.Println(probSeq)
// fmt.Println(probSeq)
seqProbs[k] = probSeq
}

Expand All @@ -217,15 +217,15 @@ func inferBarcode(b string, q []uint8) string {
denominator += v * config.BarcodeRatios[k]

}
fmt.Println("denominator: ", denominator)
// fmt.Println("denominator: ", denominator)
bcProbs := make(map[string]float64)
for k, v := range seqProbs {
// TODO change this to the correct ration (0.2 and 0.3)
probBarcodeGivenSeq := (v * config.BarcodeRatios[k]) / denominator
bcProbs[k] = probBarcodeGivenSeq

}
fmt.Println(bcProbs)
// fmt.Println(bcProbs)

trueBarcode := maxProbBarcode(bcProbs)

Expand All @@ -248,7 +248,7 @@ func maxProbBarcode(m map[string]float64) string {
}
i++
}
fmt.Println(maxK, maxV)
// fmt.Println(maxK, maxV)
return maxK

}
Expand Down
15 changes: 7 additions & 8 deletions threeprime/prob.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ determine whether the sequence in question is a contaminant or not.
package threeprime

import (
"fmt"
//"fmt"
bio "github.com/crmackay/gobioinfo"
//sw "github.com/crmackay/switchblade"
"github.com/crmackay/switchblade/config"
Expand Down Expand Up @@ -119,12 +119,12 @@ func threePLinkerTest(a bio.PairWiseAlignment, r bio.FASTQRead, testNum int) boo

queryPosition := testStart

fmt.Println(a.ExpandedCIGAR)
// fmt.Println(a.ExpandedCIGAR)
for _, elem := range a.ExpandedCIGAR {
// track position along query string, especially to keep track of indels

//fmt.Println(queryPosition, string(elem))
fmt.Println(queryPosition, string(elem))
// fmt.Println(queryPosition, string(elem))
// fmt.Println(queryPosition, string(elem))
switch {
case string(elem) == "m":
probSeqGivenContam *= probContamGivenMatch(r.PHRED.Decoded[queryPosition])
Expand Down Expand Up @@ -155,7 +155,6 @@ func threePLinkerTest(a bio.PairWiseAlignment, r bio.FASTQRead, testNum int) boo
// in the case of a calculated deletion in the query seqeuence, we
// do not increment queryPosition, since we are effectively in a
// "gap" in the query string
fmt.Println("hello")

case string(elem) == "j":
probSeqGivenContam *= probContamGivenIndel()
Expand Down Expand Up @@ -195,10 +194,10 @@ func threePLinkerTest(a bio.PairWiseAlignment, r bio.FASTQRead, testNum int) boo
((probSeqGivenContam * probContam) + (probSeqGivenChance * (1 - probContam)))

// fmt.Printf("probChanceGivenSeq: %20.400f", probChanceGivenSeq)
fmt.Println("probChanceGivenSeq", probChanceGivenSeq)
// fmt.Println("probChanceGivenSeq", probChanceGivenSeq)
// fmt.Printf("probContamGivenSeq: %20.400f", probContamGivenSeq)
fmt.Println("probContamGivenSeq", probContamGivenSeq)
fmt.Println()
// fmt.Println("probContamGivenSeq", probContamGivenSeq)
// fmt.Println()
// test P(L|S) > P(C|S)

if probContamGivenSeq > probChanceGivenSeq {
Expand Down
14 changes: 6 additions & 8 deletions threeprime/trim.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package threeprime

import (
//"errors"
"fmt"
//"fmt"
//bio "github.com/crmackay/gobioinfo"
sw "github.com/crmackay/switchblade/types"
//data "github.com/crmackay/switchblade/research"
Expand Down Expand Up @@ -59,19 +59,17 @@ func Process3p(r *sw.Read) {

still3pContam := true

pos3p := len(r.Sequence) - 1
fmt.Println(pos3p)
fmt.Println(len(config.Linker5p))
// fmt.Println(pos3p)
// fmt.Println(len(config.Linker5p))
for still3pContam {
// if len(config.Linker5p) < pos3p {
// still3pContam, pos3p = next3pAlign(r)
// fmt.Println(string(r.Sequence))
// fmt.Println(still3pContam, pos3p)
// }

still3pContam, pos3p = next3pAlign(r)
fmt.Println(string(r.Sequence))
fmt.Println(still3pContam, pos3p)
still3pContam, _ = next3pAlign(r)
// fmt.Println(string(r.Sequence))
// fmt.Println(still3pContam, pos3p)
}

trim3p(r)
Expand Down
22 changes: 10 additions & 12 deletions workers/workers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package workers

import (
"encoding/csv"
"fmt"
//"fmt"
bio "github.com/crmackay/gobioinfo"
"github.com/crmackay/switchblade/fiveprime"
"github.com/crmackay/switchblade/research"
Expand All @@ -18,7 +18,7 @@ import (
// and puts some meta data about the trimming work that was completed in a
//tab-deliminted format into a
func Trimmer(rawReads chan *bio.FASTQRead, finishedReads chan<- *bio.FASTQRead, outputData chan<- []string, doneChan chan bool) {
fmt.Println("starting worker")
//fmt.Println("starting worker")
for rawRead := range rawReads {

inProcessRead := types.NewRead(rawRead)
Expand All @@ -29,9 +29,9 @@ func Trimmer(rawReads chan *bio.FASTQRead, finishedReads chan<- *bio.FASTQRead,

data := research.GetDataCSV(inProcessRead)

fmt.Println(inProcessRead.DNASequence.Sequence)
fmt.Println(inProcessRead.End5p)
fmt.Println(inProcessRead.End3p)
//fmt.Println(inProcessRead.DNASequence.Sequence)
//fmt.Println(inProcessRead.End5p)
//fmt.Println(inProcessRead.End3p)
var finishedRead *bio.FASTQRead
if inProcessRead.End5p < inProcessRead.End3p {
finishedRead = &bio.FASTQRead{
Expand All @@ -47,7 +47,7 @@ func Trimmer(rawReads chan *bio.FASTQRead, finishedReads chan<- *bio.FASTQRead,
Misc: inProcessRead.Barcode + ":" + inProcessRead.DegenBases,
}
} else {
fmt.Println(string(inProcessRead.DNASequence.Sequence))
//fmt.Println(string(inProcessRead.DNASequence.Sequence))
finishedRead = &bio.FASTQRead{
DNASequence: bio.DNASequence{
Sequence: inProcessRead.DNASequence.Sequence[inProcessRead.End5p:inProcessRead.End5p],
Expand All @@ -68,7 +68,7 @@ func Trimmer(rawReads chan *bio.FASTQRead, finishedReads chan<- *bio.FASTQRead,

}
doneChan <- true
fmt.Println("Sending Done")
//fmt.Println("Sending Done")
return
}

Expand All @@ -86,7 +86,7 @@ func ReadWriter(inFile string, outFile string, rawReads chan *bio.FASTQRead, fin

csvfile, err := os.Create(dataPath + ".csv")
if err != nil {
fmt.Println("Error:", err)
//fmt.Println("Error:", err)
return
}
defer csvfile.Close()
Expand Down Expand Up @@ -125,12 +125,10 @@ func ReadWriter(inFile string, outFile string, rawReads chan *bio.FASTQRead, fin

for read := range finishedReads {
doneWriter.Write(*read)

}
for data := range outputData {
data := <-outputData
dataWriter.WriteAll([][]string{data})

}
doneSignal <- true

return
}

0 comments on commit ece73b3

Please sign in to comment.