Skip to content

Commit

Permalink
all sequence data lines are now same lenght in exported GFF3
Browse files Browse the repository at this point in the history
  • Loading branch information
kyostiebi committed Sep 25, 2023
1 parent fefa9af commit dd856e1
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 6 deletions.
5 changes: 4 additions & 1 deletion packages/apollo-collaboration-server/.development.env
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@ MICROSOFT_CLIENT_SECRET=~Gr8Q~h6RTU7SMC-fjNxXy_~nabTD-ME_rFyLa.M
LOG_LEVELS=error,warn,log,debug

# Reference sequence chunk size, defaults to 262144 (256 KiB)
# CHUNK_SIZE=5000
# CHUNK_SIZE=500

# Sequence line lenght in exported GFF3 file
SEQ_LINE_LEN=90

# Default new user role, possible values are admin, user, readOnly, and none
# Defaults to none
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,12 @@ export class FeaturesService {
const refSeqs = await this.refSeqModel.find({ assembly }).exec()
const refSeqIds = refSeqs.map((refSeq) => refSeq._id)
let printFasta = true

let printSeqName = true
const { SEQ_LINE_LEN } = process.env
if (!SEQ_LINE_LEN) {
throw new NotFoundException()
}
const seqLineLenght = Number(SEQ_LINE_LEN)
const headerStream = new Readable({ objectMode: true })
const sequenceStream = new Readable({ objectMode: true })

Expand All @@ -229,21 +234,51 @@ export class FeaturesService {
headerStream.push(
`##sequence-region ${refSeqDoc.name} 1 ${refSeqDoc.length}\n`,
)
let remainingLastLine = ''
if (printFasta) {
sequenceStream.push('##FASTA\n')
}
for await (const doc of this.refSeqChunksModel
.find({ refSeq: refSeqDoc.id })
.sort({ n: 1 })
.cursor()) {
if (printFasta) {
sequenceStream.push('##FASTA\n')
if (printSeqName) {
refSeqDoc.description
? sequenceStream.push(
`>${refSeqDoc.name} ${refSeqDoc.description}\n`,
)
: sequenceStream.push(`>${refSeqDoc.name}\n`)
}
sequenceStream.push(`${this.splitStringIntoChunks(doc.sequence, 60)}\n`)
printFasta = false
let seqLine = doc.sequence
// If previous's chunk last line was not "seqLineLenght" characters long then take the first characters from the first line and make one full line
if (remainingLastLine.length > 0) {
const tmp1: string = doc.sequence.slice(
0,
seqLineLenght - remainingLastLine.length,
)
sequenceStream.push(`${remainingLastLine}${tmp1}\n`)
seqLine = doc.sequence.slice(seqLineLenght - remainingLastLine.length)
remainingLastLine = ''
}
const seqData = this.splitStringIntoChunks(seqLine, seqLineLenght)
const lines: string[] = seqData.split('\n')
const lastLine: string = lines.at(-1) ?? ''
if (lastLine.length === seqLineLenght) {
sequenceStream.push(`${seqData}\n`)
} else {
for (let i = 0; i < lines.length - 1; i++) {
sequenceStream.push(`${lines[i]}\n`)
}
remainingLastLine = lastLine
}
printSeqName = false
}
if (remainingLastLine.length > 0) {
sequenceStream.push(`${remainingLastLine}\n`)
remainingLastLine = ''
}
printFasta = false
printSeqName = true
}
headerStream.push(null)
sequenceStream.push(null)
Expand Down

0 comments on commit dd856e1

Please sign in to comment.