Skip to content

Commit

Permalink
Remove stat
Browse files Browse the repository at this point in the history
Misc

Misc
  • Loading branch information
cmdcolin committed Nov 8, 2024
1 parent 6d4c522 commit a42fb33
Show file tree
Hide file tree
Showing 10 changed files with 10,215 additions and 1,482,271 deletions.
3 changes: 3 additions & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
test/data
dist
esm
20 changes: 2 additions & 18 deletions src/cramFile/container/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ export default class CramContainer {

// if there are no records in the container, there will be no compression
// header
if (!containerHeader?.numRecords) {
return null
if (!containerHeader.numRecords) {
return undefined
}
const { majorVersion } = await this.file.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
Expand Down Expand Up @@ -52,9 +52,6 @@ export default class CramContainer {

async getFirstBlock() {
const containerHeader = await this.getHeader()
if (!containerHeader) {
return undefined
}
return this.file.readBlock(containerHeader._endPosition)
}

Expand All @@ -79,26 +76,13 @@ export default class CramContainer {
const { majorVersion } = await this.file.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
const { cramContainerHeader1, cramContainerHeader2 } = sectionParsers
const { size: fileSize } = await this.file.stat()

if (position >= fileSize) {
console.warn(`pos:${position}>=fileSize:${fileSize} in cram container`)
return undefined
}

// parse the container header. do it in 2 pieces because you cannot tell
// how much to buffer until you read numLandmarks
const bytes1 = Buffer.allocUnsafe(cramContainerHeader1.maxLength)
await this.file.read(bytes1, 0, cramContainerHeader1.maxLength, position)
const header1 = parseItem(bytes1, cramContainerHeader1.parser)
const numLandmarksSize = itf8Size(header1.numLandmarks)
if (position + header1.length >= fileSize) {
// header indicates container goes beyond fileSize
console.warn(
`container at ${position} is beyond fileSize:${fileSize}, skipping`,
)
return undefined
}
const bytes2 = Buffer.allocUnsafe(
cramContainerHeader2.maxLength(header1.numLandmarks),
)
Expand Down
52 changes: 10 additions & 42 deletions src/cramFile/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,6 @@ export default class CramFile {
return this.file.read(buffer, offset, length, position)
}

// can just stat this object like a filehandle
stat() {
return this.file.stat()
}

// memoized
async getDefinition() {
const { maxLength, parser } = cramFileDefinition()
Expand Down Expand Up @@ -153,25 +148,13 @@ export default class CramFile {
const { majorVersion } = await this.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
let position = sectionParsers.cramFileDefinition.maxLength
const { size: fileSize } = await this.file.stat()
const { cramContainerHeader1 } = sectionParsers

// skip with a series of reads to the proper container
let currentContainer: CramContainer | undefined
for (let i = 0; i <= containerNumber; i++) {
// if we are about to go off the end of the file
// and have not found that container, it does not exist
if (position + cramContainerHeader1.maxLength + 8 >= fileSize) {
return undefined
}

currentContainer = this.getContainerAtPosition(position)
const currentHeader = await currentContainer.getHeader()
if (!currentHeader) {
throw new CramMalformedError(
`container ${containerNumber} not found in file`,
)
}

// if this is the first container, read all the blocks in the container
// to determine its length, because we cannot trust the container
// header's given length due to a bug somewhere in htslib
Expand All @@ -185,8 +168,7 @@ export default class CramFile {
position = block._endPosition
}
} else {
// otherwise, just traverse to the next container using the container's
// length
// otherwise, just traverse to the next container using the container's length
position += currentHeader._size + currentHeader.length
}
}
Expand All @@ -213,20 +195,18 @@ export default class CramFile {
/**
* @returns {Promise[number]} the number of containers in the file
*/
async containerCount(): Promise<number | undefined> {
async containerCount() {
const { majorVersion } = await this.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
const { size: fileSize } = await this.file.stat()
const { cramContainerHeader1 } = sectionParsers

let containerCount = 0
let position = sectionParsers.cramFileDefinition.maxLength
while (position + cramContainerHeader1.maxLength + 8 < fileSize) {
let i = 0
while (i++ < 5000) {
const currentHeader =
await this.getContainerAtPosition(position).getHeader()
if (!currentHeader) {
break
}
// console.log({ currentHeader }, currentHeader._endPosition)

// if this is the first container, read all the blocks in the container,
// because we cannot trust the container header's given length due to a
// bug somewhere in htslib
Expand All @@ -244,6 +224,9 @@ export default class CramFile {
// length
position += currentHeader._size + currentHeader.length
}
if (currentHeader.refSeqId === -1) {
return containerCount
}
containerCount += 1
}

Expand All @@ -258,11 +241,6 @@ export default class CramFile {
const { majorVersion } = await this.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
const { cramBlockHeader } = sectionParsers
const { size: fileSize } = await this.file.stat()

if (position + cramBlockHeader.maxLength >= fileSize) {
return undefined
}

const buffer = Buffer.allocUnsafe(cramBlockHeader.maxLength)
await this.file.read(buffer, 0, cramBlockHeader.maxLength, position)
Expand All @@ -282,10 +260,6 @@ export default class CramFile {
if (preReadBuffer) {
buffer = preReadBuffer
} else {
const { size: fileSize } = await this.file.stat()
if (position + size >= fileSize) {
return undefined
}
buffer = Buffer.allocUnsafe(size)
await this.file.read(buffer, 0, size, position)
}
Expand Down Expand Up @@ -348,9 +322,6 @@ export default class CramFile {
const { majorVersion } = await this.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
const blockHeader = await this.readBlockHeader(position)
if (blockHeader === undefined) {
return undefined
}
const blockContentPosition = blockHeader._endPosition

const uncompressedData = Buffer.allocUnsafe(blockHeader.uncompressedSize)
Expand Down Expand Up @@ -391,9 +362,6 @@ export default class CramFile {
sectionParsers.cramBlockCrc32,
blockContentPosition + blockHeader.compressedSize,
)
if (crc === undefined) {
return undefined
}
block.crc32 = crc.crc32

// check the block data crc32
Expand Down
6 changes: 3 additions & 3 deletions src/cramFile/record.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ export interface ReadFeature {
function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
// if it has no length, it has no sequence
if (!cramRecord.lengthOnRef && !cramRecord.readLength) {
return null
return undefined
}

if (cramRecord.isUnknownBases()) {
return null
return undefined
}

// remember: all coordinates are 1-based closed
Expand Down Expand Up @@ -437,7 +437,7 @@ export default class CramRecord {
}
return tmp.join('')
}
return null
return undefined
}

/**
Expand Down
6 changes: 5 additions & 1 deletion src/cramFile/sectionParsers.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import type { Buffer } from 'buffer'
import { TupleOf } from '../typescript'
import { parseItf8, parseLtf8 } from './util'
import { DataSeriesEncodingMap } from './codecs/dataSeriesTypes'
Expand Down Expand Up @@ -92,7 +93,10 @@ export function cramBlockHeader() {
},
}
}
return { parser, maxLength: 17 }
return {
parser,
maxLength: 17,
}
}

export function cramBlockCrc32() {
Expand Down
3 changes: 0 additions & 3 deletions src/cramFile/slice/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,6 @@ export default class CramSlice {
const { majorVersion } = await this.file.getDefinition()
const sectionParsers = getSectionParsers(majorVersion)
const containerHeader = await this.container.getHeader()
if (!containerHeader) {
throw new Error('no container header detected')
}

const header = await this.file.readBlock(
containerHeader._endPosition + this.containerPosition,
Expand Down
Loading

0 comments on commit a42fb33

Please sign in to comment.