diff --git a/README.md b/README.md index cea4b1d..b127d17 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ const bamHeader: BamHeader = await reader.getHeaderData(); * Run `yarn build` to build. ### Testing -You must have Node.js and docker-compose installed. +You must have [Node.js](https://www.npmjs.com/get-npm) and [docker-compose](https://www.docker.com/products/docker-desktop) installed. * `scripts/test.sh` to run automated tests. * `scripts/run-dependencies.sh` to stand up a web server to host static sample BigWig and BigBed files. `scripts/test.sh` runs this for you. * `scripts/stop-dependencies.sh` to stop bring down the server. \ No newline at end of file diff --git a/package.json b/package.json index f756cc9..82391b9 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "genomic-reader", - "version": "1.3.2", + "version": "1.4.0", "main": "dist/index.js", "types": "dist/index.d.ts", "license": "MIT", diff --git a/resources/static/testbb-broadpeak.bigBed b/resources/static/testbb-broadpeak.bigBed new file mode 100644 index 0000000..b004d0e Binary files /dev/null and b/resources/static/testbb-broadpeak.bigBed differ diff --git a/resources/static/testbb-idrpeak.bigbed b/resources/static/testbb-idrpeak.bigbed new file mode 100644 index 0000000..b214e04 Binary files /dev/null and b/resources/static/testbb-idrpeak.bigbed differ diff --git a/resources/static/testbb-methyl.bigbed b/resources/static/testbb-methyl.bigbed new file mode 100644 index 0000000..7d4530c Binary files /dev/null and b/resources/static/testbb-methyl.bigbed differ diff --git a/resources/static/testbb-narrowpeak.bigBed b/resources/static/testbb-narrowpeak.bigBed new file mode 100644 index 0000000..4048daa Binary files /dev/null and b/resources/static/testbb-narrowpeak.bigBed differ diff --git a/resources/static/testbb-tss.bigbed b/resources/static/testbb-tss.bigbed new file mode 100644 index 0000000..06242bf Binary files /dev/null and b/resources/static/testbb-tss.bigbed differ diff --git a/src/bigwig/BigWigReader.ts b/src/bigwig/BigWigReader.ts index 0408c6e..e56b086 100644 --- a/src/bigwig/BigWigReader.ts +++ b/src/bigwig/BigWigReader.ts @@ -51,6 +51,8 @@ interface RPLeafNode { dataSize: number; } +export type ParseFunction = (chrom: string, startBase: number, endBase: number, rest: string) => T; + const IDX_MAGIC = 0x2468ACE0; const RPTREE_HEADER_SIZE = 48; const RPTREE_NODE_LEAF_ITEM_SIZE = 32; @@ -77,10 +79,10 @@ export class BigWigReader { * Gets the type of the underlying file. */ async fileType(): Promise { - let header: HeaderData = await this.getHeader(); - return header.fileType; + let header: HeaderData = await this.getHeader(); + return header.fileType; } - + /** * Method for getting all header data for dataLoader's file. Data is loaded on demand and cached for subsequent requests. */ @@ -97,14 +99,14 @@ export class BigWigReader { * @param chrom the name of the chromosome or other sequence to retrieve. */ async getSequenceRecord(chrom: string): Promise { - let header: HeaderData = await this.getHeader(); - if (header.fileType !== FileType.TwoBit) throw new FileFormatError("getSequenceRecord is not valid on " + header.fileType + " files."); - if (!this.cachedSequenceRecords[chrom]) { + let header: HeaderData = await this.getHeader(); + if (header.fileType !== FileType.TwoBit) throw new FileFormatError("getSequenceRecord is not valid on " + header.fileType + " files."); + if (!this.cachedSequenceRecords[chrom]) { this.cachedSequenceRecords[chrom] = await loadSequenceRecord(this.dataLoader, header, chrom); } - return this.cachedSequenceRecords[chrom]; + return this.cachedSequenceRecords[chrom]; } - + /** * Method for reading unzoomed wig data from BigWig files. * @@ -114,9 +116,9 @@ export class BigWigReader { * @param endBase Ending base pair * @param zoomLevelIndex The ZoomLevelHeader.index from the zoom level you want to read from. */ - async readBigWigData(startChrom: string, startBase: number, endChrom: string, - endBase: number): Promise> { - return this.readData(startChrom, startBase, endChrom, endBase, + async readBigWigData(startChrom: string, startBase: number, endChrom: string, + endBase: number): Promise> { + return this.readData(startChrom, startBase, endChrom, endBase, (await this.getHeader()).common!.fullIndexOffset, decodeWigData); } @@ -129,9 +131,9 @@ export class BigWigReader { * @param endBase Ending base pair * @param zoomLevelIndex The ZoomLevelHeader.index from the zoom level you want to read from. */ - async streamBigWigData(startChrom: string, startBase: number, endChrom: string, - endBase: number): Promise { - return this.streamData(startChrom, startBase, endChrom, endBase, + async streamBigWigData(startChrom: string, startBase: number, endChrom: string, + endBase: number): Promise { + return this.streamData(startChrom, startBase, endChrom, endBase, (await this.getHeader()).common!.fullIndexOffset, decodeWigData); } @@ -142,11 +144,13 @@ export class BigWigReader { * @param startBase Starting base pair * @param endChrom Ending chromose * @param endBase Ending base pair + * @param [restParser] Parser for reading data */ - async readBigBedData(startChrom: string, startBase: number, endChrom: string, - endBase: number): Promise> { - return this.readData(startChrom, startBase, endChrom, endBase, - (await this.getHeader()).common!.fullIndexOffset, decodeBedData); + async readBigBedData(startChrom: string, startBase: number, endChrom: string, endBase: number): Promise>; + async readBigBedData(startChrom: string, startBase: number, endChrom: string, endBase: number, restParser: ParseFunction): Promise>; + async readBigBedData(startChrom: string, startBase: number, endChrom: string, endBase: number, restParser?: ParseFunction): Promise> { + return this.readData(startChrom, startBase, endChrom, endBase, + (await this.getHeader()).common!.fullIndexOffset, decodeBedData(restParser || parseBigBed as any)); } /** @@ -156,11 +160,13 @@ export class BigWigReader { * @param startBase Starting base pair * @param endChrom Ending chromose * @param endBase Ending base pair + * @param [restParser] Parser for reading data */ - async streamBigBedData(startChrom: string, startBase: number, endChrom: string, - endBase: number): Promise { - return this.streamData(startChrom, startBase, endChrom, endBase, - (await this.getHeader()).common!.fullIndexOffset, decodeBedData); + async streamBigBedData(startChrom: string, startBase: number, endChrom: string, endBase: number): Promise; + async streamBigBedData(startChrom: string, startBase: number, endChrom: string, endBase: number, restParser: ParseFunction): Promise; + async streamBigBedData(startChrom: string, startBase: number, endChrom: string, endBase: number, restParser?: ParseFunction): Promise { + return this.streamData(startChrom, startBase, endChrom, endBase, + (await this.getHeader()).common!.fullIndexOffset, decodeBedData(restParser || parseBigBed as any)); } /** @@ -171,7 +177,7 @@ export class BigWigReader { * @param endBase the ending base. */ async readTwoBitData(chrom: string, startBase: number, endBase: number): Promise { - const sequence: SequenceRecord = await this.getSequenceRecord(chrom); + const sequence: SequenceRecord = await this.getSequenceRecord(chrom); return loadSequence(this.dataLoader, this.cachedHeader!, sequence, startBase, endBase); } @@ -196,14 +202,14 @@ export class BigWigReader { * @param endBase Ending base pair * @param zoomLevelIndex index of the zoom level. You can call getHeader() for a list of these values under HeaderData.zoomLevelHeaders. */ - async readZoomData(startChrom: string, startBase: number, endChrom: string, endBase: number, - zoomLevelIndex: number): Promise> { + async readZoomData(startChrom: string, startBase: number, endChrom: string, endBase: number, + zoomLevelIndex: number): Promise> { const header = await this.getHeader(); if (undefined == header.zoomLevelHeaders || !(zoomLevelIndex in header.zoomLevelHeaders)) { throw new FileFormatError("Given zoomLevelIndex not found in zoom level headers."); } const treeOffset = header.zoomLevelHeaders[zoomLevelIndex].indexOffset; - return this.readData(startChrom, startBase, endChrom, endBase, + return this.readData(startChrom, startBase, endChrom, endBase, treeOffset, decodeZoomData); } @@ -216,14 +222,14 @@ export class BigWigReader { * @param endBase Ending base pair * @param zoomLevelIndex index of the zoom level. You can call getHeader() for a list of these values under HeaderData.zoomLevelHeaders. */ - async streamZoomData(startChrom: string, startBase: number, endChrom: string, endBase: number, - zoomLevelIndex: number): Promise { + async streamZoomData(startChrom: string, startBase: number, endChrom: string, endBase: number, + zoomLevelIndex: number): Promise { const header = await this.getHeader(); if (undefined == header.zoomLevelHeaders || !(zoomLevelIndex in header.zoomLevelHeaders)) { throw new FileFormatError("Given zoomLevelIndex not found in zoom level headers."); } const treeOffset = header.zoomLevelHeaders[zoomLevelIndex].indexOffset; - return this.streamData(startChrom, startBase, endChrom, endBase, + return this.streamData(startChrom, startBase, endChrom, endBase, treeOffset, decodeZoomData); } @@ -237,9 +243,9 @@ export class BigWigReader { * @param treeOffset Location of the R+ tree that stores the data we're interested. * @param decodeFunction */ - private async loadData(startChrom: string, startBase: number, endChrom: string, endBase: number, - treeOffset: number, streamMode: boolean, decodeFunction: DecodeFunction, - loadFunction: LoadFunction): Promise { + private async loadData(startChrom: string, startBase: number, endChrom: string, endBase: number, + treeOffset: number, streamMode: boolean, decodeFunction: DecodeFunction, + loadFunction: LoadFunction): Promise { const header = await this.getHeader(); if (undefined == header.chromTree) { throw new FileFormatError("No chromosome tree found in file header."); @@ -260,7 +266,7 @@ export class BigWigReader { throw new FileFormatError(`R+ tree not found at offset ${treeOffset}`); } const rootNodeOffset = treeOffset + RPTREE_HEADER_SIZE; - const leafNodes: Array = await loadLeafNodesForRPNode(bufferedLoader, header.littleEndian, rootNodeOffset, + const leafNodes: Array = await loadLeafNodesForRPNode(bufferedLoader, header.littleEndian, rootNodeOffset, startChromIndex, startBase, endChromIndex, endBase); // Iterate through filtered leaf nodes, load the data, and decode it @@ -269,23 +275,23 @@ export class BigWigReader { if (header.common!.uncompressBuffSize > 0) { leafData = inflate(leafData); } - let leafDecodedData = decodeFunction(leafData.buffer as ArrayBuffer, startChromIndex, startBase, endChromIndex, - endBase, header.chromTree.idToChrom); + + let leafDecodedData = decodeFunction(leafData.buffer as ArrayBuffer, startChromIndex, startBase, endChromIndex, endBase, header.chromTree.idToChrom); loadFunction(leafDecodedData); } } - private async readData(startChrom: string, startBase: number, endChrom: string, endBase: number, - treeOffset: number, decodeFunction: DecodeFunction): Promise> { + private async readData(startChrom: string, startBase: number, endChrom: string, endBase: number, + treeOffset: number, decodeFunction: DecodeFunction): Promise> { const data: Array = []; const load: LoadFunction = (d: T[]) => data.push(...d); await this.loadData(startChrom, startBase, endChrom, endBase, treeOffset, false, decodeFunction, load); return data; - }; + } - private async streamData(startChrom: string, startBase: number, endChrom: string, endBase: number, - treeOffset: number, decodeFunction: DecodeFunction): Promise { - const stream = new Readable({ objectMode: true, read() {} }); + private async streamData(startChrom: string, startBase: number, endChrom: string, endBase: number, + treeOffset: number, decodeFunction: DecodeFunction): Promise { + const stream = new Readable({ objectMode: true, read() { } }); const load: LoadFunction = (d: T[]) => { d.forEach((el) => stream.push(el)); }; @@ -293,9 +299,9 @@ export class BigWigReader { stream.push(null); return stream; } - } + /** * Recursively load a list of R+ tree leaf nodes for the given node (by file offset) within given chr / base bounds. * @@ -308,7 +314,7 @@ export class BigWigReader { * @returns List of simple representations of leaf nodes for the given node offset. */ async function loadLeafNodesForRPNode(bufferedLoader: BufferedDataLoader, littleEndian: boolean, rpNodeOffset: number, startChromIndex: number, - startBase: number, endChromIndex: number, endBase: number): Promise> { + startBase: number, endChromIndex: number, endBase: number): Promise> { const nodeHeaderData: ArrayBuffer = await bufferedLoader.load(rpNodeOffset, 4); const nodeHeaderParser = new BinaryParser(nodeHeaderData, littleEndian); const isLeaf = 1 === nodeHeaderParser.getByte(); @@ -356,7 +362,7 @@ type DecodeFunction = (data: ArrayBuffer, startChromIndex: number, startBase: endBase: number, chromDict: Array) => Array; type LoadFunction = (data: Array) => void; - + /** * Extract useful data from sections of raw big binary bed data * @@ -367,12 +373,78 @@ type LoadFunction = (data: Array) => void; * @param filterEndBase ending base used for filtering * @param chromDict dictionary of indices used by the file to chromosome names, conveniently stored as an array. */ -function decodeBedData(data: ArrayBuffer, filterStartChromIndex: number, filterStartBase: number, filterEndChromIndex: number, - filterEndBase: number, chromDict: Array): Array { - const decodedData: Array = []; - const binaryParser = new BinaryParser(data); +export function parseBigBed(chrom: string, startBase: number, endBase: number, rest: string): BigBedData { + const entry: BigBedData = { + chr: chrom, + start: startBase, + end: endBase + } + let tokens = rest.split("\t"); + if (tokens.length > 0) { + entry.name = tokens[0]; + } + if (tokens.length > 1) { + entry.score = parseFloat(tokens[1]); + } + if (tokens.length > 2) { + entry.strand = tokens[2]; + } + if (tokens.length > 3) { + entry.cdStart = parseInt(tokens[3]); + } + if (tokens.length > 4) { + entry.cdEnd = parseInt(tokens[4]); + } + if (tokens.length > 5 && tokens[5] !== "." && tokens[5] !== "0") { + let color: string; + if (tokens[5].includes(",")) { + color = tokens[5].startsWith("rgb") ? tokens[5] : "rgb(" + tokens[5] + ")"; + } else { + color = tokens[5]; + } + entry.color = color; + } + if (tokens.length > 8) { + const exonCount = parseInt(tokens[6]); + const exonSizes = tokens[7].split(','); + const exonStarts = tokens[8].split(','); + const exons: Array = []; + + for (var i = 0; i < exonCount; i++) { + const eStart = startBase + parseInt(exonStarts[i]); + const eEnd = eStart + parseInt(exonSizes[i]); + exons.push({ start: eStart, end: eEnd }); + } + + entry.exons = exons; + } + + return entry; +} + +/** + * Extract useful data from sections of raw big binary bed data + * @template T + * @params restParser Parser for reading big bed data + * @returns + * The big bed Decode function + * @template T + * @param data Raw bed data + * @param filterStartChromIndex starting chromosome index used for filtering + * @param filterStartBase starting base used for filtering + * @param filterEndChromIndex ending chromosome index used for filtering + * @param filterEndBase ending base used for filtering + * @param chromDict dictionary of indices used by the file to chromosome names, conveniently stored as an array. + * @param [restParser] Parse for getting data + * +*/ +const decodeBedData = (restParser: ParseFunction) => (data: ArrayBuffer, filterStartChromIndex: number, filterStartBase: number, filterEndChromIndex: number, + filterEndBase: number, chromDict: Array): Array => { + const decodedData: Array = []; + const binaryParser = new BinaryParser(data); const minSize = 3 * 4 + 1; // Minimum # of bytes required for a bed record + while (binaryParser.remLength() >= minSize) { const chromIndex = binaryParser.getInt(); const chrom = chromDict[chromIndex]; @@ -386,51 +458,7 @@ function decodeBedData(data: ArrayBuffer, filterStartChromIndex: number, filterS break; } - const entry: BigBedData = { - chr: chrom, - start: startBase, - end: endBase - } - - let tokens = rest.split("\t"); - if (tokens.length > 0) { - entry.name = tokens[0]; - } - if (tokens.length > 1) { - entry.score = parseFloat(tokens[1]); - } - if (tokens.length > 2) { - entry.strand = tokens[2]; - } - if (tokens.length > 3) { - entry.cdStart = parseInt(tokens[3]); - } - if (tokens.length > 4) { - entry.cdEnd = parseInt(tokens[4]); - } - if (tokens.length > 5 && tokens[5] !== "." && tokens[5] !== "0") { - let color: string; - if (tokens[5].includes(",")) { - color = tokens[5].startsWith("rgb") ? tokens[5] : "rgb(" + tokens[5] + ")"; - } else { - color = tokens[5]; - } - entry.color = color; - } - if (tokens.length > 8) { - const exonCount = parseInt(tokens[6]); - const exonSizes = tokens[7].split(','); - const exonStarts = tokens[8].split(','); - const exons: Array = []; - - for (var i = 0; i < exonCount; i++) { - const eStart = startBase + parseInt(exonStarts[i]); - const eEnd = eStart + parseInt(exonSizes[i]); - exons.push({ start: eStart, end: eEnd }); - } - - entry.exons = exons; - } + const entry: T = restParser(chrom, startBase, endBase, rest); decodedData.push(entry); } @@ -448,7 +476,7 @@ function decodeBedData(data: ArrayBuffer, filterStartChromIndex: number, filterS * @param chromDict dictionary of indices used by the file to chromosome names, conveniently stored as an array. */ function decodeWigData(data: ArrayBuffer, filterStartChromIndex: number, filterStartBase: number, filterEndChromIndex: number, - filterEndBase: number, chromDict: Array): Array { + filterEndBase: number, chromDict: Array): Array { const decodedData: Array = []; const binaryParser = new BinaryParser(data); @@ -484,22 +512,22 @@ function decodeWigData(data: ArrayBuffer, filterStartChromIndex: number, filterS endBase = startBase + itemSpan; } - if (chromIndex > filterEndChromIndex || (chromIndex === filterEndChromIndex && startBase >= filterEndBase)) { - break; // past the end of the range; exit - } else if (!(chromIndex < filterStartChromIndex || (chromIndex === filterStartChromIndex && endBase < filterStartBase))) { - decodedData.push({ - chr: chrom, - start: startBase, - end: endBase, - value: value + if (chromIndex > filterEndChromIndex || (chromIndex === filterEndChromIndex && startBase >= filterEndBase)) { + break; // past the end of the range; exit + } else if (!(chromIndex < filterStartChromIndex || (chromIndex === filterStartChromIndex && endBase < filterStartBase))) { + decodedData.push({ + chr: chrom, + start: startBase, + end: endBase, + value: value }); // this is within the range (i.e. not before the first requested base); add this datapoint } - if (1 !== type && 2 !== type) { - // data is stored in Fixed Step format - // only increment the start base once the last entry has been pushed - startBase += itemStep; - } + if (1 !== type && 2 !== type) { + // data is stored in Fixed Step format + // only increment the start base once the last entry has been pushed + startBase += itemStep; + } } return decodedData; } @@ -515,7 +543,7 @@ function decodeWigData(data: ArrayBuffer, filterStartChromIndex: number, filterS * @param chromDict dictionary of indices used by the file to chromosome names, conveniently stored as an array. */ function decodeZoomData(data: ArrayBuffer, filterStartChromIndex: number, filterStartBase: number, filterEndChromIndex: number, - filterEndBase: number, chromDict: Array): Array { + filterEndBase: number, chromDict: Array): Array { const decodedData: Array = []; const binaryParser = new BinaryParser(data); diff --git a/src/bigwig/encodeBigBed.ts b/src/bigwig/encodeBigBed.ts new file mode 100644 index 0000000..6c72112 --- /dev/null +++ b/src/bigwig/encodeBigBed.ts @@ -0,0 +1,280 @@ +export interface BigBedDataNarrowPeak { + chr: string, + start: number, + end: number, + name?: string, + score?: number, + // + or - or . for unknown + strand?: string, + // Measurement of average enrichment for the region + signalValue?: number, + // Statistical significance of signal value (-log10). Set to -1 if not used + pValue?: number, + // Statistical significance with multiple-test correction applied (FDR -log10). Set to -1 if not used + qValue?: number, + // Point-source called for this peak; 0-based offset from chromStart. Set to -1 if no point-source called + peak?: number, +} + +export interface BigBedDataBroadPeak { + chr: string, + start: number, + end: number, + name?: string, + score?: number, + // + or - or . for unknown + strand?: string, + // Measurement of average enrichment for the region + signalValue?: number, + // Statistical significance of signal value (-log10). Set to -1 if not used + pValue?: number, + // Statistical significance with multiple-test correction applied (FDR -log10). Set to -1 if not used + qValue?: number, +} + +export interface BigBedDataMethyl { + chr: string, + start: number, + end: number, + name?: string, + score?: number, + // + or - or . for unknown + strand?: string, + // Start of where display should be thick (start codon) + thickStart?: number, + // End of where display should be thick (stop codon) + thickEnd?: number, + // Color value R,G,B + reserved?: number, + // Number of reads or coverage + readCount?: number, + // Percentage of reads that show methylation at this position in the genome + percentMeth?: number +} + +export interface BigBedDataTssPeak { + chr: string, + start: number, + end: number, + name?: string, + score?: number, + // + or - or . for unknown + strand?: string, + // Count of reads mapping to this peak + count?: number, + // Gene identifier + gene_id?: string, + // Gene name + gene_name?: string, + // TSS identifier + tss_id?: string, + // base by base read coverage of the peak + peak_cov?: string, +} + +export interface BigBedDataIdrPeak { + chr: string, + start: number, + end: number, + name?: string, + score?: number, + // + or - or . for unknown + strand?: string, + // Local IDR value + localIDR?: number, + // Global IDR value + globalIDR?: number, + // Start position in chromosome of replicate 1 peak + rep1_chromStart?: number, + // End position in chromosome of replicate 1 peak + rep1_chromEnd?: number, + // Count (used for ranking) replicate 1 + rep1_count?: number, + // Start position in chromosome of replicate 2 peak + rep2_chromStart?: number, + // End position in chromosome of replicate 2 peak + rep2_chromEnd?: number, + // Count (used for ranking) replicate 2 + rep2_count?: number, +} + +export const parseBigBedNarrowPeak = function (chrom: string, startBase: number, endBase: number, rest: string) { + const entry: BigBedDataNarrowPeak = { + chr: chrom, + start: startBase, + end: endBase + } + + let tokens = rest.split("\t"); + if (tokens.length > 0) { + entry.name = tokens[0]; + } + if (tokens.length > 1) { + entry.score = parseFloat(tokens[1]); + } + if (tokens.length > 2) { + entry.strand = tokens[2]; + } + if (tokens.length > 3) { + entry.signalValue = parseInt(tokens[3]); + } + if (tokens.length > 4) { + entry.pValue = parseInt(tokens[4]); + } + if (tokens.length > 5) { + entry.qValue = parseInt(tokens[5]); + } + if (tokens.length > 6) { + entry.peak = parseInt(tokens[6]); + } + + return entry; +} + +export const parseBigBedBroadPeak = function (chrom: string, startBase: number, endBase: number, rest: string) { + const entry: BigBedDataBroadPeak = { + chr: chrom, + start: startBase, + end: endBase + } + + let tokens = rest.split("\t"); + if (tokens.length > 0) { + entry.name = tokens[0]; + } + if (tokens.length > 1) { + entry.score = parseFloat(tokens[1]); + } + if (tokens.length > 2) { + entry.strand = tokens[2]; + } + if (tokens.length > 3) { + entry.signalValue = parseInt(tokens[3]); + } + if (tokens.length > 4) { + entry.pValue = parseInt(tokens[4]); + } + if (tokens.length > 5) { + entry.qValue = parseInt(tokens[5]); + } + + return entry; +} + +export const parseBigBedMethyl = function (chrom: string, startBase: number, endBase: number, rest: string) { + const entry: BigBedDataMethyl = { + chr: chrom, + start: startBase, + end: endBase + } + + let tokens = rest.split("\t"); + if (tokens.length > 0) { + entry.name = tokens[0]; + } + if (tokens.length > 1) { + entry.score = parseInt(tokens[1]); + } + if (tokens.length > 2) { + entry.strand = tokens[2]; + } + if (tokens.length > 3) { + entry.thickStart = parseInt(tokens[3]); + } + if (tokens.length > 4) { + entry.thickEnd = parseInt(tokens[4]); + } + if (tokens.length > 5) { + entry.reserved = parseInt(tokens[5]); + } + if (tokens.length > 6) { + entry.readCount = parseInt(tokens[6]); + } + if (tokens.length > 7) { + entry.percentMeth = parseInt(tokens[7]); + } + + return entry; +} + +export const parseBigBedTssPeak = function (chrom: string, startBase: number, endBase: number, rest: string) { + const entry: BigBedDataTssPeak = { + chr: chrom, + start: startBase, + end: endBase + } + + let tokens = rest.split("\t"); + if (tokens.length > 0) { + entry.name = tokens[0]; + } + if (tokens.length > 1) { + entry.score = parseFloat(tokens[1]); + } + if (tokens.length > 2) { + entry.strand = tokens[2]; + } + if (tokens.length > 3) { + entry.count = parseFloat(tokens[3]); + } + if (tokens.length > 4) { + entry.gene_id = tokens[4]; + } + if (tokens.length > 5) { + entry.gene_name = tokens[5]; + } + if (tokens.length > 6) { + entry.tss_id = tokens[6]; + } + if (tokens.length > 7) { + entry.peak_cov = tokens[7]; + } + + + return entry; +} + +export const parseBigBedIdrPeak = function(chrom: string, startBase: number, endBase: number, rest: string) { + const entry: BigBedDataIdrPeak = { + chr: chrom, + start: startBase, + end: endBase + } + + let tokens = rest.split("\t"); + if (tokens.length > 0) { + entry.name = tokens[0]; + } + if (tokens.length > 1) { + entry.score = parseInt(tokens[1]); + } + if (tokens.length > 2) { + entry.strand = tokens[2]; + } + if (tokens.length > 3) { + entry.localIDR = parseFloat(tokens[3]); + } + if (tokens.length > 4) { + entry.globalIDR = parseFloat(tokens[4]); + } + if (tokens.length > 5) { + entry.rep1_chromStart = parseInt(tokens[5]); + } + if (tokens.length > 6) { + entry.rep1_chromEnd= parseInt(tokens[6]); + } + if (tokens.length > 7) { + entry.rep1_count = parseFloat(tokens[7]); + } + if (tokens.length > 8) { + entry.rep2_chromStart = parseInt(tokens[8]); + } + if (tokens.length > 9) { + entry.rep2_chromEnd = parseInt(tokens[9]); + } + if (tokens.length > 10) { + entry.rep2_chromEnd = parseFloat(tokens[10]); + } + + return entry; +} diff --git a/src/bigwig/index.ts b/src/bigwig/index.ts index e82550b..18121ba 100644 --- a/src/bigwig/index.ts +++ b/src/bigwig/index.ts @@ -1,2 +1,6 @@ export { HeaderData, FileType, CommonHeader, ZoomLevelHeader, BWTotalSummary, ChromTree } from "./BigWigHeaderReader"; -export { BigWigData, BigBedData, BigZoomData, BigWigReader } from "./BigWigReader"; \ No newline at end of file +export { + BigBedDataNarrowPeak, BigBedDataBroadPeak, BigBedDataMethyl, BigBedDataTssPeak, BigBedDataIdrPeak, + parseBigBedBroadPeak, parseBigBedIdrPeak, parseBigBedMethyl, parseBigBedNarrowPeak, parseBigBedTssPeak +} from "./encodeBigBed"; +export { BigWigData, BigBedData, BigZoomData, BigWigReader, parseBigBed } from "./BigWigReader"; \ No newline at end of file diff --git a/test/BigWigReader.test.ts b/test/BigWigReader.test.ts index c924e70..8781e33 100644 --- a/test/BigWigReader.test.ts +++ b/test/BigWigReader.test.ts @@ -1,12 +1,20 @@ import Axios from "axios"; import { AxiosDataLoader, BigWigReader, HeaderData, BigWigData } from "../src/"; +import { parseBigBed } from "../src/bigwig/BigWigReader"; +import { parseBigBedBroadPeak, parseBigBedIdrPeak, parseBigBedMethyl, parseBigBedNarrowPeak, parseBigBedTssPeak } from "../src/bigwig/encodeBigBed"; import { streamToArray } from "./testUtils"; const testBWFilename = "testbw.bigwig"; const testBWFixedStepName = "test.fixedstep.bigwig"; const testBBFilename = "testbb.bigbed"; +const testBBNarrowPeakFilename = "testbb-narrowpeak.bigBed"; +const testBBBroadPeakFilename = "testbb-broadpeak.bigbed"; +const testBBMethylFilename = "testbb-methyl.bigbed"; +const testBBIdrPeakFilename = "testbb-idrpeak.bigbed"; +const testBBTssFilename = "testbb-tss.bigbed"; const testLargeBWFilename = "testbw-large.bigwig"; + describe("BigWigReader", () => { it("should get header", async () => { const loader = new AxiosDataLoader(`http://localhost:8001/${testBWFilename}`, Axios.create()); @@ -92,10 +100,24 @@ describe("BigWigReader", () => { }); }); - it("should read unzoomed bigbed data", async () => { + it("should read unzoomed bigbed data, without parseBigBed provided", async () => { const loader = new AxiosDataLoader(`http://localhost:8001/${testBBFilename}`, Axios.create()); const reader = new BigWigReader(loader); const data = await reader.readBigBedData("chr21", 10_000_000, "chr21", 20_000_000); + + expect(data.length).toBe(46); + expect(data[0].chr).toBe("chr21"); + expect(data[0].start).toBe(9_928_613); + expect(data[0].end).toBe(10_012_791); + expect(data[0].exons!.length).toBe(22); + expect(data[0].exons![0].start).toBe(9_928_613); + expect(data[0].exons![0].end).toBe(9_928_911); + }); + + it("should read unzoomed bigbed data", async () => { + const loader = new AxiosDataLoader(`http://localhost:8001/${testBBFilename}`, Axios.create()); + const reader = new BigWigReader(loader); + const data = await reader.readBigBedData("chr21", 10_000_000, "chr21", 20_000_000, parseBigBed); expect(data.length).toBe(46); expect(data[0].chr).toBe("chr21"); expect(data[0].start).toBe(9_928_613); @@ -105,6 +127,95 @@ describe("BigWigReader", () => { expect(data[0].exons![0].end).toBe(9_928_911); }); + + it("should read unzoomed narrow peak bigbed data", async () => { + const loader = new AxiosDataLoader(`http://localhost:8001/${testBBNarrowPeakFilename}`, Axios.create()); + const reader = new BigWigReader(loader); + const data = await reader.readBigBedData("chr21", 33_037_487, "chr21", 33_047_461, parseBigBedNarrowPeak); + expect(data.length).toBe(2); + expect(data[0].chr).toBe("chr21"); + expect(data[0].start).toBe(33_039_438); + expect(data[0].end).toBe(33_039_560); + expect(data[0].name).toBe('chr21.1228'); + expect(data[0].score).toBe(577); + expect(data[0].strand).toBe('.'); + expect(data[0].signalValue).toBe(0); + expect(data[0].pValue).toBe(1); + expect(data[0].qValue).toBe(-1); + expect(data[0].peak).toBe(84); + }); + + it("should read unzoomed broad peak bigbed data", async () => { + const loader = new AxiosDataLoader(`http://localhost:8001/${testBBBroadPeakFilename}`, Axios.create()); + const reader = new BigWigReader(loader); + const data = await reader.readBigBedData("chr1", 11_169_025, "chr1", 11_333_936, parseBigBedBroadPeak); + expect(data.length).toBe(10); + expect(data[0].chr).toBe("chr1"); + expect(data[0].start).toBe(11_176_299); + expect(data[0].end).toBe(11_176_669); + expect(data[0].name).toBe('id-773'); + expect(data[0].score).toBe(22); + expect(data[0].strand).toBe('.'); + expect(data[0].signalValue).toBe(-1); + expect(data[0].pValue).toBe(-1); + expect(data[0].qValue).toBe(2); + }); + + it("should read unzoomed data methyl bigbed data", async () => { + const loader = new AxiosDataLoader(`http://localhost:8001/${testBBMethylFilename}`, Axios.create()); + const reader = new BigWigReader(loader); + const data = await reader.readBigBedData("chr3", 21_109_025, "chr3", 21_433_936, parseBigBedMethyl); + expect(data.length).toBe(4); + expect(data[0].chr).toBe("chr3"); + expect(data[0].start).toBe(21_267_378); + expect(data[0].end).toBe(21_267_379); + expect(data[0].name).toBe('AG09319_0__JS_'); + expect(data[0].score).toBe(1); + expect(data[0].strand).toBe('+'); + expect(data[0].thickStart).toBe(21_267_378); + expect(data[0].thickEnd).toBe(21_267_379); + expect(data[0].reserved).toBe(255); + expect(data[0].readCount).toBe(1); + expect(data[0].percentMeth).toBe(100); + }); + + it("should read unzoomed data tss bigbed data", async () => { + const loader = new AxiosDataLoader(`http://localhost:8001/${testBBTssFilename}`, Axios.create()); + const reader = new BigWigReader(loader); + const data = await reader.readBigBedData("chr1", 1_109_025, "chr1", 1_433_936, parseBigBedTssPeak); + expect(data.length).toBe(23); + expect(data[0].chr).toBe("chr1"); + expect(data[0].start).toBe(1_167_323); + expect(data[0].end).toBe(1_167_429); + expect(data[0].name).toBe('TSS_chr1_minus_1115669_1176476_pk1'); + expect(data[0].score).toBe(1000); + expect(data[0].strand).toBe('-'); + expect(data[0].count).toBe(928); + expect(data[0].gene_id).toBe('chr1_minus_1115669_1176476'); + expect(data[0].tss_id).toBe('TSS_chr1_minus_1115669_1176476_pk1'); + expect(data[0].peak_cov).toBe('7.0,0.0,0.0,4.0,0.0,0.0,5.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,18.0,0.0,0.0,25.0,2.0,0.0,8.0,18.0,0.0,26.0,31.0,45.0,100.0,102.0,314.0,6.0,1.0,1.0,97.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,57.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,22.0,4.0,0.0,1.0,12.0'); + }); + + it("should read unzoomed data idr peak bigbed data", async () => { + const loader = new AxiosDataLoader(`http://localhost:8001/${testBBIdrPeakFilename}`, Axios.create()); + const reader = new BigWigReader(loader); + const data = await reader.readBigBedData("chr1", 1_109_025, "chr1", 1_433_936, parseBigBedIdrPeak); + expect(data.length).toBe(5); + expect(data[0].chr).toBe("chr1"); + expect(data[0].start).toBe(1_116_078); + expect(data[0].end).toBe(1_116_107); + expect(data[0].name).toBe('.'); + expect(data[0].score).toBe(164); + expect(data[0].strand).toBe('-'); + expect(data[0].localIDR).toBe(-0); + expect(data[0].globalIDR).toBe(0.4) + expect(data[0].rep1_chromStart).toBe(1_116_078); + expect(data[0].rep1_chromEnd).toBe(1_116_107); + expect(data[0].rep1_count).toBe(21); + expect(data[0].rep2_chromStart).toBe(1_116_097); + expect(data[0].rep2_chromEnd).toBe(10); + }); + it("should read fixed step bigwig data", async () => { const loader = new AxiosDataLoader(`http://localhost:8001/${testBWFixedStepName}`, Axios.create()); const reader = new BigWigReader(loader); @@ -154,4 +265,4 @@ describe("BigWigReader", () => { expect(data.length).toBe(147); }); -}); \ No newline at end of file +});