Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CNVpytor Track: Improved Code and Added Features #1930

Merged
merged 1 commit into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 43 additions & 26 deletions js/cnvpytor/CombinedCaller.js
Original file line number Diff line number Diff line change
@@ -1,25 +1,33 @@
import t_dist from './t_dist.js'
import g_utils from './GeneralUtil.js'


class CombinedCaller{
constructor(wigFeatures, binSize) {
this.wigFeatures = wigFeatures
this.binSize = binSize
import gUtils from './GeneralUtil.js'
import baseCNVpytorVCF from './baseCNVpytorVCF.js'


class CombinedCaller extends baseCNVpytorVCF {
/**
* Creates an instance of CombinedCaller.
*
* @param {Array} wigFeatures - An array of arrays containing wig formatted data for each chromosome and bin.
* @param {number} binSize - The size of the bins used in the wig data.
* @param {string} refGenome - GC content data indexed by chromosome and bin.
*/
constructor(wigFeatures, binSize, refGenome) {
super(wigFeatures, binSize, refGenome)
}
get_fit(){
var fit_info = new g_utils.GetFit(this.wigFeatures)
var [globalMean, globalStd] = fit_info.fit_data()

return {globalMean:globalMean, globalStd:globalStd}

}
async call_2d(omin=null, mcount=null, event_type="both", max_distance=0.1, baf_threshold=0, max_copy_number=10, min_cell_fraction=0.0){

let fit_obj = this.get_fit()
this.globalMean = fit_obj.globalMean
this.globalStd = fit_obj.globalStd
// let fit_obj = this.get_fit_v2()
// this.globalMean = fit_obj.globalMean
// this.globalStd = fit_obj.globalStd

// this.getGcCorrectionSignal(this.globalMean)

// applying gc correction
await this.apply_gcCorrection()

let binScoreField = this.gcFlag ? "gcCorrectedBinScore": "binScore" ;

let overlap_min = omin==null ? 0.05 * this.binSize / 3e9: omin ;
let min_count = mcount == null ? parseInt(this.binSize / 10000) : mcount ;

Expand All @@ -41,11 +49,14 @@ class CombinedCaller{
if (bin.hets_count > 4 ){

if( bin.dp_count > min_count ){
segments.push([bin_idx])
levels.push(bin.binScore)
likelihoods.push(bin.likelihood_score)
delete bin.likelihood_score

if(bin[binScoreField]){
segments.push([bin_idx])
levels.push(bin[binScoreField])
likelihoods.push(bin.likelihood_score)
delete bin.likelihood_score

}

}
}
})
Expand Down Expand Up @@ -234,7 +245,7 @@ class CombinedCaller{
if(points == 0){
points = 1
}
let x = g_utils.linspace(min_cell_fraction, 1, points)
let x = gUtils.linspace(min_cell_fraction, 1, points)
let master_lh = {}
let germline_lh = {}
for(let cn=10; cn > -1; cn--){
Expand Down Expand Up @@ -330,12 +341,17 @@ class CombinedCaller{
}

var rawbinScore = this.formatDataStructure(this.wigFeatures, 'binScore', this.globalMean)

let gcCorrectedBinScore = [];
if (this.gcFlag) {
gcCorrectedBinScore = this.formatDataStructure(this.wigFeatures, 'gcCorrectedBinScore', this.globalMean);
}
var callScore = this.formatDataStructure(this.wigFeatures, 'segment_score', this.globalMean)

return {binScore: rawbinScore, segment_score: callScore}
return {binScore: rawbinScore, gcCorrectedBinScore: gcCorrectedBinScore, segmentScore: callScore}

}

formatDataStructure(wigFeatures, feature_column, scaling_factor = 1) {
const results = []
for (const [chr, wig] of Object.entries(wigFeatures)) {
Expand All @@ -351,7 +367,7 @@ class CombinedCaller{

return results
}

/*
formatDataStructure_BAF(feature_column, scaling_factor = -1) {
const baf1 = []
const baf2 = []
Expand All @@ -375,7 +391,7 @@ class CombinedCaller{


return [baf1, baf2]
}
}*/
}

function arrayMax(arr) {
Expand Down Expand Up @@ -424,6 +440,7 @@ function normal_overlap_approx(m1, s1, m2, s2){
* @returns {float} - Overlap area.
*/
function likelihood_overlap(likelihood_1, likelihood_2){
// console.log(likelihood_1, likelihood_2)
let sum;
try{
sum = likelihood_1.reduce((accumulator, currentValue, currentIndex) => {return accumulator + Math.min(currentValue, likelihood_2[currentIndex])});
Expand Down
188 changes: 115 additions & 73 deletions js/cnvpytor/GeneralUtil.js
Original file line number Diff line number Diff line change
@@ -1,99 +1,141 @@

class GetFit {
constructor(allBins) {
this.allBins = allBins
}
getValues() {
const bins = Object.values(this.allBins).reduce(
(binResult, bin) => { return binResult.concat(bin.filter(a => a.binScore > 0).map(a => a.binScore)) }, [])
return bins
}
getMean(data) {
return (data.reduce(function (a, b) { return a + b; }) / data.length);
}
fit_data() {
let rd_list = this.getValues()
let distParmas = getDistParams(rd_list)
return distParmas
}

histogram(data, bins) {
const step = bins[1] - bins[0];
const hist_bins = [];

data.forEach((value, index) => {
bins.forEach((bin_value, bin_index) => {
if (!hist_bins[bin_value]) {
hist_bins[bin_value] = { count: 0 };
}
if (bin_value <= value && value < bin_value + step) {
hist_bins[bin_value].count++;
return false;
}
});
});
const dist_p = []
hist_bins.forEach((bin, index) => { dist_p.push(bin.count); });
return dist_p
}
/**
* Creates an instance of GetFit.
* @param {Object} allBins - An object containing all the bins with their respective data.
*/
constructor(allBins) {
this.allBins = allBins // Stores all bins data
}

/**
* Extracts bin scores greater than zero from all bins.
* @returns {Array} An array of bin scores.
*/

getValues() {
const bins = Object.values(this.allBins).reduce(
(binResult, bin) => { return binResult.concat(bin.filter(a => a.binScore > 0).map(a => a.binScore)) }, [])
return bins
}

/**
* Calculates the mean of the given data.
* @param {Array} data - The data array to calculate the mean from.
* @returns {number} The mean value of the data.
*/
getMean(data) {
return (data.reduce(function (a, b) { return a + b; }) / data.length);
}

fit_data() {
let rd_list = this.getValues()
let distParmas = getDistParams(rd_list)
return distParmas
}


histogram(data, bins) {
const step = bins[1] - bins[0];
const hist_bins = [];

data.forEach((value, index) => {
bins.forEach((bin_value, bin_index) => {
if (!hist_bins[bin_value]) {
hist_bins[bin_value] = { count: 0 };
}
if (bin_value <= value && value < bin_value + step) {
hist_bins[bin_value].count++;
return false;
}
});
});
const dist_p = []
hist_bins.forEach((bin, index) => { dist_p.push(bin.count); });
return dist_p
}

}


function range_function(start, stop, step) {
const data_array = Array(Math.ceil((stop - start) / step))
.fill(start)
.map((x, y) => x + y * step);
return data_array;
const data_array = Array(Math.ceil((stop - start) / step))
.fill(start)
.map((x, y) => x + y * step);
return data_array;
}


function Gaussian([a, x0, sigma]) {
return x =>
(a * Math.exp(-Math.pow(x - x0, 2) / (2 * Math.pow(sigma, 2)))) / (Math.sqrt(2 * Math.PI) * sigma);
return x =>
(a * Math.exp(-Math.pow(x - x0, 2) / (2 * Math.pow(sigma, 2)))) / (Math.sqrt(2 * Math.PI) * sigma);
}

function filterOutliers(someArray) {

if (someArray.length < 4)
return someArray;
if (someArray.length < 4)
return someArray;

let values, q1, q3, iqr, maxValue, minValue;
let values, q1, q3, iqr, maxValue, minValue;

values = someArray.slice().sort((a, b) => a - b); //copy array fast and sort
values = someArray.slice().sort((a, b) => a - b); //copy array fast and sort

if ((values.length / 4) % 1 === 0) { //find quartiles
q1 = 1 / 2 * (values[(values.length / 4)] + values[(values.length / 4) + 1]);
q3 = 1 / 2 * (values[(values.length * (3 / 4))] + values[(values.length * (3 / 4)) + 1]);
} else {
q1 = values[Math.floor(values.length / 4 + 1)];
q3 = values[Math.ceil(values.length * (3 / 4) + 1)];
}
if ((values.length / 4) % 1 === 0) { //find quartiles
q1 = 1 / 2 * (values[(values.length / 4)] + values[(values.length / 4) + 1]);
q3 = 1 / 2 * (values[(values.length * (3 / 4))] + values[(values.length * (3 / 4)) + 1]);
} else {
q1 = values[Math.floor(values.length / 4 + 1)];
q3 = values[Math.ceil(values.length * (3 / 4) + 1)];
}

iqr = q3 - q1;
maxValue = q3 + iqr * 1.5;
minValue = q1 - iqr * 1.5;
iqr = q3 - q1;
maxValue = q3 + iqr * 1.5;
minValue = q1 - iqr * 1.5;

return values.filter((x) => (x >= minValue) && (x <= maxValue));
return values.filter((x) => (x >= minValue) && (x <= maxValue));
}

function getDistParams(bins) {
let filteredBins = filterOutliers(bins)
const n = filteredBins.length
const mean = filteredBins.reduce((a, b) => a + b) / n
const std = Math.sqrt(filteredBins.map(x => Math.pow(x - mean, 2)).reduce((a, b) => a + b) / n)
return [mean, std]
let filteredBins = filterOutliers(bins)
const n = filteredBins.length
const mean = filteredBins.reduce((a, b) => a + b) / n
const std = Math.sqrt(filteredBins.map(x => Math.pow(x - mean, 2)).reduce((a, b) => a + b) / n)
return [mean, std]
}

function linspace(a, b, n) {
if (typeof n === "undefined") n = Math.max(Math.round(b - a) + 1, 1);
if (n < 2) {
return n === 1 ? [a] : [];
}
var ret = Array(n);
n--;
for (let i = n; i >= 0; i--) {
ret[i] = (i * b + (n - i) * a) / n;
}
return ret;
if (typeof n === "undefined") n = Math.max(Math.round(b - a) + 1, 1);
if (n < 2) {
return n === 1 ? [a] : [];
}
var ret = Array(n);
n--;
for (let i = n; i >= 0; i--) {
ret[i] = (i * b + (n - i) * a) / n;
}
return ret;
}

export default { range_function, getDistParams, linspace, GetFit};
export function histogram2d(data1, data2, binsX, binsY) {
// Calculate bin sizes
const minX = math.min(data1);
const maxX = math.max(data1);
const minY = math.min(data2);
const maxY = math.max(data2);
const binSizeX = (maxX - minX) / binsX;
const binSizeY = (maxY - minY) / binsY;

// Create the histogram array
const histogram = math.zeros(binsX, binsY);

// Populate the histogram
for (let i = 0; i < data1.length; i++) {
const xBin = Math.floor((data1[i] - minX) / binSizeX);
const yBin = Math.floor((data2[i] - minY) / binSizeY);
histogram.set([xBin, yBin], histogram.get([xBin, yBin]) + 1);
}

return histogram;
}

export default { range_function, getDistParams, linspace, GetFit, filterOutliers };
Loading