Skip to content

Commit

Permalink
remove KR
Browse files Browse the repository at this point in the history
  • Loading branch information
sa501428 committed Feb 7, 2022
1 parent dbee3c1 commit 3892e76
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 174 deletions.
145 changes: 2 additions & 143 deletions src/juicebox/tools/utils/norm/NormalizationCalculations.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2011-2021 Broad Institute, Aiden Lab, Rice University, Baylor College of Medicine
* Copyright (c) 2011-2022 Broad Institute, Aiden Lab, Rice University, Baylor College of Medicine
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -32,9 +32,7 @@
import javastraw.reader.type.NormalizationType;
import juicebox.tools.clt.old.NormalizationBuilder;
import juicebox.tools.utils.norm.scale.ScaleHandler;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;

import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
Expand Down Expand Up @@ -244,9 +242,7 @@ boolean isEnoughMemory() {

public ListOfFloatArrays getNorm(NormalizationType normOption) {
ListOfFloatArrays norm;
if (NormalizationBuilder.usesKR(normOption)) {
norm = computeKR();
} else if (NormalizationBuilder.usesVC(normOption)) {
if (NormalizationBuilder.usesVC(normOption)) {
norm = computeVC();
} else if (NormalizationBuilder.usesSCALE(normOption)) {
norm = computeMMBA();
Expand Down Expand Up @@ -336,143 +332,6 @@ public int getNumberOfValidEntriesInVector(double[] norm) {
return counter;
}


ListOfFloatArrays computeKR() {

boolean recalculate = true;
ListOfIntArrays offset = getOffset(0);
ListOfFloatArrays kr = null;
int iteration = 1;

while (recalculate && iteration <= 6) {
// create new matrix indices upon every iteration, because we've thrown out rows
// newSize is size of new sparse matrix (non-sparse rows)
long newSize = 0;
for (int[] array : offset.getValues()) {
for (int offset1 : array) {
if (offset1 != -1) newSize++;
}
}

// initialize x0 for call the compute KR norm
ListOfDoubleArrays x0 = new ListOfDoubleArrays(newSize, 1);

x0 = computeKRNormVector(offset, 0.000001, x0, 0.1);

// assume all went well and we don't need to recalculate
recalculate = false;
int rowsTossed = 0;

if (x0 == null || iteration == 5) {
// if x0 is no good, throw out some percentage of rows and reset the offset array that gives those rows
recalculate = true;
if (iteration < 5) {
offset = getOffset(iteration);
} else {
offset = getOffset(10);
}
// System.out.print(" " + iteration + "%");
} else {
// otherwise, check to be sure there are no tiny KR values
// create true KR vector
kr = new ListOfFloatArrays(matrixSize);
int krIndex = 0;
for (int[] offsetArray : offset.getValues()) {
for (int offset1 : offsetArray) {
if (offset1 == -1) {
kr.set(krIndex++, Float.NaN);
} else {
kr.set(krIndex++, (float) (1.0f / x0.get(offset1)));
}
}
}
// find scaling factor
double mySum = getSumFactor(kr);

// if any values are too small, recalculate. set those rows to be thrown out and reset the offset
// note that if no rows are thrown out, the offset should not change
int index = 0;
for (long i = 0; i < kr.getLength(); i++) {
if (kr.get(i) * mySum < 0.01) {
offset.set(i, -1);
rowsTossed++;
recalculate = true;
} else {
if (offset.get(i) != -1) {
offset.set(i, index++);
}
}
}
// if (recalculate) System.out.print(" " + rowsTossed);
}
iteration++;
System.gc();
}
if (iteration > 6 && recalculate) {
kr = new ListOfFloatArrays(matrixSize, Float.NaN);
}

return kr;
}

private ListOfIntArrays getOffset(double percent) {
ListOfDoubleArrays rowSums = new ListOfDoubleArrays(matrixSize, 0);

Iterator<ContactRecord> iterator = getIterator();
while (iterator.hasNext()) {
ContactRecord cr = iterator.next();
int x = cr.getBinX();
int y = cr.getBinY();
float value = cr.getCounts();
rowSums.addTo(x, value);
if (x != y) {
rowSums.addTo(y, value);
}
}

double thresh = 0;
if (percent > 0) {
// Get percent threshold from positive row sums (nonzero)
DescriptiveStatistics stats = new DescriptiveStatistics();
rowSums.getValues().forEach(sum -> Arrays.stream(sum).filter(i-> i != 0).forEach(stats::addValue));
thresh = stats.getPercentile( percent);
/*
int j = 0;
for (double[] array : rowSums.getValues()) {
for (double sum : array) {
if (sum != 0) {
j++;
}
}
}
double[] posRowSums = new double[j];
j = 0;
for (double[] array : rowSums.getValues()) {
for (double sum : array) {
if (sum != 0) {
posRowSums[j++] = sum;
}
}
}
thresh = StatUtils.percentile(posRowSums, percent);
*/
}

ListOfIntArrays offset = new ListOfIntArrays(rowSums.getLength());
int index = 0;
for (long i = 0; i < rowSums.getLength(); i++) {
if (rowSums.get(i) <= thresh) {
offset.set(i, -1);
} else {
offset.set(i, index++);
}
}

return offset;

}

public ListOfFloatArrays computeMMBA() {
ListOfFloatArrays tempTargetVector = new ListOfFloatArrays(matrixSize, 1);
return ScaleHandler.mmbaScaleToVector(ic, tempTargetVector, resolution);
Expand Down
32 changes: 1 addition & 31 deletions src/juicebox/tools/utils/norm/NormalizationVectorUpdater.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2011-2021 Broad Institute, Aiden Lab, Rice University, Baylor College of Medicine
* Copyright (c) 2011-2022 Broad Institute, Aiden Lab, Rice University, Baylor College of Medicine
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -54,15 +54,12 @@ public class NormalizationVectorUpdater extends NormVectorUpdater {
protected List<ExpectedValueCalculation> expectedValueCalculations = new ArrayList<>();

// Keep track of chromosomes that fail to converge, so we don't try them at higher resolutions.
protected Set<Chromosome> krBPFailedChromosomes = new HashSet<>();
protected Set<Chromosome> krFragFailedChromosomes = new HashSet<>();
protected Set<Chromosome> mmbaBPFailedChromosomes = new HashSet<>();
protected Set<Chromosome> mmbaFragFailedChromosomes = new HashSet<>();

// norms to build; gets overwritten
protected boolean weShouldBuildVC = true;
protected boolean weShouldBuildVCSqrt = true;
protected boolean weShouldBuildKR = true;
protected boolean weShouldBuildScale = true;

protected static void printNormTiming(String norm, Chromosome chr, HiCZoom zoom, long currentTime) {
Expand All @@ -84,7 +81,6 @@ protected static void updateExpectedValueCalculationForChr(final int chrIdx, Nor
protected void reEvaluateWhichIntraNormsToBuild(List<NormalizationType> normalizationsToBuild) {
weShouldBuildVC = normalizationsToBuild.contains(NormalizationHandler.VC);
weShouldBuildVCSqrt = normalizationsToBuild.contains(NormalizationHandler.VC_SQRT);
weShouldBuildKR = normalizationsToBuild.contains(NormalizationHandler.KR);
weShouldBuildScale = normalizationsToBuild.contains(NormalizationHandler.SCALE);
}

Expand All @@ -108,23 +104,6 @@ protected void buildVCOrVCSQRT(boolean weShouldBuildVC, boolean weShouldBuildVCS
}
}

protected void buildKR(Chromosome chr, NormalizationCalculations nc, HiCZoom zoom, MatrixZoomData zd, ExpectedValueCalculation evKR) throws IOException {
Set<Chromosome> failureSetKR = zoom.getUnit() == HiCZoom.HiCUnit.FRAG ? krFragFailedChromosomes : krBPFailedChromosomes;
final int chrIdx = chr.getIndex();

long currentTime = System.currentTimeMillis();
if (!failureSetKR.contains(chr)) {
ListOfFloatArrays kr = nc.computeKR();
if (kr == null) {
failureSetKR.add(chr);
printNormTiming("FAILED KR", chr, zoom, currentTime);
} else {
updateExpectedValueCalculationForChr(chrIdx, nc, kr, NormalizationHandler.KR, zoom, zd, evKR, normVectorBuffers, normVectorIndices);
printNormTiming("KR", chr, zoom, currentTime);
}
}
}

public void updateHicFile(String path, List<NormalizationType> normalizationsToBuild,
Map<NormalizationType, Integer> resolutionsToBuildTo, int genomeWideLowestResolutionAllowed, boolean noFrag) throws IOException {

Expand Down Expand Up @@ -171,7 +150,6 @@ public void updateHicFile(String path, List<NormalizationType> normalizationsToB

ExpectedValueCalculation evVC = new ExpectedValueCalculation(chromosomeHandler, zoom.getBinSize(), fcm, NormalizationHandler.VC);
ExpectedValueCalculation evVCSqrt = new ExpectedValueCalculation(chromosomeHandler, zoom.getBinSize(), fcm, NormalizationHandler.VC_SQRT);
ExpectedValueCalculation evKR = new ExpectedValueCalculation(chromosomeHandler, zoom.getBinSize(), fcm, NormalizationHandler.KR);
ExpectedValueCalculation evSCALE = new ExpectedValueCalculation(chromosomeHandler, zoom.getBinSize(), fcm, NormalizationHandler.SCALE);

// Loop through chromosomes
Expand All @@ -196,11 +174,6 @@ public void updateHicFile(String path, List<NormalizationType> normalizationsToB
chr, nc, zoom, zd, evVC, evVCSqrt);
}

// KR normalization
if (weShouldBuildKR && zoom.getBinSize() >= resolutionsToBuildTo.get(NormalizationHandler.KR)) {
buildKR(chr, nc, zoom, zd, evKR);
}

// Fast scaling normalization
if (weShouldBuildScale && zoom.getBinSize() >= resolutionsToBuildTo.get(NormalizationHandler.SCALE)) {
buildScale(chr, nc, zoom, zd, evSCALE);
Expand All @@ -215,9 +188,6 @@ public void updateHicFile(String path, List<NormalizationType> normalizationsToB
if (weShouldBuildVCSqrt && evVCSqrt.hasData() && zoom.getBinSize() >= resolutionsToBuildTo.get(NormalizationHandler.VC_SQRT)) {
expectedValueCalculations.add(evVCSqrt);
}
if (weShouldBuildKR && evKR.hasData() && zoom.getBinSize() >= resolutionsToBuildTo.get(NormalizationHandler.KR)) {
expectedValueCalculations.add(evKR);
}
if (weShouldBuildScale && evSCALE.hasData() && zoom.getBinSize() >= resolutionsToBuildTo.get(NormalizationHandler.SCALE)) {
expectedValueCalculations.add(evSCALE);
}
Expand Down

0 comments on commit 3892e76

Please sign in to comment.