Skip to content

Commit

Permalink
Addressing the feedback from oct 11, 2023 except implementing the Str…
Browse files Browse the repository at this point in the history
…ipe Flag in RANS Nx16 encoder
  • Loading branch information
yash-puligundla committed Oct 19, 2023
1 parent 43145d4 commit f7e6c57
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 169 deletions.
2 changes: 1 addition & 1 deletion scripts/install-samtools.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/sh
set -ex
wget https://github.com/samtools/samtools/releases/download/1.14/samtools-1.14.tar.bz2
# CRAM Interop Tests are dependent on the test files in samtools-1.14/htslib-1.14/htscodes/tests/dat
# Note that the CRAM Interop Tests are dependent on the test files in samtools-1.14/htslib-1.14/htscodecs/tests/dat
tar -xjvf samtools-1.14.tar.bz2
cd samtools-1.14 && ./configure --prefix=/usr && make && sudo make install
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import htsjdk.samtools.cram.compression.rans.Constants;
import htsjdk.samtools.cram.compression.rans.RANSDecode;
import htsjdk.samtools.cram.compression.rans.RANSDecodingSymbol;
import htsjdk.samtools.cram.compression.rans.RANSParams;
import htsjdk.samtools.cram.compression.rans.Utils;

import java.nio.ByteBuffer;
Expand All @@ -17,17 +16,17 @@ public class RANSNx16Decode extends RANSDecode {
private static final int FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK = 0x01;

public ByteBuffer uncompress(final ByteBuffer inBuffer) {

// For RANS decoding, the bytes are read in little endian from the input stream
inBuffer.order(ByteOrder.LITTLE_ENDIAN);
return uncompress(inBuffer, 0);
}

public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
if (inBuffer.remaining() == 0) {
return EMPTY_BUFFER;
}

// For RANS decoding, the bytes are read in little endian from the input stream
inBuffer.order(ByteOrder.LITTLE_ENDIAN);

// the first byte of compressed stream gives the formatFlags
final int formatFlags = inBuffer.get() & 0xFF;
final RANSNx16Params ransNx16Params = new RANSNx16Params(formatFlags);
Expand Down Expand Up @@ -70,7 +69,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
uncompressedRLEOutputLength = outSize;
outSize = Utils.readUint7(inBuffer);
// TODO: maybe move decodeRLEMeta in-line
uncompressedRLEMetaData = decodeRLEMeta(inBuffer, ransNx16Params, uncompressedRLEMetaDataLength, rleSymbols);
uncompressedRLEMetaData = decodeRLEMeta(inBuffer, uncompressedRLEMetaDataLength, rleSymbols);
}

ByteBuffer outBuffer = ByteBuffer.allocate(outSize);
Expand All @@ -86,7 +85,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
uncompressOrder0WayN(inBuffer, outBuffer, outSize, ransNx16Params);
break;
case ONE:
uncompressOrder1WayN(inBuffer, outBuffer, outSize, ransNx16Params);
uncompressOrder1WayN(inBuffer, outBuffer, ransNx16Params);
break;
default:
throw new RuntimeException("Unknown rANS order: " + ransNx16Params.getOrder());
Expand Down Expand Up @@ -167,7 +166,6 @@ private ByteBuffer uncompressOrder0WayN(
private ByteBuffer uncompressOrder1WayN(
final ByteBuffer inBuffer,
final ByteBuffer outBuffer,
final int outSize,
final RANSNx16Params ransNx16Params) {
initializeRANSDecoder();

Expand Down Expand Up @@ -286,7 +284,7 @@ private void readFrequencyTableOrder0(

private void readFrequencyTableOrder1(
final ByteBuffer cp,
int shift) {
final int shift) {
final int[][] frequencies = new int[Constants.NUMBER_OF_SYMBOLS][Constants.NUMBER_OF_SYMBOLS];
final ArithmeticDecoder[] D = getD();
final RANSDecodingSymbol[][] decodingSymbols = getDecodingSymbols();
Expand Down Expand Up @@ -349,7 +347,10 @@ private static int[] readAlphabet(final ByteBuffer cp){
return alphabet;
}

private ByteBuffer decodeRLEMeta(final ByteBuffer inBuffer , final RANSParams ransParams, final int uncompressedRLEMetaDataLength, final int[] rleSymbols) {
private ByteBuffer decodeRLEMeta(
final ByteBuffer inBuffer,
final int uncompressedRLEMetaDataLength,
final int[] rleSymbols) {
ByteBuffer uncompressedRLEMetaData;
final int compressedRLEMetaDataLength;
if ((uncompressedRLEMetaDataLength & 0x01)!=0) {
Expand All @@ -370,15 +371,19 @@ private ByteBuffer decodeRLEMeta(final ByteBuffer inBuffer , final RANSParams ra

int numRLESymbols = uncompressedRLEMetaData.get() & 0xFF;
if (numRLESymbols == 0) {
numRLESymbols = 256;
numRLESymbols = Constants.NUMBER_OF_SYMBOLS;
}
for (int i = 0; i< numRLESymbols; i++) {
rleSymbols[uncompressedRLEMetaData.get() & 0xFF] = 1;
}
return uncompressedRLEMetaData;
}

private ByteBuffer decodeRLE(ByteBuffer inBuffer , final int[] rleSymbols, final ByteBuffer uncompressedRLEMetaData, int uncompressedRLEOutputLength) {
private ByteBuffer decodeRLE(
ByteBuffer inBuffer,
final int[] rleSymbols,
final ByteBuffer uncompressedRLEMetaData,
final int uncompressedRLEOutputLength) {
ByteBuffer rleOutBuffer = ByteBuffer.allocate(uncompressedRLEOutputLength);
int j = 0;
for(int i = 0; j< uncompressedRLEOutputLength; i++){
Expand All @@ -396,7 +401,11 @@ private ByteBuffer decodeRLE(ByteBuffer inBuffer , final int[] rleSymbols, final
return inBuffer;
}

private ByteBuffer decodePack(ByteBuffer inBuffer, final int[] packMappingTable, int numSymbols, int uncompressedPackOutputLength) {
private ByteBuffer decodePack(
ByteBuffer inBuffer,
final int[] packMappingTable,
final int numSymbols,
final int uncompressedPackOutputLength) {
ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength);
int j = 0;

Expand Down Expand Up @@ -445,38 +454,35 @@ else if (numSymbols <= 16){
return inBuffer;
}

private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){

private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){
final int numInterleaveStreams = inBuffer.get() & 0xFF;

// retrieve lengths of compressed interleaved streams
int[] clen = new int[numInterleaveStreams];
final int[] compressedLengths = new int[numInterleaveStreams];
for ( int j=0; j<numInterleaveStreams; j++ ){
clen[j] = Utils.readUint7(inBuffer);
compressedLengths[j] = Utils.readUint7(inBuffer);
}

// Decode the compressed interleaved stream
int[] ulen = new int[numInterleaveStreams];
ByteBuffer[] T = new ByteBuffer[numInterleaveStreams];

final int[] uncompressedLengths = new int[numInterleaveStreams];
final ByteBuffer[] TransposedData = new ByteBuffer[numInterleaveStreams];
for ( int j=0; j<numInterleaveStreams; j++){
ulen[j] = (int) Math.floor(((double) outSize)/numInterleaveStreams);
uncompressedLengths[j] = (int) Math.floor(((double) outSize)/numInterleaveStreams);
if ((outSize % numInterleaveStreams) > j){
ulen[j]++;
uncompressedLengths[j]++;
}

T[j] = uncompress(inBuffer, ulen[j]);
TransposedData[j] = uncompress(inBuffer, uncompressedLengths[j]);
}

// Transpose
ByteBuffer out = ByteBuffer.allocate(outSize);
final ByteBuffer outBuffer = ByteBuffer.allocate(outSize);
for (int j = 0; j <numInterleaveStreams; j++) {
for (int i = 0; i < ulen[j]; i++) {
out.put((i*numInterleaveStreams)+j, T[j].get(i));
for (int i = 0; i < uncompressedLengths[j]; i++) {
outBuffer.put((i*numInterleaveStreams)+j, TransposedData[j].get(i));
}
}

return out;
return outBuffer;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN
// NoSize
if (!ransNx16Params.isNosz()) {
// original size is not recorded
int insize = inBuffer.remaining();
Utils.writeUint7(insize,outBuffer);
Utils.writeUint7(inBuffer.remaining(),outBuffer);
}

ByteBuffer inputBuffer = inBuffer;
Expand Down Expand Up @@ -68,7 +67,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN

// RLE
if (ransNx16Params.isRLE()){
inputBuffer = encodeRLE(inputBuffer, ransNx16Params, outBuffer);
inputBuffer = encodeRLE(inputBuffer, outBuffer);
}


Expand Down Expand Up @@ -480,13 +479,10 @@ private static void writeAlphabet(final ByteBuffer cp, final int[] F) {
}

private void buildSymsOrder0(final int[] F) {
final RANSEncodingSymbol[] syms = getEncodingSymbols()[0];
// updates the RANSEncodingSymbol array for all the symbols

// TODO: commented out to suppress spotBugs warning
//final int[] C = new int[Constants.NUMBER_OF_SYMBOLS];
// updates all the encodingSymbols
final RANSEncodingSymbol[] syms = getEncodingSymbols()[0];

// T = running sum of frequencies including the current symbol
// F[j] = frequency of symbol "j"
// cumulativeFreq = cumulative frequency of all the symbols preceding "j" (excluding the frequency of symbol "j")
int cumulativeFreq = 0;
Expand Down Expand Up @@ -515,44 +511,44 @@ private void buildSymsOrder1(final int[][] F) {
}
}

private ByteBuffer encodeRLE(final ByteBuffer inBuffer ,final RANSParams ransParams, final ByteBuffer outBuffer){
private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuffer){

// Find the symbols that benefit from RLE, i.e, the symbols that occur more than 2 times in succession.
// spec: For symbols that occur many times in succession, we can replace them with a single symbol and a count.
final int[] rleSymbols = new int[Constants.NUMBER_OF_SYMBOLS];
final int[] runCounts = new int[Constants.NUMBER_OF_SYMBOLS];
int inputSize = inBuffer.remaining();

int lastSymbol = -1;
for (int i = 0; i < inputSize; i++) {
int currentSymbol = inBuffer.get(i)&0xFF;
rleSymbols[currentSymbol] += (currentSymbol==lastSymbol ? 1:-1);
runCounts[currentSymbol] += (currentSymbol==lastSymbol ? 1:-1);
lastSymbol = currentSymbol;
}

// numRLESymbols is the number of symbols that are run length encoded
int numRLESymbols = 0;
for (int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i++) {
if (rleSymbols[i]>0) {
if (runCounts[i]>0) {
numRLESymbols++;
}
}

if (numRLESymbols==0) {
// Format cannot cope with zero RLE symbols, so pick one!
numRLESymbols = 1;
rleSymbols[0] = 1;
runCounts[0] = 1;
}

// create rleMetaData buffer to store rle metadata.
// This buffer will be compressed using compressOrder0WayN towards the end of this method
// TODO: How did we come up with this calculation for Buffer size? numRLESymbols+1+inputSize
ByteBuffer rleMetaData = ByteBuffer.allocate(numRLESymbols+1+inputSize); // rleMetaData

// write number of symbols that are run length encoded to the outBuffer
// write number of symbols that are run length encoded
rleMetaData.put((byte) numRLESymbols);

for (int i=0; i<256; i++){
if (rleSymbols[i] >0){
for (int i=0; i<Constants.NUMBER_OF_SYMBOLS; i++){
if (runCounts[i] >0){
// write the symbols that are run length encoded
rleMetaData.put((byte) i);
}
Expand All @@ -566,7 +562,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer ,final RANSParams ransPar

for (int i = 0; i < inputSize; i++) {
encodedData.put(encodedDataIdx++,inBuffer.get(i));
if (rleSymbols[inBuffer.get(i)&0xFF]>0) {
if (runCounts[inBuffer.get(i)&0xFF]>0) {
lastSymbol = inBuffer.get(i) & 0xFF;
int run = 0;

Expand All @@ -585,7 +581,6 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer ,final RANSParams ransPar

encodedData.limit(encodedDataIdx);
// limit and rewind
// TODO: check if position of rleMetadata is at the end of the buffer as expected
rleMetaData.limit(rleMetaData.position());
rleMetaData.rewind();

Expand Down
Loading

0 comments on commit f7e6c57

Please sign in to comment.