diff --git a/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java b/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java new file mode 100644 index 0000000000..6d9a725696 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/compression/CompressionUtils.java @@ -0,0 +1,179 @@ +package htsjdk.samtools.cram.compression; + +import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.rans.Constants; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public class CompressionUtils { + public static void writeUint7(final int i, final ByteBuffer cp) { + int s = 0; + int X = i; + do { + s += 7; + X >>= 7; + } while (X > 0); + do { + s -= 7; + //writeByte + final int s_ = (s > 0) ? 1 : 0; + cp.put((byte) (((i >> s) & 0x7f) + (s_ << 7))); + } while (s > 0); + } + + public static int readUint7(final ByteBuffer cp) { + int i = 0; + int c; + do { + //read byte + c = cp.get(); + i = (i << 7) | (c & 0x7f); + } while ((c & 0x80) != 0); + return i; + } + + public static ByteBuffer encodePack( + final ByteBuffer inBuffer, + final ByteBuffer outBuffer, + final int[] frequencyTable, + final int[] packMappingTable, + final int numSymbols){ + final int inSize = inBuffer.remaining(); + final ByteBuffer encodedBuffer; + if (numSymbols <= 1) { + encodedBuffer = CompressionUtils.allocateByteBuffer(0); + } else if (numSymbols <= 2) { + + // 1 bit per value + final int encodedBufferSize = (int) Math.ceil((double) inSize/8); + encodedBuffer = CompressionUtils.allocateByteBuffer(encodedBufferSize); + int j = -1; + for (int i = 0; i < inSize; i ++) { + if (i % 8 == 0) { + encodedBuffer.put(++j, (byte) 0); + } + encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << (i % 8)))); + } + } else if (numSymbols <= 4) { + + // 2 bits per value + final int encodedBufferSize = (int) Math.ceil((double) inSize/4); + encodedBuffer = CompressionUtils.allocateByteBuffer(encodedBufferSize); + int j = -1; + for (int i = 0; i < inSize; i ++) { + if (i % 4 == 0) { + encodedBuffer.put(++j, (byte) 0); + } + encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 4) * 2)))); + } + } else { + + // 4 bits per value + final int encodedBufferSize = (int) Math.ceil((double)inSize/2); + encodedBuffer = CompressionUtils.allocateByteBuffer(encodedBufferSize); + int j = -1; + for (int i = 0; i < inSize; i ++) { + if (i % 2 == 0) { + encodedBuffer.put(++j, (byte) 0); + } + encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 2) * 4)))); + } + } + + // write numSymbols + outBuffer.put((byte) numSymbols); + + // write mapping table "packMappingTable" that converts mapped value to original symbol + for(int i = 0; i < Constants.NUMBER_OF_SYMBOLS; i ++) { + if (frequencyTable[i] > 0) { + outBuffer.put((byte) i); + } + } + + // write the length of data + CompressionUtils.writeUint7(encodedBuffer.limit(), outBuffer); + return encodedBuffer; // Here position = 0 since we have always accessed the data buffer using index + } + + public static ByteBuffer decodePack( + final ByteBuffer inBuffer, + final byte[] packMappingTable, + final int numSymbols, + final int uncompressedPackOutputLength) { + final ByteBuffer outBufferPack = CompressionUtils.allocateByteBuffer(uncompressedPackOutputLength); + int j = 0; + if (numSymbols <= 1) { + for (int i=0; i < uncompressedPackOutputLength; i++){ + outBufferPack.put(i, packMappingTable[0]); + } + } + + // 1 bit per value + else if (numSymbols <= 2) { + int v = 0; + for (int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 8 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, packMappingTable[v & 1]); + v >>=1; + } + } + + // 2 bits per value + else if (numSymbols <= 4){ + int v = 0; + for(int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 4 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, packMappingTable[v & 3]); + v >>=2; + } + } + + // 4 bits per value + else if (numSymbols <= 16){ + int v = 0; + for(int i=0; i < uncompressedPackOutputLength; i++){ + if (i % 2 == 0){ + v = inBuffer.get(j++); + } + outBufferPack.put(i, packMappingTable[v & 15]); + v >>=4; + } + } + return outBufferPack; + } + + + + public static ByteBuffer allocateOutputBuffer(final int inSize) { + // This calculation is identical to the one in samtools rANS_static.c + // Presumably the frequency table (always big enough for order 1) = 257*257, + // then * 3 for each entry (byte->symbol, 2 bytes -> scaled frequency), + // + 9 for the header (order byte, and 2 int lengths for compressed/uncompressed lengths). + final int compressedSize = (int) (inSize + 257 * 257 * 3 + 9); + final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize).order(ByteOrder.LITTLE_ENDIAN); + if (outputBuffer.remaining() < compressedSize) { + throw new CRAMException("Failed to allocate sufficient buffer size for RANS coder."); + } + return outputBuffer; + } + + // returns a new LITTLE_ENDIAN ByteBuffer of size = bufferSize + public static ByteBuffer allocateByteBuffer(final int bufferSize){ + return ByteBuffer.allocate(bufferSize).order(ByteOrder.LITTLE_ENDIAN); + } + + // returns a LITTLE_ENDIAN ByteBuffer that is created by wrapping a byte[] + public static ByteBuffer wrap(final byte[] inputBytes){ + return ByteBuffer.wrap(inputBytes).order(ByteOrder.LITTLE_ENDIAN); + } + + // returns a LITTLE_ENDIAN ByteBuffer that is created by inputBuffer.slice() + public static ByteBuffer slice(final ByteBuffer inputBuffer){ + return inputBuffer.slice().order(ByteOrder.LITTLE_ENDIAN); + } +} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java index de7272eb53..06abbca89d 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/Utils.java @@ -1,8 +1,6 @@ package htsjdk.samtools.cram.compression.rans; -import htsjdk.samtools.cram.CRAMException; import java.nio.ByteBuffer; -import java.nio.ByteOrder; final public class Utils { @@ -54,32 +52,6 @@ public static long RANSDecodeRenormalizeNx16(final long r, final ByteBuffer byte return ret; } - public static void writeUint7(final int i, final ByteBuffer cp) { - int s = 0; - int X = i; - do { - s += 7; - X >>= 7; - } while (X > 0); - do { - s -= 7; - //writeByte - final int s_ = (s > 0) ? 1 : 0; - cp.put((byte) (((i >> s) & 0x7f) + (s_ << 7))); - } while (s > 0); - } - - public static int readUint7(final ByteBuffer cp) { - int i = 0; - int c; - do { - //read byte - c = cp.get(); - i = (i << 7) | (c & 0x7f); - } while ((c & 0x80) != 0); - return i; - } - public static void normaliseFrequenciesOrder0(final int[] F, final int bits) { // Returns an array of normalised Frequencies, // such that the frequencies add up to 1<symbol, 2 bytes -> scaled frequency), - // + 9 for the header (order byte, and 2 int lengths for compressed/uncompressed lengths). - final int compressedSize = (int) (inSize + 257 * 257 * 3 + 9); - final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize).order(ByteOrder.LITTLE_ENDIAN); - if (outputBuffer.remaining() < compressedSize) { - throw new CRAMException("Failed to allocate sufficient buffer size for RANS coder."); - } - return outputBuffer; - } - - // returns a new LITTLE_ENDIAN ByteBuffer of size = bufferSize - public static ByteBuffer allocateByteBuffer(final int bufferSize){ - return ByteBuffer.allocate(bufferSize).order(ByteOrder.LITTLE_ENDIAN); - } - - // returns a LITTLE_ENDIAN ByteBuffer that is created by wrapping a byte[] - public static ByteBuffer wrap(final byte[] inputBytes){ - return ByteBuffer.wrap(inputBytes).order(ByteOrder.LITTLE_ENDIAN); - } - - // returns a LITTLE_ENDIAN ByteBuffer that is created by inputBuffer.slice() - public static ByteBuffer slice(final ByteBuffer inputBuffer){ - return inputBuffer.slice().order(ByteOrder.LITTLE_ENDIAN); - } - } \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java index 6efcc31868..25b9b773e9 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Decode.java @@ -1,6 +1,7 @@ package htsjdk.samtools.cram.compression.rans.rans4x8; import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.CompressionUtils; import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSDecode; @@ -15,7 +16,7 @@ public class RANS4x8Decode extends RANSDecode { private static final int RAW_BYTE_LENGTH = 4; - private static final ByteBuffer EMPTY_BUFFER = Utils.allocateByteBuffer(0); + private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0); // This method assumes that inBuffer is already rewound. // It uncompresses the data in the inBuffer, leaving it consumed. @@ -39,7 +40,7 @@ public ByteBuffer uncompress(final ByteBuffer inBuffer) { // uncompressed bytes length final int outSize = inBuffer.getInt(); - final ByteBuffer outBuffer = Utils.allocateByteBuffer(outSize); + final ByteBuffer outBuffer = CompressionUtils.allocateByteBuffer(outSize); initializeRANSDecoder(); switch (order) { case ZERO: diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java index 8ac4b618b3..638882fb67 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/rans4x8/RANS4x8Encode.java @@ -1,6 +1,7 @@ package htsjdk.samtools.cram.compression.rans.rans4x8; import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.CompressionUtils; import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSEncode; import htsjdk.samtools.cram.compression.rans.RANSEncodingSymbol; @@ -15,7 +16,7 @@ public class RANS4x8Encode extends RANSEncode { // streams smaller than this value don't have sufficient symbol context for ORDER-1 encoding, // so always use ORDER-0 private static final int MINIMUM_ORDER_1_SIZE = 4; - private static final ByteBuffer EMPTY_BUFFER = Utils.allocateByteBuffer(0); + private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0); // This method assumes that inBuffer is already rewound. // It compresses the data in the inBuffer, leaving it consumed. @@ -44,7 +45,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANS4x8Params params private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { final int inputSize = inBuffer.remaining(); - final ByteBuffer outBuffer = Utils.allocateOutputBuffer(inputSize); + final ByteBuffer outBuffer = CompressionUtils.allocateOutputBuffer(inputSize); // move the output buffer ahead to the start of the frequency table (we'll come back and // write the output stream prefix at the end of this method) @@ -55,7 +56,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { // using the normalised frequencies, set the RANSEncodingSymbols buildSymsOrder0(normalizedFreq); - final ByteBuffer cp = Utils.slice(outBuffer); + final ByteBuffer cp = CompressionUtils.slice(outBuffer); // write Frequency table final int frequencyTableSize = writeFrequenciesOrder0(cp, normalizedFreq); @@ -65,7 +66,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { final RANSEncodingSymbol[] syms = getEncodingSymbols()[0]; final int in_size = inBuffer.remaining(); long rans0, rans1, rans2, rans3; - final ByteBuffer ptr = Utils.slice(cp); + final ByteBuffer ptr = CompressionUtils.slice(cp); rans0 = Constants.RANS_4x8_LOWER_BOUND; rans1 = Constants.RANS_4x8_LOWER_BOUND; rans2 = Constants.RANS_4x8_LOWER_BOUND; @@ -112,7 +113,7 @@ private ByteBuffer compressOrder0Way4(final ByteBuffer inBuffer) { private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { final int inSize = inBuffer.remaining(); - final ByteBuffer outBuffer = Utils.allocateOutputBuffer(inSize); + final ByteBuffer outBuffer = CompressionUtils.allocateOutputBuffer(inSize); // move to start of frequency outBuffer.position(Constants.RANS_4x8_PREFIX_BYTE_LENGTH); @@ -123,7 +124,7 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { // using the normalised frequencies, set the RANSEncodingSymbols buildSymsOrder1(normalizedFreq); - final ByteBuffer cp = Utils.slice(outBuffer); + final ByteBuffer cp = CompressionUtils.slice(outBuffer); final int frequencyTableSize = writeFrequenciesOrder1(cp, normalizedFreq); inBuffer.rewind(); final int in_size = inBuffer.remaining(); @@ -156,7 +157,7 @@ private ByteBuffer compressOrder1Way4(final ByteBuffer inBuffer) { byte l3 = inBuffer.get(in_size - 1); // Slicing is needed for buffer reversing later - final ByteBuffer ptr = Utils.slice(cp); + final ByteBuffer ptr = CompressionUtils.slice(cp); final RANSEncodingSymbol[][] syms = getEncodingSymbols(); for (i3 = in_size - 2; i3 > 4 * isz4 - 2 && i3 >= 0; i3--) { final byte c3 = inBuffer.get(i3); diff --git a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java index abd4ac85da..dcb81c8d5f 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/rans/ransnx16/RANSNx16Decode.java @@ -1,6 +1,7 @@ package htsjdk.samtools.cram.compression.rans.ransnx16; import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.CompressionUtils; import htsjdk.samtools.cram.compression.rans.ArithmeticDecoder; import htsjdk.samtools.cram.compression.rans.Constants; import htsjdk.samtools.cram.compression.rans.RANSDecode; @@ -12,7 +13,7 @@ import java.util.Arrays; public class RANSNx16Decode extends RANSDecode { - private static final ByteBuffer EMPTY_BUFFER = Utils.allocateByteBuffer(0); + private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0); private static final int FREQ_TABLE_OPTIONALLY_COMPRESSED_MASK = 0x01; private static final int RLE_META_OPTIONALLY_COMPRESSED_MASK = 0x01; @@ -36,7 +37,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { final RANSNx16Params ransNx16Params = new RANSNx16Params(formatFlags); // if nosz flag is set, then uncompressed size is not recorded. - int uncompressedSize = ransNx16Params.isNosz() ? outSize : Utils.readUint7(inBuffer); + int uncompressedSize = ransNx16Params.isNosz() ? outSize : CompressionUtils.readUint7(inBuffer); // if stripe, then decodeStripe if (ransNx16Params.isStripe()) { @@ -57,7 +58,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { for (int i = 0; i < numSymbols; i++) { packMappingTable[i] = inBuffer.get(); } - uncompressedSize = Utils.readUint7(inBuffer); + uncompressedSize = CompressionUtils.readUint7(inBuffer); } else { throw new CRAMException("Bit Packing is not permitted when number of distinct symbols is greater than 16 or equal to 0. " + "Number of distinct symbols: " + numSymbols); @@ -70,9 +71,9 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { ByteBuffer uncompressedRLEMetaData = null; if (ransNx16Params.isRLE()) { rleSymbols = new int[Constants.NUMBER_OF_SYMBOLS]; - final int uncompressedRLEMetaDataLength = Utils.readUint7(inBuffer); + final int uncompressedRLEMetaDataLength = CompressionUtils.readUint7(inBuffer); uncompressedRLEOutputLength = uncompressedSize; - uncompressedSize = Utils.readUint7(inBuffer); + uncompressedSize = CompressionUtils.readUint7(inBuffer); uncompressedRLEMetaData = decodeRLEMeta(inBuffer, uncompressedRLEMetaDataLength, rleSymbols, ransNx16Params); } @@ -80,14 +81,14 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { // If CAT is set then, the input is uncompressed if (ransNx16Params.isCAT()) { - outBuffer = Utils.slice(inBuffer); + outBuffer = CompressionUtils.slice(inBuffer); // While resetting the position to the end is not strictly necessary, // it is being done for the sake of completeness and // to meet the requirements of the tests that verify the boundary conditions. inBuffer.position(inBuffer.limit()); } else { - outBuffer = Utils.allocateByteBuffer(uncompressedSize); + outBuffer = CompressionUtils.allocateByteBuffer(uncompressedSize); // uncompressedSize is 0 in cases where Pack flag is used // and number of distinct symbols in the raw data is 1 @@ -112,7 +113,7 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { // if pack, then decodePack if (ransNx16Params.isPack()) { - outBuffer = decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); + outBuffer = CompressionUtils.decodePack(outBuffer, packMappingTable, numSymbols, packDataLength); } return outBuffer; } @@ -186,16 +187,16 @@ private void uncompressOrder1WayN( // so is optionally compressed using the order-0 rANSNx16 codec with a fixed 4-way interleaving. // if optionalCompressFlag is true, the frequency table was compressed using RANS Nx16, N=4 Order 0 - final int uncompressedLength = Utils.readUint7(inBuffer); - final int compressedLength = Utils.readUint7(inBuffer); + final int uncompressedLength = CompressionUtils.readUint7(inBuffer); + final int compressedLength = CompressionUtils.readUint7(inBuffer); byte[] compressedFreqTable = new byte[compressedLength]; // read compressedLength bytes into compressedFreqTable byte array inBuffer.get(compressedFreqTable,0,compressedLength); // decode the compressedFreqTable to get the uncompressedFreqTable using RANS Nx16, N=4 Order 0 uncompress - freqTableSource = Utils.allocateByteBuffer(uncompressedLength); - final ByteBuffer compressedFrequencyTableBuffer = Utils.wrap(compressedFreqTable); + freqTableSource = CompressionUtils.allocateByteBuffer(uncompressedLength); + final ByteBuffer compressedFrequencyTableBuffer = CompressionUtils.wrap(compressedFreqTable); // uncompress using RANSNx16 Order 0, Nway = 4 // formatFlags = (~RANSNx16Params.ORDER_FLAG_MASK & ~RANSNx16Params.N32_FLAG_MASK) = ~(RANSNx16Params.ORDER_FLAG_MASK | RANSNx16Params.N32_FLAG_MASK) @@ -269,7 +270,7 @@ private void readFrequencyTableOrder0( // read frequencies, normalise frequencies for (int j = 0; j < Constants.NUMBER_OF_SYMBOLS; j++) { if (alphabet[j] > 0) { - decoder.frequencies[j] = Utils.readUint7(cp); + decoder.frequencies[j] = CompressionUtils.readUint7(cp); } } Utils.normaliseFrequenciesOrder0Shift(decoder.frequencies, Constants.TOTAL_FREQ_SHIFT); @@ -303,7 +304,7 @@ private void readFrequencyTableOrder1( if (run > 0) { run--; } else { - D[i].frequencies[j] = Utils.readUint7(cp); + D[i].frequencies[j] = CompressionUtils.readUint7(cp); if (D[i].frequencies[j] == 0){ run = cp.get() & 0xFF; } @@ -363,13 +364,13 @@ private ByteBuffer decodeRLEMeta( if ((uncompressedRLEMetaDataLength & RLE_META_OPTIONALLY_COMPRESSED_MASK)!=0) { final byte[] uncompressedRLEMetaDataArray = new byte[(uncompressedRLEMetaDataLength-1)/2]; inBuffer.get(uncompressedRLEMetaDataArray, 0, (uncompressedRLEMetaDataLength-1)/2); - uncompressedRLEMetaData = Utils.wrap(uncompressedRLEMetaDataArray); + uncompressedRLEMetaData = CompressionUtils.wrap(uncompressedRLEMetaDataArray); } else { - final int compressedRLEMetaDataLength = Utils.readUint7(inBuffer); + final int compressedRLEMetaDataLength = CompressionUtils.readUint7(inBuffer); final byte[] compressedRLEMetaDataArray = new byte[compressedRLEMetaDataLength]; inBuffer.get(compressedRLEMetaDataArray,0,compressedRLEMetaDataLength); - final ByteBuffer compressedRLEMetaData = Utils.wrap(compressedRLEMetaDataArray); - uncompressedRLEMetaData = Utils.allocateByteBuffer(uncompressedRLEMetaDataLength / 2); + final ByteBuffer compressedRLEMetaData = CompressionUtils.wrap(compressedRLEMetaDataArray); + uncompressedRLEMetaData = CompressionUtils.allocateByteBuffer(uncompressedRLEMetaDataLength / 2); // uncompress using Order 0 and N = Nway uncompressOrder0WayN( compressedRLEMetaData, @@ -393,12 +394,12 @@ private ByteBuffer decodeRLE( final int[] rleSymbols, final ByteBuffer uncompressedRLEMetaData, final int uncompressedRLEOutputLength) { - final ByteBuffer rleOutBuffer = Utils.allocateByteBuffer(uncompressedRLEOutputLength); + final ByteBuffer rleOutBuffer = CompressionUtils.allocateByteBuffer(uncompressedRLEOutputLength); int j = 0; for(int i = 0; j< uncompressedRLEOutputLength; i++){ final byte sym = inBuffer.get(i); if (rleSymbols[sym & 0xFF]!=0){ - final int run = Utils.readUint7(uncompressedRLEMetaData); + final int run = CompressionUtils.readUint7(uncompressedRLEMetaData); for (int r=0; r<= run; r++){ rleOutBuffer.put(j++, sym); } @@ -409,63 +410,12 @@ private ByteBuffer decodeRLE( return rleOutBuffer; } - private ByteBuffer decodePack( - final ByteBuffer inBuffer, - final byte[] packMappingTable, - final int numSymbols, - final int uncompressedPackOutputLength) { - final ByteBuffer outBufferPack = Utils.allocateByteBuffer(uncompressedPackOutputLength); - int j = 0; - if (numSymbols <= 1) { - for (int i=0; i < uncompressedPackOutputLength; i++){ - outBufferPack.put(i, packMappingTable[0]); - } - } - - // 1 bit per value - else if (numSymbols <= 2) { - int v = 0; - for (int i=0; i < uncompressedPackOutputLength; i++){ - if (i % 8 == 0){ - v = inBuffer.get(j++); - } - outBufferPack.put(i, packMappingTable[v & 1]); - v >>=1; - } - } - - // 2 bits per value - else if (numSymbols <= 4){ - int v = 0; - for(int i=0; i < uncompressedPackOutputLength; i++){ - if (i % 4 == 0){ - v = inBuffer.get(j++); - } - outBufferPack.put(i, packMappingTable[v & 3]); - v >>=2; - } - } - - // 4 bits per value - else if (numSymbols <= 16){ - int v = 0; - for(int i=0; i < uncompressedPackOutputLength; i++){ - if (i % 2 == 0){ - v = inBuffer.get(j++); - } - outBufferPack.put(i, packMappingTable[v & 15]); - v >>=4; - } - } - return outBufferPack; - } - private ByteBuffer decodeStripe(final ByteBuffer inBuffer, final int outSize){ final int numInterleaveStreams = inBuffer.get() & 0xFF; // read lengths of compressed interleaved streams for ( int j=0; j { // Stripe flag is not implemented in the write implementation ///////////////////////////////////////////////////////////////////////////////////////////////// - private static final ByteBuffer EMPTY_BUFFER = Utils.allocateByteBuffer(0); + private static final ByteBuffer EMPTY_BUFFER = CompressionUtils.allocateByteBuffer(0); // This method assumes that inBuffer is already rewound. // It compresses the data in the inBuffer, leaving it consumed. @@ -24,14 +25,14 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN if (inBuffer.remaining() == 0) { return EMPTY_BUFFER; } - final ByteBuffer outBuffer = Utils.allocateOutputBuffer(inBuffer.remaining()); + final ByteBuffer outBuffer = CompressionUtils.allocateOutputBuffer(inBuffer.remaining()); final int formatFlags = ransNx16Params.getFormatFlags(); outBuffer.put((byte) (formatFlags)); // one byte for formatFlags // NoSize if (!ransNx16Params.isNosz()) { // original size is not recorded - Utils.writeUint7(inBuffer.remaining(),outBuffer); + CompressionUtils.writeUint7(inBuffer.remaining(),outBuffer); } ByteBuffer inputBuffer = inBuffer; @@ -59,7 +60,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RANSNx16Params ransN // skip Packing if numSymbols = 0 or numSymbols > 16 if (numSymbols !=0 && numSymbols <= 16) { - inputBuffer = encodePack(inputBuffer, outBuffer, frequencyTable, packMappingTable, numSymbols); + inputBuffer = CompressionUtils.encodePack(inputBuffer, outBuffer, frequencyTable, packMappingTable, numSymbols); } else { // unset pack flag in the first byte of the outBuffer outBuffer.put(0,(byte)(outBuffer.get(0) & ~RANSNx16Params.PACK_FLAG_MASK)); @@ -117,7 +118,7 @@ private void compressOrder0WayN ( } final int prefix_size = outBuffer.position(); final int[] F = buildFrequenciesOrder0(inBuffer); - final ByteBuffer cp = Utils.slice(outBuffer); + final ByteBuffer cp = CompressionUtils.slice(outBuffer); // Normalize Frequencies such that sum of Frequencies = 1 << bitsize Utils.normaliseFrequenciesOrder0(F, bitSize); @@ -150,7 +151,7 @@ private void compressOrder0WayN ( for (int r=0; r0){ @@ -198,8 +199,8 @@ private void compressOrder1WayN ( Utils.normaliseFrequenciesOrder1(frequencies, Constants.TOTAL_FREQ_SHIFT); final int prefix_size = outBuffer.position(); - ByteBuffer frequencyTable = Utils.allocateOutputBuffer(1); - final ByteBuffer compressedFrequencyTable = Utils.allocateOutputBuffer(1); + ByteBuffer frequencyTable = CompressionUtils.allocateOutputBuffer(1); + final ByteBuffer compressedFrequencyTable = CompressionUtils.allocateOutputBuffer(1); // uncompressed frequency table final int uncompressedFrequencyTableSize = writeFrequenciesOrder1(frequencyTable,frequencies); @@ -216,7 +217,7 @@ private void compressOrder1WayN ( // TODO: we should work on a more permanent solution for this issue! initializeRANSEncoder(); final int compressedFrequencyTableSize = compressedFrequencyTable.limit(); - final ByteBuffer cp = Utils.slice(outBuffer); + final ByteBuffer cp = CompressionUtils.slice(outBuffer); // spec: The order-1 frequency table itself may still be quite large, // so is optionally compressed using the order-0 rANSNx16 codec with a fixed 4-way interleaving. @@ -224,8 +225,8 @@ private void compressOrder1WayN ( // first byte cp.put((byte) (1 | Constants.TOTAL_FREQ_SHIFT << 4 )); - Utils.writeUint7(uncompressedFrequencyTableSize,cp); - Utils.writeUint7(compressedFrequencyTableSize,cp); + CompressionUtils.writeUint7(uncompressedFrequencyTableSize,cp); + CompressionUtils.writeUint7(compressedFrequencyTableSize,cp); // write bytes from compressedFrequencyTable to cp int i=0; @@ -283,7 +284,7 @@ private void compressOrder1WayN ( } // Slicing is needed for buffer reversing later. - final ByteBuffer ptr = Utils.slice(cp); + final ByteBuffer ptr = CompressionUtils.slice(cp); final RANSEncodingSymbol[][] ransEncodingSymbols = getEncodingSymbols(); final byte[] context = new byte[Nway]; @@ -419,7 +420,7 @@ private static int writeFrequenciesOrder1(final ByteBuffer cp, final int[][] F) if (run > 0) { run--; } else { - Utils.writeUint7(F[i][j],cp); + CompressionUtils.writeUint7(F[i][j],cp); if (F[i][j] == 0) { // Count how many more zero-freqs we have for (int k = j+1; k < Constants.NUMBER_OF_SYMBOLS; k++) { @@ -502,7 +503,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff // create rleMetaData buffer to store rle metadata. // This buffer will be compressed using compressOrder0WayN towards the end of this method // TODO: How did we come up with this calculation for Buffer size? numRLESymbols+1+inputSize - final ByteBuffer rleMetaData = Utils.allocateByteBuffer(numRLESymbols+1+inputSize); // rleMetaData + final ByteBuffer rleMetaData = CompressionUtils.allocateByteBuffer(numRLESymbols+1+inputSize); // rleMetaData // write number of symbols that are run length encoded rleMetaData.put((byte) numRLESymbols); @@ -517,7 +518,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff // Apply RLE // encodedBuffer -> input src data without repetition - final ByteBuffer encodedBuffer = Utils.allocateByteBuffer(inputSize); // rleInBuffer + final ByteBuffer encodedBuffer = CompressionUtils.allocateByteBuffer(inputSize); // rleInBuffer int encodedBufferIdx = 0; // rleInBufferIndex for (int i = 0; i < inputSize; i++) { @@ -532,7 +533,7 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff } // write the run value to metadata - Utils.writeUint7(run, rleMetaData); + CompressionUtils.writeUint7(run, rleMetaData); // go to the next element that is not equal to its previous element i += run; @@ -545,15 +546,15 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff rleMetaData.rewind(); // compress the rleMetaData Buffer - final ByteBuffer compressedRleMetaData = Utils.allocateOutputBuffer(rleMetaData.remaining()); + final ByteBuffer compressedRleMetaData = CompressionUtils.allocateOutputBuffer(rleMetaData.remaining()); // compress using Order 0 and N = Nway compressOrder0WayN(rleMetaData, new RANSNx16Params(0x00 | ransNx16Params.getFormatFlags() & RANSNx16Params.N32_FLAG_MASK),compressedRleMetaData); // write to compressedRleMetaData to outBuffer - Utils.writeUint7(rleMetaData.limit()*2, outBuffer); - Utils.writeUint7(encodedBufferIdx, outBuffer); - Utils.writeUint7(compressedRleMetaData.limit(),outBuffer); + CompressionUtils.writeUint7(rleMetaData.limit()*2, outBuffer); + CompressionUtils.writeUint7(encodedBufferIdx, outBuffer); + CompressionUtils.writeUint7(compressedRleMetaData.limit(),outBuffer); outBuffer.put(compressedRleMetaData); @@ -565,67 +566,4 @@ private ByteBuffer encodeRLE(final ByteBuffer inBuffer, final ByteBuffer outBuff return encodedBuffer; } - private ByteBuffer encodePack( - final ByteBuffer inBuffer , - final ByteBuffer outBuffer, - final int[] frequencyTable, - final int[] packMappingTable, - final int numSymbols){ - final int inSize = inBuffer.remaining(); - final ByteBuffer encodedBuffer; - if (numSymbols <= 1) { - encodedBuffer = Utils.allocateByteBuffer(0); - } else if (numSymbols <= 2) { - - // 1 bit per value - final int encodedBufferSize = (int) Math.ceil((double) inSize/8); - encodedBuffer = Utils.allocateByteBuffer(encodedBufferSize); - int j = -1; - for (int i = 0; i < inSize; i ++) { - if (i % 8 == 0) { - encodedBuffer.put(++j, (byte) 0); - } - encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << (i % 8)))); - } - } else if (numSymbols <= 4) { - - // 2 bits per value - final int encodedBufferSize = (int) Math.ceil((double) inSize/4); - encodedBuffer = Utils.allocateByteBuffer(encodedBufferSize); - int j = -1; - for (int i = 0; i < inSize; i ++) { - if (i % 4 == 0) { - encodedBuffer.put(++j, (byte) 0); - } - encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 4) * 2)))); - } - } else { - - // 4 bits per value - final int encodedBufferSize = (int) Math.ceil((double)inSize/2); - encodedBuffer = Utils.allocateByteBuffer(encodedBufferSize); - int j = -1; - for (int i = 0; i < inSize; i ++) { - if (i % 2 == 0) { - encodedBuffer.put(++j, (byte) 0); - } - encodedBuffer.put(j, (byte) (encodedBuffer.get(j) + (packMappingTable[inBuffer.get(i) & 0xFF] << ((i % 2) * 4)))); - } - } - - // write numSymbols - outBuffer.put((byte) numSymbols); - - // write mapping table "packMappingTable" that converts mapped value to original symbol - for(int i = 0 ; i < Constants.NUMBER_OF_SYMBOLS; i ++) { - if (frequencyTable[i] > 0) { - outBuffer.put((byte) i); - } - } - - // write the length of data - Utils.writeUint7(encodedBuffer.limit(), outBuffer); - return encodedBuffer; // Here position = 0 since we have always accessed the data buffer using index - } - } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index c8379e8f63..9c1abafc29 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -1,10 +1,10 @@ package htsjdk.samtools.cram; import htsjdk.HtsjdkTest; +import htsjdk.samtools.cram.compression.CompressionUtils; import htsjdk.samtools.cram.compression.rans.RANSDecode; import htsjdk.samtools.cram.compression.rans.RANSEncode; import htsjdk.samtools.cram.compression.rans.RANSParams; -import htsjdk.samtools.cram.compression.rans.Utils; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; @@ -177,7 +177,7 @@ public void testRANSRoundTrip( // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results - final ByteBuffer uncompressedInteropBytes = Utils.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer uncompressedInteropBytes = CompressionUtils.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); // Stripe Flag is not implemented in RANSNx16 Encoder. // The encoder throws CRAMException if Stripe Flag is used. @@ -208,8 +208,8 @@ public void testDecodeOnly( // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results - final ByteBuffer uncompressedInteropBytes = Utils.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); - final ByteBuffer preCompressedInteropBytes = Utils.wrap(IOUtils.toByteArray(preCompressedInteropStream)); + final ByteBuffer uncompressedInteropBytes = CompressionUtils.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer preCompressedInteropBytes = CompressionUtils.wrap(IOUtils.toByteArray(preCompressedInteropStream)); // Use htsjdk to uncompress the precompressed file from htscodecs repo final ByteBuffer uncompressedHtsjdkBytes = ransDecode.uncompress(preCompressedInteropBytes); diff --git a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java index 4e8a1e6bd2..9495d826ef 100644 --- a/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java +++ b/src/test/java/htsjdk/samtools/cram/compression/rans/RansTest.java @@ -2,6 +2,7 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.cram.compression.CompressionUtils; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; @@ -174,7 +175,7 @@ public void testRoundTripTinySmallLarge( final TestDataEnvelope td, final Integer lowerLimit, final Integer upperLimit){ - final ByteBuffer in = Utils.wrap(td.testArray); + final ByteBuffer in = CompressionUtils.wrap(td.testArray); for (int rawSize = lowerLimit; rawSize < upperLimit; rawSize++) { in.position(0); in.limit(rawSize); @@ -188,7 +189,7 @@ public void testRans4x8BuffersMeetBoundaryExpectations( final RANS4x8Decode ransDecode, final RANS4x8Params params) { final int rawSize = 1001; - final ByteBuffer rawData = Utils.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); + final ByteBuffer rawData = CompressionUtils.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(rawSize,rawData,ransEncode, ransDecode,params); Assert.assertTrue(compressed.limit() > Constants.RANS_4x8_PREFIX_BYTE_LENGTH); // minimum prefix len when input is not Empty Assert.assertEquals(compressed.get(), (byte) params.getOrder().ordinal()); @@ -202,7 +203,7 @@ public void testRansNx16BuffersMeetBoundaryExpectations( final RANSNx16Decode ransDecode, final RANSNx16Params params) { final int rawSize = 1001; - final ByteBuffer rawData = Utils.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); + final ByteBuffer rawData = CompressionUtils.wrap(randomBytesFromGeometricDistribution(rawSize, 0.01)); final ByteBuffer compressed = ransBufferMeetBoundaryExpectations(rawSize,rawData,ransEncode,ransDecode,params); rawData.rewind(); Assert.assertTrue(compressed.limit() > 1); // minimum prefix len when input is not Empty @@ -227,7 +228,7 @@ public void testRansNx16BuffersMeetBoundaryExpectations( } // if nosz flag is not set, then the uncompressed size is recorded if (!params.isNosz()){ - Assert.assertEquals(Utils.readUint7(compressed), rawSize); + Assert.assertEquals(CompressionUtils.readUint7(compressed), rawSize); } } @@ -237,7 +238,7 @@ public void testRoundTrip( final RANSDecode ransDecode, final RANSParams params, final TestDataEnvelope td) { - ransRoundTrip(ransEncode, ransDecode, params, Utils.wrap(td.testArray)); + ransRoundTrip(ransEncode, ransDecode, params, CompressionUtils.wrap(td.testArray)); } @Test( @@ -251,7 +252,7 @@ public void testRansNx16RejectEncodeStripe( // When td is not Empty, Encoding with Stripe Flag should throw an Exception // as Encode Stripe is not implemented - ransEncode.compress(Utils.wrap(td.testArray), params); + ransEncode.compress(CompressionUtils.wrap(td.testArray), params); } @Test( @@ -261,7 +262,7 @@ public void testRansNx16RejectEncodeStripe( expectedExceptionsMessageRegExp = "Bit Packing is not permitted when number " + "of distinct symbols is greater than 16 or equal to 0. Number of distinct symbols: 0") public void testRANSNx16RejectDecodePack(){ - final ByteBuffer compressedData = Utils.wrap(new byte[]{(byte) RANSNx16Params.PACK_FLAG_MASK, (byte) 0x00, (byte) 0x00}); + final ByteBuffer compressedData = CompressionUtils.wrap(new byte[]{(byte) RANSNx16Params.PACK_FLAG_MASK, (byte) 0x00, (byte) 0x00}); final RANSNx16Decode ransDecode = new RANSNx16Decode(); ransDecode.uncompress(compressedData); }