Skip to content

Commit

Permalink
Adding support for gzip extra subfields.
Browse files Browse the repository at this point in the history
  • Loading branch information
Danny Deschenes committed Nov 15, 2024
1 parent 28c1441 commit 8a491d6
Show file tree
Hide file tree
Showing 5 changed files with 318 additions and 10 deletions.
209 changes: 209 additions & 0 deletions src/main/java/org/apache/commons/compress/compressors/gzip/Extra.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/


package org.apache.commons.compress.compressors.gzip;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

/**
* Carrier collection for subfields.
*/
public class Extra {

static final int MAX_SIZE = 0xFFFF;
static final byte[] ZERO_BYTES = new byte[0];

static Extra fromBytes(byte[] ba) throws IOException {
if (ba == null)
return null;

Extra e = new Extra();

int pos = 0;
while (pos <= (ba.length - 4)) {
SubField f = new SubField();
f.si1 = ba[pos++];
f.si2 = ba[pos++];

int sublen = (ba[pos++] & 0xff) | ((ba[pos++] & 0xff) << 8);
if (sublen > (ba.length - pos))
throw new IOException("Extra subfield lenght exceeds remaining bytes in extra: " + sublen + " > "
+ (ba.length - pos));

f.payload = new byte[sublen];
System.arraycopy(ba, pos, f.payload, 0, sublen);
pos += sublen;

e.fieldsList.add(f);
e.totalSize = pos;
}

if (pos < ba.length)
throw new IOException("" + (ba.length-pos) + " remaining bytes not used to parse an extra subfield.");

return e;
}

// --------------

final List<SubField> fieldsList = new ArrayList<>();
int totalSize = 0;

public Extra() {
}

public boolean isEmpty() {
return fieldsList.isEmpty();
}

public void clear() {
fieldsList.clear();
totalSize = 0;
}

/**
* The bytes count of this extra payload when encoded. This does not include its
* own 16 bits size. For N sub fields, the total is all subfields payloads + 4N.
*/
public int getEncodedSize() {
return totalSize;
}

/**
* The count of subfields contained in this extra.
*/
public int getSize() {
return fieldsList.size();
}

/**
* Append a subfield by a 2-chars ISO-8859-1 string. The char at index 0 and 1
* are respectiovely si1 and si2.
*
* @throws IllegalArgumentException if the subfield is not 2-chars or the
* payload is null
*
* @throws IOException if appending this subfield would exceed the
* max size 65535 of the extra header.
*/
public Extra appendSubField(String subfieldId, byte[] payload) throws IOException {
if (subfieldId.length() != 2)
throw new IllegalArgumentException("subfield id must be a 2-chars iso-8859-1 string.");
if (payload == null)
throw new IllegalArgumentException("payload was null");

char si1 = subfieldId.charAt(0);
char si2 = subfieldId.charAt(1);
if ((si1 & 0xff00) != 0 || (si2 & 0xff00) != 0)
throw new IllegalArgumentException("subfield id must be a 2-chars iso-8859-1 string.");

SubField f = new SubField((byte) (si1 & 0xff), (byte) (si2 & 0xff), payload);
int len = 4 + payload.length;
if (totalSize + len > MAX_SIZE)
throw new IOException(
"extra subfield '" + f.getId() + "' too big (extras total size is already at " + totalSize + ")");

fieldsList.add(f);
totalSize += len;

return this;
}

byte[] toBytes() {
if (fieldsList.isEmpty())
return ZERO_BYTES;

byte[] ba = new byte[totalSize];

int pos = 0;
for (SubField f : fieldsList) {
ba[pos++] = f.si1;
ba[pos++] = f.si2;
ba[pos++] = (byte) (f.payload.length & 0xff); // little endian expected
ba[pos++] = (byte) (f.payload.length >>> 8);
System.arraycopy(f.payload, 0, ba, pos, f.payload.length);
pos += f.payload.length;
}
return ba;
}

/**
* Give all 2-chars ISO-8859-1 strings denoting the subfields. Note that this is
* imprecise as ids can repeat. Use the methods with indexes to find a specific
* occurence.
*/
public List<String> listIds() {
return fieldsList.stream().map(SubField::getId).collect(Collectors.toList());
}

/**
* Find the 1st subfield that matches the id.
*/
public SubField findFirstSubField(String subfieldId) {
return fieldsList.stream().filter(f -> f.getId().equals(subfieldId)).findFirst().orElse(null);
}

/**
* Find the subfield at the given index.
*/
public SubField subFieldAt(int i) {
return fieldsList.get(i);
}

// =============

/**
* The carrier for a subfield in the gzip extra.
*/
public static class SubField {
byte si1;
byte si2;
byte[] payload;

SubField() {
}

SubField(byte si1, byte si2, byte[] payload) {
this.si1 = si1;
this.si2 = si2;
this.payload = payload;
}

/**
* The 2 char iso-8859-1 string made from the si1 and si2 bytes of the sub field
* id.
*/
public String getId() {
return "" + ((char) (si1 & 0xff)) + ((char) (si2 & 0xff));
}

/**
* The subfield payload.
*/
public byte[] getPayload() {
return payload;
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -244,17 +244,13 @@ private boolean init(final boolean isFirstMember) throws IOException {
}
parameters.setOperatingSystem(inData.readUnsignedByte());

// Extra field, ignored
// Extra field
if ((flg & FEXTRA) != 0) {
int xlen = inData.readUnsignedByte();
xlen |= inData.readUnsignedByte() << 8;

// This isn't as efficient as calling in.skip would be,
// but it's lazier to handle unexpected end of input this way.
// Most files don't have an extra field anyway.
while (xlen-- > 0) {
inData.readUnsignedByte();
}
byte[] extra = new byte[xlen];
inData.readFully(extra);
parameters.setExtra(Extra.fromBytes(extra));
}

// Original file name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@
* @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP File Format Specification</a>
*/
public class GzipCompressorOutputStream extends CompressorOutputStream<OutputStream> {


/** Header flag indicating an EXTRA subfields collection follows the header */
private static final int FEXTRA = 1 << 2;

/** Header flag indicating a file name follows the header */
private static final int FNAME = 1 << 3;

Expand Down Expand Up @@ -170,11 +173,12 @@ private void write(final String value, final Charset charset) throws IOException
private void writeHeader(final GzipParameters parameters) throws IOException {
final String fileName = parameters.getFileName();
final String comment = parameters.getComment();
final byte[] extra = parameters.getExtra()!=null ? parameters.getExtra().toBytes() : null;
final ByteBuffer buffer = ByteBuffer.allocate(10);
buffer.order(ByteOrder.LITTLE_ENDIAN);
buffer.putShort((short) GZIPInputStream.GZIP_MAGIC);
buffer.put((byte) Deflater.DEFLATED); // compression method (8: deflate)
buffer.put((byte) ((fileName != null ? FNAME : 0) | (comment != null ? FCOMMENT : 0))); // flags
buffer.put((byte) ((extra != null ? FEXTRA : 0) |(fileName != null ? FNAME : 0) | (comment != null ? FCOMMENT : 0))); // flags
buffer.putInt((int) (parameters.getModificationTime() / 1000));
// extra flags
final int compressionLevel = parameters.getCompressionLevel();
Expand All @@ -187,6 +191,11 @@ private void writeHeader(final GzipParameters parameters) throws IOException {
}
buffer.put((byte) parameters.getOperatingSystem());
out.write(buffer.array());
if (extra != null) {
out.write(extra.length & 0xff); // little endian
out.write((extra.length >>> 8) & 0xff);
out.write(extra);
}
write(fileName, parameters.getFileNameCharset());
write(comment, parameters.getFileNameCharset());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import org.apache.commons.io.Charsets;


/**
* Parameters for the GZIP compressor.
*
Expand Down Expand Up @@ -289,6 +290,7 @@ public int type() {
* </p>
*/
private Instant modificationTime = Instant.EPOCH;
private Extra extra;
private String fileName;
private Charset fileNameCharset = GzipUtils.GZIP_ENCODING;
private String comment;
Expand Down Expand Up @@ -340,6 +342,16 @@ public int getCompressionLevel() {
public int getDeflateStrategy() {
return deflateStrategy;
}

/**
* Gets the Extra.
*
* @return the extra.
* @since 1.28.0
*/
public Extra getExtra() {
return extra;
}

/**
* Gets the file name.
Expand Down Expand Up @@ -467,6 +479,17 @@ public void setCompressionLevel(final int compressionLevel) {
public void setDeflateStrategy(final int deflateStrategy) {
this.deflateStrategy = deflateStrategy;
}


/**
* Sets the Extra subfields.
*
* @param extra the collections of extra sub fields.
* @since 1.28.0
*/
public void setExtra(Extra extra) {
this.extra = extra;
}

/**
* Sets the name of the compressed file.
Expand Down
Loading

0 comments on commit 8a491d6

Please sign in to comment.