Skip to content

Commit

Permalink
Use commons-io BOMInputStream instead of our custom one
Browse files Browse the repository at this point in the history
  • Loading branch information
mdoering committed Jun 23, 2015
1 parent d91d8b2 commit 33d41ea
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 9 deletions.
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@

<properties>
<commons-io.version>2.4</commons-io.version>
<commons-lang.version>3.3.2</commons-lang.version>
<commons-lang.version>3.4</commons-lang.version>
<dwc-api.version>1.12</dwc-api.version>
<freemarker.version>2.3.21</freemarker.version>
<gbif-common.version>0.17</gbif-common.version>
<gbif-common.version>0.18-SNAPSHOT</gbif-common.version>
<gbif-registry-metadata.version>2.26</gbif-registry-metadata.version>
<guava.version>18.0</guava.version>
<junit.version>4.11</junit.version>
Expand Down
12 changes: 6 additions & 6 deletions src/main/java/org/gbif/dwca/io/ArchiveFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import org.gbif.io.CSVReader;
import org.gbif.io.CSVReaderFactory;
import org.gbif.io.DownloadUtil;
import org.gbif.utils.file.BomSafeInputStreamWrapper;
import org.gbif.utils.file.CompressionUtil;

import java.io.File;
Expand All @@ -25,6 +24,7 @@
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
Expand All @@ -33,6 +33,7 @@
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.commons.io.filefilter.HiddenFileFilter;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.commons.io.input.BOMInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
Expand Down Expand Up @@ -341,7 +342,7 @@ public static Archive openArchive(File dwcaFolder) throws IOException, Unsupport
File mf = new File(dwcaFolder, "meta.xml");
if (mf.exists()) {
// read metafile
readMetaDescriptor(archive, new FileInputStream(mf), true);
readMetaDescriptor(archive, new FileInputStream(mf));

} else {
// meta.xml lacking.
Expand Down Expand Up @@ -438,15 +439,14 @@ private static ArchiveFile readFileHeaders(File dataFile) throws UnsupportedArch
return dwcFile;
}

private static void readMetaDescriptor(Archive archive, InputStream metaDescriptor, boolean normaliseTerms)
throws UnsupportedArchiveException {
@VisibleForTesting
protected static void readMetaDescriptor(Archive archive, InputStream metaDescriptor) throws UnsupportedArchiveException {

try {
SAXParser p = SAX_FACTORY.newSAXParser();
MetaHandler mh = new MetaHandler(archive);
LOG.debug("Reading archive metadata file");
// p.parse(metaDescriptor, mh);
p.parse(new BomSafeInputStreamWrapper(metaDescriptor), mh);
p.parse(new BOMInputStream(metaDescriptor), mh);
} catch (Exception e1) {
LOG.warn("Exception caught", e1);
throw new UnsupportedArchiveException(e1);
Expand Down
13 changes: 13 additions & 0 deletions src/test/java/org/gbif/dwca/io/ArchiveFactoryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import javax.xml.parsers.SAXParser;

import com.google.common.collect.Sets;
import org.apache.commons.io.input.BOMInputStream;
import org.junit.Ignore;
import org.junit.Test;

Expand All @@ -38,6 +42,15 @@ private void assertNumberOfCoreRecords(Archive arch, int expectedRecords) throws
assertEquals(expectedRecords, rows);
}

@Test
public void testMetaHandlerUtf16le() throws Exception {
for (String fn : new String[]{"/meta/meta.xml", "/meta-utf16le.xml","/xml-entity-meta/meta.xml"}) {
System.out.println(fn);
InputStream is = ClassLoader.class.getResourceAsStream(fn);
ArchiveFactory.readMetaDescriptor(new Archive(), is);
}
}

@Test
public void testBuildReaderFile() throws IOException {
Archive arch = ArchiveFactory.openArchive(FileUtils.getClasspathFile("TDB_104.csv"));
Expand Down
Binary file added src/test/resources/meta-utf16le.xml
Binary file not shown.
2 changes: 1 addition & 1 deletion src/test/resources/meta/meta.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml version="1.0" encoding="UTF-8"?>
<archive xmlns="http://rs.tdwg.org/dwc/text/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://rs.tdwg.org/dwc/text/ http://darwincore.googlecode.com/svn/trunk/text/tdwg_dwc_text.xsd"
Expand Down

0 comments on commit 33d41ea

Please sign in to comment.