forked from gbif/dwca-io
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Extract value cleaning methods into a new utils class and implement t…
…he html entity replacement as per POR-155
- Loading branch information
Showing
8 changed files
with
113 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package org.gbif.dwc.record; | ||
|
||
import java.util.regex.Pattern; | ||
|
||
import org.apache.commons.lang3.StringEscapeUtils; | ||
|
||
public class CleanUtils { | ||
private static final Pattern NULL_REPL = Pattern.compile("^\\s*(null|\\\\N)?\\s*$", Pattern.CASE_INSENSITIVE); | ||
|
||
private CleanUtils() { | ||
} | ||
|
||
/** | ||
* Does basic entity replacments if requested to string values. | ||
* @param value the original string | ||
* @param nulls if true replaces common, literal NULL values with real nulls, e.g. "\N" or "NULL" | ||
* @param entities if true replaces html4, xml and numerical entities with their unicode character | ||
*/ | ||
public static String clean(String value, boolean nulls, boolean entities) { | ||
if (value == null || (nulls && NULL_REPL.matcher(value).find()) ) { | ||
return null; | ||
} | ||
return entities ? StringEscapeUtils.unescapeHtml4(value) : value; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
package org.gbif.dwc.record; | ||
|
||
import junit.framework.TestCase; | ||
import org.junit.Test; | ||
|
||
public class CleanUtilsTest extends TestCase { | ||
|
||
@Test | ||
public void testCleanFalse() throws Exception { | ||
for (String x : new String[]{"", " ", " ", ".", "a ", " Me & Bobby McGee"}) { | ||
assertEquals(x, CleanUtils.clean(x, false, false)); | ||
} | ||
} | ||
|
||
@Test | ||
public void testClean() throws Exception { | ||
assertNull(CleanUtils.clean("", true, true)); | ||
assertNull(CleanUtils.clean(null, true, true)); | ||
assertNull(CleanUtils.clean(" ", true, true)); | ||
assertNull(CleanUtils.clean("\\N", true, true)); | ||
assertNull(CleanUtils.clean("NULL", true, true)); | ||
|
||
assertEquals(" Me & Bobby McGee", CleanUtils.clean(" Me & Bobby McGee", true, true)); | ||
assertEquals("Me & Bobby McGee", CleanUtils.clean("Me & Bobby McGee", true, true)); | ||
assertEquals("Me & Bobby McGee", CleanUtils.clean("Me & Bobby McGee", true, true)); | ||
assertEquals("Me & Bobby McGee", CleanUtils.clean("Me & Bobby McGee", true, true)); | ||
assertEquals("Me & Bobby McGee", CleanUtils.clean("Me & Bobby McGee", true, true)); | ||
|
||
assertEquals("Me &", CleanUtils.clean("Me &", true, true)); | ||
assertEquals("Me & ;", CleanUtils.clean("Me & ;", true, true)); | ||
assertEquals("Me & amp;", CleanUtils.clean("Me & amp;", true, true)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters