-
Notifications
You must be signed in to change notification settings - Fork 296
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add LineReader for reading lines with terminators
- Loading branch information
Showing
2 changed files
with
277 additions
and
0 deletions.
There are no files selected for viewing
149 changes: 149 additions & 0 deletions
149
commonmark/src/main/java/org/commonmark/internal/util/LineReader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
package org.commonmark.internal.util; | ||
|
||
import java.io.Closeable; | ||
import java.io.IOException; | ||
import java.io.Reader; | ||
|
||
/** | ||
* Reads lines from a reader like {@link java.io.BufferedReader} but also returns the line terminators. | ||
* <p> | ||
* Line terminators can be either a line feed {@code "\n"}, carriage return {@code "\r"}, or a carriage return followed | ||
* by a line feed {@code "\r\n"}. Call {@link #getLineTerminator()} after {@link #readLine()} to obtain the | ||
* corresponding line terminator. If a stream has a line at the end without a terminator, {@link #getLineTerminator()} | ||
* returns {@code null}. | ||
*/ | ||
public class LineReader implements Closeable { | ||
|
||
// Same as java.io.BufferedReader | ||
static final int CHAR_BUFFER_SIZE = 8192; | ||
static final int EXPECTED_LINE_LENGTH = 80; | ||
|
||
private Reader reader; | ||
private char[] cbuf; | ||
|
||
private int position = 0; | ||
private int limit = 0; | ||
|
||
private String lineTerminator = null; | ||
|
||
public LineReader(Reader reader) { | ||
this.reader = reader; | ||
this.cbuf = new char[CHAR_BUFFER_SIZE]; | ||
} | ||
|
||
/** | ||
* Read a line of text. | ||
* | ||
* @return the line, or {@code null} when the end of the stream has been reached and no more lines can be read | ||
*/ | ||
public String readLine() throws IOException { | ||
StringBuilder sb = null; | ||
boolean cr = false; | ||
|
||
while (true) { | ||
if (position >= limit) { | ||
fill(); | ||
} | ||
|
||
if (cr) { | ||
// We saw a CR before, check if we have CR LF or just CR. | ||
if (position < limit && cbuf[position] == '\n') { | ||
position++; | ||
return line(sb.toString(), "\r\n"); | ||
} else { | ||
return line(sb.toString(), "\r"); | ||
} | ||
} | ||
|
||
if (position >= limit) { | ||
// End of stream, return either the last line without terminator or null for end. | ||
return line(sb != null ? sb.toString() : null, null); | ||
} | ||
|
||
int start = position; | ||
int i = position; | ||
for (; i < limit; i++) { | ||
char c = cbuf[i]; | ||
if (c == '\n') { | ||
position = i + 1; | ||
return line(finish(sb, start, i), "\n"); | ||
} else if (c == '\r') { | ||
if (i + 1 < limit) { | ||
// We know what the next character is, so we can check now whether we have | ||
// a CR LF or just a CR and return. | ||
if (cbuf[i + 1] == '\n') { | ||
position = i + 2; | ||
return line(finish(sb, start, i), "\r\n"); | ||
} else { | ||
position = i + 1; | ||
return line(finish(sb, start, i), "\r"); | ||
} | ||
} else { | ||
// We don't know what the next character is yet, check on next iteration. | ||
cr = true; | ||
position = i + 1; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
if (position < i) { | ||
position = i; | ||
} | ||
|
||
// Haven't found a finished line yet, copy the data from the buffer so that we can fill | ||
// the buffer again. | ||
if (sb == null) { | ||
sb = new StringBuilder(EXPECTED_LINE_LENGTH); | ||
} | ||
sb.append(cbuf, start, i - start); | ||
} | ||
} | ||
|
||
/** | ||
* Return the line terminator of the last read line from {@link #readLine()}. | ||
* | ||
* @return {@code "\n"}, {@code "\r"}, {@code "\r\n"}, or {@code null} | ||
*/ | ||
public String getLineTerminator() { | ||
return lineTerminator; | ||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
if (reader == null) { | ||
return; | ||
} | ||
try { | ||
reader.close(); | ||
} finally { | ||
reader = null; | ||
cbuf = null; | ||
} | ||
} | ||
|
||
private void fill() throws IOException { | ||
int read; | ||
do { | ||
read = reader.read(cbuf, 0, cbuf.length); | ||
} while (read == 0); | ||
if (read > 0) { | ||
limit = read; | ||
position = 0; | ||
} | ||
} | ||
|
||
private String line(String line, String lineTerminator) { | ||
this.lineTerminator = lineTerminator; | ||
return line; | ||
} | ||
|
||
private String finish(StringBuilder sb, int start, int end) { | ||
int len = end - start; | ||
if (sb == null) { | ||
return new String(cbuf, start, len); | ||
} else { | ||
return sb.append(cbuf, start, len).toString(); | ||
} | ||
} | ||
} |
128 changes: 128 additions & 0 deletions
128
commonmark/src/test/java/org/commonmark/internal/util/LineReaderTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
package org.commonmark.internal.util; | ||
|
||
import org.junit.Test; | ||
|
||
import java.io.*; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Objects; | ||
|
||
import static java.util.stream.Collectors.joining; | ||
import static org.commonmark.internal.util.LineReader.CHAR_BUFFER_SIZE; | ||
import static org.junit.Assert.*; | ||
|
||
public class LineReaderTest { | ||
|
||
@Test | ||
public void testReadLine() throws IOException { | ||
assertLines(); | ||
|
||
assertLines("", "\n"); | ||
assertLines("foo", "\n", "bar", "\n"); | ||
assertLines("foo", "\n", "bar", null); | ||
assertLines("", "\n", "", "\n"); | ||
assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\n"); | ||
assertLines(repeat("a", CHAR_BUFFER_SIZE), "\n"); | ||
assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\n"); | ||
|
||
assertLines("", "\r\n"); | ||
assertLines("foo", "\r\n", "bar", "\r\n"); | ||
assertLines("foo", "\r\n", "bar", null); | ||
assertLines("", "\r\n", "", "\r\n"); | ||
assertLines(repeat("a", CHAR_BUFFER_SIZE - 2), "\r\n"); | ||
assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\r\n"); | ||
assertLines(repeat("a", CHAR_BUFFER_SIZE), "\r\n"); | ||
assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\r\n"); | ||
|
||
assertLines("", "\r"); | ||
assertLines("foo", "\r", "bar", "\r"); | ||
assertLines("foo", "\r", "bar", null); | ||
assertLines("", "\r", "", "\r"); | ||
assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\r"); | ||
assertLines(repeat("a", CHAR_BUFFER_SIZE), "\r"); | ||
assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\r"); | ||
|
||
assertLines("", "\n", "", "\r", "", "\r\n", "", "\n"); | ||
assertLines("what", "\r", "are", "\r", "", "\r", "you", "\r\n", "", "\r\n", "even", "\n", "doing", null); | ||
} | ||
|
||
@Test | ||
public void testClose() throws IOException { | ||
var reader = new InputStreamReader(new ByteArrayInputStream("test".getBytes(StandardCharsets.UTF_8))); | ||
var lineReader = new LineReader(reader); | ||
lineReader.close(); | ||
lineReader.close(); | ||
try { | ||
reader.read(); | ||
fail("Expected read to throw after closing reader"); | ||
} catch (IOException e) { | ||
// Expected | ||
} | ||
} | ||
|
||
private void assertLines(String... s) throws IOException { | ||
assertTrue("Expected parts needs to be even (pairs of content and terminator)", s.length % 2 == 0); | ||
var input = Arrays.stream(s).filter(Objects::nonNull).collect(joining("")); | ||
|
||
assertLines(new StringReader(input), s); | ||
assertLines(new SlowStringReader(input), s); | ||
} | ||
|
||
private static void assertLines(Reader reader, String... expectedParts) throws IOException { | ||
try (var lineReader = new LineReader(reader)) { | ||
var lines = new ArrayList<>(); | ||
String line; | ||
while ((line = lineReader.readLine()) != null) { | ||
lines.add(line); | ||
lines.add(lineReader.getLineTerminator()); | ||
} | ||
assertNull(lineReader.getLineTerminator()); | ||
assertEquals(Arrays.asList(expectedParts), lines); | ||
} | ||
} | ||
|
||
private static String repeat(String s, int count) { | ||
StringBuilder sb = new StringBuilder(s.length() * count); | ||
for (int i = 0; i < count; i++) { | ||
sb.append(s); | ||
} | ||
return sb.toString(); | ||
} | ||
|
||
/** | ||
* Reader that only reads 0 or 1 chars at a time to test the corner cases. | ||
*/ | ||
private static class SlowStringReader extends Reader { | ||
|
||
private final String s; | ||
private int position = 0; | ||
private boolean empty = false; | ||
|
||
private SlowStringReader(String s) { | ||
this.s = s; | ||
} | ||
|
||
@Override | ||
public int read(char[] cbuf, int off, int len) throws IOException { | ||
Objects.checkFromIndexSize(off, len, cbuf.length); | ||
if (len == 0) { | ||
return 0; | ||
} | ||
empty = !empty; | ||
if (empty) { | ||
// Return 0 every other time to test handling of 0. | ||
return 0; | ||
} | ||
if (position >= s.length()) { | ||
return -1; | ||
} | ||
cbuf[off] = s.charAt(position++); | ||
return 1; | ||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
} | ||
} | ||
} |