Skip to content

Commit

Permalink
Add LineReader for reading lines with terminators
Browse files Browse the repository at this point in the history
  • Loading branch information
robinst committed Oct 12, 2024
1 parent a08d760 commit 6e93f85
Show file tree
Hide file tree
Showing 2 changed files with 277 additions and 0 deletions.
149 changes: 149 additions & 0 deletions commonmark/src/main/java/org/commonmark/internal/util/LineReader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
package org.commonmark.internal.util;

import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;

/**
* Reads lines from a reader like {@link java.io.BufferedReader} but also returns the line terminators.
* <p>
* Line terminators can be either a line feed {@code "\n"}, carriage return {@code "\r"}, or a carriage return followed
* by a line feed {@code "\r\n"}. Call {@link #getLineTerminator()} after {@link #readLine()} to obtain the
* corresponding line terminator. If a stream has a line at the end without a terminator, {@link #getLineTerminator()}
* returns {@code null}.
*/
public class LineReader implements Closeable {

// Same as java.io.BufferedReader
static final int CHAR_BUFFER_SIZE = 8192;
static final int EXPECTED_LINE_LENGTH = 80;

private Reader reader;
private char[] cbuf;

private int position = 0;
private int limit = 0;

private String lineTerminator = null;

public LineReader(Reader reader) {
this.reader = reader;
this.cbuf = new char[CHAR_BUFFER_SIZE];
}

/**
* Read a line of text.
*
* @return the line, or {@code null} when the end of the stream has been reached and no more lines can be read
*/
public String readLine() throws IOException {
StringBuilder sb = null;
boolean cr = false;

while (true) {
if (position >= limit) {
fill();
}

if (cr) {
// We saw a CR before, check if we have CR LF or just CR.
if (position < limit && cbuf[position] == '\n') {
position++;
return line(sb.toString(), "\r\n");
} else {
return line(sb.toString(), "\r");
}
}

if (position >= limit) {
// End of stream, return either the last line without terminator or null for end.
return line(sb != null ? sb.toString() : null, null);
}

int start = position;
int i = position;
for (; i < limit; i++) {
char c = cbuf[i];
if (c == '\n') {
position = i + 1;
return line(finish(sb, start, i), "\n");
} else if (c == '\r') {
if (i + 1 < limit) {
// We know what the next character is, so we can check now whether we have
// a CR LF or just a CR and return.
if (cbuf[i + 1] == '\n') {
position = i + 2;
return line(finish(sb, start, i), "\r\n");
} else {
position = i + 1;
return line(finish(sb, start, i), "\r");
}
} else {
// We don't know what the next character is yet, check on next iteration.
cr = true;
position = i + 1;
break;
}
}
}

if (position < i) {
position = i;
}

// Haven't found a finished line yet, copy the data from the buffer so that we can fill
// the buffer again.
if (sb == null) {
sb = new StringBuilder(EXPECTED_LINE_LENGTH);
}
sb.append(cbuf, start, i - start);
}
}

/**
* Return the line terminator of the last read line from {@link #readLine()}.
*
* @return {@code "\n"}, {@code "\r"}, {@code "\r\n"}, or {@code null}
*/
public String getLineTerminator() {
return lineTerminator;
}

@Override
public void close() throws IOException {
if (reader == null) {
return;
}
try {
reader.close();
} finally {
reader = null;
cbuf = null;
}
}

private void fill() throws IOException {
int read;
do {
read = reader.read(cbuf, 0, cbuf.length);
} while (read == 0);
if (read > 0) {
limit = read;
position = 0;
}
}

private String line(String line, String lineTerminator) {
this.lineTerminator = lineTerminator;
return line;
}

private String finish(StringBuilder sb, int start, int end) {
int len = end - start;
if (sb == null) {
return new String(cbuf, start, len);
} else {
return sb.append(cbuf, start, len).toString();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
package org.commonmark.internal.util;

import org.junit.Test;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Objects;

import static java.util.stream.Collectors.joining;
import static org.commonmark.internal.util.LineReader.CHAR_BUFFER_SIZE;
import static org.junit.Assert.*;

public class LineReaderTest {

@Test
public void testReadLine() throws IOException {
assertLines();

assertLines("", "\n");
assertLines("foo", "\n", "bar", "\n");
assertLines("foo", "\n", "bar", null);
assertLines("", "\n", "", "\n");
assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\n");
assertLines(repeat("a", CHAR_BUFFER_SIZE), "\n");
assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\n");

assertLines("", "\r\n");
assertLines("foo", "\r\n", "bar", "\r\n");
assertLines("foo", "\r\n", "bar", null);
assertLines("", "\r\n", "", "\r\n");
assertLines(repeat("a", CHAR_BUFFER_SIZE - 2), "\r\n");
assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\r\n");
assertLines(repeat("a", CHAR_BUFFER_SIZE), "\r\n");
assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\r\n");

assertLines("", "\r");
assertLines("foo", "\r", "bar", "\r");
assertLines("foo", "\r", "bar", null);
assertLines("", "\r", "", "\r");
assertLines(repeat("a", CHAR_BUFFER_SIZE - 1), "\r");
assertLines(repeat("a", CHAR_BUFFER_SIZE), "\r");
assertLines(repeat("a", CHAR_BUFFER_SIZE) + "b", "\r");

assertLines("", "\n", "", "\r", "", "\r\n", "", "\n");
assertLines("what", "\r", "are", "\r", "", "\r", "you", "\r\n", "", "\r\n", "even", "\n", "doing", null);
}

@Test
public void testClose() throws IOException {
var reader = new InputStreamReader(new ByteArrayInputStream("test".getBytes(StandardCharsets.UTF_8)));
var lineReader = new LineReader(reader);
lineReader.close();
lineReader.close();
try {
reader.read();
fail("Expected read to throw after closing reader");
} catch (IOException e) {
// Expected
}
}

private void assertLines(String... s) throws IOException {
assertTrue("Expected parts needs to be even (pairs of content and terminator)", s.length % 2 == 0);
var input = Arrays.stream(s).filter(Objects::nonNull).collect(joining(""));

assertLines(new StringReader(input), s);
assertLines(new SlowStringReader(input), s);
}

private static void assertLines(Reader reader, String... expectedParts) throws IOException {
try (var lineReader = new LineReader(reader)) {
var lines = new ArrayList<>();
String line;
while ((line = lineReader.readLine()) != null) {
lines.add(line);
lines.add(lineReader.getLineTerminator());
}
assertNull(lineReader.getLineTerminator());
assertEquals(Arrays.asList(expectedParts), lines);
}
}

private static String repeat(String s, int count) {
StringBuilder sb = new StringBuilder(s.length() * count);
for (int i = 0; i < count; i++) {
sb.append(s);
}
return sb.toString();
}

/**
* Reader that only reads 0 or 1 chars at a time to test the corner cases.
*/
private static class SlowStringReader extends Reader {

private final String s;
private int position = 0;
private boolean empty = false;

private SlowStringReader(String s) {
this.s = s;
}

@Override
public int read(char[] cbuf, int off, int len) throws IOException {
Objects.checkFromIndexSize(off, len, cbuf.length);
if (len == 0) {
return 0;
}
empty = !empty;
if (empty) {
// Return 0 every other time to test handling of 0.
return 0;
}
if (position >= s.length()) {
return -1;
}
cbuf[off] = s.charAt(position++);
return 1;
}

@Override
public void close() throws IOException {
}
}
}

0 comments on commit 6e93f85

Please sign in to comment.