Skip to content

Commit

Permalink
feat: revamp file IO, this time Windows compatible (leanprover#4950)
Browse files Browse the repository at this point in the history
This implements a naive version of `getline` because Windows does not
have `getline`. Given the fact that `FILE` has buffered IO, calling
`fgetc` in a loop is not as big of a performance hazard as it might seem
at first glance.

The proper solution to this would of course be to have our own buffered
IO so we are fully in charge of the buffer. In this situation we could
check the entire buffer for a newline at once instead of char by char.
However that is not going to happen for the near future so I propose we
stay with this implementation. If reading individual lines of a file
does truly end up being the performance bottle neck we have already
won^^.
  • Loading branch information
hargoniX authored Aug 7, 2024
1 parent 63c4de5 commit 7776852
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 54 deletions.
48 changes: 31 additions & 17 deletions src/Init/System/IO.lean
Original file line number Diff line number Diff line change
Expand Up @@ -470,31 +470,23 @@ def withFile (fn : FilePath) (mode : Mode) (f : Handle → IO α) : IO α :=
def Handle.putStrLn (h : Handle) (s : String) : IO Unit :=
h.putStr (s.push '\n')

partial def Handle.readBinToEnd (h : Handle) : IO ByteArray := do
partial def Handle.readBinToEndInto (h : Handle) (buf : ByteArray) : IO ByteArray := do
let rec loop (acc : ByteArray) : IO ByteArray := do
let buf ← h.read 1024
if buf.isEmpty then
return acc
else
loop (acc ++ buf)
loop ByteArray.empty
loop buf

partial def Handle.readToEnd (h : Handle) : IO String := do
let rec loop (s : String) := do
let line ← h.getLine
if line.isEmpty then
return s
else
loop (s ++ line)
loop ""

def readBinFile (fname : FilePath) : IO ByteArray := do
let h ← Handle.mk fname Mode.read
h.readBinToEnd
partial def Handle.readBinToEnd (h : Handle) : IO ByteArray := do
h.readBinToEndInto .empty

def readFile (fname : FilePath) : IO String := do
let h ← Handle.mk fname Mode.read
h.readToEnd
def Handle.readToEnd (h : Handle) : IO String := do
let data ← h.readBinToEnd
match String.fromUTF8? data with
| some s => return s
| none => throw <| .userError s!"Tried to read from handle containing non UTF-8 data."

partial def lines (fname : FilePath) : IO (Array String) := do
let h ← Handle.mk fname Mode.read
Expand Down Expand Up @@ -600,6 +592,28 @@ end System.FilePath

namespace IO

namespace FS

def readBinFile (fname : FilePath) : IO ByteArray := do
-- Requires metadata so defined after metadata
let mdata ← fname.metadata
let size := mdata.byteSize.toUSize
let handle ← IO.FS.Handle.mk fname .read
let buf ←
if size > 0 then
handle.read mdata.byteSize.toUSize
else
pure <| ByteArray.mkEmpty 0
handle.readBinToEndInto buf

def readFile (fname : FilePath) : IO String := do
let data ← readBinFile fname
match String.fromUTF8? data with
| some s => return s
| none => throw <| .userError s!"Tried to read file '{fname}' containing non UTF-8 data."

end FS

def withStdin [Monad m] [MonadFinally m] [MonadLiftT BaseIO m] (h : FS.Stream) (x : m α) : m α := do
let prev ← setStdin h
try x finally discard <| setStdin prev
Expand Down
47 changes: 20 additions & 27 deletions src/runtime/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,43 +485,36 @@ extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_write(b_obj_arg h, b_obj_arg
}
}

/*
Handle.getLine : (@& Handle) → IO Unit
The line returned by `lean_io_prim_handle_get_line`
is truncated at the first '\0' character and the
rest of the line is discarded. */
/* Handle.getLine : (@& Handle) → IO Unit */
extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_get_line(b_obj_arg h, obj_arg /* w */) {
FILE * fp = io_get_handle(h);
const int buf_sz = 64;
char buf_str[buf_sz]; // NOLINT

std::string result;
bool first = true;
while (true) {
char * out = std::fgets(buf_str, buf_sz, fp);
if (out != nullptr) {
if (strlen(buf_str) < buf_sz-1 || buf_str[buf_sz-2] == '\n') {
if (first) {
return io_result_mk_ok(mk_string(out));
} else {
result.append(out);
return io_result_mk_ok(mk_string(result));
}
}
result.append(out);
} else if (std::feof(fp)) {
clearerr(fp);
return io_result_mk_ok(mk_string(result));
} else {
return io_result_mk_error(decode_io_error(errno, nullptr));
int c; // Note: int, not char, required to handle EOF
while ((c = std::fgetc(fp)) != EOF) {
result.push_back(c);
if (c == '\n') {
break;
}
first = false;
}

if (std::ferror(fp)) {
return io_result_mk_error(decode_io_error(errno, nullptr));
} else if (std::feof(fp)) {
clearerr(fp);
return io_result_mk_ok(mk_string(result));
} else {
obj_res ret = io_result_mk_ok(mk_string(result));
return ret;
}
}

/* Handle.putStr : (@& Handle) → (@& String) → IO Unit */
extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_put_str(b_obj_arg h, b_obj_arg s, obj_arg /* w */) {
FILE * fp = io_get_handle(h);
if (std::fputs(lean_string_cstr(s), fp) != EOF) {
usize n = lean_string_size(s) - 1; // - 1 to ignore the terminal NULL byte.
usize m = std::fwrite(lean_string_cstr(s), 1, n, fp);
if (m == n) {
return io_result_mk_ok(box(0));
} else {
return io_result_mk_error(decode_io_error(errno, nullptr));
Expand Down
24 changes: 15 additions & 9 deletions stage0/src/runtime/io.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion stage0/src/runtime/object.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions tests/lean/run/3546.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
def test : IO Unit := do
let tmpFile := "3546.tmp"
let firstLine := "foo\u0000bar\n"
let content := firstLine ++ "hello world\nbye"
IO.FS.writeFile tmpFile content
let handle ← IO.FS.Handle.mk tmpFile .read
let firstReadLine ← handle.getLine
let cond := firstLine == firstReadLine && firstReadLine.length == 8 -- paranoid
IO.println cond
IO.FS.removeFile tmpFile

/-- info: true -/
#guard_msgs in
#eval test

0 comments on commit 7776852

Please sign in to comment.