Skip to content

Commit

Permalink
Update nixpkgs
Browse files Browse the repository at this point in the history
This requires two main changes:
- An update to the nixpkgs patch. It is now based on this draft PR:
  NixOS/nixpkgs#352629
- An update to the Lean runtime, as Lean is updated to v4.10.0. The
  changes come from Lean commit
  0a1a855ba80e51515570439f3d73d3d9414ac053.
  • Loading branch information
kuruczgy committed Nov 1, 2024
1 parent 63615e1 commit fa30d2d
Show file tree
Hide file tree
Showing 8 changed files with 343 additions and 168 deletions.
6 changes: 3 additions & 3 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@
'';
hash = "sha256-yJ54S+HB3CH5/wb+GFwNkz3/990J8Z6GbuttDhWqN5Q=";
};
patches = finalAttrs.patches ++ [ ./patches/qemu.patch ];
patches = [ ./patches/qemu.patch ];
buildInputs = finalAttrs.buildInputs ++ [ pkgsNative.libgcrypt ];
configureFlags = finalAttrs.configureFlags ++ [ "--enable-gcrypt" ];
});
Expand Down
288 changes: 205 additions & 83 deletions patches/nixpkgs.patch

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions platform/lean/lean.h
Original file line number Diff line number Diff line change
Expand Up @@ -1119,7 +1119,12 @@ static inline size_t lean_string_byte_size(lean_object *o) {
}
/* instance : inhabited char := ⟨'A'⟩ */
static inline uint32_t lean_char_default_value() { return 'A'; }
LEAN_EXPORT lean_obj_res lean_mk_string_unchecked(char const *s, size_t sz,
size_t len);
LEAN_EXPORT lean_obj_res lean_mk_string_from_bytes(char const *s, size_t sz);
LEAN_EXPORT lean_obj_res lean_mk_string_from_bytes_unchecked(char const *s,
size_t sz);
LEAN_EXPORT lean_obj_res lean_mk_ascii_string_unchecked(char const *s);
LEAN_EXPORT lean_obj_res lean_mk_string(char const *s);
static inline char const *lean_string_cstr(b_lean_obj_arg o) {
assert(lean_is_string(o));
Expand Down
96 changes: 66 additions & 30 deletions platform/runtime/object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,13 +221,15 @@ extern "C" LEAN_EXPORT lean_object *lean_array_data(lean_obj_arg a) {
}

extern "C" LEAN_EXPORT lean_obj_res lean_array_get_panic(lean_obj_arg def_val) {
return lean_panic_fn(def_val, lean_mk_string("Error: index out of bounds"));
return lean_panic_fn(
def_val, lean_mk_ascii_string_unchecked("Error: index out of bounds"));
}

extern "C" LEAN_EXPORT lean_obj_res lean_array_set_panic(lean_obj_arg a,
lean_obj_arg v) {
lean_dec(v);
return lean_panic_fn(a, lean_mk_string("Error: index out of bounds"));
return lean_panic_fn(
a, lean_mk_ascii_string_unchecked("Error: index out of bounds"));
}

// =======================================
Expand Down Expand Up @@ -825,9 +827,9 @@ extern "C" LEAN_EXPORT lean_obj_res lean_float_to_string(double a) {
if (isnan(a))
// override NaN because we don't want NaNs to be distinguishable
// because the sign bit / payload bits can be architecture-dependent
return mk_string("NaN");
return mk_ascii_string_unchecked("NaN");
else
return mk_string(std::to_string(a));
return mk_ascii_string_unchecked(std::to_string(a));
}

extern "C" LEAN_EXPORT double lean_float_scaleb(double a, b_lean_obj_arg b) {
Expand Down Expand Up @@ -880,31 +882,66 @@ static object *string_ensure_capacity(object *o, size_t extra) {
}
}

extern "C" LEAN_EXPORT object *lean_mk_string_core(char const *s, size_t sz,
size_t len) {
extern "C" LEAN_EXPORT object *lean_mk_string_unchecked(char const *s,
size_t sz, size_t len) {
size_t rsz = sz + 1;
object *r = lean_alloc_string(rsz, rsz, len);
memcpy(w_string_cstr(r), s, sz);
w_string_cstr(r)[sz] = 0;
return r;
}

object *lean_mk_string_lossy_recover(char const *s, size_t sz, size_t pos,
size_t i) {
std::string str(s, pos);
size_t start = pos;
while (pos < sz) {
if (!validate_utf8_one((const uint8_t *)s, sz, pos)) {
str.append(s + start, pos - start);
str.append("\ufffd"); // U+FFFD REPLACEMENT CHARACTER
do
pos++;
while (pos < sz && (s[pos] & 0xc0) == 0x80);
start = pos;
}
i++;
}
str.append(s + start, pos - start);
return lean_mk_string_unchecked(str.data(), str.size(), i);
}

extern "C" LEAN_EXPORT object *lean_mk_string_from_bytes(char const *s,
size_t sz) {
return lean_mk_string_core(s, sz, utf8_strlen(s, sz));
size_t pos = 0, i = 0;
if (validate_utf8((const uint8_t *)s, sz, pos, i)) {
return lean_mk_string_unchecked(s, pos, i);
} else {
return lean_mk_string_lossy_recover(s, sz, pos, i);
}
}

extern "C" LEAN_EXPORT object *
lean_mk_string_from_bytes_unchecked(char const *s, size_t sz) {
return lean_mk_string_unchecked(s, sz, utf8_strlen(s, sz));
}

extern "C" LEAN_EXPORT object *lean_mk_string(char const *s) {
return lean_mk_string_from_bytes(s, strlen(s));
}

extern "C" LEAN_EXPORT obj_res lean_string_from_utf8(b_obj_arg a) {
return lean_mk_string_from_bytes(
extern "C" LEAN_EXPORT object *lean_mk_ascii_string_unchecked(char const *s) {
size_t len = strlen(s);
return lean_mk_string_unchecked(s, len, len);
}

extern "C" LEAN_EXPORT obj_res lean_string_from_utf8_unchecked(b_obj_arg a) {
return lean_mk_string_from_bytes_unchecked(
reinterpret_cast<char *>(lean_sarray_cptr(a)), lean_sarray_size(a));
}

extern "C" LEAN_EXPORT uint8 lean_string_validate_utf8(b_obj_arg a) {
return validate_utf8(lean_sarray_cptr(a), lean_sarray_size(a));
size_t pos = 0, i = 0;
return validate_utf8(lean_sarray_cptr(a), lean_sarray_size(a), pos, i);
}

extern "C" LEAN_EXPORT obj_res lean_string_to_utf8(b_obj_arg s) {
Expand All @@ -918,8 +955,8 @@ object *mk_string(std::string const &s) {
return lean_mk_string_from_bytes(s.data(), s.size());
}

object *mk_ascii_string(std::string const &s) {
return lean_mk_string_core(s.data(), s.size(), s.size());
object *mk_ascii_string_unchecked(std::string const &s) {
return lean_mk_string_unchecked(s.data(), s.size(), s.size());
}

std::string string_to_std(b_obj_arg o) {
Expand Down Expand Up @@ -990,16 +1027,6 @@ extern "C" LEAN_EXPORT bool lean_string_lt(object *s1, object *s2) {
return r < 0 || (r == 0 && sz1 < sz2);
}

static std::string list_as_string(b_obj_arg lst) {
std::string s;
b_obj_arg o = lst;
while (!lean_is_scalar(o)) {
push_unicode_scalar(s, lean_unbox_uint32(lean_ctor_get(o, 0)));
o = lean_ctor_get(o, 1);
}
return s;
}

static obj_res string_to_list_core(std::string const &s, bool reverse = false) {
std::vector<unsigned> tmp;
utf8_decode(s, tmp);
Expand All @@ -1018,9 +1045,16 @@ static obj_res string_to_list_core(std::string const &s, bool reverse = false) {
}

extern "C" LEAN_EXPORT obj_res lean_string_mk(obj_arg cs) {
std::string s = list_as_string(cs);
std::string s;
b_obj_arg o = cs;
size_t len = 0;
while (!lean_is_scalar(o)) {
push_unicode_scalar(s, lean_unbox_uint32(lean_ctor_get(o, 0)));
o = lean_ctor_get(o, 1);
len++;
}
lean_dec(cs);
return mk_string(s);
return lean_mk_string_unchecked(s.data(), s.size(), len);
}

extern "C" LEAN_EXPORT obj_res lean_string_data(obj_arg s) {
Expand Down Expand Up @@ -1159,7 +1193,8 @@ extern "C" LEAN_EXPORT obj_res lean_string_utf8_get_opt(b_obj_arg s,

static uint32 lean_string_utf8_get_panic() {
lean_panic_fn(lean_box(0),
lean_mk_string("Error: invalid `String.Pos` at `String.get!`"));
lean_mk_ascii_string_unchecked(
"Error: invalid `String.Pos` at `String.get!`"));
return lean_char_default_value();
}

Expand Down Expand Up @@ -1258,11 +1293,11 @@ extern "C" LEAN_EXPORT obj_res lean_string_utf8_extract(b_obj_arg s,
char const *str = lean_string_cstr(s);
usize sz = lean_string_size(s) - 1;
if (b >= e || b >= sz)
return lean_mk_string("");
return lean_mk_string_unchecked("", 0, 0);
/* In the reference implementation if `b` is not pointing to a valid UTF8
character start position, the result is the empty string. */
if (!is_utf8_first_byte(str[b]))
return lean_mk_string("");
return lean_mk_string_unchecked("", 0, 0);
if (e > sz)
e = sz;
lean_assert(b < e);
Expand All @@ -1273,7 +1308,7 @@ extern "C" LEAN_EXPORT obj_res lean_string_utf8_extract(b_obj_arg s,
e = sz;
usize new_sz = e - b;
lean_assert(new_sz > 0);
return lean_mk_string_from_bytes(lean_string_cstr(s) + b, new_sz);
return lean_mk_string_from_bytes_unchecked(lean_string_cstr(s) + b, new_sz);
}

extern "C" LEAN_EXPORT obj_res lean_string_utf8_prev(b_obj_arg s,
Expand Down Expand Up @@ -1347,9 +1382,10 @@ extern "C" LEAN_EXPORT obj_res lean_string_utf8_set(obj_arg s, b_obj_arg i0,
std::string tmp;
push_unicode_scalar(tmp, c);
std::string new_s = string_to_std(s);
usize len = lean_string_len(s);
dec(s);
new_s.replace(i, get_utf8_char_size_at(new_s, i), tmp);
return mk_string(new_s);
return lean_mk_string_unchecked(new_s.data(), new_s.size(), len);
}

extern "C" LEAN_EXPORT uint64 lean_string_hash(b_obj_arg s) {
Expand All @@ -1359,7 +1395,7 @@ extern "C" LEAN_EXPORT uint64 lean_string_hash(b_obj_arg s) {
}

extern "C" LEAN_EXPORT obj_res lean_string_of_usize(size_t n) {
return mk_ascii_string(std::to_string(n));
return mk_ascii_string_unchecked(std::to_string(n));
}

// =======================================
Expand Down
1 change: 1 addition & 0 deletions platform/runtime/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,7 @@ inline obj_res alloc_string(size_t size, size_t capacity, size_t len) {
return lean_alloc_string(size, capacity, len);
}
inline obj_res mk_string(char const *s) { return lean_mk_string(s); }
LEAN_EXPORT obj_res mk_ascii_string_unchecked(std::string const &s);
LEAN_EXPORT obj_res mk_string(std::string const &s);
LEAN_EXPORT std::string string_to_std(b_obj_arg o);
inline char const *string_cstr(b_obj_arg o) { return lean_string_cstr(o); }
Expand Down
106 changes: 56 additions & 50 deletions platform/runtime/utf8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,62 +118,68 @@ void utf8_decode(std::string const &str, std::vector<unsigned> &out) {
}
}

bool validate_utf8(uint8_t const *str, size_t size) {
size_t i = 0;
while (i < size) {
unsigned c = str[i];
if ((c & 0x80) == 0) {
/* zero continuation (0 to 0x7F) */
i++;
} else if ((c & 0xe0) == 0xc0) {
/* one continuation (0x80 to 0x7FF) */
if (i + 1 >= size)
return false;

unsigned c1 = str[i + 1];
if ((c1 & 0xc0) != 0x80)
return false;

unsigned r = ((c & 0x1f) << 6) | (c1 & 0x3f);
if (r < 0x80)
return false;
bool validate_utf8_one(uint8_t const *str, size_t size, size_t &pos) {
unsigned c = str[pos];
if ((c & 0x80) == 0) {
/* zero continuation (0 to 0x7F) */
pos++;
} else if ((c & 0xe0) == 0xc0) {
/* one continuation (0x80 to 0x7FF) */
if (pos + 1 >= size)
return false;

i += 2;
} else if ((c & 0xf0) == 0xe0) {
/* two continuations (0x800 to 0xD7FF and 0xE000 to 0xFFFF) */
if (i + 2 >= size)
return false;
unsigned c1 = str[pos + 1];
if ((c1 & 0xc0) != 0x80)
return false;

unsigned c1 = str[i + 1];
unsigned c2 = str[i + 2];
if ((c1 & 0xc0) != 0x80 || (c2 & 0xc0) != 0x80)
return false;
unsigned r = ((c & 0x1f) << 6) | (c1 & 0x3f);
if (r < 0x80)
return false;

unsigned r = ((c & 0x0f) << 12) | ((c1 & 0x3f) << 6) | (c2 & 0x3f);
if (r < 0x800 || (r >= 0xD800 && r <= 0xDFFF))
return false;
pos += 2;
} else if ((c & 0xf0) == 0xe0) {
/* two continuations (0x800 to 0xD7FF and 0xE000 to 0xFFFF) */
if (pos + 2 >= size)
return false;

i += 3;
} else if ((c & 0xf8) == 0xf0) {
/* three continuations (0x10000 to 0x10FFFF) */
if (i + 3 >= size)
return false;

unsigned c1 = str[i + 1];
unsigned c2 = str[i + 2];
unsigned c3 = str[i + 3];
if ((c1 & 0xc0) != 0x80 || (c2 & 0xc0) != 0x80 || (c3 & 0xc0) != 0x80)
return false;

unsigned r = ((c & 0x07) << 18) | ((c1 & 0x3f) << 12) |
((c2 & 0x3f) << 6) | (c3 & 0x3f);
if (r < 0x10000 || r > 0x10FFFF)
return false;
unsigned c1 = str[pos + 1];
unsigned c2 = str[pos + 2];
if ((c1 & 0xc0) != 0x80 || (c2 & 0xc0) != 0x80)
return false;

i += 4;
} else {
unsigned r = ((c & 0x0f) << 12) | ((c1 & 0x3f) << 6) | (c2 & 0x3f);
if (r < 0x800 || (r >= 0xD800 && r <= 0xDFFF))
return false;
}

pos += 3;
} else if ((c & 0xf8) == 0xf0) {
/* three continuations (0x10000 to 0x10FFFF) */
if (pos + 3 >= size)
return false;

unsigned c1 = str[pos + 1];
unsigned c2 = str[pos + 2];
unsigned c3 = str[pos + 3];
if ((c1 & 0xc0) != 0x80 || (c2 & 0xc0) != 0x80 || (c3 & 0xc0) != 0x80)
return false;

unsigned r = ((c & 0x07) << 18) | ((c1 & 0x3f) << 12) | ((c2 & 0x3f) << 6) |
(c3 & 0x3f);
if (r < 0x10000 || r > 0x10FFFF)
return false;

pos += 4;
} else {
return false;
}
return true;
}

bool validate_utf8(uint8_t const *str, size_t size, size_t &pos, size_t &i) {
while (pos < size) {
if (!validate_utf8_one(str, size, pos))
return false;
i++;
}
return true;
}
Expand Down
7 changes: 6 additions & 1 deletion platform/runtime/utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,13 @@ LEAN_EXPORT size_t utf8_strlen(char const *str, size_t sz);
LEAN_EXPORT void utf8_decode(std::string const &str,
std::vector<unsigned> &out);

/* Returns true if the given character is valid UTF-8 */
LEAN_EXPORT bool validate_utf8_one(uint8_t const *str, size_t size,
size_t &pos);

/* Returns true if the provided string is valid UTF-8 */
LEAN_EXPORT bool validate_utf8(uint8_t const *str, size_t size);
LEAN_EXPORT bool validate_utf8(uint8_t const *str, size_t size, size_t &pos,
size_t &i);

/* Push a unicode scalar value into a utf-8 encoded string */
LEAN_EXPORT void push_unicode_scalar(std::string &s, unsigned code);
Expand Down

0 comments on commit fa30d2d

Please sign in to comment.