Skip to content

Commit

Permalink
Add error handling and update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
eelmafia committed Sep 29, 2024
1 parent 2b6bdb8 commit 8d1772a
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 102 deletions.
3 changes: 2 additions & 1 deletion src/gleamlz_string.gleam
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import gleam/bit_array
import gleam/dict
import gleam/list
import gleam/result
import gleam/string
import internal_lib/lib

Expand All @@ -24,7 +25,7 @@ pub fn decompress_from_base64(string: String) {
|> list.index_map(fn(x, i) { #(x, i) })
|> dict.from_list()
|> lib.decode_base64(string, _, <<>>)
|> lib.decompress
|> result.try(lib.decompress)
}

pub fn compress_to_encoded_uri(string: String) {
Expand Down
193 changes: 121 additions & 72 deletions src/internal_lib/lib.gleam
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import gleam/bit_array
import gleam/dict.{type Dict}
import gleam/int
import gleam/result
import gleam/string

pub type DecodeType {
pub type DecompressError {
EInvalidInput
}

type DecodeType {
Char(#(BitArray, BitArray, Dict(Int, BitArray)))
Index(#(Int, BitArray))
EOF
Expand All @@ -15,15 +20,19 @@ pub fn decode_base64(
bitstring: BitArray,
) {
case string.length(string) {
0 -> bitstring
0 -> Ok(bitstring)
_ -> {
let assert Ok(#(char, rest)) = string.pop_grapheme(string)
let assert Ok(num) = dict.get(key_dict, char)
decode_base64(
rest,
key_dict,
bit_array.append(bitstring, <<num:size(6)>>),
)
case dict.get(key_dict, char) {
Ok(num) -> {
decode_base64(
rest,
key_dict,
bit_array.append(bitstring, <<num:size(6)>>),
)
}
_ -> Error(EInvalidInput)
}
}
}
}
Expand Down Expand Up @@ -120,98 +129,137 @@ fn w_output(w: String, dict: Dict(String, #(Int, Bool)), char_just_added: Bool)
}
}

pub fn decompress(bstring) {
let assert Char(char) = decode_next_segment(bstring, dict.new())

decompress_string(char.0, char.1, char.2, <<>>)
|> to_utf16("")
pub fn decompress(bstring) -> Result(String, DecompressError) {
case bstring {
<<>> -> Ok("")
_ -> {
result.try(decode_next_segment(bstring, dict.new()), fn(return) {
case return {
Char(char) -> {
result.try(
decompress_string(char.0, char.1, char.2, <<>>),
fn(string) { to_utf16(string, "") },
)
}
_ -> Error(EInvalidInput)
}
})
}
}
}

fn decompress_string(
w: BitArray,
str: BitArray,
dict: Dict(Int, BitArray),
final_str: BitArray,
) -> BitArray {
case decode_next_segment(str, dict) {
Char(char) -> {
let dict =
dict.insert(char.2, dict.size(char.2) + 3, bit_array.append(w, char.0))
decompress_string(char.0, char.1, dict, <<final_str:bits, w:bits>>)
}
Index(seq) -> {
let c = case dict.get(dict, seq.0) {
Ok(value) -> value
Error(Nil) -> {
case { dict.size(dict) + 3 } == seq.0 {
True -> bit_array.append(w, <<w:bits-size(16)>>)
False -> panic as "Error in decompressing"
) -> Result(BitArray, DecompressError) {
result.try(decode_next_segment(str, dict), fn(return) {
case return {
Char(char) -> {
let dict =
dict.insert(
char.2,
dict.size(char.2) + 3,
bit_array.append(w, char.0),
)
decompress_string(char.0, char.1, dict, <<final_str:bits, w:bits>>)
}
Index(seq) -> {
let c = case dict.get(dict, seq.0) {
Ok(value) -> Ok(value)
Error(Nil) -> {
case { dict.size(dict) + 3 } == seq.0 {
True -> Ok(bit_array.append(w, <<w:bits-size(16)>>))
False -> Error(EInvalidInput)
}
}
}
result.try(c, fn(c) {
let dict =
dict.insert(
dict,
dict.size(dict) + 3,
bit_array.append(w, <<c:bits-size(16)>>),
)
decompress_string(c, seq.1, dict, <<final_str:bits, w:bits>>)
})
}
EOF -> {
Ok(<<final_str:bits, w:bits>>)
}
let dict =
dict.insert(
dict,
dict.size(dict) + 3,
bit_array.append(w, <<c:bits-size(16)>>),
)
decompress_string(c, seq.1, dict, <<final_str:bits, w:bits>>)
}
EOF -> {
<<final_str:bits, w:bits>>
}
}
})
}

fn decode_next_segment(bitstring, dict) -> DecodeType {
fn decode_next_segment(bitstring, dict) -> Result(DecodeType, DecompressError) {
let size = { dict.size(dict) + 3 } |> find_bits
let assert <<dict_entry:size(size), rest:bits>> = bitstring
let assert <<dict_entry:size(size)>> = reverse(<<dict_entry:size(size)>>)

case dict_entry {
0 -> {
let assert <<c:size(8), rest:bits>> = rest
let assert <<c:size(8)>> = reverse(<<c:size(8)>>)
let assert Ok(codepoint) = string.utf_codepoint(c)
let char = <<codepoint:utf16_codepoint>>
let dict = dict.insert(dict, dict.size(dict) + 3, char)
Char(#(char, rest, dict))
}
1 -> {
let assert <<c:size(16), rest:bits>> = rest
let assert <<c:size(16)>> = reverse(<<c:size(16)>>)
let char = <<c:size(16)>>
let dict = dict.insert(dict, dict.size(dict) + 3, char)
Char(#(char, rest, dict))
}
2 -> {
EOF
}
index -> {
Index(#(index, rest))
case bitstring {
<<dict_entry:size(size), rest:bits>> -> {
let assert <<dict_entry:size(size)>> = reverse(<<dict_entry:size(size)>>)
case dict_entry {
0 -> {
case rest {
<<c:size(8), rest:bits>> -> {
let assert <<c:size(8)>> = reverse(<<c:size(8)>>)
let assert Ok(codepoint) = string.utf_codepoint(c)
let char = <<codepoint:utf16_codepoint>>
let dict = dict.insert(dict, dict.size(dict) + 3, char)
Ok(Char(#(char, rest, dict)))
}
_ -> Error(EInvalidInput)
}
}
1 -> {
case rest {
<<c:size(16), rest:bits>> -> {
let assert <<c:size(16)>> = reverse(<<c:size(16)>>)
let char = <<c:size(16)>>
let dict = dict.insert(dict, dict.size(dict) + 3, char)
Ok(Char(#(char, rest, dict)))
}
_ -> Error(EInvalidInput)
}
}
2 -> {
Ok(EOF)
}
index -> {
Ok(Index(#(index, rest)))
}
}
}
_ -> Error(EInvalidInput)
}
}

// HELPERS

fn to_utf16(bitstring: BitArray, string: String) -> String {
fn to_utf16(bitstring: BitArray, string: String) {
case bitstring {
<<>> -> string
<<>> -> Ok(string)
<<bytes:16, rest:bits>> -> {
case bytes {
surrogate if surrogate >= 0xD800 && surrogate <= 0xDFFF -> {
//check if high or low surrogate
case surrogate {
high if high >= 0xD800 && high <= 0xDBFF -> {
let assert <<low:size(16), rest:bits>> = rest
// Convert surrogates to codepoint - https://www.unicode.org/versions/Unicode3.0.0/ch03.pdf
let codepoint =
{ high - 0xD800 } * 0x400 + { low - 0xDC00 } + 0x10000
let assert Ok(codepoint) = string.utf_codepoint(codepoint)
to_utf16(rest, string <> string.from_utf_codepoints([codepoint]))
case rest {
<<low:size(16), rest:bits>> -> {
// Convert surrogates to codepoint - https://www.unicode.org/versions/Unicode3.0.0/ch03.pdf
let codepoint =
{ high - 0xD800 } * 0x400 + { low - 0xDC00 } + 0x10000
let assert Ok(codepoint) = string.utf_codepoint(codepoint)
to_utf16(
rest,
string <> string.from_utf_codepoints([codepoint]),
)
}
_ -> Error(EInvalidInput)
}
}
_ -> panic as "Invalid UTF16"
_ -> Error(EInvalidInput)
}
}
other -> {
Expand All @@ -232,7 +280,8 @@ fn to_utf16(bitstring: BitArray, string: String) -> String {
}
}
_ -> {
panic as "Not enough bits"
//impossible to reach
Error(EInvalidInput)
}
}
}
Expand Down
25 changes: 9 additions & 16 deletions test/gleamlz_string_test.gleam
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import gleam/bit_array
import gleam/erlang/atom
import gleam/list
import gleam/result
import gleam/string
import gleamlz_string
import gleeunit
Expand All @@ -27,24 +26,18 @@ pub fn known_decompression_test() {
5, 133, 48, 54, 96, 246, 3, 64, 4, 9, 107, 2, 24, 22, 217, 180, 53, 51, 144,
0,
>>)
|> should.equal("hello, i am a 猫")
|> should.equal(Ok("hello, i am a 猫"))
}

pub fn random_compression_test_() {
pub fn every_utf8_char_test_() {
let assert Ok(timeout) = atom.from_string("timeout")
#(timeout, 20.0, [
fn() {
let list =
list.concat([list.range(0, 55_295), list.range(57_344, 65_535)])
let allutf8chars = test_helpers.all_utf8_chars()

let stringlist =
list.map(list, fn(x) { string.utf_codepoint(x) })
|> result.values
|> string.from_utf_codepoints

gleamlz_string.compress_to_uint8(stringlist)
gleamlz_string.compress_to_uint8(allutf8chars)
|> gleamlz_string.decompress_from_uint8
|> should.equal(stringlist)
|> should.equal(Ok(allutf8chars))
},
])
}
Expand All @@ -58,7 +51,7 @@ pub fn repeated_single_byte_test_() {
let string = string.repeat("a", x)
gleamlz_string.compress_to_uint8(string)
|> gleamlz_string.decompress_from_uint8
|> should.equal(string)
|> should.equal(Ok(string))
})
},
])
Expand All @@ -73,7 +66,7 @@ pub fn repeated_double_byte_test_() {
let string = string.repeat("猫", x)
gleamlz_string.compress_to_uint8(string)
|> gleamlz_string.decompress_from_uint8
|> should.equal(string)
|> should.equal(Ok(string))
})
},
])
Expand All @@ -89,7 +82,7 @@ pub fn high_entropy_string_test_() {

gleamlz_string.compress_to_uint8(str)
|> gleamlz_string.decompress_from_uint8
|> should.equal(str)
|> should.equal(Ok(str))
})
},
])
Expand All @@ -104,7 +97,7 @@ pub fn large_low_entropy_string_test_() {

gleamlz_string.compress_to_uint8(str)
|> gleamlz_string.decompress_from_uint8
|> should.equal(str)
|> should.equal(Ok(str))
},
])
}
Loading

0 comments on commit 8d1772a

Please sign in to comment.