diff --git a/CHANGELOG.md b/CHANGELOG.md index 228bc62..f15906b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## v1.3.0 - xxx Breaking changes: + - `GleamIntParseError` and `GleamIntParseErrorReason` have been removed. - `InvalidCharacter` has been renamed to `UnknownCharacter`. - A new `InvalidDigitPosition` error has been introduced. diff --git a/src/lenient_parse.gleam b/src/lenient_parse.gleam index 267d622..339d90e 100644 --- a/src/lenient_parse.gleam +++ b/src/lenient_parse.gleam @@ -1,8 +1,5 @@ -import gleam/float -import gleam/int -import gleam/result import lenient_parse/internal/parse -import parse_error.{type ParseError, GleamFloatParseError, GleamIntParseError} +import parse_error.{type ParseError} /// Converts a string to a float using a more lenient parsing method than gleam's `float.parse()`. It behaves similarly to Python's `float()` built-in function. /// @@ -24,15 +21,7 @@ import parse_error.{type ParseError, GleamFloatParseError, GleamIntParseError} /// lenient_parse.to_float("abc") // -> Error(InvalidCharacter("a", 0)) /// ``` pub fn to_float(text: String) -> Result(Float, ParseError) { - let text = text |> parse.parse_float - use text <- result.try(text) - let res = text |> float.parse |> result.replace_error(GleamFloatParseError) - use <- result.lazy_or(res) - - text - |> int.parse - |> result.map(int.to_float) - |> result.replace_error(GleamIntParseError) + text |> parse.parse_float } /// Converts a string to an integer using a more lenient parsing method than gleam's `int.parse()`. @@ -52,6 +41,5 @@ pub fn to_float(text: String) -> Result(Float, ParseError) { /// lenient_parse.to_int("abc") // -> Error(InvalidCharacter("a", 0)) /// ``` pub fn to_int(text: String) -> Result(Int, ParseError) { - use text <- result.try(text |> parse.parse_int) - text |> int.parse |> result.replace_error(GleamIntParseError) + text |> parse.parse_int } diff --git a/src/lenient_parse/internal/parse.gleam b/src/lenient_parse/internal/parse.gleam index 37d7ab2..9ce3e5d 100644 --- a/src/lenient_parse/internal/parse.gleam +++ b/src/lenient_parse/internal/parse.gleam @@ -1,4 +1,5 @@ import gleam/bool +import gleam/int import gleam/list import gleam/option.{type Option, None, Some} import gleam/result @@ -6,11 +7,11 @@ import lenient_parse/internal/tokenizer.{ type Token, DecimalPoint, Digit, Sign, Underscore, Unknown, Whitespace, } import parse_error.{ - type ParseError, EmptyString, InvalidDecimalPosition, UnknownCharacter, - WhitespaceOnlyString, + type ParseError, EmptyString, InvalidDecimalPosition, + InvalidUnderscorePosition, UnknownCharacter, WhitespaceOnlyString, } -pub fn parse_float(input: String) -> Result(String, ParseError) { +pub fn parse_float(input: String) -> Result(Float, ParseError) { let tokens = input |> tokenizer.tokenize let index = 0 let empty_string = "" @@ -19,28 +20,35 @@ pub fn parse_float(input: String) -> Result(String, ParseError) { use #(leading_whitespace, tokens, index) <- result.try(pre_whitespace_result) let sign_result = parse_sign(tokens, index) - use #(sign, tokens, index) <- result.try(sign_result) + use #(is_positive, tokens, index) <- result.try(sign_result) - let digit_pre_decimal = parse_digit(tokens, empty_string, index, index) - use #(digit_pre_decimal, tokens, index) <- result.try(digit_pre_decimal) + let whole_digit_result = parse_digit(tokens, 0, index, index, 0) + use #(whole_digit, _, tokens, index) <- result.try(whole_digit_result) - let decimal_point_result = parse_decimal_point(tokens, index) - use #(decimal_specified, tokens, index) <- result.try(decimal_point_result) + let decimal_result = parse_decimal_point(tokens, index) + use #(decimal_specified, tokens, index) <- result.try(decimal_result) - let digit_post_decimal = parse_digit(tokens, empty_string, index, index) - use #(digit_post_decimal, tokens, index) <- result.try(digit_post_decimal) + let fractional_digit_result = parse_digit(tokens, 0, index, index, 0) + use #(fractional_digit, fractional_digit_length, tokens, index) <- result.try( + fractional_digit_result, + ) let post_whitespace_result = parse_whitespace(tokens, empty_string, index) use #(_, tokens, index) <- result.try(post_whitespace_result) case tokens |> list.first { - Ok(token) -> Error(tokenizer.error_for_token(token, index)) + Ok(token) -> Error(tokenizer.to_error(token, index)) _ -> { - case digit_pre_decimal, digit_post_decimal { - Some(pre), Some(post) -> Ok(sign <> pre <> "." <> post) - Some(pre), None -> Ok(sign <> pre <> ".0") - None, Some(post) -> Ok(sign <> "0." <> post) + case whole_digit, fractional_digit { + Some(whole), Some(fractional) -> + Ok(form_float(is_positive, whole, fractional, fractional_digit_length)) + Some(whole), None -> + Ok(form_float(is_positive, whole, 0, fractional_digit_length)) + None, Some(fractional) -> + Ok(form_float(is_positive, 0, fractional, fractional_digit_length)) _, _ -> { + // TODO: This sucks - hardcoded to take care of one specific test case during the rewrite: "." + // There is likely a better way to handle this. use <- bool.guard( decimal_specified, Error(InvalidDecimalPosition(index - 1)), @@ -56,7 +64,7 @@ pub fn parse_float(input: String) -> Result(String, ParseError) { } } -pub fn parse_int(input: String) -> Result(String, ParseError) { +pub fn parse_int(input: String) -> Result(Int, ParseError) { let tokens = input |> tokenizer.tokenize let index = 0 let empty_string = "" @@ -65,19 +73,23 @@ pub fn parse_int(input: String) -> Result(String, ParseError) { use #(leading_whitespace, tokens, index) <- result.try(pre_whitespace_result) let sign_result = parse_sign(tokens, index) - use #(sign, tokens, index) <- result.try(sign_result) + use #(is_positive, tokens, index) <- result.try(sign_result) - let digit_result = parse_digit(tokens, empty_string, index, index) - use #(digit, tokens, index) <- result.try(digit_result) + let digit_result = parse_digit(tokens, 0, index, index, 0) + use #(digit, _, tokens, index) <- result.try(digit_result) let post_whitespace_result = parse_whitespace(tokens, empty_string, index) use #(_, tokens, index) <- result.try(post_whitespace_result) case tokens |> list.first { - Ok(token) -> Error(tokenizer.error_for_token(token, index)) + Ok(token) -> Error(tokenizer.to_error(token, index)) _ -> { case leading_whitespace, digit { - Some(_), Some(digit) | None, Some(digit) -> Ok(sign <> digit) + Some(_), Some(digit) | None, Some(digit) -> + case is_positive { + True -> Ok(digit) + False -> Ok(-digit) + } Some(_), None -> Error(WhitespaceOnlyString) _, _ -> Error(EmptyString) } @@ -115,14 +127,14 @@ fn parse_whitespace( fn parse_sign( tokens: List(Token), index: Int, -) -> Result(#(String, List(Token), Int), ParseError) { +) -> Result(#(Bool, List(Token), Int), ParseError) { case tokens { - [] -> Ok(#("+", tokens, index)) + [] -> Ok(#(True, tokens, index)) [first, ..rest] -> { case first { Unknown(character) -> Error(UnknownCharacter(character, index)) - Sign(a) -> Ok(#(a, rest, index + 1)) - _ -> Ok(#("+", tokens, index)) + Sign(is_positive) -> Ok(#(is_positive, rest, index + 1)) + _ -> Ok(#(True, tokens, index)) } } } @@ -146,17 +158,18 @@ fn parse_decimal_point( fn parse_digit( tokens: List(Token), - acc: String, + acc: Int, index: Int, beginning_index: Int, -) -> Result(#(Option(String), List(Token), Int), ParseError) { + digit_length: Int, +) -> Result(#(Option(Int), Int, List(Token), Int), ParseError) { let at_beginning = index == beginning_index case tokens { [] -> - case acc { - "" -> Ok(#(None, tokens, index)) - _ -> Ok(#(Some(acc), tokens, index)) + case digit_length > 0 { + True -> Ok(#(Some(acc), digit_length, tokens, index)) + False -> Ok(#(None, digit_length, tokens, index)) } [first, ..rest] -> { let lookahead = rest |> list.first @@ -170,23 +183,52 @@ fn parse_digit( } case first { - Digit(digit) -> - parse_digit(rest, acc <> digit, index + 1, beginning_index) + Digit(digit) -> { + let acc = acc * 10 + digit + parse_digit(rest, acc, index + 1, beginning_index, digit_length + 1) + } Underscore if next_is_underscore -> - Error(parse_error.InvalidUnderscorePosition(index + 1)) + Error(InvalidUnderscorePosition(index + 1)) Underscore if at_beginning || is_end -> - Error(parse_error.InvalidUnderscorePosition(index)) - Underscore -> parse_digit(rest, acc, index + 1, beginning_index) + Error(InvalidUnderscorePosition(index)) + Underscore -> { + parse_digit(rest, acc, index + 1, beginning_index, digit_length) + } Whitespace(whitespace) if at_beginning -> Error(UnknownCharacter(whitespace, index)) Unknown(character) -> Error(UnknownCharacter(character, index)) _ -> { - case acc { - "" -> Ok(#(None, tokens, index)) - _ -> Ok(#(Some(acc), tokens, index)) + case digit_length > 0 { + True -> Ok(#(Some(acc), digit_length, tokens, index)) + False -> Ok(#(None, digit_length, tokens, index)) } } } } } } + +fn form_float( + is_positive: Bool, + whole_digit: Int, + fractional_digit: Int, + fractional_length: Int, +) -> Float { + let whole_float = whole_digit |> int.to_float + let fractional_float = + fractional_digit + |> int.to_float + |> normalize_fractional_part(fractional_length) + let float_value = whole_float +. fractional_float + case is_positive { + True -> float_value + False -> float_value *. -1.0 + } +} + +fn normalize_fractional_part(value: Float, fractional_length: Int) -> Float { + case fractional_length <= 0 { + True -> value + False -> normalize_fractional_part(value /. 10.0, fractional_length - 1) + } +} diff --git a/src/lenient_parse/internal/tokenizer.gleam b/src/lenient_parse/internal/tokenizer.gleam index 04bb646..c410118 100644 --- a/src/lenient_parse/internal/tokenizer.gleam +++ b/src/lenient_parse/internal/tokenizer.gleam @@ -1,3 +1,4 @@ +import gleam/int import gleam/list import gleam/string import parse_error.{ @@ -6,8 +7,8 @@ import parse_error.{ } pub type Token { - Sign(String) - Digit(String) + Sign(Bool) + Digit(Int) Underscore DecimalPoint Whitespace(String) @@ -23,9 +24,18 @@ fn do_tokenize(characters: List(String), acc: List(Token)) -> List(Token) { [] -> acc |> list.reverse [first, ..rest] -> { let token = case first { - "-" | "+" -> Sign(first) - "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" -> - Digit(first) + "-" -> Sign(False) + "+" -> Sign(True) + "0" -> Digit(0) + "1" -> Digit(1) + "2" -> Digit(2) + "3" -> Digit(3) + "4" -> Digit(4) + "5" -> Digit(5) + "6" -> Digit(6) + "7" -> Digit(7) + "8" -> Digit(8) + "9" -> Digit(9) "." -> DecimalPoint "_" -> Underscore " " | "\n" | "\t" | "\r" | "\f" | "\r\n" -> Whitespace(first) @@ -37,10 +47,14 @@ fn do_tokenize(characters: List(String), acc: List(Token)) -> List(Token) { } } -pub fn error_for_token(token: Token, index) -> ParseError { +pub fn to_error(token: Token, index) -> ParseError { case token { - Digit(digit) -> InvalidDigitPosition(digit, index) - Sign(sign) -> InvalidSignPosition(sign, index) + Digit(digit) -> { + let digit = digit |> int.to_string + InvalidDigitPosition(digit, index) + } + Sign(True) -> InvalidSignPosition("+", index) + Sign(False) -> InvalidSignPosition("-", index) Underscore -> InvalidUnderscorePosition(index) Unknown(character) -> UnknownCharacter(character, index) Whitespace(whitespace) -> UnknownCharacter(whitespace, index) diff --git a/src/parse_error.gleam b/src/parse_error.gleam index 35c9415..aa2c7a4 100644 --- a/src/parse_error.gleam +++ b/src/parse_error.gleam @@ -40,26 +40,11 @@ pub type ParseError { /// - `character`: The invalid character as a `String`. /// - `index`: The position of the invalid character in the input string. UnknownCharacter(character: String, index: Int) - - /// Represents an error when Gleam's `float.parse` fails after custom parsing - /// and coercion. - /// - /// This indicates that the string couldn't be converted to a float even with - /// more permissive rules. - GleamFloatParseError - - /// Represents an error when Gleam's `int.parse` fails after custom parsing - /// and coercion. - /// - /// This indicates that the string couldn't be converted to an integer even - /// with more permissive rules. - GleamIntParseError } @internal pub fn to_string(error: ParseError) -> String { case error { - GleamIntParseError -> "gleam integer parse error" UnknownCharacter(character, index) -> "unknown character \"" <> character @@ -69,7 +54,6 @@ pub fn to_string(error: ParseError) -> String { "invalid underscore at position: " <> index |> int.to_string EmptyString -> "empty string" WhitespaceOnlyString -> "whitespace only string" - GleamFloatParseError -> "gleam float parse error" InvalidDecimalPosition(index) -> "invalid decimal at position: " <> index |> int.to_string InvalidSignPosition(sign, index) -> diff --git a/test/tokenizer_test.gleam b/test/tokenizer_test.gleam index 13749df..f894f34 100644 --- a/test/tokenizer_test.gleam +++ b/test/tokenizer_test.gleam @@ -13,18 +13,18 @@ pub fn tokenize_test() { Whitespace("\r"), Whitespace("\f"), Whitespace("\r\n"), - Sign("+"), - Sign("-"), - Digit("0"), - Digit("1"), - Digit("2"), - Digit("3"), - Digit("4"), - Digit("5"), - Digit("6"), - Digit("7"), - Digit("8"), - Digit("9"), + Sign(True), + Sign(False), + Digit(0), + Digit(1), + Digit(2), + Digit(3), + Digit(4), + Digit(5), + Digit(6), + Digit(7), + Digit(8), + Digit(9), DecimalPoint, Underscore, Unknown("a"),