diff --git a/src/lenient_parse.gleam b/src/lenient_parse.gleam index 45f33f6..267d622 100644 --- a/src/lenient_parse.gleam +++ b/src/lenient_parse.gleam @@ -1,7 +1,8 @@ -import lenient_parse/internal/coerce.{parse_float, parse_int} - +import gleam/float +import gleam/int +import gleam/result import lenient_parse/internal/parse -import parse_error.{type ParseError} +import parse_error.{type ParseError, GleamFloatParseError, GleamIntParseError} /// Converts a string to a float using a more lenient parsing method than gleam's `float.parse()`. It behaves similarly to Python's `float()` built-in function. /// @@ -23,8 +24,15 @@ import parse_error.{type ParseError} /// lenient_parse.to_float("abc") // -> Error(InvalidCharacter("a", 0)) /// ``` pub fn to_float(text: String) -> Result(Float, ParseError) { + let text = text |> parse.parse_float + use text <- result.try(text) + let res = text |> float.parse |> result.replace_error(GleamFloatParseError) + use <- result.lazy_or(res) + text - |> parse.to_float(parse_float) + |> int.parse + |> result.map(int.to_float) + |> result.replace_error(GleamIntParseError) } /// Converts a string to an integer using a more lenient parsing method than gleam's `int.parse()`. @@ -44,6 +52,6 @@ pub fn to_float(text: String) -> Result(Float, ParseError) { /// lenient_parse.to_int("abc") // -> Error(InvalidCharacter("a", 0)) /// ``` pub fn to_int(text: String) -> Result(Int, ParseError) { - text - |> parse.to_int(parse_int) + use text <- result.try(text |> parse.parse_int) + text |> int.parse |> result.replace_error(GleamIntParseError) } diff --git a/src/lenient_parse/internal/coerce.gleam b/src/lenient_parse/internal/coerce.gleam deleted file mode 100644 index f221905..0000000 --- a/src/lenient_parse/internal/coerce.gleam +++ /dev/null @@ -1,205 +0,0 @@ -import gleam/bool -import gleam/list -import gleam/option.{type Option, None, Some} -import gleam/result -import lenient_parse/internal/tokenizer.{ - type Token, DecimalPoint, Digit, Sign, Underscore, Unknown, Whitespace, -} - -import parse_error.{ - type ParseError, EmptyString, InvalidCharacter, InvalidDecimalPosition, - InvalidDigitPosition, InvalidSignPosition, InvalidUnderscorePosition, - WhitespaceOnlyString, -} - -pub fn parse_float(input: String) -> Result(String, ParseError) { - let tokens = input |> tokenizer.tokenize - let index = 0 - let empty_string = "" - - let pre_whitespace_result = parse_whitespace(tokens, empty_string, index) - use #(leading_whitespace, tokens, index) <- result.try(pre_whitespace_result) - - let sign_result = parse_sign(tokens, index) - use #(sign, tokens, index) <- result.try(sign_result) - - let digit_pre_decimal = parse_digit(tokens, empty_string, index, index) - use #(digit_pre_decimal, tokens, index) <- result.try(digit_pre_decimal) - - let decimal_point_result = parse_decimal_point(tokens, index) - use #(decimal_specified, tokens, index) <- result.try(decimal_point_result) - - let digit_post_decimal = parse_digit(tokens, empty_string, index, index) - use #(digit_post_decimal, tokens, index) <- result.try(digit_post_decimal) - - let post_whitespace_result = parse_whitespace(tokens, empty_string, index) - use #(_, tokens, index) <- result.try(post_whitespace_result) - - case tokens |> list.first { - Ok(token) -> Error(extraneous_token_error(token, index)) - _ -> { - case digit_pre_decimal, digit_post_decimal { - Some(pre), Some(post) -> Ok(sign <> pre <> "." <> post) - Some(pre), None -> Ok(sign <> pre <> ".0") - None, Some(post) -> Ok(sign <> "0." <> post) - _, _ -> { - use <- bool.guard( - decimal_specified, - Error(InvalidDecimalPosition(index - 1)), - ) - - case leading_whitespace { - Some(_) -> Error(WhitespaceOnlyString) - _ -> Error(EmptyString) - } - } - } - } - } -} - -pub fn parse_int(input: String) -> Result(String, ParseError) { - let tokens = input |> tokenizer.tokenize - let index = 0 - let empty_string = "" - - let pre_whitespace_result = parse_whitespace(tokens, empty_string, index) - use #(leading_whitespace, tokens, index) <- result.try(pre_whitespace_result) - - let sign_result = parse_sign(tokens, index) - use #(sign, tokens, index) <- result.try(sign_result) - - let digit_result = parse_digit(tokens, empty_string, index, index) - use #(digit, tokens, index) <- result.try(digit_result) - - let post_whitespace_result = parse_whitespace(tokens, empty_string, index) - use #(_, tokens, index) <- result.try(post_whitespace_result) - - case tokens |> list.first { - Ok(token) -> Error(extraneous_token_error(token, index)) - _ -> { - case leading_whitespace, digit { - Some(_), Some(digit) | None, Some(digit) -> Ok(sign <> digit) - Some(_), None -> Error(WhitespaceOnlyString) - _, _ -> Error(EmptyString) - } - } - } -} - -fn parse_whitespace( - tokens: List(Token), - acc: String, - index: Int, -) -> Result(#(Option(String), List(Token), Int), ParseError) { - case tokens { - [] -> - case acc { - "" -> Ok(#(None, tokens, index)) - _ -> Ok(#(Some(acc), tokens, index)) - } - [first, ..rest] -> { - case first { - Unknown(character) -> Error(InvalidCharacter(character, index)) - Whitespace(whitespace) -> - parse_whitespace(rest, acc <> whitespace, index + 1) - _ -> { - case acc { - "" -> Ok(#(None, tokens, index)) - _ -> Ok(#(Some(acc), tokens, index)) - } - } - } - } - } -} - -fn parse_sign( - tokens: List(Token), - index: Int, -) -> Result(#(String, List(Token), Int), ParseError) { - case tokens { - [] -> Ok(#("+", tokens, index)) - [first, ..rest] -> { - case first { - Unknown(character) -> Error(InvalidCharacter(character, index)) - Sign(a) -> Ok(#(a, rest, index + 1)) - _ -> Ok(#("+", tokens, index)) - } - } - } -} - -fn parse_decimal_point( - tokens: List(Token), - index: Int, -) -> Result(#(Bool, List(Token), Int), ParseError) { - case tokens { - [] -> Ok(#(False, tokens, index)) - [first, ..rest] -> { - case first { - Unknown(character) -> Error(InvalidCharacter(character, index)) - DecimalPoint -> Ok(#(True, rest, index + 1)) - _ -> Ok(#(False, rest, index)) - } - } - } -} - -fn parse_digit( - tokens: List(Token), - acc: String, - index: Int, - beginning_index: Int, -) -> Result(#(Option(String), List(Token), Int), ParseError) { - let at_beginning = index == beginning_index - - case tokens { - [] -> - case acc { - "" -> Ok(#(None, tokens, index)) - _ -> Ok(#(Some(acc), tokens, index)) - } - [first, ..rest] -> { - let lookahead = rest |> list.first - let is_end = case lookahead { - Ok(Digit(_)) -> False - _ -> True - } - let next_is_underscore = case lookahead { - Ok(Underscore) -> True - _ -> False - } - - case first { - Digit(digit) -> - parse_digit(rest, acc <> digit, index + 1, beginning_index) - Underscore if next_is_underscore -> - Error(parse_error.InvalidUnderscorePosition(index + 1)) - Underscore if at_beginning || is_end -> - Error(parse_error.InvalidUnderscorePosition(index)) - Underscore -> parse_digit(rest, acc, index + 1, beginning_index) - Whitespace(whitespace) if at_beginning -> - Error(InvalidCharacter(whitespace, index)) - Unknown(character) -> Error(InvalidCharacter(character, index)) - _ -> { - case acc { - "" -> Ok(#(None, tokens, index)) - _ -> Ok(#(Some(acc), tokens, index)) - } - } - } - } - } -} - -fn extraneous_token_error(token: Token, index) -> ParseError { - case token { - Digit(digit) -> InvalidDigitPosition(digit, index) - Sign(sign) -> InvalidSignPosition(sign, index) - Underscore -> InvalidUnderscorePosition(index) - Unknown(character) -> InvalidCharacter(character, index) - Whitespace(whitespace) -> InvalidCharacter(whitespace, index) - DecimalPoint -> InvalidDecimalPosition(index) - } -} diff --git a/src/lenient_parse/internal/parse.gleam b/src/lenient_parse/internal/parse.gleam index 246b659..61b6461 100644 --- a/src/lenient_parse/internal/parse.gleam +++ b/src/lenient_parse/internal/parse.gleam @@ -1,27 +1,204 @@ -import gleam/float -import gleam/int +import gleam/bool +import gleam/list +import gleam/option.{type Option, None, Some} import gleam/result -import parse_error.{type ParseError, GleamFloatParseError, GleamIntParseError} - -pub fn to_float( - text: String, - coerce_strategy: fn(String) -> Result(String, ParseError), -) -> Result(Float, ParseError) { - let text = text |> coerce_strategy - use text <- result.try(text) - let res = text |> float.parse |> result.replace_error(GleamFloatParseError) - use <- result.lazy_or(res) - - text - |> int.parse - |> result.map(int.to_float) - |> result.replace_error(GleamIntParseError) -} - -pub fn to_int( - text: String, - coerce_strategy: fn(String) -> Result(String, ParseError), -) -> Result(Int, ParseError) { - use text <- result.try(text |> coerce_strategy) - text |> int.parse |> result.replace_error(GleamIntParseError) +import lenient_parse/internal/tokenizer.{ + type Token, DecimalPoint, Digit, Sign, Underscore, Unknown, Whitespace, +} +import parse_error.{ + type ParseError, EmptyString, InvalidCharacter, InvalidDecimalPosition, + InvalidDigitPosition, InvalidSignPosition, InvalidUnderscorePosition, + WhitespaceOnlyString, +} + +pub fn parse_float(input: String) -> Result(String, ParseError) { + let tokens = input |> tokenizer.tokenize + let index = 0 + let empty_string = "" + + let pre_whitespace_result = parse_whitespace(tokens, empty_string, index) + use #(leading_whitespace, tokens, index) <- result.try(pre_whitespace_result) + + let sign_result = parse_sign(tokens, index) + use #(sign, tokens, index) <- result.try(sign_result) + + let digit_pre_decimal = parse_digit(tokens, empty_string, index, index) + use #(digit_pre_decimal, tokens, index) <- result.try(digit_pre_decimal) + + let decimal_point_result = parse_decimal_point(tokens, index) + use #(decimal_specified, tokens, index) <- result.try(decimal_point_result) + + let digit_post_decimal = parse_digit(tokens, empty_string, index, index) + use #(digit_post_decimal, tokens, index) <- result.try(digit_post_decimal) + + let post_whitespace_result = parse_whitespace(tokens, empty_string, index) + use #(_, tokens, index) <- result.try(post_whitespace_result) + + case tokens |> list.first { + Ok(token) -> Error(extraneous_token_error(token, index)) + _ -> { + case digit_pre_decimal, digit_post_decimal { + Some(pre), Some(post) -> Ok(sign <> pre <> "." <> post) + Some(pre), None -> Ok(sign <> pre <> ".0") + None, Some(post) -> Ok(sign <> "0." <> post) + _, _ -> { + use <- bool.guard( + decimal_specified, + Error(InvalidDecimalPosition(index - 1)), + ) + + case leading_whitespace { + Some(_) -> Error(WhitespaceOnlyString) + _ -> Error(EmptyString) + } + } + } + } + } +} + +pub fn parse_int(input: String) -> Result(String, ParseError) { + let tokens = input |> tokenizer.tokenize + let index = 0 + let empty_string = "" + + let pre_whitespace_result = parse_whitespace(tokens, empty_string, index) + use #(leading_whitespace, tokens, index) <- result.try(pre_whitespace_result) + + let sign_result = parse_sign(tokens, index) + use #(sign, tokens, index) <- result.try(sign_result) + + let digit_result = parse_digit(tokens, empty_string, index, index) + use #(digit, tokens, index) <- result.try(digit_result) + + let post_whitespace_result = parse_whitespace(tokens, empty_string, index) + use #(_, tokens, index) <- result.try(post_whitespace_result) + + case tokens |> list.first { + Ok(token) -> Error(extraneous_token_error(token, index)) + _ -> { + case leading_whitespace, digit { + Some(_), Some(digit) | None, Some(digit) -> Ok(sign <> digit) + Some(_), None -> Error(WhitespaceOnlyString) + _, _ -> Error(EmptyString) + } + } + } +} + +fn parse_whitespace( + tokens: List(Token), + acc: String, + index: Int, +) -> Result(#(Option(String), List(Token), Int), ParseError) { + case tokens { + [] -> + case acc { + "" -> Ok(#(None, tokens, index)) + _ -> Ok(#(Some(acc), tokens, index)) + } + [first, ..rest] -> { + case first { + Unknown(character) -> Error(InvalidCharacter(character, index)) + Whitespace(whitespace) -> + parse_whitespace(rest, acc <> whitespace, index + 1) + _ -> { + case acc { + "" -> Ok(#(None, tokens, index)) + _ -> Ok(#(Some(acc), tokens, index)) + } + } + } + } + } +} + +fn parse_sign( + tokens: List(Token), + index: Int, +) -> Result(#(String, List(Token), Int), ParseError) { + case tokens { + [] -> Ok(#("+", tokens, index)) + [first, ..rest] -> { + case first { + Unknown(character) -> Error(InvalidCharacter(character, index)) + Sign(a) -> Ok(#(a, rest, index + 1)) + _ -> Ok(#("+", tokens, index)) + } + } + } +} + +fn parse_decimal_point( + tokens: List(Token), + index: Int, +) -> Result(#(Bool, List(Token), Int), ParseError) { + case tokens { + [] -> Ok(#(False, tokens, index)) + [first, ..rest] -> { + case first { + Unknown(character) -> Error(InvalidCharacter(character, index)) + DecimalPoint -> Ok(#(True, rest, index + 1)) + _ -> Ok(#(False, rest, index)) + } + } + } +} + +fn parse_digit( + tokens: List(Token), + acc: String, + index: Int, + beginning_index: Int, +) -> Result(#(Option(String), List(Token), Int), ParseError) { + let at_beginning = index == beginning_index + + case tokens { + [] -> + case acc { + "" -> Ok(#(None, tokens, index)) + _ -> Ok(#(Some(acc), tokens, index)) + } + [first, ..rest] -> { + let lookahead = rest |> list.first + let is_end = case lookahead { + Ok(Digit(_)) -> False + _ -> True + } + let next_is_underscore = case lookahead { + Ok(Underscore) -> True + _ -> False + } + + case first { + Digit(digit) -> + parse_digit(rest, acc <> digit, index + 1, beginning_index) + Underscore if next_is_underscore -> + Error(parse_error.InvalidUnderscorePosition(index + 1)) + Underscore if at_beginning || is_end -> + Error(parse_error.InvalidUnderscorePosition(index)) + Underscore -> parse_digit(rest, acc, index + 1, beginning_index) + Whitespace(whitespace) if at_beginning -> + Error(InvalidCharacter(whitespace, index)) + Unknown(character) -> Error(InvalidCharacter(character, index)) + _ -> { + case acc { + "" -> Ok(#(None, tokens, index)) + _ -> Ok(#(Some(acc), tokens, index)) + } + } + } + } + } +} + +fn extraneous_token_error(token: Token, index) -> ParseError { + case token { + Digit(digit) -> InvalidDigitPosition(digit, index) + Sign(sign) -> InvalidSignPosition(sign, index) + Underscore -> InvalidUnderscorePosition(index) + Unknown(character) -> InvalidCharacter(character, index) + Whitespace(whitespace) -> InvalidCharacter(whitespace, index) + DecimalPoint -> InvalidDecimalPosition(index) + } } diff --git a/test/python/python_parse.gleam b/test/python/python_parse.gleam index 835f396..303f980 100644 --- a/test/python/python_parse.gleam +++ b/test/python/python_parse.gleam @@ -2,14 +2,14 @@ import gleam/result import shellout pub fn to_float(text: String) -> Result(String, Nil) { - text |> coerce("float") + text |> parse("float") } pub fn to_int(text: String) -> Result(String, Nil) { - text |> coerce("int") + text |> parse("int") } -fn coerce(text: String, parse_function_name: String) -> Result(String, Nil) { +fn parse(text: String, parse_function_name: String) -> Result(String, Nil) { shellout.command( run: "uv", with: [ diff --git a/test/python_parse_test.gleam b/test/python_parse_test.gleam index 1053ea8..7dd0a09 100644 --- a/test/python_parse_test.gleam +++ b/test/python_parse_test.gleam @@ -21,14 +21,14 @@ pub fn check_against_python_tests() { let message = case output, python_output { Ok(_), Ok(python_output) -> { - "should_coerce: \"" + "should_parse: \"" <> input_printable_text <> "\" -> \"" <> python_output <> "\"" } Error(_), Error(_) -> { - "should_not_coerce: \"" + "should_not_parse: \"" <> input_printable_text <> "\" -> \"Error\"" } @@ -66,14 +66,14 @@ pub fn check_against_python_tests() { let message = case output, python_output { Ok(_), Ok(python_output) -> { - "should_coerce: \"" + "should_parse: \"" <> input_printable_text <> "\" -> \"" <> python_output <> "\"" } Error(_), Error(_) -> { - "should_not_coerce: \"" + "should_not_parse: \"" <> input_printable_text <> "\" -> \"Error\"" } @@ -109,7 +109,7 @@ fn form_panic_message( python_output: String, ) -> String { "Invalid test data configuration." - <> " Test data for both our's and Python's coerce methods should both expect" + <> " Test data for both our's and Python's parse methods should both expect" <> " to either succeed or fail for the same input.\n" <> "Input: " <> input diff --git a/test/to_float_parse_test.gleam b/test/to_float_parse_test.gleam index 5542006..948e08a 100644 --- a/test/to_float_parse_test.gleam +++ b/test/to_float_parse_test.gleam @@ -7,7 +7,7 @@ import shared_test_data import startest.{describe, it} import startest/expect -pub fn coerce_into_valid_number_string_tests() { +pub fn parse_into_valid_number_string_tests() { describe( "float_test", shared_test_data.float_data @@ -18,7 +18,7 @@ pub fn coerce_into_valid_number_string_tests() { let message = case output { Ok(output) -> { - "should_coerce: \"" + "should_parse: \"" <> input_printable_text <> "\" -> \"" <> output |> float.to_string @@ -26,7 +26,7 @@ pub fn coerce_into_valid_number_string_tests() { } Error(error) -> { let error_string = error |> parse_error.to_string - "should_not_coerce: \"" + "should_not_parse: \"" <> input_printable_text <> "\" -> \"" <> error_string diff --git a/test/to_int_parse_test.gleam b/test/to_int_parse_test.gleam index 3a5645b..c2fd2b7 100644 --- a/test/to_int_parse_test.gleam +++ b/test/to_int_parse_test.gleam @@ -7,7 +7,7 @@ import shared_test_data import startest.{describe, it} import startest/expect -pub fn coerce_into_valid_number_string_tests() { +pub fn parse_into_valid_number_string_tests() { describe( "int_test", shared_test_data.int_data @@ -18,7 +18,7 @@ pub fn coerce_into_valid_number_string_tests() { let message = case output { Ok(output) -> { - "should_coerce: \"" + "should_parse: \"" <> input_printable_text <> "\" -> \"" <> output |> int.to_string @@ -26,7 +26,7 @@ pub fn coerce_into_valid_number_string_tests() { } Error(error) -> { let error_string = error |> parse_error.to_string - "should_not_coerce: \"" + "should_not_parse: \"" <> input_printable_text <> "\" -> \"" <> error_string