Skip to content

Commit

Permalink
Support inferred bases (#60)
Browse files Browse the repository at this point in the history
  • Loading branch information
JosephTLyons authored Nov 11, 2024
1 parent 2e63607 commit 8c0dcc2
Show file tree
Hide file tree
Showing 13 changed files with 749 additions and 342 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## v1.3.0 - XXXX-XX-XX

- Added base 0 support to `lenient_parse.to_int_with_base`. When providing a base of 0, the function will look for a base prefix string (`0b`, `0o`, `0x`) to try to determine the base value. If no prefix is found, the function will default to base 10.

## v1.2.0 - 2024-11-09

- Added arbitrary base support for integer parsing - use pub `lenient_parse.to_int_with_base`.
Expand Down
28 changes: 22 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import gleam/io
import lenient_parse
pub fn main() {
// Parse a string containing an integer into a float
// Parse a string containing an integer value into a float
"1" |> lenient_parse.to_float |> io.debug // Ok(1.0)
"1" |> float.parse |> io.debug // Error(Nil)
Expand All @@ -33,12 +33,12 @@ pub fn main() {
"-5.001" |> lenient_parse.to_float |> io.debug // Ok(-5.001)
"-5.001" |> float.parse |> io.debug // Ok(-5.001)
// Parse a more complex float with scientific notation
// Parse a string containing a complex float with scientific notation
"-1_234.567_8e-2" |> lenient_parse.to_float |> io.debug // Ok(-12.345678)
"-1_234.567_8e-2" |> float.parse |> io.debug // Error(Nil)
// Parse a string containing an integer into an integer
// Parse a string containing an integer
"123" |> lenient_parse.to_int |> io.debug // Ok(123)
"123" |> int.parse |> io.debug // Ok(123)
Expand All @@ -53,16 +53,32 @@ pub fn main() {
"1_000_000" |> lenient_parse.to_int |> io.debug // Ok(1000000)
"1_000_000" |> int.parse |> io.debug // Error(Nil)
// Parse a binary string with underscores
// Parse a string containing a binary number with underscores
"1000_0000" |> lenient_parse.to_int_with_base(base: 2) |> io.debug // Ok(128)
"1000_0000" |> int.base_parse(2) |> io.debug // Error(Nil)
// Parse a hexadecimal string with underscores
// Parse a string containing a hexadecimal number with underscores
"DEAD_BEEF" |> lenient_parse.to_int_with_base(base: 16) |> io.debug// Ok(3735928559)
"DEAD_BEEF" |> lenient_parse.to_int_with_base(base: 16) |> io.debug // Ok(3735928559)
"DEAD_BEEF" |> int.base_parse(16) |> io.debug // Error(Nil)
// Use base 0 to automatically detect the base when parsing strings with prefix indicators
"0b10" |> lenient_parse.to_int_with_base(base: 0) |> io.debug // Ok(2)
"0b10" |> int.base_parse(0) |> io.debug // Error(Nil)
"0o01234" |> lenient_parse.to_int_with_base(base: 0) |> io.debug // Ok(668)
"0o01234" |> int.base_parse(0) |> io.debug // Error(Nil)
"0xDEADBEEF" |> lenient_parse.to_int_with_base(base: 0) |> io.debug // Ok(3735928559)
"0xDEADBEEF" |> int.base_parse(0) |> io.debug // Error(Nil)
// If no prefix string is present, base 0 defaults to base 10
"-4" |> lenient_parse.to_int_with_base(base: 0) |> io.debug // Ok(-4)
"-4" |> int.base_parse(0) |> io.debug // Error(Nil)
// Nice errors
"12.3e_3" |> lenient_parse.to_float |> io.debug // Error(InvalidUnderscorePosition(5))
Expand Down
5 changes: 3 additions & 2 deletions src/lenient_parse.gleam
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import gleam/bool
import lenient_parse/internal/base_constants.{base_0, base_10}
import lenient_parse/internal/parser
import lenient_parse/internal/tokenizer
import parse_error.{type ParseError, InvalidBaseValue}
Expand All @@ -15,7 +16,7 @@ pub fn to_float(text text: String) -> Result(Float, ParseError) {
/// gleam's `int.parse()`. It behaves similarly to Python's `int()` built-in
/// function, using a default base of 10.
pub fn to_int(text text: String) -> Result(Int, ParseError) {
text |> to_int_with_base(base: 10)
text |> to_int_with_base(base: base_10)
}

/// Converts a string to an integer using a more lenient parsing method than
Expand All @@ -25,7 +26,7 @@ pub fn to_int_with_base(
text text: String,
base base: Int,
) -> Result(Int, ParseError) {
let is_valid_base = base >= 2 && base <= 36
let is_valid_base = base == base_0 || { base >= 2 && base <= 36 }
use <- bool.guard(!is_valid_base, Error(InvalidBaseValue(base)))
let tokens = text |> tokenizer.tokenize_int(base: base)
tokens |> parser.parse_int(base: base)
Expand Down
9 changes: 9 additions & 0 deletions src/lenient_parse/internal/base_constants.gleam
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
pub const base_0 = 0

pub const base_2 = 2

pub const base_8 = 8

pub const base_10 = 10

pub const base_16 = 16
114 changes: 86 additions & 28 deletions src/lenient_parse/internal/parser.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@ import gleam/option.{type Option, None, Some}
import gleam/order
import gleam/queue.{type Queue}
import gleam/result
import lenient_parse/internal/base_constants.{
base_0, base_10, base_16, base_2, base_8,
}
import lenient_parse/internal/scale
import lenient_parse/internal/token.{
type Token, DecimalPoint, Digit, ExponentSymbol, Sign, Underscore, Unknown,
Whitespace,
type Token, BasePrefix, DecimalPoint, Digit, ExponentSymbol, Sign, Underscore,
Unknown, Whitespace,
}
import parse_error.{
type ParseError, EmptyString, InvalidDecimalPosition,
type ParseError, BasePrefixOnly, EmptyString, InvalidDecimalPosition,
InvalidExponentSymbolPosition, InvalidUnderscorePosition, OutOfBaseRange,
UnknownCharacter, WhitespaceOnlyString,
}
Expand All @@ -29,14 +32,14 @@ pub fn parse_float(tokens tokens: List(Token)) -> Result(Float, ParseError) {
let parse_data = parse_sign(tokens, next_index)
use ParseData(is_positive, next_index, tokens) <- result.try(parse_data)

let parse_data = parse_digits(tokens, next_index)
let parse_data = parse_digits(tokens, next_index, base_10)
use ParseData(whole_digits, next_index, tokens) <- result.try(parse_data)

let parse_data = parse_decimal_point(tokens, next_index)
use ParseData(decimal_specified, next_index, tokens) <- result.try(parse_data)

let parse_data = case decimal_specified {
True -> parse_digits(tokens, next_index)
True -> parse_digits(tokens, next_index, base_10)
False -> Ok(ParseData(queue.new(), next_index, tokens))
}
use ParseData(fractional_digits, next_index, tokens) <- result.try(parse_data)
Expand All @@ -53,22 +56,22 @@ pub fn parse_float(tokens tokens: List(Token)) -> Result(Float, ParseError) {

let parse_data = case missing_digit_parts, exponent_symbol {
True, Some(exponent_symbol) ->
Error(InvalidExponentSymbolPosition(exponent_symbol, next_index - 1))
Error(InvalidExponentSymbolPosition(next_index - 1, exponent_symbol))
_, None -> Ok(ParseData(0, next_index, tokens))
_, Some(exponent_symbol) -> {
let parse_data = parse_sign(tokens, next_index)
use ParseData(exponent_digit_is_positive, next_index, tokens) <- result.try(
parse_data,
)

let parse_data = parse_digits(tokens, next_index)
let parse_data = parse_digits(tokens, next_index, base_10)
use ParseData(exponent_digits, next_index, tokens) <- result.try(
parse_data,
)

let parse_data = case exponent_digits |> queue.is_empty {
True ->
Error(InvalidExponentSymbolPosition(exponent_symbol, next_index - 1))
Error(InvalidExponentSymbolPosition(next_index - 1, exponent_symbol))
False -> Ok(ParseData(exponent_digits, next_index, tokens))
}
use ParseData(exponent_digits, next_index, tokens) <- result.try(
Expand All @@ -91,7 +94,7 @@ pub fn parse_float(tokens tokens: List(Token)) -> Result(Float, ParseError) {

let remaining_token_result = case tokens {
[] -> Ok(Nil)
[token, ..] -> Error(token.to_error(token))
[token, ..] -> Error(token.to_error(token, base_10))
}
use _ <- result.try(remaining_token_result)

Expand Down Expand Up @@ -120,22 +123,54 @@ pub fn parse_int(
let parse_data = parse_sign(tokens, next_index)
use ParseData(is_positive, next_index, tokens) <- result.try(parse_data)

let parse_data = parse_digits(tokens, next_index)
let parse_data = case base {
base
if base == base_0 || base == base_2 || base == base_8 || base == base_16
-> {
let parse_data = parse_base_prefix(tokens, next_index)
use ParseData(base_data, next_index, tokens) <- result.try(parse_data)

let #(base, prefix_data) = case base_data {
Some(#(index_range, prefix, base)) -> #(
base,
Some(#(index_range, prefix)),
)
None -> {
let default_base = case base {
0 -> base_10
_ -> base
}

#(default_base, None)
}
}

Ok(ParseData(#(base, prefix_data), next_index, tokens))
}
_ -> Ok(ParseData(#(base, None), next_index, tokens))
}
use ParseData(#(base, prefix_data), next_index, tokens) <- result.try(
parse_data,
)

let parse_data = parse_digits(tokens, next_index, base)
use ParseData(digits, next_index, tokens) <- result.try(parse_data)

let parse_data = parse_whitespace(tokens, next_index)
use ParseData(_, _, tokens) <- result.try(parse_data)

let remaining_token_result = case tokens {
[] -> Ok(Nil)
[token, ..] -> Error(token.to_error(token))
[token, ..] -> Error(token.to_error(token, base))
}
use _ <- result.try(remaining_token_result)

case leading_whitespace, digits |> queue.is_empty {
None, True -> Error(EmptyString)
Some(_), True -> Error(WhitespaceOnlyString)
_, False -> {
case leading_whitespace, prefix_data, digits |> queue.is_empty {
None, None, True -> Error(EmptyString)
_, Some(#(index_range, prefix)), True ->
Error(BasePrefixOnly(index_range, prefix))
Some(_), _, True -> Error(WhitespaceOnlyString)
_, _, _ -> {
let value = digits |> digits_to_int_with_base(base: base)
let value = case is_positive {
True -> value
Expand All @@ -160,7 +195,7 @@ fn do_parse_whitespace(
) -> Result(ParseData(Option(String)), ParseError) {
case tokens {
[Unknown(#(start_index, _), character), ..] ->
Error(UnknownCharacter(character, start_index))
Error(UnknownCharacter(start_index, character))
[Whitespace(#(_, end_index), whitespace), ..rest] -> {
do_parse_whitespace(
tokens: rest,
Expand All @@ -185,7 +220,7 @@ fn parse_sign(
) -> Result(ParseData(Bool), ParseError) {
case tokens {
[Unknown(#(start_index, _), character), ..] ->
Error(UnknownCharacter(character, start_index))
Error(UnknownCharacter(start_index, character))
[Sign(#(_, end_index), _, is_positive), ..rest] ->
Ok(ParseData(data: is_positive, next_index: end_index, tokens: rest))
_ -> {
Expand All @@ -194,13 +229,31 @@ fn parse_sign(
}
}

fn parse_base_prefix(
tokens tokens: List(Token),
index index: Int,
) -> Result(ParseData(Option(#(#(Int, Int), String, Int))), ParseError) {
case tokens {
[Unknown(#(start_index, _), character), ..] ->
Error(UnknownCharacter(start_index, character))
[BasePrefix(index_range, prefix, base), ..rest] -> {
Ok(ParseData(
data: Some(#(index_range, prefix, base)),
next_index: index_range.1,
tokens: rest,
))
}
_ -> Ok(ParseData(data: None, next_index: index, tokens: tokens))
}
}

fn parse_decimal_point(
tokens tokens: List(Token),
index index: Int,
) -> Result(ParseData(Bool), ParseError) {
case tokens {
[Unknown(#(start_index, _), character), ..] ->
Error(UnknownCharacter(character, start_index))
Error(UnknownCharacter(start_index, character))
[DecimalPoint(#(_, end_index)), ..rest] ->
Ok(ParseData(data: True, next_index: end_index, tokens: rest))
_ -> Ok(ParseData(data: False, next_index: index, tokens: tokens))
Expand All @@ -213,7 +266,7 @@ fn parse_exponent_symbol(
) -> Result(ParseData(Option(String)), ParseError) {
case tokens {
[Unknown(#(start_index, _), character), ..] ->
Error(UnknownCharacter(character, start_index))
Error(UnknownCharacter(start_index, character))
[ExponentSymbol(#(_, end_index), exponent_symbol), ..rest] ->
Ok(ParseData(
data: Some(exponent_symbol),
Expand All @@ -227,10 +280,12 @@ fn parse_exponent_symbol(
fn parse_digits(
tokens tokens: List(Token),
index index: Int,
base base: Int,
) -> Result(ParseData(Queue(Int)), ParseError) {
do_parse_digits(
tokens: tokens,
index: index,
base: base,
acc: queue.new(),
at_beginning: True,
)
Expand All @@ -239,18 +294,19 @@ fn parse_digits(
fn do_parse_digits(
tokens tokens: List(Token),
index index: Int,
base base: Int,
acc acc: Queue(Int),
at_beginning at_beginning: Bool,
) -> Result(ParseData(Queue(Int)), ParseError) {
case tokens {
[Unknown(#(start_index, _), character), ..] ->
Error(UnknownCharacter(character, start_index))
Error(UnknownCharacter(start_index, character))
[Whitespace(#(start_index, _), whitespace), ..] if at_beginning ->
Error(UnknownCharacter(whitespace, start_index))
Error(UnknownCharacter(start_index, whitespace))
[Underscore(#(start_index, end_index)), ..rest] -> {
let lookahead = rest |> list.first
let at_end = case lookahead {
Ok(Digit(_, _, _, _)) -> False
Ok(Digit(_, _, _)) -> False
_ -> True
}
let next_is_underscore = case lookahead {
Expand All @@ -271,20 +327,22 @@ fn do_parse_digits(
do_parse_digits(
tokens: rest,
index: end_index,
base: base,
acc: acc,
at_beginning: False,
)
}
[Digit(#(_, end_index), _, value, base), ..rest] if value < base -> {
[Digit(#(_, end_index), _, value), ..rest] if value < base -> {
do_parse_digits(
tokens: rest,
index: end_index,
base: base,
acc: acc |> queue.push_back(value),
at_beginning: False,
)
}
[Digit(#(start_index, _), character, value, base), ..] ->
Error(OutOfBaseRange(character, value, base, start_index))
[Digit(#(start_index, _), character, value), ..] ->
Error(OutOfBaseRange(start_index, character, value, base))
_ -> Ok(ParseData(data: acc, next_index: index, tokens: tokens))
}
}
Expand Down Expand Up @@ -314,7 +372,7 @@ fn form_float(
}

fn digits_to_int(digits digits: Queue(Int)) -> Int {
digits_to_int_with_base(digits: digits, base: 10)
digits_to_int_with_base(digits: digits, base: base_10)
}

fn digits_to_int_with_base(digits digits: Queue(Int), base base: Int) -> Int {
Expand Down Expand Up @@ -345,8 +403,8 @@ fn do_power(
}
}
order.Gt ->
do_power(base, exponent - 1, scale_factor * 10, exponent_is_positive)
do_power(base, exponent - 1, scale_factor * base_10, exponent_is_positive)
order.Lt ->
do_power(base, exponent + 1, scale_factor * 10, exponent_is_positive)
do_power(base, exponent + 1, scale_factor * base_10, exponent_is_positive)
}
}
Loading

0 comments on commit 8c0dcc2

Please sign in to comment.