Skip to content

Commit

Permalink
Merge pull request #420 from koto-lang/underscores-in-numbers
Browse files Browse the repository at this point in the history
Add support for underscores in numbers
  • Loading branch information
irh authored Feb 1, 2025
2 parents 27d1056 + df0b5d7 commit 9526049
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 34 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ The Koto project adheres to
f x...
# -> 6
```
- Number literals can now include underscores.
[#399](https://github.com/koto-lang/koto/issues/399)
- E.g.
```koto
x = 1_000_000
y = 0xff_aa_bb
```

#### API

Expand Down
45 changes: 31 additions & 14 deletions crates/lexer/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,12 @@ impl<'a> TokenLexer<'a> {
use Token::*;

let has_leading_zero = chars.peek() == Some(&'0');
let mut char_bytes = consume_and_count(&mut chars, is_digit);
let mut char_bytes = if matches!(chars.peek(), Some(c) if c.is_ascii_digit()) {
chars.next();
1 + consume_and_count(&mut chars, is_decimal_digit)
} else {
0
};
let mut allow_exponent = true;

match chars.peek() {
Expand All @@ -557,14 +562,14 @@ impl<'a> TokenLexer<'a> {
chars.next();

match chars.peek() {
Some(c) if is_digit(*c) => {}
Some(c) if c.is_ascii_digit() => {}
Some(&'e') => {
// lookahead to check that this isn't a function call starting with 'e'
// e.g. 1.exp()
let mut lookahead = chars.clone();
lookahead.next();
match lookahead.peek() {
Some(c) if is_digit(*c) => {}
Some(c) if is_decimal_digit(*c) => {}
Some(&'+' | &'-') => {}
_ => {
self.advance_line(char_bytes);
Expand All @@ -578,7 +583,7 @@ impl<'a> TokenLexer<'a> {
}
}

char_bytes += 1 + consume_and_count(&mut chars, is_digit);
char_bytes += 1 + consume_and_count(&mut chars, is_decimal_digit);
}
_ => {}
}
Expand All @@ -592,7 +597,7 @@ impl<'a> TokenLexer<'a> {
char_bytes += 1;
}

char_bytes += consume_and_count(&mut chars, is_digit);
char_bytes += consume_and_count(&mut chars, is_decimal_digit);
}

self.advance_line(char_bytes);
Expand Down Expand Up @@ -858,20 +863,20 @@ impl Iterator for TokenLexer<'_> {
}
}

fn is_digit(c: char) -> bool {
c.is_ascii_digit()
fn is_decimal_digit(c: char) -> bool {
c.is_ascii_digit() || c == '_'
}

fn is_binary_digit(c: char) -> bool {
matches!(c, '0' | '1')
matches!(c, '0' | '1' | '_')
}

fn is_octal_digit(c: char) -> bool {
matches!(c, '0'..='7')
matches!(c, '0'..='7' | '_')
}

fn is_hex_digit(c: char) -> bool {
c.is_ascii_hexdigit()
c.is_ascii_hexdigit() || c == '_'
}

fn is_whitespace(c: char) -> bool {
Expand Down Expand Up @@ -1348,9 +1353,13 @@ r#''bar''#
0.5e+9
-8e8
0xabadcafe
0xABADCAFE
0xABAD_CAFE
0o707606
0b1010101";
0o_707_606_
0b1010101
1_000_000
1_000e_001
1.e9_9";
check_lexer_output(
input,
&[
Expand All @@ -1368,11 +1377,19 @@ r#''bar''#
(NewLine, None, 4),
(Number, Some("0xabadcafe"), 5),
(NewLine, None, 5),
(Number, Some("0xABADCAFE"), 6),
(Number, Some("0xABAD_CAFE"), 6),
(NewLine, None, 6),
(Number, Some("0o707606"), 7),
(NewLine, None, 7),
(Number, Some("0b1010101"), 8),
(Number, Some("0o_707_606_"), 8),
(NewLine, None, 8),
(Number, Some("0b1010101"), 9),
(NewLine, None, 9),
(Number, Some("1_000_000"), 10),
(NewLine, None, 10),
(Number, Some("1_000e_001"), 11),
(NewLine, None, 11),
(Number, Some("1.e9_9"), 12),
],
);
}
Expand Down
41 changes: 23 additions & 18 deletions crates/parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::{
};
use koto_lexer::{LexedToken, Lexer, Span, StringType, Token};
use std::{
borrow::Cow,
collections::HashSet,
iter::Peekable,
str::{Chars, FromStr},
Expand Down Expand Up @@ -1772,6 +1773,12 @@ impl<'source> Parser<'source> {
self.consume_token_with_context(context); // Token::Number

let slice = self.current_token.slice(self.source);
// Strip underscores if necessary
let slice = if slice.contains('_') {
Cow::Owned(slice.chars().filter(|&c| c != '_').collect())
} else {
Cow::Borrowed(slice)
};

let maybe_integer = if let Some(hex) = slice.strip_prefix("0x") {
i64::from_str_radix(hex, 16)
Expand All @@ -1780,37 +1787,35 @@ impl<'source> Parser<'source> {
} else if let Some(binary) = slice.strip_prefix("0b") {
i64::from_str_radix(binary, 2)
} else {
i64::from_str(slice)
i64::from_str(&slice)
};

let number_node = if let Ok(n) = maybe_integer {
// Should we store the number as a SmallInt or as a stored constant?
if u8::try_from(n).is_ok() {
let n = if negate { -n } else { n };
self.push_node(SmallInt(n as i16))?
SmallInt(n as i16)
} else {
let n = if negate { -n } else { n };
match self.constants.add_i64(n) {
Ok(constant_index) => self.push_node(Int(constant_index))?,
Err(_) => return self.error(InternalError::ConstantPoolCapacityOverflow),
if let Ok(constant_index) = self.constants.add_i64(n) {
Int(constant_index)
} else {
return self.error(InternalError::ConstantPoolCapacityOverflow);
}
}
} else {
match f64::from_str(slice) {
Ok(n) => {
let n = if negate { -n } else { n };
match self.constants.add_f64(n) {
Ok(constant_index) => self.push_node(Float(constant_index))?,
Err(_) => return self.error(InternalError::ConstantPoolCapacityOverflow),
}
}
Err(_) => {
return self.error(InternalError::NumberParseFailure);
}
} else if let Ok(n) = f64::from_str(&slice) {
let n = if negate { -n } else { n };
if let Ok(constant_index) = self.constants.add_f64(n) {
Float(constant_index)
} else {
return self.error(InternalError::ConstantPoolCapacityOverflow);
}
} else {
return self.error(InternalError::NumberParseFailure);
};

self.check_for_chain_after_node(number_node, context)
let node = self.push_node(number_node)?;
self.check_for_chain_after_node(node, context)
}

// Parses expressions contained in round parentheses
Expand Down
4 changes: 2 additions & 2 deletions crates/parser/tests/parser_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,11 @@ null"#;
1
0x1
0x100
0xABADCAFE
0xABAD_CAFE
0o1
0o100
0b1
0b100
0b1_0_0
";
check_ast(
source,
Expand Down

0 comments on commit 9526049

Please sign in to comment.