-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update rust token lexer
- Loading branch information
Showing
10 changed files
with
269 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
name: Rust | ||
|
||
on: | ||
push: | ||
branches: [ "master", "dev" ] | ||
pull_request: | ||
branches: [ "master", "dev" ] | ||
|
||
env: | ||
CARGO_TERM_COLOR: always | ||
|
||
jobs: | ||
build: | ||
|
||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: CD | ||
run: cd rust | ||
- name: Build | ||
run: cargo build --verbose | ||
- name: Run tests | ||
run: cargo test --verbose |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# The usage of Rust version Trc | ||
|
||
## First written value for Trc | ||
|
||
you can write integer value in the following ways: | ||
|Way|Explain| | ||
|:---|:---| | ||
|12321312|commom value| | ||
|122_32432_4324324|use underline to split the number to read more easily| | ||
|0b32132|number in 2 radix| | ||
|0o324243|number in 8 radix| | ||
|0x324324|number in 16 radix| | ||
|
||
Tip:No matter how large is your number.Trc compiler will take it into enough type to store!Don't worry! | ||
|
||
The next is the string value. To make you to write strings more easily: | ||
|
||
|Way|Explain| | ||
|:---|:---| | ||
|"hello world"|commom value| | ||
|"hello_world"|use underline to split the number to read more easily| |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,32 @@ | ||
mod token; | ||
use std::fs; | ||
|
||
pub struct compiler { | ||
input:fs::File | ||
use std::io; | ||
use std::collections::hash_set; | ||
|
||
pub struct ConstPool { | ||
const_ints: hash_set::HashSet<i32>, | ||
} | ||
|
||
impl ConstPool { | ||
fn new() -> Self { | ||
ConstPool { | ||
const_ints: hash_set::HashSet::new(), | ||
} | ||
} | ||
} | ||
|
||
pub struct Compiler<T:io::Read> { | ||
input:T, | ||
line: usize, | ||
const_pool: ConstPool | ||
} | ||
|
||
impl compiler { | ||
fn new(f:fs::File) -> compiler { | ||
compiler { | ||
input:f | ||
impl<T:io::Read> Compiler<T> { | ||
fn new(f: T) -> Self { | ||
Compiler { | ||
input:f, | ||
line:1, | ||
const_pool: ConstPool::new(), | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,201 @@ | ||
enum TokenType { | ||
use std::str::{Chars, CharIndices}; | ||
|
||
use super::Compiler; | ||
use std::io; | ||
|
||
#[derive(PartialEq, Debug)] | ||
enum TokenType { | ||
DOT, | ||
COMMA, | ||
INT_VALUE, | ||
STRING_VALUE, | ||
FLOAT_VALUE, | ||
LONG_INT_VALUE, | ||
LONG_FLOAT_VALUE | ||
} | ||
|
||
enum Data { | ||
|
||
#[derive(PartialEq, Debug)] | ||
pub enum Data { | ||
Int(i32), | ||
Str(String), | ||
FLOAT(f64) | ||
} | ||
|
||
#[derive(PartialEq, Debug)] | ||
struct Token { | ||
tp:TokenType, | ||
data:Data | ||
} | ||
|
||
struct TokenLex { | ||
|
||
struct TokenLex<'code, T:io::Read> { | ||
code: &'code str, | ||
pos: CharIndices<'code>, | ||
compiler_data:&'code mut Compiler<T> | ||
} | ||
|
||
impl Token { | ||
fn new(tp:TokenType, data:Option<Data>) -> Token { | ||
match data { | ||
Some(data) => Token { | ||
tp, | ||
data | ||
}, | ||
None => Token { | ||
tp, | ||
data:Data::Int(0) | ||
} | ||
} | ||
} | ||
} | ||
|
||
impl<T:io::Read> Iterator for TokenLex<'_, T> { | ||
type Item = Token; | ||
fn next(&mut self) -> Option<Self::Item> { | ||
self.next_token() | ||
} | ||
} | ||
|
||
impl<T:io::Read> TokenLex<'_, T> { | ||
fn new<'a>(code:&'a str, compiler_data:&'a mut Compiler<T>) -> TokenLex<'a, T> { | ||
TokenLex { | ||
code, | ||
pos:code.char_indices(), | ||
compiler_data | ||
} | ||
} | ||
|
||
fn lex_symbol(&mut self, c:char) -> Token { | ||
match c { | ||
'.' => Token::new(TokenType::DOT, None), | ||
',' => Token::new(TokenType::COMMA, None), | ||
_ => panic!("Not a symbol.Compiler error") | ||
} | ||
} | ||
|
||
fn lex_num(&mut self, c:char) -> Token { | ||
// to save the int in str | ||
let mut s = c.to_string(); | ||
// the radix of result | ||
let mut radix = 10; | ||
let mut presecnt_lex: Option<(usize, char)> = self.pos.next(); | ||
match presecnt_lex { | ||
Some(c) => { | ||
let c = c.1; | ||
if c == '0' { | ||
presecnt_lex = self.pos.next(); | ||
match presecnt_lex { | ||
Some(c) => { | ||
let c = c.1; | ||
match c { | ||
'x' | 'X' => { | ||
s += "0x"; | ||
radix = 16; | ||
}, | ||
'b' | 'B' => { | ||
s += "0b"; | ||
radix = 2; | ||
}, | ||
'o' | 'O' => { | ||
s += "0o"; | ||
radix = 8; | ||
}, | ||
_ => {} | ||
} | ||
}, | ||
None => { | ||
return Token::new(TokenType::INT_VALUE, Some(Data::Int(0))); | ||
} | ||
} | ||
} | ||
}, | ||
None => { | ||
panic!("Not a num.Compiler error"); | ||
} | ||
} | ||
loop { | ||
match self.pos.next() { | ||
None => { | ||
break; | ||
}, | ||
Some(c) => { | ||
let c = c.1; | ||
if c.is_digit(10) { | ||
s.push(c); | ||
} else { | ||
self.pos.next_back(); | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
Token::new(TokenType::INT_VALUE, Some(Data::Int(s.parse().expect("wrong string to int")))) | ||
} | ||
|
||
fn next_token(&mut self) -> Option<Token> { | ||
let mut presecnt_lex = self.pos.next(); | ||
loop { | ||
presecnt_lex = self.pos.next(); | ||
match presecnt_lex { | ||
Some(c) => { | ||
let c = c.1; | ||
if c == '\n' || c == '\t' || c == ' ' { | ||
continue; | ||
} | ||
break; | ||
}, | ||
None => { | ||
return None; | ||
} | ||
} | ||
} | ||
let presecnt_lex = presecnt_lex.unwrap().1; | ||
if presecnt_lex.is_digit(10) { | ||
Some(self.lex_num(presecnt_lex)); | ||
} | ||
Some(self.lex_symbol(presecnt_lex)) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
fn check<T:io::Read>(tokenlex:&mut TokenLex<T>, expected_res: Vec<Token>) { | ||
for i in expected_res { | ||
assert_eq!(i, tokenlex.next().unwrap()); | ||
} | ||
assert_eq!(None, tokenlex.next()); | ||
} | ||
|
||
#[test] | ||
fn test_next_token() { | ||
let mut env = Compiler::new(io::stdin()); | ||
let mut t = TokenLex::new(r#",,.,100 | ||
123.9 232_304904 | ||
0b011 | ||
0x2aA4 | ||
0o2434 0"#, &mut env); | ||
let res = vec![Token::new(TokenType::COMMA, None), | ||
Token::new(TokenType::COMMA, None), | ||
Token::new(TokenType::DOT, None), | ||
Token::new(TokenType::COMMA, None), | ||
Token::new(TokenType::FLOAT_VALUE, Some(Data::FLOAT(123.9))), | ||
Token::new(TokenType::INT_VALUE, Some(Data::Int(232_304904))), | ||
Token::new(TokenType::INT_VALUE, Some(Data::Int(0b011))), | ||
Token::new(TokenType::INT_VALUE, Some(Data::Int(0x2aA4))), | ||
Token::new(TokenType::INT_VALUE, Some(Data::Int(0o2434))), | ||
Token::new(TokenType::INT_VALUE, Some(Data::Int(0)))]; | ||
check(&mut t, res); | ||
} | ||
|
||
#[test] | ||
#[should_panic] | ||
fn test_token_wrong() { | ||
let mut env = Compiler::new(io::stdin()); | ||
let t = TokenLex::new(r#"0xtghhy 0b231"#, &mut env); | ||
for _ in t {} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.