-
Notifications
You must be signed in to change notification settings - Fork 26
Close #506: Implement preprocessor concatenation #510
base: master
Are you sure you want to change the base?
Changes from all commits
28aaca8
1f32b12
4f0e7d6
6396e51
d128286
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -231,8 +231,9 @@ pub enum Token { | |||||
|
||||||
// Misc | ||||||
Ellipsis, | ||||||
StructDeref, // -> | ||||||
Hash, // #, used for preprocessing | ||||||
StructDeref, // -> | ||||||
Hash, // #, used for preprocessing | ||||||
HashHash(bool), // ##, used for preprocessing (the bool is true unless it is created by `# ## #`) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not clear what the bool is where you use it in the code, might be better to say explicitly (the comment is fine though).
Suggested change
Comment on lines
-234
to
+236
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I'd mildly prefer to either have inconsistent alignment or to put the comment on the line above. Otherwise it's more work next time you need to update the comments. |
||||||
} | ||||||
|
||||||
/* impls */ | ||||||
|
@@ -393,6 +394,7 @@ impl std::fmt::Display for Token { | |||||
Ellipsis => write!(f, "..."), | ||||||
StructDeref => write!(f, "->"), | ||||||
Hash => write!(f, "#"), | ||||||
HashHash(_) => write!(f, "##"), | ||||||
} | ||||||
} | ||||||
} | ||||||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -1941,4 +1941,50 @@ h", | |||||||||||||||||||||||||||||||||
assert_is_str("__DATE__"); | ||||||||||||||||||||||||||||||||||
assert_is_str("__TIME__"); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
#[test] | ||||||||||||||||||||||||||||||||||
fn hashhash() { | ||||||||||||||||||||||||||||||||||
use crate::data::lex::{AssignmentToken::*, ComparisonToken::*, Keyword, LiteralToken::*}; | ||||||||||||||||||||||||||||||||||
use Token::*; | ||||||||||||||||||||||||||||||||||
fn assert_concat(x: &str, y: &str, cpp_src: Option<Token>) { | ||||||||||||||||||||||||||||||||||
let src = format!("#define tok {} ## {}\ntok", x, y); | ||||||||||||||||||||||||||||||||||
let tok = cpp(&src) | ||||||||||||||||||||||||||||||||||
.next_non_whitespace() | ||||||||||||||||||||||||||||||||||
.unwrap() | ||||||||||||||||||||||||||||||||||
.map(|tok| tok.data) | ||||||||||||||||||||||||||||||||||
.ok(); | ||||||||||||||||||||||||||||||||||
assert_eq!(tok, cpp_src); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
assert_concat("<", "<", Some(ShiftLeft)); | ||||||||||||||||||||||||||||||||||
assert_concat("+", "+", Some(PlusPlus)); | ||||||||||||||||||||||||||||||||||
assert_concat(">>", "=", Some(Assignment(ShrEqual))); | ||||||||||||||||||||||||||||||||||
assert_concat(">", "=", Some(Comparison(GreaterEqual))); | ||||||||||||||||||||||||||||||||||
assert_concat("#", "#", Some(HashHash(false))); | ||||||||||||||||||||||||||||||||||
assert_concat("-", ">", Some(StructDeref)); | ||||||||||||||||||||||||||||||||||
assert_concat("const", "ance", Some(Id("constance".into()))); | ||||||||||||||||||||||||||||||||||
assert_concat("xyz", "123", Some(Id("xyz123".into()))); | ||||||||||||||||||||||||||||||||||
assert_concat("un", "signed", Some(Keyword(Keyword::Unsigned))); | ||||||||||||||||||||||||||||||||||
assert_concat("unsign", "ed", Some(Keyword(Keyword::Unsigned))); | ||||||||||||||||||||||||||||||||||
assert_concat("5", "e5", Some(Literal(Float("5e5".into())))); | ||||||||||||||||||||||||||||||||||
assert_concat("1234", ".5", Some(Literal(Float("1234.5".into())))); | ||||||||||||||||||||||||||||||||||
assert_concat("42", "000", Some(Literal(Int("42000".into())))); | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
assert_concat("+", "/", None); | ||||||||||||||||||||||||||||||||||
assert_concat(r#"'x'"#, r#"'y'"#, None); | ||||||||||||||||||||||||||||||||||
assert_concat(r#""x""#, r#""y""#, None); | ||||||||||||||||||||||||||||||||||
assert_concat("0b1", "6", None); | ||||||||||||||||||||||||||||||||||
assert_concat("/", "/", None); // Not a comment | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||||||
#[test] | ||||||||||||||||||||||||||||||||||
#[ignore] // Related to https://github.com/jyn514/saltwater/issues/513 | ||||||||||||||||||||||||||||||||||
fn hash_and_hashhash() { | ||||||||||||||||||||||||||||||||||
assert_same( | ||||||||||||||||||||||||||||||||||
"#define hash_hash # ## # | ||||||||||||||||||||||||||||||||||
#define mkstr(a) # a | ||||||||||||||||||||||||||||||||||
#define in_between(a) mkstr(a) | ||||||||||||||||||||||||||||||||||
#define join(c, d) in_between(c hash_hash d) | ||||||||||||||||||||||||||||||||||
join(x, y);", | ||||||||||||||||||||||||||||||||||
r#""X ## Y""#, | ||||||||||||||||||||||||||||||||||
); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
//! | ||
//! This module does no parsing and accepts only tokens. | ||
|
||
use super::{cpp::CppResult, files::FileProcessor}; | ||
use super::{cpp::CppResult, files::FileProcessor, Lexer}; | ||
use crate::{ | ||
error::CppError, CompileError, CompileResult, InternedStr, LiteralToken, Locatable, Location, | ||
Token, | ||
|
@@ -166,56 +166,104 @@ pub fn replace( | |
// - _not_ after every token, since otherwise that won't catch some mutual recursion | ||
// See https://github.com/jyn514/rcc/issues/427 for examples. | ||
let mut ids_seen = HashSet::new(); | ||
let mut replacements = Vec::new(); | ||
let mut replacements: Vec<CompileResult<Locatable<Token>>> = Vec::new(); | ||
let mut pending = VecDeque::new(); | ||
pending.push_back(Ok(location.with(token))); | ||
|
||
let mut pending_hashhash: Option<Token> = None; // Token before ## | ||
|
||
// outer loop: replace all tokens in the replacement list | ||
while let Some(token) = pending.pop_front() { | ||
// first step: perform (recursive) substitution on the ID | ||
if let Ok(Locatable { | ||
data: Token::Id(id), | ||
.. | ||
}) = token | ||
{ | ||
if !ids_seen.contains(&id) { | ||
match definitions.get(&id) { | ||
Some(Definition::Object(replacement_list)) => { | ||
ids_seen.insert(id); | ||
// prepend the new tokens to the pending tokens | ||
// They need to go before, not after. For instance: | ||
// ```c | ||
// #define a b c d | ||
// #define b 1 + 2 | ||
// a | ||
// ``` | ||
// should replace to `1 + 2 c d`, not `c d 1 + 2` | ||
let mut new_pending = VecDeque::new(); | ||
// we need a `clone()` because `self.definitions` needs to keep its copy of the definition | ||
new_pending.extend( | ||
replacement_list | ||
.iter() | ||
.cloned() | ||
.map(|t| Ok(location.with(t))), | ||
); | ||
new_pending.append(&mut pending); | ||
pending = new_pending; | ||
continue; | ||
} | ||
// TODO: so many allocations :( | ||
Some(Definition::Function { .. }) => { | ||
ids_seen.insert(id); | ||
let func_replacements = | ||
replace_function(definitions, id, location, &mut pending, &mut inner); | ||
let mut func_replacements: VecDeque<_> = | ||
func_replacements.into_iter().collect(); | ||
func_replacements.append(&mut pending); | ||
pending = func_replacements; | ||
continue; | ||
match token { | ||
Ok(Locatable { | ||
data: ref succeeding_tok, | ||
.. | ||
}) if pending_hashhash.is_some() => { | ||
if matches!(succeeding_tok, Token::Whitespace(_)) { | ||
continue; | ||
} | ||
let pending_hashhash = pending_hashhash.take().unwrap(); // We just checked that it's some | ||
Comment on lines
+177
to
+185
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better to write this as match (token, pending_hashhash) {
(Ok(..), Some(hashhash)) => { That way you don't have an extra |
||
let concat_token = | ||
concat(&pending_hashhash, succeeding_tok, &location).ok_or_else(|| { | ||
location.with( | ||
CppError::HashHashInvalid( | ||
pending_hashhash.clone(), | ||
succeeding_tok.clone(), | ||
) | ||
.into(), | ||
) | ||
}); | ||
pending.push_back(concat_token); | ||
continue; | ||
} | ||
Ok(Locatable { | ||
data: Token::Id(id), | ||
.. | ||
}) => { | ||
if !ids_seen.contains(&id) { | ||
match definitions.get(&id) { | ||
Some(Definition::Object(replacement_list)) => { | ||
ids_seen.insert(id); | ||
// prepend the new tokens to the pending tokens | ||
// They need to go before, not after. For instance: | ||
// ```c | ||
// #define a b c d | ||
// #define b 1 + 2 | ||
// a | ||
// ``` | ||
// should replace to `1 + 2 c d`, not `c d 1 + 2` | ||
let mut new_pending = VecDeque::new(); | ||
// we need a `clone()` because `self.definitions` needs to keep its copy of the definition | ||
new_pending.extend( | ||
replacement_list | ||
.iter() | ||
.cloned() | ||
.map(|t| Ok(location.with(t))), | ||
); | ||
new_pending.append(&mut pending); | ||
pending = new_pending; | ||
continue; | ||
} | ||
// TODO: so many allocations :( | ||
Some(Definition::Function { .. }) => { | ||
ids_seen.insert(id); | ||
let func_replacements = replace_function( | ||
definitions, | ||
id, | ||
location, | ||
&mut pending, | ||
&mut inner, | ||
); | ||
let mut func_replacements: VecDeque<_> = | ||
func_replacements.into_iter().collect(); | ||
func_replacements.append(&mut pending); | ||
pending = func_replacements; | ||
continue; | ||
} | ||
None => {} | ||
} | ||
None => {} | ||
} | ||
} | ||
Ok(Locatable { | ||
data: Token::HashHash(true), | ||
.. | ||
}) => { | ||
let preceding_tok = loop { | ||
match replacements.pop() { | ||
Some(Ok(Locatable { | ||
data: Token::Whitespace(_), | ||
.. | ||
})) => continue, | ||
Some(Ok(Locatable { data: token, .. })) => break token, | ||
None | Some(Err(_)) => { | ||
return wrap_error(&location, CppError::HashHashMissingParameter(true)) | ||
} | ||
Comment on lines
+258
to
+260
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, in the |
||
} | ||
}; | ||
pending_hashhash = Some(preceding_tok); | ||
continue; | ||
} | ||
_ => {} | ||
} | ||
replacements.push(token); | ||
} | ||
|
@@ -367,16 +415,17 @@ fn replace_function( | |
// and taking no arguments other than knowing the number of parameters. | ||
if !(args.len() == 1 && params.is_empty() && args[0].is_empty()) { | ||
// booo, this is the _only_ error in the whole replacer | ||
return vec![Err( | ||
location.with(CppError::TooFewArguments(params.len(), args.len()).into()) | ||
)]; | ||
return wrap_error( | ||
&location, | ||
CppError::TooFewArguments(params.len(), args.len()), | ||
); | ||
} | ||
} | ||
|
||
let mut pending_hash = false; // Seen a hash? | ||
for token in body { | ||
match *token { | ||
Token::Id(id) => { | ||
match token { | ||
&Token::Id(id) => { | ||
// #define f(a) { a + 1 } \n f(b) => b + 1 | ||
if let Some(index) = params.iter().position(|¶m| param == id) { | ||
let replacement = args[index].clone(); | ||
|
@@ -387,7 +436,7 @@ fn replace_function( | |
replacements.push(stringify(replacement)); | ||
} | ||
} else if pending_hash { | ||
return vec![Err(location.with(CppError::HashMissingParameter.into()))]; | ||
return wrap_error(&location, CppError::HashMissingParameter); | ||
} else { | ||
replacements.push(Token::Id(id)); | ||
} | ||
|
@@ -403,7 +452,7 @@ fn replace_function( | |
} | ||
_ => { | ||
if pending_hash { | ||
return vec![Err(location.with(CppError::HashMissingParameter.into()))]; | ||
return wrap_error(&location, CppError::HashMissingParameter); | ||
} else { | ||
replacements.push(token.clone()); | ||
} | ||
|
@@ -439,3 +488,21 @@ fn stringify(args: Vec<Token>) -> Token { | |
ret.trim() | ||
))])) | ||
} | ||
|
||
fn concat(x: &Token, y: &Token, location: &Location) -> Option<Locatable<Token>> { | ||
let mut lexer = Lexer::new(location.file, format!("{}{}", x, y), false); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You said doing this means we can get rid of the LiteralParser trait, right? Do you want to do that here or in a follow-up PR? |
||
match lexer.next() { | ||
Some(Ok(tok)) if lexer.next().is_none() => Some(match tok { | ||
Locatable { | ||
data: Token::HashHash(_), | ||
location, | ||
} => location.with(Token::HashHash(false)), | ||
tok => tok, | ||
}), | ||
_ => None, | ||
} | ||
} | ||
|
||
fn wrap_error(location: &Location, err: CppError) -> Vec<CppResult<Token>> { | ||
vec![Err(location.with(err.into()))] | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.