Skip to content
This repository has been archived by the owner on Jun 3, 2021. It is now read-only.

Close #506: Implement preprocessor concatenation #510

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/data/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,14 @@ pub enum CppError {
/// '#' in a function macro not followed by function parameter
#[error("'#' is not followed by a macro parameter")]
HashMissingParameter,

/// '##' missing arguments
#[error("'##' cannot appear at {} of macro expansion", if *(.0) { "start" } else { "end"})]
HashHashMissingParameter(bool),

/// The result of '##' is not a valid token
#[error("pasting formed '{0}{1}', an invalid preprocessing token")]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#[error("pasting formed '{0}{1}', an invalid preprocessing token")]
#[error("token pasting formed '{0}{1}', an invalid preprocessing token")]

HashHashInvalid(Token, Token),
}

/// Lex errors are non-exhaustive and may have new variants added at any time
Expand Down
6 changes: 4 additions & 2 deletions src/data/lex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,8 +231,9 @@ pub enum Token {

// Misc
Ellipsis,
StructDeref, // ->
Hash, // #, used for preprocessing
StructDeref, // ->
Hash, // #, used for preprocessing
HashHash(bool), // ##, used for preprocessing (the bool is true unless it is created by `# ## #`)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not clear what the bool is where you use it in the code, might be better to say explicitly (the comment is fine though).

Suggested change
HashHash(bool), // ##, used for preprocessing (the bool is true unless it is created by `# ## #`)
HashHash { operator: bool }, // ##, used for preprocessing (the bool is true unless it is created by `# ## #`)

Comment on lines -234 to +236
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I'd mildly prefer to either have inconsistent alignment or to put the comment on the line above. Otherwise it's more work next time you need to update the comments.

}

/* impls */
Expand Down Expand Up @@ -393,6 +394,7 @@ impl std::fmt::Display for Token {
Ellipsis => write!(f, "..."),
StructDeref => write!(f, "->"),
Hash => write!(f, "#"),
HashHash(_) => write!(f, "##"),
}
}
}
Expand Down
46 changes: 46 additions & 0 deletions src/lex/cpp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1941,4 +1941,50 @@ h",
assert_is_str("__DATE__");
assert_is_str("__TIME__");
}

#[test]
fn hashhash() {
use crate::data::lex::{AssignmentToken::*, ComparisonToken::*, Keyword, LiteralToken::*};
use Token::*;
fn assert_concat(x: &str, y: &str, cpp_src: Option<Token>) {
let src = format!("#define tok {} ## {}\ntok", x, y);
let tok = cpp(&src)
.next_non_whitespace()
.unwrap()
.map(|tok| tok.data)
.ok();
assert_eq!(tok, cpp_src);
}
assert_concat("<", "<", Some(ShiftLeft));
assert_concat("+", "+", Some(PlusPlus));
assert_concat(">>", "=", Some(Assignment(ShrEqual)));
assert_concat(">", "=", Some(Comparison(GreaterEqual)));
assert_concat("#", "#", Some(HashHash(false)));
assert_concat("-", ">", Some(StructDeref));
assert_concat("const", "ance", Some(Id("constance".into())));
assert_concat("xyz", "123", Some(Id("xyz123".into())));
assert_concat("un", "signed", Some(Keyword(Keyword::Unsigned)));
assert_concat("unsign", "ed", Some(Keyword(Keyword::Unsigned)));
assert_concat("5", "e5", Some(Literal(Float("5e5".into()))));
assert_concat("1234", ".5", Some(Literal(Float("1234.5".into()))));
assert_concat("42", "000", Some(Literal(Int("42000".into()))));

assert_concat("+", "/", None);
assert_concat(r#"'x'"#, r#"'y'"#, None);
assert_concat(r#""x""#, r#""y""#, None);
assert_concat("0b1", "6", None);
assert_concat("/", "/", None); // Not a comment
}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
}
}
#[test]
fn gnu_weirdness() {
// #506
let program = "
#define __GLIBC_USE(F) __GLIBC_USE_ ## F
# define __GLIBC_USE_IEC_60559_TYPES_EXT 1
# define __GLIBC_USE_ISOC2X 1
#if __GLIBC_USE (IEC_60559_BFP_EXT) || __GLIBC_USE (ISOC2X)
int main() {}
#endif
";
assert_same(program, "int main() {}");
}

#[test]
#[ignore] // Related to https://github.com/jyn514/saltwater/issues/513
fn hash_and_hashhash() {
assert_same(
"#define hash_hash # ## #
#define mkstr(a) # a
#define in_between(a) mkstr(a)
#define join(c, d) in_between(c hash_hash d)
join(x, y);",
r#""X ## Y""#,
);
}
}
8 changes: 7 additions & 1 deletion src/lex/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,13 @@ impl Iterator for Lexer {
let span_start = self.location.offset - c.len_utf8() as u32;
// this giant switch is most of the logic
let data = match c {
'#' => Token::Hash,
'#' => match self.peek() {
Some('#') => {
self.next_char();
Token::HashHash(true)
}
_ => Token::Hash,
},
'+' => match self.peek() {
Some('=') => {
self.next_char();
Expand Down
167 changes: 117 additions & 50 deletions src/lex/replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//!
//! This module does no parsing and accepts only tokens.

use super::{cpp::CppResult, files::FileProcessor};
use super::{cpp::CppResult, files::FileProcessor, Lexer};
use crate::{
error::CppError, CompileError, CompileResult, InternedStr, LiteralToken, Locatable, Location,
Token,
Expand Down Expand Up @@ -166,56 +166,104 @@ pub fn replace(
// - _not_ after every token, since otherwise that won't catch some mutual recursion
// See https://github.com/jyn514/rcc/issues/427 for examples.
let mut ids_seen = HashSet::new();
let mut replacements = Vec::new();
let mut replacements: Vec<CompileResult<Locatable<Token>>> = Vec::new();
let mut pending = VecDeque::new();
pending.push_back(Ok(location.with(token)));

let mut pending_hashhash: Option<Token> = None; // Token before ##

// outer loop: replace all tokens in the replacement list
while let Some(token) = pending.pop_front() {
// first step: perform (recursive) substitution on the ID
if let Ok(Locatable {
data: Token::Id(id),
..
}) = token
{
if !ids_seen.contains(&id) {
match definitions.get(&id) {
Some(Definition::Object(replacement_list)) => {
ids_seen.insert(id);
// prepend the new tokens to the pending tokens
// They need to go before, not after. For instance:
// ```c
// #define a b c d
// #define b 1 + 2
// a
// ```
// should replace to `1 + 2 c d`, not `c d 1 + 2`
let mut new_pending = VecDeque::new();
// we need a `clone()` because `self.definitions` needs to keep its copy of the definition
new_pending.extend(
replacement_list
.iter()
.cloned()
.map(|t| Ok(location.with(t))),
);
new_pending.append(&mut pending);
pending = new_pending;
continue;
}
// TODO: so many allocations :(
Some(Definition::Function { .. }) => {
ids_seen.insert(id);
let func_replacements =
replace_function(definitions, id, location, &mut pending, &mut inner);
let mut func_replacements: VecDeque<_> =
func_replacements.into_iter().collect();
func_replacements.append(&mut pending);
pending = func_replacements;
continue;
match token {
Ok(Locatable {
data: ref succeeding_tok,
..
}) if pending_hashhash.is_some() => {
if matches!(succeeding_tok, Token::Whitespace(_)) {
continue;
}
let pending_hashhash = pending_hashhash.take().unwrap(); // We just checked that it's some
Comment on lines +177 to +185
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to write this as

match (token, pending_hashhash) {
    (Ok(..), Some(hashhash)) => {

That way you don't have an extra unwrap.

let concat_token =
concat(&pending_hashhash, succeeding_tok, &location).ok_or_else(|| {
location.with(
CppError::HashHashInvalid(
pending_hashhash.clone(),
succeeding_tok.clone(),
)
.into(),
)
});
pending.push_back(concat_token);
continue;
}
Ok(Locatable {
data: Token::Id(id),
..
}) => {
if !ids_seen.contains(&id) {
match definitions.get(&id) {
Some(Definition::Object(replacement_list)) => {
ids_seen.insert(id);
// prepend the new tokens to the pending tokens
// They need to go before, not after. For instance:
// ```c
// #define a b c d
// #define b 1 + 2
// a
// ```
// should replace to `1 + 2 c d`, not `c d 1 + 2`
let mut new_pending = VecDeque::new();
// we need a `clone()` because `self.definitions` needs to keep its copy of the definition
new_pending.extend(
replacement_list
.iter()
.cloned()
.map(|t| Ok(location.with(t))),
);
new_pending.append(&mut pending);
pending = new_pending;
continue;
}
// TODO: so many allocations :(
Some(Definition::Function { .. }) => {
ids_seen.insert(id);
let func_replacements = replace_function(
definitions,
id,
location,
&mut pending,
&mut inner,
);
let mut func_replacements: VecDeque<_> =
func_replacements.into_iter().collect();
func_replacements.append(&mut pending);
pending = func_replacements;
continue;
}
None => {}
}
None => {}
}
}
Ok(Locatable {
data: Token::HashHash(true),
..
}) => {
let preceding_tok = loop {
match replacements.pop() {
Some(Ok(Locatable {
data: Token::Whitespace(_),
..
})) => continue,
Some(Ok(Locatable { data: token, .. })) => break token,
None | Some(Err(_)) => {
return wrap_error(&location, CppError::HashHashMissingParameter(true))
}
Comment on lines +258 to +260
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, in the Some(Err) case I think we should return the original error instead of swallowing it.

}
};
pending_hashhash = Some(preceding_tok);
continue;
}
_ => {}
}
replacements.push(token);
}
Expand Down Expand Up @@ -367,16 +415,17 @@ fn replace_function(
// and taking no arguments other than knowing the number of parameters.
if !(args.len() == 1 && params.is_empty() && args[0].is_empty()) {
// booo, this is the _only_ error in the whole replacer
return vec![Err(
location.with(CppError::TooFewArguments(params.len(), args.len()).into())
)];
return wrap_error(
&location,
CppError::TooFewArguments(params.len(), args.len()),
);
}
}

let mut pending_hash = false; // Seen a hash?
for token in body {
match *token {
Token::Id(id) => {
match token {
&Token::Id(id) => {
// #define f(a) { a + 1 } \n f(b) => b + 1
if let Some(index) = params.iter().position(|&param| param == id) {
let replacement = args[index].clone();
Expand All @@ -387,7 +436,7 @@ fn replace_function(
replacements.push(stringify(replacement));
}
} else if pending_hash {
return vec![Err(location.with(CppError::HashMissingParameter.into()))];
return wrap_error(&location, CppError::HashMissingParameter);
} else {
replacements.push(Token::Id(id));
}
Expand All @@ -403,7 +452,7 @@ fn replace_function(
}
_ => {
if pending_hash {
return vec![Err(location.with(CppError::HashMissingParameter.into()))];
return wrap_error(&location, CppError::HashMissingParameter);
} else {
replacements.push(token.clone());
}
Expand Down Expand Up @@ -439,3 +488,21 @@ fn stringify(args: Vec<Token>) -> Token {
ret.trim()
))]))
}

fn concat(x: &Token, y: &Token, location: &Location) -> Option<Locatable<Token>> {
let mut lexer = Lexer::new(location.file, format!("{}{}", x, y), false);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You said doing this means we can get rid of the LiteralParser trait, right? Do you want to do that here or in a follow-up PR?

match lexer.next() {
Some(Ok(tok)) if lexer.next().is_none() => Some(match tok {
Locatable {
data: Token::HashHash(_),
location,
} => location.with(Token::HashHash(false)),
tok => tok,
}),
_ => None,
}
}

fn wrap_error(location: &Location, err: CppError) -> Vec<CppResult<Token>> {
vec![Err(location.with(err.into()))]
}