From 28aaca80a45785d56fa3d4c4cbc28bb6f9107da5 Mon Sep 17 00:00:00 2001 From: Hunter Damron Date: Sun, 16 Aug 2020 14:00:10 -0400 Subject: [PATCH] Initial WIP on preprocessor-hashhash Collected before and after tokens and passed them to a concat function --- src/data/error.rs | 4 ++ src/data/lex.rs | 2 + src/lex/mod.rs | 8 ++- src/lex/replace.rs | 146 ++++++++++++++++++++++++++++++--------------- 4 files changed, 110 insertions(+), 50 deletions(-) diff --git a/src/data/error.rs b/src/data/error.rs index fef5b59e..89894721 100644 --- a/src/data/error.rs +++ b/src/data/error.rs @@ -506,6 +506,10 @@ pub enum CppError { /// '#' in a function macro not followed by function parameter #[error("'#' is not followed by a macro parameter")] HashMissingParameter, + + /// '##' missing arguments + #[error("'##' cannot appear at {} of macro expansion", if *(.0) { "start" } else { "end"})] + HashHashMissingParameter(bool), } /// Lex errors are non-exhaustive and may have new variants added at any time diff --git a/src/data/lex.rs b/src/data/lex.rs index fb9a3a59..3680846d 100644 --- a/src/data/lex.rs +++ b/src/data/lex.rs @@ -233,6 +233,7 @@ pub enum Token { Ellipsis, StructDeref, // -> Hash, // #, used for preprocessing + HashHash, // ##, used for preprocessing } /* impls */ @@ -393,6 +394,7 @@ impl std::fmt::Display for Token { Ellipsis => write!(f, "..."), StructDeref => write!(f, "->"), Hash => write!(f, "#"), + HashHash => write!(f, "##"), } } } diff --git a/src/lex/mod.rs b/src/lex/mod.rs index 7fb3cd9c..034c7ed3 100644 --- a/src/lex/mod.rs +++ b/src/lex/mod.rs @@ -362,7 +362,13 @@ impl Iterator for Lexer { let span_start = self.location.offset - c.len_utf8() as u32; // this giant switch is most of the logic let data = match c { - '#' => Token::Hash, + '#' => match self.peek() { + Some('#') => { + self.next_char(); + Token::HashHash + } + _ => Token::Hash, + }, '+' => match self.peek() { Some('=') => { self.next_char(); diff --git a/src/lex/replace.rs b/src/lex/replace.rs index 6359afa2..3ccb72ec 100644 --- a/src/lex/replace.rs +++ b/src/lex/replace.rs @@ -166,56 +166,95 @@ pub fn replace( // - _not_ after every token, since otherwise that won't catch some mutual recursion // See https://github.com/jyn514/rcc/issues/427 for examples. let mut ids_seen = HashSet::new(); - let mut replacements = Vec::new(); + let mut replacements: Vec>> = Vec::new(); let mut pending = VecDeque::new(); pending.push_back(Ok(location.with(token))); + let mut pending_hashhash: Option = None; // Token before ## + // outer loop: replace all tokens in the replacement list while let Some(token) = pending.pop_front() { - // first step: perform (recursive) substitution on the ID - if let Ok(Locatable { - data: Token::Id(id), - .. - }) = token - { - if !ids_seen.contains(&id) { - match definitions.get(&id) { - Some(Definition::Object(replacement_list)) => { - ids_seen.insert(id); - // prepend the new tokens to the pending tokens - // They need to go before, not after. For instance: - // ```c - // #define a b c d - // #define b 1 + 2 - // a - // ``` - // should replace to `1 + 2 c d`, not `c d 1 + 2` - let mut new_pending = VecDeque::new(); - // we need a `clone()` because `self.definitions` needs to keep its copy of the definition - new_pending.extend( - replacement_list - .iter() - .cloned() - .map(|t| Ok(location.with(t))), - ); - new_pending.append(&mut pending); - pending = new_pending; - continue; - } - // TODO: so many allocations :( - Some(Definition::Function { .. }) => { - ids_seen.insert(id); - let func_replacements = - replace_function(definitions, id, location, &mut pending, &mut inner); - let mut func_replacements: VecDeque<_> = - func_replacements.into_iter().collect(); - func_replacements.append(&mut pending); - pending = func_replacements; - continue; + match token { + Ok(Locatable { + data: ref succeeding_tok, + .. + }) if pending_hashhash.is_some() => { + if matches!(succeeding_tok, Token::Whitespace(_)) { + continue; + } + let pending_hashhash = pending_hashhash.take().unwrap(); // We just checked that it's some + let concat_token = concat(pending_hashhash, succeeding_tok.clone(), &location); + replacements.push(concat_token); // TODO don't bypass pending + continue; + } + Ok(Locatable { + data: Token::Id(id), + .. + }) => { + if !ids_seen.contains(&id) { + match definitions.get(&id) { + Some(Definition::Object(replacement_list)) => { + ids_seen.insert(id); + // prepend the new tokens to the pending tokens + // They need to go before, not after. For instance: + // ```c + // #define a b c d + // #define b 1 + 2 + // a + // ``` + // should replace to `1 + 2 c d`, not `c d 1 + 2` + let mut new_pending = VecDeque::new(); + // we need a `clone()` because `self.definitions` needs to keep its copy of the definition + new_pending.extend( + replacement_list + .iter() + .cloned() + .map(|t| Ok(location.with(t))), + ); + new_pending.append(&mut pending); + pending = new_pending; + continue; + } + // TODO: so many allocations :( + Some(Definition::Function { .. }) => { + ids_seen.insert(id); + let func_replacements = replace_function( + definitions, + id, + location, + &mut pending, + &mut inner, + ); + let mut func_replacements: VecDeque<_> = + func_replacements.into_iter().collect(); + func_replacements.append(&mut pending); + pending = func_replacements; + continue; + } + None => {} } - None => {} } } + Ok(Locatable { + data: Token::HashHash, + .. + }) => { + let preceding_tok = loop { + match replacements.pop() { + Some(Ok(Locatable { + data: Token::Whitespace(_), + .. + })) => continue, + Some(Ok(Locatable { data: token, .. })) => break token, + None | Some(Err(_)) => { + return wrap_error(&location, CppError::HashHashMissingParameter(true)) + } + } + }; + pending_hashhash = Some(preceding_tok); + continue; + } + _ => {} } replacements.push(token); } @@ -367,16 +406,17 @@ fn replace_function( // and taking no arguments other than knowing the number of parameters. if !(args.len() == 1 && params.is_empty() && args[0].is_empty()) { // booo, this is the _only_ error in the whole replacer - return vec![Err( - location.with(CppError::TooFewArguments(params.len(), args.len()).into()) - )]; + return wrap_error( + &location, + CppError::TooFewArguments(params.len(), args.len()).into(), + ); } } let mut pending_hash = false; // Seen a hash? for token in body { - match *token { - Token::Id(id) => { + match token { + &Token::Id(id) => { // #define f(a) { a + 1 } \n f(b) => b + 1 if let Some(index) = params.iter().position(|¶m| param == id) { let replacement = args[index].clone(); @@ -387,7 +427,7 @@ fn replace_function( replacements.push(stringify(replacement)); } } else if pending_hash { - return vec![Err(location.with(CppError::HashMissingParameter.into()))]; + return wrap_error(&location, CppError::HashMissingParameter); } else { replacements.push(Token::Id(id)); } @@ -403,7 +443,7 @@ fn replace_function( } _ => { if pending_hash { - return vec![Err(location.with(CppError::HashMissingParameter.into()))]; + return wrap_error(&location, CppError::HashMissingParameter); } else { replacements.push(token.clone()); } @@ -439,3 +479,11 @@ fn stringify(args: Vec) -> Token { ret.trim() ))])) } + +fn concat(x: Token, b: Token, location: &Location) -> CompileResult> { + todo!(); +} + +fn wrap_error(location: &Location, err: CppError) -> Vec> { + vec![Err(location.with(err.into()))] +}