Skip to content
This repository has been archived by the owner on Jun 3, 2021. It is now read-only.

Commit

Permalink
Convert code from shared_string to arcstr (#514)
Browse files Browse the repository at this point in the history
* Convert code to use arcstr
* Respond to review feedback
  • Loading branch information
thomcc authored Aug 19, 2020
1 parent b51c6e9 commit 6d41e7f
Show file tree
Hide file tree
Showing 10 changed files with 89 additions and 75 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ atty = { version = "0.2", default-features = false, optional = true }
git-testament = { version = "0.1", optional = true }
rand = { version = "0.7", optional = true }
rodio = { version = "0.11.0", optional = true }
shared_str = "0.1.1"
arcstr = "0.2.2"
time = "0.2"

[dev-dependencies]
Expand Down
8 changes: 4 additions & 4 deletions benches/parens.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
use arcstr::ArcStr;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rcc::codespan::Files;
use rcc::{Lexer, Locatable, Parser, Token};
use std::rc::Rc;

fn parens(c: &mut Criterion) {
// should take no more than n stack frames
let n = 3000;
let the_biggun = Rc::from(format!("{}1 + 2{}", "(".repeat(n), ")".repeat(n)));
let the_biggun = arcstr::format!("{}1 + 2{}", "(".repeat(n), ")".repeat(n));
let parse = |s| {
let mut files = Files::new();
let file_id = files.add("<bench>", Rc::clone(s));
let mut lexer = Lexer::new(file_id, Rc::clone(s), false);
let file_id = files.add("<bench>", ArcStr::clone(s));
let mut lexer = Lexer::new(file_id, ArcStr::clone(s), false);
let first: Locatable<Token> = lexer.next().unwrap().unwrap();
let mut p: Parser<Lexer> = Parser::new(first, lexer, false);
p.expr()
Expand Down
39 changes: 16 additions & 23 deletions src/data/lex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use proptest_derive::Arbitrary;
use crate::data::hir::BinaryOp;
use crate::intern::InternedStr;

use shared_str::RcStr;
use arcstr::Substr;

// holds where a piece of code came from
// should almost always be immutable
Expand Down Expand Up @@ -166,11 +166,11 @@ pub enum ComparisonToken {
#[derive(Clone, Debug)]
pub enum LiteralToken {
// literals
Int(RcStr),
UnsignedInt(RcStr),
Float(RcStr),
Str(Vec<RcStr>),
Char(RcStr),
Int(Substr),
UnsignedInt(Substr),
Float(Substr),
Str(Vec<Substr>),
Char(Substr),
}

impl PartialEq for LiteralToken {
Expand Down Expand Up @@ -404,15 +404,8 @@ impl std::fmt::Display for LiteralToken {
Int(i) => write!(f, "{}", i),
UnsignedInt(u) => write!(f, "{}", u),
Float(n) => write!(f, "{}", n),
Str(rcstr) => {
let joined = rcstr
.iter()
.map(RcStr::as_str)
.collect::<Vec<_>>()
.join(" ");
write!(f, "{}", joined)
}
Char(rcstr) => write!(f, "{}", rcstr.as_str()),
Str(s) => write!(f, "{}", s.join(" ")),
Char(s) => write!(f, "{}", s),
}
}
}
Expand Down Expand Up @@ -463,28 +456,28 @@ impl From<ComparisonToken> for Token {
#[cfg(test)]
mod proptest_impl {
use super::LiteralToken;
use arcstr::Substr;
use proptest::prelude::*;
use shared_str::RcStr;

impl Arbitrary for LiteralToken {
type Parameters = ();
type Strategy = BoxedStrategy<LiteralToken>;
fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
prop_oneof![
// TODO give regex of all possible literals
any::<i64>().prop_map(|x| LiteralToken::Int(RcStr::from(x.to_string()))),
any::<u64>().prop_map(|x| LiteralToken::UnsignedInt(RcStr::from(x.to_string()))),
any::<f64>().prop_map(|x| LiteralToken::Float(RcStr::from(x.to_string()))),
any::<u8>().prop_map(|c| LiteralToken::Char(RcStr::from(format!(
any::<i64>().prop_map(|x| LiteralToken::Int(Substr::from(x.to_string()))),
any::<u64>().prop_map(|x| LiteralToken::UnsignedInt(Substr::from(x.to_string()))),
any::<f64>().prop_map(|x| LiteralToken::Float(Substr::from(x.to_string()))),
any::<u8>().prop_map(|c| LiteralToken::Char(Substr::from(arcstr::format!(
"\'{}\'",
(c as char).escape_default()
)))),
prop::collection::vec(".*", 1..10).prop_map(|strs| {
let rcstrs = strs
let substrs = strs
.into_iter()
.map(|s| RcStr::from(format!("\"{}\"", s.escape_default())))
.map(|s| Substr::from(arcstr::format!("\"{}\"", s.escape_default())))
.collect();
LiteralToken::Str(rcstrs)
LiteralToken::Str(substrs)
}),
]
.boxed()
Expand Down
32 changes: 17 additions & 15 deletions src/lex/cpp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,11 @@
use lazy_static::lazy_static;

use shared_str::RcStr;
use arcstr::{ArcStr, Substr};
use std::borrow::Cow;
use std::collections::{HashMap, VecDeque};
use std::convert::TryFrom;
use std::path::{Path, PathBuf};
use std::rc::Rc;

use super::files::FileProcessor;
use super::replace::{replace, replace_iter, Definition, Definitions};
Expand All @@ -55,7 +54,7 @@ use crate::Files;
/// ```
pub struct PreProcessorBuilder<'a> {
/// The buffer for the starting file
buf: Rc<str>,
buf: ArcStr,
/// The name of the file
filename: PathBuf,
/// Whether to print each token before replacement
Expand All @@ -67,7 +66,7 @@ pub struct PreProcessorBuilder<'a> {
}

impl<'a> PreProcessorBuilder<'a> {
pub fn new<S: Into<Rc<str>>>(buf: S) -> PreProcessorBuilder<'a> {
pub fn new<S: Into<ArcStr>>(buf: S) -> PreProcessorBuilder<'a> {
PreProcessorBuilder {
debug: false,
filename: PathBuf::default(),
Expand Down Expand Up @@ -163,9 +162,9 @@ impl From<Vec<Token>> for Definition {
impl TryFrom<&str> for Definition {
type Error = error::LexError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
let value = Rc::from(format!("{}\n", value));
let value = arcstr::format!("{}\n", value);
let mut files = codespan::Files::new();
let dummy_id = files.add("<impl TryFrom<&str> for Definition>", Rc::clone(&value));
let dummy_id = files.add("<impl TryFrom<&str> for Definition>", ArcStr::clone(&value));
let lexer = Lexer::new(dummy_id, value, false);
lexer
.map(|res| match res {
Expand Down Expand Up @@ -315,7 +314,7 @@ impl<'a> PreProcessor<'a> {
/// but will never delete a file.
///
/// The `debug` parameter specifies whether to print out tokens before replacement.
pub fn new<'search: 'a, I: IntoIterator<Item = Cow<'search, Path>>, S: Into<Rc<str>>>(
pub fn new<'search: 'a, I: IntoIterator<Item = Cow<'search, Path>>, S: Into<ArcStr>>(
chars: S,
filename: impl Into<std::ffi::OsString>,
debug: bool,
Expand Down Expand Up @@ -683,6 +682,9 @@ impl<'a> PreProcessor<'a> {
where
L: Iterator<Item = Locatable<Token>>,
{
const ONE: LiteralToken = LiteralToken::Int(arcstr::literal_substr!("1"));
const ZERO: LiteralToken = LiteralToken::Int(arcstr::literal_substr!("0"));

let mut cpp_tokens = Vec::with_capacity(lex_tokens.size_hint().1.unwrap_or_default());
let defined = "defined".into();

Expand All @@ -695,9 +697,9 @@ impl<'a> PreProcessor<'a> {
} if name == defined => {
let def = Self::defined(&mut lex_tokens, location)?;
let literal = if definitions.contains_key(&def) {
LiteralToken::Int(RcStr::from("1"))
ONE
} else {
LiteralToken::Int(RcStr::from("0"))
ZERO
};
location.with(Token::Literal(literal))
}
Expand All @@ -713,7 +715,7 @@ impl<'a> PreProcessor<'a> {
if let Ok(tok) = &mut token {
expr_location = Some(location.maybe_merge(expr_location));
if let Token::Id(_) = tok.data {
tok.data = Token::Literal(LiteralToken::Int(RcStr::from("0")));
tok.data = Token::Literal(ZERO);
}
}
token
Expand Down Expand Up @@ -1096,15 +1098,15 @@ impl<'a> PreProcessor<'a> {
Some(file) => {
let mut path = PathBuf::from("<builtin>");
path.push(filename);
(path, Rc::from(file))
(path, ArcStr::from(file))
}
None => return Err(not_found),
}
}
};
let source = crate::Source {
path,
code: Rc::clone(&src),
code: ArcStr::clone(&src),
};
self.file_processor.add_file(filename, source);
Ok(())
Expand Down Expand Up @@ -1166,11 +1168,11 @@ impl<'a> PreProcessor<'a> {
}

fn int_def(i: i32) -> Definition {
Definition::Object(vec![LiteralToken::Int(RcStr::from(i.to_string())).into()])
Definition::Object(vec![LiteralToken::Int(Substr::from(i.to_string())).into()])
}
fn str_def<S: Into<String>>(s: S) -> Definition {
let rcstr = RcStr::from(format!("\"{}\"", s.into().replace(r#"""#, r#"\""#)));
Definition::Object(vec![LiteralToken::Str(vec![rcstr]).into()])
let substr = Substr::from(arcstr::format!("\"{}\"", s.into().replace(r#"""#, r#"\""#)));
Definition::Object(vec![LiteralToken::Str(vec![substr]).into()])
}

macro_rules! built_in_headers {
Expand Down
8 changes: 4 additions & 4 deletions src/lex/files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ use crate::{
ErrorHandler, Location,
};
use crate::{Files, Source};
use arcstr::ArcStr;
use std::path::{Path, PathBuf};
use std::rc::Rc;

// TODO: this API is absolutely terrible, there's _no_ encapsulation
pub(super) struct FileProcessor {
Expand Down Expand Up @@ -49,15 +49,15 @@ impl Iterator for FileProcessor {

impl FileProcessor {
pub(super) fn new(
chars: impl Into<Rc<str>>,
chars: impl Into<ArcStr>,
filename: impl Into<std::ffi::OsString>,
debug: bool,
) -> Self {
let mut files = Files::new();
let chars = chars.into();
let filename = filename.into();
let source = crate::Source {
code: Rc::clone(&chars),
code: ArcStr::clone(&chars),
path: filename.clone().into(),
};
let file = files.add(filename, source);
Expand Down Expand Up @@ -87,7 +87,7 @@ impl FileProcessor {
self.includes.last_mut().unwrap_or(&mut self.first_lexer)
}
pub(super) fn add_file(&mut self, filename: PathBuf, source: Source) {
let code = Rc::clone(&source.code);
let code = ArcStr::clone(&source.code);
let id = self.files.add(filename, source);
self.includes
.push(Lexer::new(id, code, self.first_lexer.debug));
Expand Down
41 changes: 29 additions & 12 deletions src/lex/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use std::convert::{TryFrom, TryInto};
use std::rc::Rc;

use codespan::FileId;

Expand All @@ -9,7 +8,7 @@ use super::data::{
*,
};
use super::intern::InternedStr;
use shared_str::RcStr;
use arcstr::{ArcStr, Substr};

mod cpp;
mod files;
Expand All @@ -36,7 +35,7 @@ type LexResult<T = Token> = Result<T, Locatable<LexError>>;
#[derive(Debug)]
pub struct Lexer {
location: SingleLocation,
chars: Rc<str>,
chars: ArcStr,
/// used for 2-character tokens
current: Option<char>,
/// used for 3-character tokens
Expand Down Expand Up @@ -78,7 +77,7 @@ pub(crate) struct SingleLocation {

impl Lexer {
/// Creates a Lexer from a filename and the contents of a file
pub fn new<S: Into<Rc<str>>>(file: FileId, chars: S, debug: bool) -> Lexer {
pub fn new<S: Into<ArcStr>>(file: FileId, chars: S, debug: bool) -> Lexer {
Lexer {
given_newline_error: false,
debug,
Expand Down Expand Up @@ -118,14 +117,12 @@ impl Lexer {
self.chars[self.location.offset as usize..].chars()
}

fn slice(&self, span_start: u32) -> RcStr {
fn slice(&self, span_start: u32) -> Substr {
use std::ops::Range;
RcStr::from(self.chars.clone())
.slice_with(|s| {
s.get::<Range<usize>>(self.span(span_start).span.into())
.unwrap_or("")
})
.unwrap()
self.chars.substr_using(|s| {
s.get::<Range<usize>>(self.span(span_start).span.into())
.unwrap_or("")
})
}

/// Parse a number literal, given the starting character and whether floats are allowed.
Expand Down Expand Up @@ -199,7 +196,7 @@ impl Lexer {
Ok(Token::Literal(literal))
}
// at this point we've already seen a '.', if we see one again it's an error
fn parse_float(&mut self, radix: Radix, span_start: u32) -> Result<RcStr, LexError> {
fn parse_float(&mut self, radix: Radix, span_start: u32) -> Result<Substr, LexError> {
// parse fraction: second {digits} in regex
while let Some(c) = self.peek() {
let c = c as char;
Expand Down Expand Up @@ -1062,3 +1059,23 @@ impl LiteralToken {
}
}
}

#[cfg(test)]
mod test {
use super::Lexer;
use arcstr::ArcStr;
#[test]
fn test_lexer_slice_parent() {
let mut files = codespan::Files::new();
let astr = arcstr::literal!("int main() { return 0; }\n");
let dummy_id = files.add("dummy main", ArcStr::clone(&astr));
let mut lexer = Lexer::new(dummy_id, &astr, false);
let _ = lexer.next();
let sliced = lexer.slice(0);
// Note that the parent is not guaranteed to be equal for empty strings,
// since we don't want to have an empty substr which is the last
// remaining thing keeping some non-empty ArcStr from being freed.
assert!(!sliced.is_empty(), "{:?}", sliced);
assert!(ArcStr::ptr_eq(sliced.parent(), &astr));
}
}
4 changes: 2 additions & 2 deletions src/lex/replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::{
};
use std::collections::{HashMap, HashSet, VecDeque};

use shared_str::RcStr;
use arcstr::Substr;

/// All known macro definitions.
///
Expand Down Expand Up @@ -434,7 +434,7 @@ fn stringify(args: Vec<Token>) -> Token {
}
})
.collect();
Token::Literal(LiteralToken::Str(vec![RcStr::from(format!(
Token::Literal(LiteralToken::Str(vec![Substr::from(arcstr::format!(
"\"{}\"",
ret.trim()
))]))
Expand Down
Loading

0 comments on commit 6d41e7f

Please sign in to comment.