Skip to content

Commit

Permalink
feat: Handle regex pattern for Value type (#118)
Browse files Browse the repository at this point in the history
* wip: add regex value type

* feat: handle value type for regex patterns

* fix: fix test and handle regex in match fn

* chore: optimize the var usage and refactor fn

* fix: use regress instead of regex

* fix: flag handling with regress library

---------

Co-authored-by: Tommy Rogers <[email protected]>
  • Loading branch information
deepakraog and tommy authored Nov 5, 2024
1 parent 6e3e83e commit 8b75dea
Show file tree
Hide file tree
Showing 11 changed files with 224 additions and 102 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ dtoa = "1.0.9"
base64 = "0.22.1"
serde_json = "1.0.117"
rand = "0.8.5"
regex = "1.11.1"
regress = "0.10.1"
num-format = "0.4.4"
uuid = { version = "1.8.0", features = ["fast-rng", "v4", "v7"] }

[dev-dependencies]
test-case = "3.3.1"
test-generator = "0.3.1"
regex = "1.5.4"
regress = "0.10.1"

[build-dependencies]
glob = "0.3"
2 changes: 1 addition & 1 deletion src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ impl fmt::Display for Error {
S0302UnterminatedRegex(ref p) =>
write!(f, "{}: No terminating / in regular expression", p),
S0303InvalidRegex(ref p, ref message) =>
// The error message from `regex::Regex` a "regex parse error: " prefix, so don't be redundant here.
// The error message from `regress::Regex` a "regex parse error: " prefix, so don't be redundant here.
write!(f, "{}: {}", p, message),

// Runtime errors
Expand Down
4 changes: 4 additions & 0 deletions src/evaluator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ impl<'a> Evaluator<'a> {
ref update,
ref delete,
} => Value::transformer(self.arena, pattern, update, delete),
AstKind::Regex(ref regex_literal) => {
// Wrap the regex literal in a `Value::Regex` and return it
self.arena.alloc(Value::Regex(regex_literal.clone()))
}
_ => unimplemented!("TODO: node kind not yet supported: {:#?}", node.kind),
};

Expand Down
81 changes: 68 additions & 13 deletions src/evaluator/functions.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
use base64::Engine;
use chrono::{TimeZone, Utc};
use hashbrown::{DefaultHashBuilder, HashMap};
use rand::Rng;
use regex::Regex;
use std::borrow::{Borrow, Cow};
use std::collections::HashSet;
use std::time::{SystemTime, UNIX_EPOCH};
use uuid::Uuid;

use crate::datetime::{format_custom_date, parse_custom_format, parse_timezone_offset};
use crate::evaluator::RegexLiteral;
use crate::parser::expressions::check_balanced_brackets;

use bumpalo::collections::CollectIn;
use bumpalo::collections::String as BumpString;
use bumpalo::collections::Vec as BumpVec;
use bumpalo::Bump;

Expand Down Expand Up @@ -185,6 +187,7 @@ pub fn fn_boolean<'a>(
Value::bool(false)
}
},
Value::Regex(_) => Value::bool(true),
Value::Lambda { .. } | Value::NativeFn { .. } | Value::Transformer { .. } => {
Value::bool(false)
}
Expand Down Expand Up @@ -1741,7 +1744,7 @@ pub fn fn_pad<'a>(
Ok(Value::string(context.arena, &result))
}

pub fn fn_match_regex<'a>(
pub fn fn_match<'a>(
context: FunctionContext<'a, '_>,
args: &[&'a Value<'a>],
) -> Result<&'a Value<'a>> {
Expand All @@ -1752,20 +1755,72 @@ pub fn fn_match_regex<'a>(
assert_arg!(value_to_validate.is_string(), context, 1);

let pattern_value = match args.get(1).copied() {
Some(val) if val.is_string() => val,
Some(val) => val,
_ => return Err(Error::D3010EmptyPattern(context.char_index)),
};

let regex_pattern = Regex::new(&pattern_value.as_str())
.map_err(|_| Error::D3010EmptyPattern(context.char_index))?;
let regex_literal = match pattern_value {
Value::Regex(ref regex_literal) => regex_literal,
Value::String(ref s) => {
let regex = RegexLiteral::new(s.as_str(), false, false)
.map_err(|_| Error::D3010EmptyPattern(context.char_index))?;
&*context.arena.alloc(regex)
}
_ => return Err(Error::D3010EmptyPattern(context.char_index)),
};

if regex_pattern.is_match(&value_to_validate.as_str()) {
Ok(value_to_validate) // Return input if it matches
} else {
Err(Error::D3137Error(format!(
"Invalid format: '{}' does not match the expected pattern '{}'",
value_to_validate.as_str(),
pattern_value.as_str()
)))
let limit = args
.get(2)
.and_then(|val| {
if val.is_number() {
Some(val.as_f64() as usize)
} else {
None
}
})
.unwrap_or(usize::MAX);

let key_match = BumpString::from_str_in("match", context.arena);
let key_index = BumpString::from_str_in("index", context.arena);
let key_groups = BumpString::from_str_in("groups", context.arena);

let mut matches: bumpalo::collections::Vec<&Value<'a>> =
bumpalo::collections::Vec::new_in(context.arena);

for (i, m) in regex_literal
.get_regex()
.find_iter(&value_to_validate.as_str())
.enumerate()
{
if i >= limit {
break;
}

let matched_text = &value_to_validate.as_str()[m.start()..m.end()];
let match_str = context
.arena
.alloc(Value::string(context.arena, matched_text));

let index_val = context
.arena
.alloc(Value::number(context.arena, m.start() as f64));

let group_vec: bumpalo::collections::Vec<&Value<'a>> =
bumpalo::collections::Vec::new_in(context.arena);
let groups_val = context
.arena
.alloc(Value::Array(group_vec, ArrayFlags::empty()));

let mut match_obj: HashMap<BumpString, &Value<'a>, DefaultHashBuilder, &Bump> =
HashMap::with_capacity_and_hasher_in(3, DefaultHashBuilder::default(), context.arena);
match_obj.insert(key_match.clone(), match_str);
match_obj.insert(key_index.clone(), index_val);
match_obj.insert(key_groups.clone(), groups_val);

matches.push(context.arena.alloc(Value::Object(match_obj)));
}

Ok(context
.arena
.alloc(Value::Array(matches, ArrayFlags::empty())))
}
5 changes: 4 additions & 1 deletion src/evaluator/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use hashbrown::HashMap;

use super::frame::Frame;
use super::functions::FunctionContext;
use crate::parser::ast::{Ast, AstKind};
use crate::parser::ast::{Ast, AstKind, RegexLiteral};
use crate::{Error, Result};

pub mod impls;
Expand Down Expand Up @@ -51,6 +51,7 @@ pub enum Value<'a> {
Number(f64),
Bool(bool),
String(BumpString<'a>),
Regex(RegexLiteral),
Array(BumpVec<'a, &'a Value<'a>>, ArrayFlags),
Object(HashMap<BumpString<'a>, &'a Value<'a>, DefaultHashBuilder, &'a Bump>),
Range(Range<'a>),
Expand Down Expand Up @@ -309,6 +310,7 @@ impl<'a> Value<'a> {
}
},
Value::Object(ref o) => !o.is_empty(),
Value::Regex(_) => true, // Treat Regex as truthy if it exists
Value::Lambda { .. } | Value::NativeFn { .. } | Value::Transformer { .. } => false,
Value::Range(ref r) => !r.is_empty(),
}
Expand Down Expand Up @@ -516,6 +518,7 @@ impl<'a> Value<'a> {
delete,
} => Value::transformer(arena, pattern, update, delete),
Self::Range(range) => Value::range_from(arena, range),
Self::Regex(regex) => arena.alloc(Value::Regex(regex.clone())),
}
}

Expand Down
8 changes: 7 additions & 1 deletion src/evaluator/value/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ impl<'a> PartialEq<Value<'a>> for Value<'a> {
(Value::Array(l, ..), Value::Array(r, ..)) => *l == *r,
(Value::Object(l), Value::Object(r)) => *l == *r,
(Value::Range(l), Value::Range(r)) => *l == *r,
(Value::Regex(l), Value::Regex(r)) => l == r,
_ => false,
}
}
Expand Down Expand Up @@ -91,6 +92,7 @@ impl std::fmt::Debug for Value<'_> {
Self::String(s) => s.fmt(f),
Self::Array(a, _) => a.fmt(f),
Self::Object(o) => o.fmt(f),
Self::Regex(r) => write!(f, "<regex({:?})>", r),
Self::Lambda { .. } => write!(f, "<lambda>"),
Self::NativeFn { .. } => write!(f, "<nativefn>"),
Self::Transformer { .. } => write!(f, "<transformer>"),
Expand All @@ -101,7 +103,10 @@ impl std::fmt::Debug for Value<'_> {

impl std::fmt::Display for Value<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:#?}", self)
match self {
Self::Regex(r) => write!(f, "<regex({:?})>", r),
_ => write!(f, "{:#?}", self),
}
}
}

Expand All @@ -123,6 +128,7 @@ impl Hash for Value<'_> {
map.get(key).hash(state);
}
}
Value::Regex(r) => r.hash(state),
Value::Range(r) => r.hash(state),
Value::Lambda { .. } => generate_random_hash(state),
Value::NativeFn { name, .. } => name.hash(state),
Expand Down
4 changes: 4 additions & 0 deletions src/evaluator/value/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,10 @@ impl<T: Formatter> Serializer<T> {
Value::Bool(false) => self.write(b"false"),
Value::Array(..) | Value::Range(..) => self.write_array(value)?,
Value::Object(..) => self.write_object(value)?,
Value::Regex(ref regex) => {
let pattern = format!("\"{}\"", regex.as_pattern());
self.write(pattern.as_bytes());
}
Value::Lambda { .. } | Value::NativeFn { .. } | Value::Transformer { .. } => {
self.write(b"\"\"")
}
Expand Down
Loading

0 comments on commit 8b75dea

Please sign in to comment.