diff --git a/Cargo.toml b/Cargo.toml index 5e301e6..271e160 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "min_shark" -version = "0.5.0" +version = "0.6.0" edition = "2021" authors = ["Dov Reshef "] license = "MIT OR Apache-2.0" @@ -17,12 +17,12 @@ exclude = [ # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -bstr = "1.9" -regex = "1.10" -derive_more = "0.99" -ipnet = "2.9" -memchr = "2.7" +bstr = "1.11" +regex = "1.11.1" +derive_more = { version = "1.0.0", features = ["full"] } +ipnet = "2.10.1" +memchr = "2.7.4" [dev-dependencies] -tracing = "0.1" -tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tracing = "0.1.41" +tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } diff --git a/docs/syntax.md b/docs/syntax.md index 20ab762..c1f84a7 100644 --- a/docs/syntax.md +++ b/docs/syntax.md @@ -4,7 +4,8 @@ * tcp: bool * udp: bool -* vlan: bool +* vlan: bool +* arp: bool * eth.addr: byte-string | regex * eth.dst: byte-string | regex * eth.src: byte-string | regex @@ -27,7 +28,8 @@ use the fields name with or without logical operations. Example: * 'tcp' * 'not udp' - +* '!arp' + ### byte-string hexadecimal numbers separated by ':'. diff --git a/rustfmt.toml b/rustfmt.toml index d50fdf0..670b25c 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,4 +1,3 @@ -edition = "2021" -version = "Two" +style_edition = "2024" imports_layout = "Vertical" imports_granularity = "Crate" diff --git a/src/driver.rs b/src/driver.rs index c9c1ded..5d3c6c0 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -4,6 +4,7 @@ //! * Parse the list of tokens into an Expression object, or return the parsing error. //! * For errors, try to figure out the root and to present it in a human friendly manner. use crate::{ + Expression, input::Input, lexer::{ Lexer, @@ -11,7 +12,6 @@ use crate::{ TokenKind, }, parser::Parser, - Expression, }; /// What we expected to find where we found the error @@ -152,6 +152,7 @@ pub fn parse(filter_expr: &str) -> Result { mod tests { use super::parse; use crate::{ + Expression, driver::ErrorKind, expression::{ Clause, @@ -164,7 +165,6 @@ mod tests { }, mac_addr::MacAddr, test_utils::init_test_logging, - Expression, }; use regex::bytes::Regex; use tracing::info; diff --git a/src/expression.rs b/src/expression.rs index dcd642b..c1bde5e 100644 --- a/src/expression.rs +++ b/src/expression.rs @@ -32,32 +32,32 @@ impl Eq for RegexMatcher {} /// List of the supported comparison operations #[derive(Debug, Clone, PartialEq, Eq, derive_more::Display)] pub enum CmpOp { - #[display(fmt = "==")] + #[display("==")] Equal, - #[display(fmt = "!=")] + #[display("!=")] NotEqual, - #[display(fmt = "<")] + #[display("<")] LessThan, - #[display(fmt = "<=")] + #[display("<=")] LessEqual, - #[display(fmt = ">")] + #[display(">")] GreaterThan, - #[display(fmt = ">=")] + #[display(">=")] GreaterEqual, } /// List of supported ethernet operations #[derive(Debug, Clone, PartialEq, Eq, derive_more::Display)] pub enum EthOp { - #[display(fmt = "{op} {val}")] + #[display("{op} {val}")] Compare { op: CmpOp, val: MacAddr }, - #[display(fmt = "in {_0:?}")] + #[display("in {_0:?}")] MatchAny(Vec), - #[display(fmt = "not in {_0:?}")] + #[display("not in {_0:?}")] MatchNone(Vec), - #[display(fmt = "contains {_0:?}")] + #[display("contains {_0:?}")] Contains(Vec), - #[display(fmt = "matches {_0}")] + #[display("matches {_0}")] RegexMatch(RegexMatcher), } @@ -108,11 +108,11 @@ impl EthOp { /// List of supported IP operations #[derive(Debug, Clone, PartialEq, Eq, derive_more::Display)] pub enum IpOp { - #[display(fmt = "{op} {val}")] + #[display("{op} {val}")] Compare { op: CmpOp, val: IpNet }, - #[display(fmt = "in {_0:?}")] + #[display("in {_0:?}")] MatchAny(Vec), - #[display(fmt = "not in {_0:?}")] + #[display("not in {_0:?}")] MatchNone(Vec), } @@ -151,11 +151,11 @@ impl IpOp { /// List of supported Port operations #[derive(Debug, Clone, PartialEq, Eq, derive_more::Display)] pub enum ValOp { - #[display(fmt = "{op} {val}")] + #[display("{op} {val}")] Compare { op: CmpOp, val: u32 }, - #[display(fmt = "in {_0:?}")] + #[display("in {_0:?}")] MatchAny(Vec), - #[display(fmt = "not in {_0:?}")] + #[display("not in {_0:?}")] MatchNone(Vec), } @@ -194,9 +194,9 @@ impl ValOp { /// List of supported payload operations #[derive(Debug, Clone, PartialEq, Eq, derive_more::Display)] pub enum PayloadOp { - #[display(fmt = "contains {_0:?}")] + #[display("contains {_0:?}")] Contains(Vec), - #[display(fmt = "matches {_0}")] + #[display("matches {_0}")] RegexMatch(RegexMatcher), } @@ -222,7 +222,7 @@ impl PayloadOp { /// List of supported Port operations #[derive(Debug, Clone, PartialEq, Eq, derive_more::Display)] pub enum PayloadLenOp { - #[display(fmt = "{op} {val}")] + #[display("{op} {val}")] Compare { op: CmpOp, val: u32 }, } @@ -252,49 +252,52 @@ impl PayloadLenOp { #[derive(Debug, Clone, PartialEq, Eq, derive_more::Display)] pub enum Clause { /// Is Tcp - #[display(fmt = "tcp")] + #[display("tcp")] IsTcp, /// Is udp - #[display(fmt = "udp")] + #[display("udp")] IsUdp, /// Is vlan - #[display(fmt = "vlan")] + #[display("vlan")] IsVlan, + /// Match ARP + #[display("arp")] + IsArp, /// Match any of the vlans - #[display(fmt = "vlan.id {_0}")] + #[display("vlan.id {_0}")] VlanId(ValOp), /// Match any of destination ports - #[display(fmt = "dstport {_0}")] + #[display("dstport {_0}")] PortDst(ValOp), /// Match any of source ports - #[display(fmt = "srcport {_0}")] + #[display("srcport {_0}")] PortSrc(ValOp), /// Match any of either the source or destination ports - #[display(fmt = "port {_0}")] + #[display("port {_0}")] Port(ValOp), /// Ethernet destination match - #[display(fmt = "eth.dst {_0}")] + #[display("eth.dst {_0}")] EthDst(EthOp), /// Ethernet source match - #[display(fmt = "eth.src {_0}")] + #[display("eth.src {_0}")] EthSrc(EthOp), /// Ethernet either source or destination match - #[display(fmt = "eth {_0}")] + #[display("eth {_0}")] EthAddr(EthOp), /// Destination IP match - #[display(fmt = "ip.dst {_0}")] + #[display("ip.dst {_0}")] IpDst(IpOp), /// Source IP match - #[display(fmt = "ip.src {_0}")] + #[display("ip.src {_0}")] IpSrc(IpOp), /// Either source or destination IP - #[display(fmt = "ip {_0}")] + #[display("ip {_0}")] IpAddr(IpOp), /// Match payload - #[display(fmt = "payload {_0}")] + #[display("payload {_0}")] Payload(PayloadOp), /// Match payload length - #[display(fmt = "payload.len {_0}")] + #[display("payload.len {_0}")] PayloadLen(PayloadLenOp), } @@ -305,6 +308,7 @@ impl Clause { Clause::IsTcp => matcher.is_tcp.unwrap_or_default(), Clause::IsUdp => matcher.is_udp.unwrap_or_default(), Clause::IsVlan => matcher.is_vlan.unwrap_or_default(), + Clause::IsArp => matcher.is_arp.unwrap_or_default(), Clause::VlanId(vlan_op) => matcher .vlan .map(|v| vlan_op.is_match(v)) @@ -477,6 +481,7 @@ impl Expression { is_tcp: None, is_udp: None, is_vlan: None, + is_arp: None, src_eth: None, dst_eth: None, src_ip: None, @@ -507,6 +512,7 @@ pub struct Matcher<'e, 'p> { is_tcp: Option, is_udp: Option, is_vlan: Option, + is_arp: Option, src_eth: Option, dst_eth: Option, src_ip: Option, @@ -517,7 +523,7 @@ pub struct Matcher<'e, 'p> { payload: Option<&'p [u8]>, } -impl<'e, 'p> Matcher<'e, 'p> { +impl<'p> Matcher<'_, 'p> { /// Whether the packet has tcp data pub fn tcp(mut self, val: bool) -> Self { self.is_tcp = Some(val); @@ -537,6 +543,12 @@ impl<'e, 'p> Matcher<'e, 'p> { self } + /// Whether the packet has arp data + pub fn arp(mut self, val: bool) -> Self { + self.is_arp = Some(val); + self + } + /// The source ethernet address pub fn src_eth(mut self, val: MacAddr) -> Self { self.src_eth = Some(val); @@ -805,6 +817,32 @@ mod tests { assert!(res); } + #[test] + fn test_single_clause_arp_expression() { + init_test_logging(); + + for (exp, is_arp, expected) in [ + (Expression::from(Clause::IsArp), true, true), + (Expression::from(Clause::IsArp), false, false), + (Expression::not(Clause::IsArp), true, false), + (Expression::not(Clause::IsArp), false, true), + ] { + info!("Evaluating expression \"{exp}\""); + let res = exp.matcher().arp(is_arp).is_match(); + assert_eq!(res, expected); + } + } + + #[test] + fn test_complex_not_arp_expression() { + init_test_logging(); + let expression = + Expression::not(Clause::IsArp).and(Clause::Port(ValOp::match_any(vec![80, 443]))); + info!("Evaluating expression \"{expression}\""); + let res = expression.matcher().arp(false).srcport(80).is_match(); + assert!(res); + } + #[test] fn test_single_clause_vlan_expressions() { init_test_logging(); diff --git a/src/lexer.rs b/src/lexer.rs index 2e97da0..912cace 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -7,112 +7,115 @@ use derive_more::Constructor; #[derive(Debug, Clone, Copy, PartialEq, Eq, derive_more::Display)] pub(crate) enum TokenKind { /// Literal `tcp` - #[display(fmt = "tcp")] + #[display("tcp")] LitTcp, /// Literal `udp` - #[display(fmt = "udp")] + #[display("udp")] LitUdp, /// Literal `vlan` - #[display(fmt = "vlan")] + #[display("vlan")] LitVlan, /// Literal `eth.addr` - #[display(fmt = "eth.addr")] + #[display("eth.addr")] LitEthAddr, /// Literal `eth.dst` - #[display(fmt = "eth.dst")] + #[display("eth.dst")] LitEthDst, /// Literal `eth.src` - #[display(fmt = "eth.src")] + #[display("eth.src")] LitEthSrc, /// Literal `ip.addr` - #[display(fmt = "ip.addr")] + #[display("ip.addr")] LitIpAddr, /// Literal `ip.dst` - #[display(fmt = "ip.dst")] + #[display("ip.dst")] LitIpDst, /// Literal `ip.src` - #[display(fmt = "ip.src")] + #[display("ip.src")] LitIpSrc, /// Literal `vlan.id` - #[display(fmt = "vlan.id")] + #[display("vlan.id")] LitVlanId, /// Literal `port` - #[display(fmt = "port")] + #[display("port")] LitPort, /// Literal `dstport` - #[display(fmt = "dstport")] + #[display("dstport")] LitPortDst, /// Literal `srcport` - #[display(fmt = "srcport")] + #[display("srcport")] LitPortSrc, /// Literal `payload` - #[display(fmt = "payload")] + #[display("payload")] LitPayload, /// Literal `payload.len` - #[display(fmt = "payload.len")] + #[display("payload.len")] LitPayloadLen, + /// Literal 'arp' + #[display("arp")] + LitArp, /// `,` - #[display(fmt = ",")] + #[display(",")] Comma, /// `not` or `!` - #[display(fmt = "not")] + #[display("not")] Not, /// `or` or `||` - #[display(fmt = "or")] + #[display("or")] Or, /// `and` or `&&` - #[display(fmt = "and")] + #[display("and")] And, /// `in` - #[display(fmt = "in")] + #[display("in")] In, /// `contains` - #[display(fmt = "contains")] + #[display("contains")] Contains, /// `~` or `matches` for regex matching - #[display(fmt = "matches")] + #[display("matches")] RegexMatch, /// `==` or `eq` - #[display(fmt = "==")] + #[display("==")] Equal, /// `!=` or `ne` - #[display(fmt = "!=")] + #[display("!=")] NotEqual, /// `<` or `lt` - #[display(fmt = "<")] + #[display("<")] LessThan, /// `<=` or `le` - #[display(fmt = "<=")] + #[display("<=")] LessEqual, /// `>` or `gt` - #[display(fmt = ">")] + #[display(">")] GreaterThan, /// `>=` or `ge` - #[display(fmt = ">=")] + #[display(">=")] GreaterEqual, /// `(` - #[display(fmt = "(")] + #[display("(")] OpenParen, /// `)` - #[display(fmt = ")")] + #[display(")")] CloseParen, /// `{` - #[display(fmt = "{{")] + #[display("{{")] OpenBrace, /// `}` - #[display(fmt = "}}")] + #[display("}}")] CloseBrace, /// A value - #[display(fmt = "a value")] + #[display("a value")] Value, /// A quoted value, either with "" or '' - #[display(fmt = "a quoted value")] + #[display("a quoted value")] QuotedValue, /// For good errors, it's better to lex everything and handle failure at parsing. - #[display(fmt = "an error")] + #[display("an error")] Error, /// End of input - #[display(fmt = "end of input")] + #[display("end of input")] EoF, } @@ -208,6 +211,7 @@ impl<'a> Lexer<'a> { "srcport" => TokenKind::LitPortSrc, "payload" => TokenKind::LitPayload, "payload.len" => TokenKind::LitPayloadLen, + "arp" => TokenKind::LitArp, "le" => TokenKind::LessEqual, "lt" => TokenKind::LessThan, "ge" => TokenKind::GreaterEqual, @@ -400,8 +404,10 @@ mod tests { use TokenKind::*; init_test_logging(); - let inputs = ["tcp", "udp", "vlan"]; - let expected = &[[(LitTcp, "tcp")], [(LitUdp, "udp")], [(LitVlan, "vlan")]]; + let inputs = ["tcp", "udp", "vlan", "arp"]; + let expected = &[[(LitTcp, "tcp")], [(LitUdp, "udp")], [(LitVlan, "vlan")], [ + (LitArp, "arp"), + ]]; for (input, expected) in inputs.into_iter().zip(expected) { compare_input_tokens(input, expected); } diff --git a/src/lib.rs b/src/lib.rs index e9f2e2b..368d841 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,8 +11,8 @@ mod test_utils; mod value_parsers; pub use driver::{ - parse, ParseError, + parse, }; pub use expression::{ Expression, diff --git a/src/parser.rs b/src/parser.rs index 8d5321f..a89dd10 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,4 +1,5 @@ use crate::{ + Expression, driver::{ ErrorKind, Expected, @@ -25,7 +26,6 @@ use crate::{ parse_regex, parse_u32, }, - Expression, }; use bstr::BStr; use std::vec::IntoIter; @@ -362,6 +362,10 @@ impl<'a> Parser<'a> { self.advance(); Clause::IsVlan } + TokenKind::LitArp => { + self.advance(); + Clause::IsArp + } TokenKind::LitEthAddr => { self.advance(); self.parse_ethernet_operations().map(Clause::EthAddr)? @@ -482,6 +486,7 @@ mod tests { Parser, }; use crate::{ + Expression, expression::{ Clause, CmpOp, @@ -499,7 +504,6 @@ mod tests { }, mac_addr::MacAddr, test_utils::init_test_logging, - Expression, }; use ipnet::IpNet; use regex::bytes::Regex; @@ -819,8 +823,8 @@ mod tests { fn test_parse_single_term() { init_test_logging(); - let inputs = ["tcp", "udp", "vlan"]; - let expected = [Clause::IsTcp, Clause::IsUdp, Clause::IsVlan]; + let inputs = ["tcp", "udp", "vlan", "arp"]; + let expected = [Clause::IsTcp, Clause::IsUdp, Clause::IsVlan, Clause::IsArp]; // Validate we have an expected result for every input assert_eq!(inputs.len(), expected.len());