diff --git a/agent/src/flow_generator/protocol_logs/sql/redis-commands b/agent/src/flow_generator/protocol_logs/sql/redis-commands new file mode 100644 index 00000000000..2e4c497b859 --- /dev/null +++ b/agent/src/flow_generator/protocol_logs/sql/redis-commands @@ -0,0 +1,468 @@ +ACL CAT +ACL DELUSER +ACL DRYRUN +ACL GENPASS +ACL GETUSER +ACL LIST +ACL LOAD +ACL LOG +ACL SAVE +ACL SETUSER +ACL USERS +ACL WHOAMI +APPEND +ASKING +AUTH +BF.ADD +BF.CARD +BF.EXISTS +BF.INFO +BF.INSERT +BF.LOADCHUNK +BF.MADD +BF.MEXISTS +BF.RESERVE +BF.SCANDUMP +BGREWRITEAOF +BGSAVE +BITCOUNT +BITFIELD +BITFIELD_RO +BITOP +BITPOS +BLMOVE +BLMPOP +BLPOP +BRPOP +BRPOPLPUSH +BZMPOP +BZPOPMAX +BZPOPMIN +CF.ADD +CF.ADDNX +CF.COUNT +CF.DEL +CF.EXISTS +CF.INFO +CF.INSERT +CF.INSERTNX +CF.LOADCHUNK +CF.MEXISTS +CF.RESERVE +CF.SCANDUMP +CLIENT CACHING +CLIENT GETNAME +CLIENT GETREDIR +CLIENT ID +CLIENT INFO +CLIENT KILL +CLIENT LIST +CLIENT NO-EVICT +CLIENT NO-TOUCH +CLIENT PAUSE +CLIENT REPLY +CLIENT SETINFO +CLIENT SETNAME +CLIENT TRACKING +CLIENT TRACKINGINFO +CLIENT UNBLOCK +CLIENT UNPAUSE +CLUSTER ADDSLOTS +CLUSTER ADDSLOTSRANGE +CLUSTER BUMPEPOCH +CLUSTER COUNT-FAILURE-REPORTS +CLUSTER COUNTKEYSINSLOT +CLUSTER DELSLOTS +CLUSTER DELSLOTSRANGE +CLUSTER FAILOVER +CLUSTER FLUSHSLOTS +CLUSTER FORGET +CLUSTER GETKEYSINSLOT +CLUSTER INFO +CLUSTER KEYSLOT +CLUSTER LINKS +CLUSTER MEET +CLUSTER MYID +CLUSTER MYSHARDID +CLUSTER NODES +CLUSTER REPLICAS +CLUSTER REPLICATE +CLUSTER RESET +CLUSTER SAVECONFIG +CLUSTER SET-CONFIG-EPOCH +CLUSTER SETSLOT +CLUSTER SHARDS +CLUSTER SLAVES +CLUSTER SLOTS +CMS.INCRBY +CMS.INFO +CMS.INITBYDIM +CMS.INITBYPROB +CMS.MERGE +CMS.QUERY +COMMAND +COMMAND COUNT +COMMAND DOCS +COMMAND GETKEYS +COMMAND GETKEYSANDFLAGS +COMMAND INFO +COMMAND LIST +CONFIG GET +CONFIG RESETSTAT +CONFIG REWRITE +CONFIG SET +COPY +DBSIZE +DECR +DECRBY +DEL +DISCARD +DUMP +ECHO +EVAL +EVAL_RO +EVALSHA +EVALSHA_RO +EXEC +EXISTS +EXPIRE +EXPIREAT +EXPIRETIME +FAILOVER +FCALL +FCALL_RO +FLUSHALL +FLUSHDB +FT._LIST +FT.AGGREGATE +FT.ALIASADD +FT.ALIASDEL +FT.ALIASUPDATE +FT.ALTER +FT.CONFIG GET +FT.CONFIG SET +FT.CREATE +FT.CURSOR DEL +FT.CURSOR READ +FT.DICTADD +FT.DICTDEL +FT.DICTDUMP +FT.DROPINDEX +FT.EXPLAIN +FT.EXPLAINCLI +FT.INFO +FT.PROFILE +FT.SEARCH +FT.SPELLCHECK +FT.SUGADD +FT.SUGDEL +FT.SUGGET +FT.SUGLEN +FT.SYNDUMP +FT.SYNUPDATE +FT.TAGVALS +FUNCTION DELETE +FUNCTION DUMP +FUNCTION FLUSH +FUNCTION KILL +FUNCTION LIST +FUNCTION LOAD +FUNCTION RESTORE +FUNCTION STATS +GEOADD +GEODIST +GEOHASH +GEOPOS +GEORADIUS +GEORADIUS_RO +GEORADIUSBYMEMBER +GEORADIUSBYMEMBER_RO +GEOSEARCH +GEOSEARCHSTORE +GET +GETBIT +GETDEL +GETEX +GETRANGE +GETSET +HDEL +HELLO +HEXISTS +HEXPIRE +HEXPIREAT +HEXPIRETIME +HGET +HGETALL +HINCRBY +HINCRBYFLOAT +HKEYS +HLEN +HMGET +HMSET +HPERSIST +HPEXPIRE +HPEXPIREAT +HPEXPIRETIME +HPTTL +HRANDFIELD +HSCAN +HSET +HSETNX +HSTRLEN +HTTL +HVALS +INCR +INCRBY +INCRBYFLOAT +INFO +JSON.ARRAPPEND +JSON.ARRINDEX +JSON.ARRINSERT +JSON.ARRLEN +JSON.ARRPOP +JSON.ARRTRIM +JSON.CLEAR +JSON.DEBUG +JSON.DEBUG MEMORY +JSON.DEL +JSON.FORGET +JSON.GET +JSON.MERGE +JSON.MGET +JSON.MSET +JSON.NUMINCRBY +JSON.NUMMULTBY +JSON.OBJKEYS +JSON.OBJLEN +JSON.RESP +JSON.SET +JSON.STRAPPEND +JSON.STRLEN +JSON.TOGGLE +JSON.TYPE +KEYS +LASTSAVE +LATENCY DOCTOR +LATENCY GRAPH +LATENCY HISTOGRAM +LATENCY HISTORY +LATENCY LATEST +LATENCY RESET +LCS +LINDEX +LINSERT +LLEN +LMOVE +LMPOP +LOLWUT +LPOP +LPOS +LPUSH +LPUSHX +LRANGE +LREM +LSET +LTRIM +MEMORY DOCTOR +MEMORY MALLOC-STATS +MEMORY PURGE +MEMORY STATS +MEMORY USAGE +MGET +MIGRATE +MODULE LIST +MODULE LOAD +MODULE LOADEX +MODULE UNLOAD +MONITOR +MOVE +MSET +MSETNX +MULTI +OBJECT ENCODING +OBJECT FREQ +OBJECT IDLETIME +OBJECT REFCOUNT +PERSIST +PEXPIRE +PEXPIREAT +PEXPIRETIME +PFADD +PFCOUNT +PFDEBUG +PFMERGE +PFSELFTEST +PING +PSETEX +PSUBSCRIBE +PSYNC +PTTL +PUBLISH +PUBSUB CHANNELS +PUBSUB NUMPAT +PUBSUB NUMSUB +PUBSUB SHARDCHANNELS +PUBSUB SHARDNUMSUB +PUNSUBSCRIBE +QUIT +RANDOMKEY +READONLY +READWRITE +RENAME +RENAMENX +REPLCONF +REPLICAOF +RESET +RESTORE +RESTORE-ASKING +ROLE +RPOP +RPOPLPUSH +RPUSH +RPUSHX +SADD +SAVE +SCAN +SCARD +SCRIPT DEBUG +SCRIPT EXISTS +SCRIPT FLUSH +SCRIPT KILL +SCRIPT LOAD +SDIFF +SDIFFSTORE +SELECT +SET +SETBIT +SETEX +SETNX +SETRANGE +SHUTDOWN +SINTER +SINTERCARD +SINTERSTORE +SISMEMBER +SLAVEOF +SLOWLOG GET +SLOWLOG LEN +SLOWLOG RESET +SMEMBERS +SMISMEMBER +SMOVE +SORT +SORT_RO +SPOP +SPUBLISH +SRANDMEMBER +SREM +SSCAN +SSUBSCRIBE +STRLEN +SUBSCRIBE +SUBSTR +SUNION +SUNIONSTORE +SUNSUBSCRIBE +SWAPDB +SYNC +TDIGEST.ADD +TDIGEST.BYRANK +TDIGEST.BYREVRANK +TDIGEST.CDF +TDIGEST.CREATE +TDIGEST.INFO +TDIGEST.MAX +TDIGEST.MERGE +TDIGEST.MIN +TDIGEST.QUANTILE +TDIGEST.RANK +TDIGEST.RESET +TDIGEST.REVRANK +TDIGEST.TRIMMED_MEAN +TIME +TOPK.ADD +TOPK.COUNT +TOPK.INCRBY +TOPK.INFO +TOPK.LIST +TOPK.QUERY +TOPK.RESERVE +TOUCH +TS.ADD +TS.ALTER +TS.CREATE +TS.CREATERULE +TS.DECRBY +TS.DEL +TS.DELETERULE +TS.GET +TS.INCRBY +TS.INFO +TS.MADD +TS.MGET +TS.MRANGE +TS.MREVRANGE +TS.QUERYINDEX +TS.RANGE +TS.REVRANGE +TTL +TYPE +UNLINK +UNSUBSCRIBE +UNWATCH +WAIT +WAITAOF +WATCH +XACK +XADD +XAUTOCLAIM +XCLAIM +XDEL +XGROUP CREATE +XGROUP CREATECONSUMER +XGROUP DELCONSUMER +XGROUP DESTROY +XGROUP SETID +XINFO CONSUMERS +XINFO GROUPS +XINFO STREAM +XLEN +XPENDING +XRANGE +XREAD +XREADGROUP +XREVRANGE +XSETID +XTRIM +ZADD +ZCARD +ZCOUNT +ZDIFF +ZDIFFSTORE +ZINCRBY +ZINTER +ZINTERCARD +ZINTERSTORE +ZLEXCOUNT +ZMPOP +ZMSCORE +ZPOPMAX +ZPOPMIN +ZRANDMEMBER +ZRANGE +ZRANGEBYLEX +ZRANGEBYSCORE +ZRANGESTORE +ZRANK +ZREM +ZREMRANGEBYLEX +ZREMRANGEBYRANK +ZREMRANGEBYSCORE +ZREVRANGE +ZREVRANGEBYLEX +ZREVRANGEBYSCORE +ZREVRANK +ZSCAN +ZSCORE +ZUNION +ZUNIONSTORE diff --git a/agent/src/flow_generator/protocol_logs/sql/redis.rs b/agent/src/flow_generator/protocol_logs/sql/redis.rs index 60fd70bdcd1..eeaf628ac72 100644 --- a/agent/src/flow_generator/protocol_logs/sql/redis.rs +++ b/agent/src/flow_generator/protocol_logs/sql/redis.rs @@ -14,7 +14,7 @@ * limitations under the License. */ -use std::{fmt, str}; +use std::{cell::OnceCell, collections::HashMap, fmt, str}; use serde::{Serialize, Serializer}; @@ -626,6 +626,66 @@ mod stringifier { } } +struct Command { + cmd: String, + sub: Vec, +} + +thread_local! { + static ALL_COMMANDS: OnceCell> = OnceCell::new(); + static MAX_COMMAND_LENGTH: OnceCell = OnceCell::new(); +} + +fn max_command_length() -> usize { + MAX_COMMAND_LENGTH.with(|cell| { + let len = cell.get_or_init(|| { + ALL_COMMANDS.with(|cell| { + let cmds = cell.get_or_init(all_commands); + cmds.iter().map(|cmd| cmd.cmd.len()).max().unwrap() + }) + }); + *len + }) +} + +fn all_commands() -> Vec { + let mut command_map: HashMap<&str, Vec> = HashMap::new(); + for line in ALL_COMMNADS_STR.lines() { + if line.is_empty() { + continue; + } + let mut keys = line.split_whitespace(); + let key0 = keys.next().unwrap(); + let key1 = keys.next().unwrap_or_default(); + command_map.entry(key0).or_default().push(key1.to_owned()); + } + let mut commands = command_map + .into_iter() + .map(|(cmd, mut sub)| { + // remove `sub` for commands without sub commands to save memory + // commands (for example, JSON.DEBUG) that can have zero or one more sub commands + // can also be removed because validate the second word is pointless + if sub.is_empty() || sub.iter().any(|s| s.is_empty()) { + Command { + cmd: cmd.to_owned(), + sub: vec![], + } + } else { + sub.sort_unstable(); + Command { + cmd: cmd.to_owned(), + sub, + } + } + }) + .collect::>(); + commands.sort_unstable_by(|k1, k2| k1.cmd.cmp(&k2.cmd)); + commands +} + +// The list is from https://redis.io/docs/latest/commands/ +const ALL_COMMNADS_STR: &str = include_str!("redis-commands"); + struct CommandLine<'a> { payload: &'a [u8], cmd_upper: String, @@ -633,8 +693,6 @@ struct CommandLine<'a> { } impl<'a> CommandLine<'a> { - const MAX_COMMAND_LENGTH: usize = 17; - fn new(payload: &'a [u8]) -> Result { if payload.len() < "*0\r\n".len() || payload[0] != b'*' { return Err(Error::RedisLogParseFailed); @@ -642,14 +700,22 @@ impl<'a> CommandLine<'a> { let (payload, length) = stringifier::read_length(&payload[1..])?; - let mut cmd_upper = String::new(); // read command let (mut payload_iter, command) = Self::decode_bulk_string(payload)?; - if command.len() <= Self::MAX_COMMAND_LENGTH && command.is_ascii() { - // SAFTY: checked ascii string - unsafe { - cmd_upper = str::from_utf8_unchecked(command).to_ascii_uppercase(); - } + if command.len() > max_command_length() { + return Err(Error::RedisLogParseFailed); + } + let Ok(cmd_upper) = str::from_utf8(command).map(|s| s.to_ascii_uppercase()) else { + return Err(Error::RedisLogParseFailed); + }; + + let valid = if length > 1 { + Self::check_command(&cmd_upper, Some(payload_iter)) + } else { + Self::check_command(&cmd_upper, None) + }; + if !valid { + return Err(Error::RedisLogParseFailed); } // validate rest of the buffer @@ -667,6 +733,30 @@ impl<'a> CommandLine<'a> { }) } + fn check_command(cmd_upper: &str, next_cmds: Option<&[u8]>) -> bool { + ALL_COMMANDS.with(|cell| { + let cmds = cell.get_or_init(all_commands); + match cmds.binary_search_by_key(&cmd_upper, |cmd| &cmd.cmd) { + Ok(id) if cmds[id].sub.is_empty() => true, + Ok(id) => { + if let Some(next) = next_cmds { + let Ok((_, next_cmd)) = Self::decode_bulk_string(next) else { + return false; + }; + let Ok(next_cmd) = str::from_utf8(next_cmd) else { + return false; + }; + let next_cmd_upper = next_cmd.to_ascii_uppercase(); + cmds[id].sub.binary_search(&next_cmd_upper).is_ok() + } else { + false + } + } + Err(_) => false, + } + }) + } + fn decode_bulk_string(payload: &[u8]) -> Result<(&[u8], &[u8])> { if payload.len() < "$0\r\n".len() || payload[0] != b'$' { return Err(Error::RedisLogParseFailed); @@ -1102,6 +1192,35 @@ mod tests { } } + #[test] + fn check_command() { + // single word command + assert!(CommandLine::check_command("SET", None)); + assert!(CommandLine::check_command("SET", Some(b"$3\r\nkey\r\n"))); + + // multi word command + assert!(CommandLine::check_command("ACL", Some(b"$4\r\nLIST\r\n"))); + assert!(CommandLine::check_command( + "ACL", + Some(b"$7\r\nGENPASS\r\n") + )); + assert!(!CommandLine::check_command( + "ACL", + Some(b"$7\r\nINVALID\r\n") + )); + + // single or multi + assert!(CommandLine::check_command("JSON.DEBUG", None)); + assert!(CommandLine::check_command( + "JSON.DEBUG", + Some(b"$6\r\nMEMORY\r\n") + )); + assert!(CommandLine::check_command( + "JSON.DEBUG", + Some(b"$14\r\nSOMETHING_ELSE\r\n") + )); + } + #[test] fn check_perf() { let expected = vec![ @@ -1225,7 +1344,7 @@ mod tests { ("BITFIELD key GET type offset INCRBY type", "BITFIELD key GET type offset INCRBY type"), ("BITFIELD key SET type offset", "BITFIELD key SET type offset"), ("CONFIG SET parameter value", "CONFIG SET parameter ?"), - ("CONFIG foo bar baz", "CONFIG foo bar baz"), + ("CONFIG GET foo bar baz", "CONFIG GET foo bar baz"), ("GEOADD key longitude latitude member longitude latitude member longitude latitude member", "GEOADD key longitude latitude ? longitude latitude ? longitude latitude ?"), ("GEOADD key longitude latitude member longitude latitude member", "GEOADD key longitude latitude ? longitude latitude ?"), ("GEOADD key longitude latitude member", "GEOADD key longitude latitude ?"), @@ -1275,14 +1394,14 @@ mod tests { ("ZADD key XX INCR score member score member", "ZADD key XX INCR score ? score ?"), ("ZADD key XX INCR score member", "ZADD key XX INCR score ?"), ("ZADD key XX INCR score", "ZADD key XX INCR score"), - ("CONFIG command SET k v", "CONFIG command SET k ?"), ("SET *😊®© ❤️", "SET *😊®© ?"), - ("SET😊 ❤️*😊®© ❤️", "SET😊 ❤️*😊®© ❤️"), ("ZADD key 😊 member score 😊", "ZADD key 😊 ? score ?"), ]; for (input, expected) in testcases.iter() { let redis_str = encode_redis_command(input); - let cmdline = CommandLine::new(&redis_str).unwrap(); + let Ok(cmdline) = CommandLine::new(&redis_str) else { + panic!("parse cmdline failed at: {input}"); + }; let output = cmdline.stringify(true); assert_eq!( str::from_utf8(output.as_slice()).unwrap(),