Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(transaction): data type prediction for lazy predictions #1194

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
Implement rule metadata filter
  • Loading branch information
piyushroshan committed Nov 7, 2024
commit 225d1daf3943b144edd94317ac93e7387747301e
139 changes: 121 additions & 18 deletions experimental/types/value_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// SPDX-License-Identifier: Apache-2.0
package types

import "unicode"
import (
"unicode"
)

// DataMetadata is the type of metadata that a value can have.
type DataMetadata int
Expand All @@ -29,6 +31,10 @@ const (
// NewValueMetadata returns a new ValueMetadata from a string.
func NewValueMetadata(metadata string) (DataMetadata, bool) {
switch metadata {
case "numeric":
return ValueMetadataNumeric, true
case "boolean":
return ValueMetadataBoolean, true
case "alphanumeric":
return ValueMetadataAlphanumeric, true
case "ascii":
Expand All @@ -39,10 +45,6 @@ func NewValueMetadata(metadata string) (DataMetadata, bool) {
return ValueMetadataURI, true
case "domain":
return ValueMetadataDomain, true
case "numeric":
return ValueMetadataNumeric, true
case "boolean":
return ValueMetadataBoolean, true
case "unicode":
return ValueMetadataUnicode, true
}
Expand All @@ -54,23 +56,34 @@ type DataMetadataList struct {
metadata map[DataMetadata]bool
}

func (v *DataMetadataList) Test(data string, metadataType DataMetadata) bool {
result, ok := v.metadata[metadataType]
if !ok {
// we do the analysis only once
switch metadataType {
case ValueMetadataAlphanumeric:
return v.testAlphanumeric(data)
default:
// this should not happen
return false
func (v *DataMetadataList) Evaluate(data string) {
// we do the analysis only once
if v.metadata == nil {
v.metadata = make(map[DataMetadata]bool)
for metadataType := range v.metadata {
switch metadataType {
case ValueMetadataNumeric:
v.evaluateNumeric(data)
case ValueMetadataBoolean:
v.evaluateBoolean(data)
case ValueMetadataAlphanumeric:
v.evaluateAlphanumeric(data)
case ValueMetadataAscii:
v.evaluateAscii(data)
case ValueMetadataBase64:
v.evaluateBase64(data)
// case ValueMetadataURI:
// result = result || v.evaluateURI(data)
// case ValueMetadataDomain:
// result = result || v.evaluateDomain(data)
// case ValueMetadataUnicode:
// result = result || v.evaluateUnicode(data)
}
}
}
return result

}

func (v *DataMetadataList) testAlphanumeric(data string) bool {
func (v *DataMetadataList) evaluateAlphanumeric(data string) bool {
res := true
for _, c := range data {
if !unicode.IsLetter(c) && !unicode.IsNumber(c) {
Expand All @@ -81,3 +94,93 @@ func (v *DataMetadataList) testAlphanumeric(data string) bool {
v.metadata[ValueMetadataAlphanumeric] = res
return res
}

func (v *DataMetadataList) evaluateAscii(data string) bool {
res := true
for i := 0; i < len(data); i++ {
if data[i] > unicode.MaxASCII {
res = false
break
}
}
v.metadata[ValueMetadataAscii] = res
return res
}

func isBase64(c byte) bool {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '+' || c == '/'
}

func (v *DataMetadataList) evaluateBase64(data string) bool {
res := true
for i := 0; i < len(data); i++ {
if !isBase64(data[i]) {
res = false
break
}
}
v.metadata[ValueMetadataBase64] = res
return res
}

func (v *DataMetadataList) evaluateNumeric(data string) bool {
res := true
for _, c := range data {
if !unicode.IsNumber(c) {
res = false
break
}
}
v.metadata[ValueMetadataNumeric] = res
return res
}

func (v *DataMetadataList) evaluateBoolean(data string) bool {
res := true
if data == "true" || data == "false" {
res = true
}
v.metadata[ValueMetadataBoolean] = res
return res
}

func (v *DataMetadataList) TestNumeric() bool {
return v.metadata[ValueMetadataNumeric]
}

func (v *DataMetadataList) TestBoolean() bool {
return v.metadata[ValueMetadataBoolean]
}

func (v *DataMetadataList) TestAlphanumeric() bool {
return v.metadata[ValueMetadataAlphanumeric]
}

func (v *DataMetadataList) TestAscii() bool {
return v.metadata[ValueMetadataAscii]
}

func (v *DataMetadataList) TestBase64() bool {
return v.metadata[ValueMetadataBase64]
}

func (v *DataMetadataList) TestURI() bool {
return v.metadata[ValueMetadataURI]
}

func (v *DataMetadataList) TestDomain() bool {
return v.metadata[ValueMetadataDomain]
}

func (v *DataMetadataList) TestUnicode() bool {
return v.metadata[ValueMetadataUnicode]
}

func (v *DataMetadataList) Test(metadataTypes []DataMetadata) bool {
for _, metadataType := range metadataTypes {
if !v.metadata[metadataType] {
return false
}
}
return true
}
13 changes: 13 additions & 0 deletions internal/actions/tag.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
package actions

import (
"fmt"
"strings"

"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
"github.com/corazawaf/coraza/v3/internal/corazawaf"
)
Expand All @@ -30,6 +33,16 @@ func (a *tagFn) Init(r plugintypes.RuleMetadata, data string) error {
return ErrMissingArguments
}
r.(*corazawaf.Rule).Tags_ = append(r.(*corazawaf.Rule).Tags_, data)
if strings.HasPrefix(data, "metadatafilter/") {
filters_string := strings.Split(data, "/")
filters := strings.Split(filters_string[1], ",")
for _, filter := range filters {
ok := r.(*corazawaf.Rule).AddAllowedMetadata(filter)
if ok != nil {
return fmt.Errorf("invalid metadata filter: %s", filter)
}
}
}
return nil
}

Expand Down
9 changes: 7 additions & 2 deletions internal/corazarules/rule_match.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"fmt"
"strconv"
"strings"
"unsafe"

experimentalTypes "github.com/corazawaf/coraza/v3/experimental/types"
"github.com/corazawaf/coraza/v3/types"
Expand Down Expand Up @@ -64,6 +63,8 @@ func (m *MatchData) ChainLevel() int {
}

func (m *MatchData) Metadata() experimentalTypes.DataMetadataList {
// Evaluate the metadata if it's not set
m.Metadata_.Evaluate(m.Value_)
return m.Metadata_
}

Expand Down Expand Up @@ -152,7 +153,11 @@ func (mr *MatchedRule) ClientIPAddress() string {
}

func (mr *MatchedRule) MatchedDatas() []types.MatchData {
return *(*[]types.MatchData)(unsafe.Pointer(&mr.MatchedDatas_))
var matchedDatas []types.MatchData
for _, md := range mr.MatchedDatas_ {
matchedDatas = append(matchedDatas, md)
}
return matchedDatas
}

func (mr *MatchedRule) MatchedDatasExperimental_() []experimentalTypes.MatchData {
Expand Down
23 changes: 22 additions & 1 deletion internal/corazawaf/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ type Rule struct {
// chainedRules containing rules with just PhaseUnknown variables, may potentially
// be anticipated. This boolean ensures that it happens
withPhaseUnknownVariable bool

allowedMetadatas []experimentalTypes.DataMetadata
}

func (r *Rule) ParentID() int {
Expand All @@ -157,6 +159,10 @@ func (r *Rule) Status() int {
return r.DisruptiveStatus
}

func (r *Rule) AllowedMetadatas() []experimentalTypes.DataMetadata {
return r.allowedMetadatas
}

const chainLevelZero = 0

// Evaluate will evaluate the current rule for the indicated transaction
Expand Down Expand Up @@ -242,8 +248,14 @@ func (r *Rule) doEvaluate(logger debuglog.Logger, phase types.RulePhase, tx *Tra
vLog = logger.With(debuglog.Str("variable", v.Variable.Name()))
}
vLog.Debug().Msg("Expanding arguments for rule")

allowedMetadatas := r.AllowedMetadatas()
for i, arg := range values {
if len(allowedMetadatas) > 0 {
argDataMetadataList := arg.Metadata()
if argDataMetadataList.Test(allowedMetadatas) {
continue
}
}
args, errs := r.transformArg(arg, i, cache)
if len(errs) > 0 {
vWarnLog := vLog.Warn()
Expand Down Expand Up @@ -609,6 +621,15 @@ func (r *Rule) AddTransformation(name string, t plugintypes.Transformation) erro
return nil
}

func (r *Rule) AddAllowedMetadata(metadataName string) error {
metadata, ok := experimentalTypes.NewValueMetadata(metadataName)
if !ok {
return fmt.Errorf("invalid metadata %q not found", metadataName)
}
r.allowedMetadatas = append(r.allowedMetadatas, metadata)
return nil
}

// ClearTransformations clears all the transformations
// it is mostly used by the "none" transformation
func (r *Rule) ClearTransformations() {
Expand Down
11 changes: 7 additions & 4 deletions internal/corazawaf/rule_multiphase.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,15 +318,18 @@ func isMultiphaseDoubleEvaluation(tx *Transaction, phase types.RulePhase, r *Rul
*collectiveMatchedValues = append(*collectiveMatchedValues, mr)

for _, matchedRule := range tx.matchedRules {
matchedData := matchedRule.MatchedDatas()
matchedDataExp := *(*[]experimentalTypes.MatchData)(unsafe.Pointer(&matchedData))
if matchedRule.Rule().ID() == r.ParentID_ && matchedChainDepth(matchedDataExp) == matchedChainDepth(*collectiveMatchedValues) {
matchedDatas := matchedRule.MatchedDatas()
var matchedDatasExp []experimentalTypes.MatchData
for _, v := range matchedDatas {
matchedDatasExp = append(matchedDatasExp, *(*experimentalTypes.MatchData)(unsafe.Pointer(&v)))
}
if matchedRule.Rule().ID() == r.ParentID_ && matchedChainDepth(matchedDatasExp) == matchedChainDepth(*collectiveMatchedValues) {
// This might be a double match, let's generate the chains that aready matched and the one that just matched
// let's see if all the latter already matched.

// generateChainMatches generates matched chains based on the matchedValues and populates matchedChains and collectiveMatchedChains variables
var matchedChains, collectiveMatchedChains [][]experimentalTypes.MatchData
generateChainMatches(tx, matchedDataExp, 0, nil, &matchedChains)
generateChainMatches(tx, matchedDatasExp, 0, nil, &matchedChains)
generateChainMatches(tx, *collectiveMatchedValues, 0, nil, &collectiveMatchedChains)

// Check if a newly matched chain (part of collectiveMatchedChain) already matched
Expand Down