From 3360f150b5056b350b176a524322a4be6b7b66f4 Mon Sep 17 00:00:00 2001 From: solnicki Date: Wed, 20 Nov 2024 10:15:38 +0100 Subject: [PATCH] implement rexep scan and data generation --- go.mod | 3 ++ go.sum | 4 ++ internal/anonymizer/anonymizer.go | 65 +++++++++++++++++++++----- internal/anonymizer/anonymizer_test.go | 17 ++++++- internal/generator/generator.go | 27 +++++++++++ internal/lookup/lookup.go | 23 +++++++++ internal/proof/proof.go | 32 +++++++------ internal/proof/proof_test.go | 23 ++++----- tests/integration_test.go | 15 ++++-- 9 files changed, 164 insertions(+), 45 deletions(-) create mode 100644 internal/generator/generator.go create mode 100644 internal/lookup/lookup.go diff --git a/go.mod b/go.mod index 662393f..fc7b61b 100644 --- a/go.mod +++ b/go.mod @@ -4,8 +4,11 @@ go 1.22.5 require github.com/stretchr/testify v1.9.0 +require golang.org/x/text v0.16.0 // indirect + require ( github.com/davecgh/go-spew v1.1.1 // indirect + github.com/go-faker/faker/v4 v4.5.0 github.com/pmezard/go-difflib v1.0.0 // indirect golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7 gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index fd1583e..85fb57e 100644 --- a/go.sum +++ b/go.sum @@ -1,11 +1,15 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-faker/faker/v4 v4.5.0 h1:ARzAY2XoOL9tOUK+KSecUQzyXQsUaZHefjyF8x6YFHc= +github.com/go-faker/faker/v4 v4.5.0/go.mod h1:p3oq1GRjG2PZ7yqeFFfQI20Xm61DoBDlCA8RiSyZ48M= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7 h1:wDLEX9a7YQoKdKNQt88rtydkqDxeGaBUTnIYc3iG/mA= golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/internal/anonymizer/anonymizer.go b/internal/anonymizer/anonymizer.go index f095155..aadb22c 100644 --- a/internal/anonymizer/anonymizer.go +++ b/internal/anonymizer/anonymizer.go @@ -3,19 +3,25 @@ package anonymizer import ( "fmt" "log/slog" + "regexp" "strings" "github.com/logmanager-oss/logveil/internal/config" + "github.com/logmanager-oss/logveil/internal/generator" "github.com/logmanager-oss/logveil/internal/loader" + "github.com/logmanager-oss/logveil/internal/lookup" "github.com/logmanager-oss/logveil/internal/proof" "golang.org/x/exp/rand" ) // Anonymizer represents an object responsible for anonymizing indivisual log lines feed to it. It contains anonymization data which will be used to anonymize input and a random number generator funtion used to select values from anonymization data. type Anonymizer struct { - anonData map[string][]string - randFunc func(int) int - proofWriter *proof.ProofWriter + anonData map[string][]string + randFunc func(int) int + proofWriter *proof.ProofWriter + lookup *lookup.Lookup + generator *generator.Generator + replacementMap map[string]string } func CreateAnonymizer(config *config.Config, proofWriter *proof.ProofWriter) (*Anonymizer, error) { @@ -28,12 +34,33 @@ func CreateAnonymizer(config *config.Config, proofWriter *proof.ProofWriter) (*A anonData: anonymizingData, randFunc: rand.Intn, proofWriter: proofWriter, + lookup: lookup.New(), + generator: &generator.Generator{}, }, nil } func (an *Anonymizer) Anonymize(logLine map[string]string) string { - defer an.proofWriter.Flush() + an.replacementMap = make(map[string]string) + an.loadAndReplace(logLine) + an.generateAndReplace(logLine["raw"], an.lookup.ValidIpv4, an.generator.GenerateRandomIPv4()) + an.generateAndReplace(logLine["raw"], an.lookup.ValidIpv6, an.generator.GenerateRandomIPv6()) + an.generateAndReplace(logLine["raw"], an.lookup.ValidMac, an.generator.GenerateRandomMac()) + an.generateAndReplace(logLine["raw"], an.lookup.ValidEmail, an.generator.GenerateRandomEmail()) + an.generateAndReplace(logLine["raw"], an.lookup.ValidUrl, an.generator.GenerateRandomUrl()) + + an.proofWriter.Write(an.replacementMap) + an.proofWriter.Flush() + + return an.replace(logLine["raw"]) +} + +// SetRandFunc sets the function used by Anonymize() to select values from anonymization data at random +func (an *Anonymizer) SetRandFunc(randFunc func(int) int) { + an.randFunc = randFunc +} + +func (an *Anonymizer) loadAndReplace(logLine map[string]string) { for field, value := range logLine { if field == "raw" { continue @@ -43,21 +70,35 @@ func (an *Anonymizer) Anonymize(logLine map[string]string) string { continue } + if _, ok := an.replacementMap[value]; ok { + continue + } + if anonValues, exists := an.anonData[field]; exists { newAnonValue := anonValues[an.randFunc(len(anonValues))] - - an.proofWriter.Write(value, newAnonValue) + an.replacementMap[value] = newAnonValue slog.Debug(fmt.Sprintf("Replacing the values for field %s. From %s to %s.\n", field, value, newAnonValue)) - - logLine["raw"] = strings.Replace(logLine["raw"], value, newAnonValue, -1) } } +} + +func (an *Anonymizer) generateAndReplace(rawLog string, regexp *regexp.Regexp, generatedData string) { + values := regexp.FindAllString(rawLog, -1) + + for _, value := range values { + if _, ok := an.replacementMap[value]; ok { + continue + } - return logLine["raw"] + an.replacementMap[value] = generatedData + } } -// SetRandFunc sets the function used by Anonymize() to select values from anonymization data at random -func (an *Anonymizer) SetRandFunc(randFunc func(int) int) { - an.randFunc = randFunc +func (an *Anonymizer) replace(rawLog string) string { + for oldValue, newValue := range an.replacementMap { + rawLog = strings.ReplaceAll(rawLog, oldValue, newValue) + } + + return rawLog } diff --git a/internal/anonymizer/anonymizer_test.go b/internal/anonymizer/anonymizer_test.go index 16f4a60..30efd6d 100644 --- a/internal/anonymizer/anonymizer_test.go +++ b/internal/anonymizer/anonymizer_test.go @@ -1,8 +1,10 @@ package anonymizer import ( + "math/rand" "testing" + "github.com/go-faker/faker/v4" "github.com/logmanager-oss/logveil/internal/config" "github.com/logmanager-oss/logveil/internal/proof" "github.com/stretchr/testify/assert" @@ -18,8 +20,18 @@ func TestAnonimizer_AnonymizeData(t *testing.T) { { name: "Test AnonymizeData", anonymizingDataDir: "../../tests/data/anonymization_data", - input: map[string]string{"@timestamp": "2024-06-05T14:59:27.000+00:00", "src_ip": "10.10.10.1", "username": "miloslav.illes", "organization": "Microsoft", "raw": "2024-06-05T14:59:27.000+00:00, 10.10.10.1, miloslav.illes, Microsoft"}, - expectedOutput: "2024-06-05T14:59:27.000+00:00, 10.20.0.53, ladislav.dosek, Apple", + input: map[string]string{ + "@timestamp": "2024-06-05T14:59:27.000+00:00", + "src_ip": "10.10.10.1", + "src_ipv6": "7f1d:64ed:536a:1fd7:fe8e:cc29:9df4:7911", + "mac": "71:e5:41:18:cb:3e", + "email": "test@test.com", + "url": "https://www.testurl.com", + "username": "miloslav.illes", + "organization": "Microsoft", + "raw": "2024-06-05T14:59:27.000+00:00, 10.10.10.1, 7f1d:64ed:536a:1fd7:fe8e:cc29:9df4:7911, miloslav.illes, Microsoft, 71:e5:41:18:cb:3e, test@test.com, https://www.testurl.com", + }, + expectedOutput: "2024-06-05T14:59:27.000+00:00, 10.20.0.53, 8186:39ac:48a4:c6af:a2f1:581a:8b95:25e2, ladislav.dosek, Apple, 0f:da:68:92:7f:2b, QHtPwsw@RJSkoHl.top, http://soqovkq.com/NfkcUjG.php", }, } @@ -31,6 +43,7 @@ func TestAnonimizer_AnonymizeData(t *testing.T) { } // Disabling randomization so we know which values to expect anonymizer.SetRandFunc(func(int) int { return 1 }) + faker.SetRandomSource(rand.NewSource(1)) output := anonymizer.Anonymize(tt.input) assert.Equal(t, tt.expectedOutput, output) diff --git a/internal/generator/generator.go b/internal/generator/generator.go new file mode 100644 index 0000000..12c1a8f --- /dev/null +++ b/internal/generator/generator.go @@ -0,0 +1,27 @@ +package generator + +import ( + "github.com/go-faker/faker/v4" +) + +type Generator struct{} + +func (g *Generator) GenerateRandomIPv4() string { + return faker.IPv4() +} + +func (g *Generator) GenerateRandomIPv6() string { + return faker.IPv6() +} + +func (g *Generator) GenerateRandomMac() string { + return faker.MacAddress() +} + +func (g *Generator) GenerateRandomEmail() string { + return faker.Email() +} + +func (g *Generator) GenerateRandomUrl() string { + return faker.URL() +} diff --git a/internal/lookup/lookup.go b/internal/lookup/lookup.go new file mode 100644 index 0000000..bc2c44b --- /dev/null +++ b/internal/lookup/lookup.go @@ -0,0 +1,23 @@ +package lookup + +import ( + "regexp" +) + +type Lookup struct { + ValidIpv4 *regexp.Regexp + ValidIpv6 *regexp.Regexp + ValidMac *regexp.Regexp + ValidEmail *regexp.Regexp + ValidUrl *regexp.Regexp +} + +func New() *Lookup { + return &Lookup{ + ValidIpv4: regexp.MustCompile(`((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.?\b){4}`), + ValidIpv6: regexp.MustCompile(`(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))`), + ValidMac: regexp.MustCompile(`([0-9A-Fa-f]{2}[:-]){5}([0-9A-Fa-f]{2})`), + ValidEmail: regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*"), + ValidUrl: regexp.MustCompile(`https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)`), + } +} diff --git a/internal/proof/proof.go b/internal/proof/proof.go index 1ab7539..461d15c 100644 --- a/internal/proof/proof.go +++ b/internal/proof/proof.go @@ -35,27 +35,29 @@ func CreateProofWriter(config *config.Config, openFiles *files.FilesHandler) (*P return &ProofWriter{IsEnabled: false}, nil } -func (p *ProofWriter) Write(originalValue string, maskedValue string) { +func (p *ProofWriter) Write(replacementMap map[string]string) { if !p.IsEnabled { return } - proof := struct { - OriginalValue string `json:"original"` - MaskedValue string `json:"new"` - }{ - OriginalValue: originalValue, - MaskedValue: maskedValue, - } + for originalValue, newValue := range replacementMap { + proof := struct { + OriginalValue string `json:"original"` + NewValue string `json:"new"` + }{ + OriginalValue: originalValue, + NewValue: newValue, + } - bytes, err := json.Marshal(proof) - if err != nil { - slog.Error("marshalling anonymisation proof", "error", err) - } + bytes, err := json.Marshal(proof) + if err != nil { + slog.Error("marshalling anonymisation proof", "error", err) + } - _, err = fmt.Fprintf(p.writer, "%s\n", bytes) - if err != nil { - slog.Error("writing anonymisation proof", "error", err) + _, err = fmt.Fprintf(p.writer, "%s\n", bytes) + if err != nil { + slog.Error("writing anonymisation proof", "error", err) + } } } diff --git a/internal/proof/proof_test.go b/internal/proof/proof_test.go index cd226bb..c2aaf42 100644 --- a/internal/proof/proof_test.go +++ b/internal/proof/proof_test.go @@ -15,22 +15,23 @@ func TestProof_Write(t *testing.T) { tests := []struct { name string isProofWriter bool - originalValue string - maskedValue string + replacementMap map[string]string expectedOutput string }{ { - name: "Test case 1: write proof", - isProofWriter: true, - originalValue: "test", - maskedValue: "masked", + name: "Test case 1: write proof", + isProofWriter: true, + replacementMap: map[string]string{ + "test": "masked", + }, expectedOutput: "{\"original\":\"test\",\"new\":\"masked\"}\n", }, { - name: "Test case 2: proof writer disabled", - isProofWriter: false, - originalValue: "test", - maskedValue: "masked", + name: "Test case 2: proof writer disabled", + isProofWriter: false, + replacementMap: map[string]string{ + "test": "masked", + }, expectedOutput: "", }, } @@ -44,7 +45,7 @@ func TestProof_Write(t *testing.T) { t.Fatal(err) } - p.Write(tt.originalValue, tt.maskedValue) + p.Write(tt.replacementMap) p.Flush() file, err := os.OpenFile("proof.json", os.O_RDWR|os.O_CREATE, 0644) diff --git a/tests/integration_test.go b/tests/integration_test.go index 9aebe5b..c41d424 100644 --- a/tests/integration_test.go +++ b/tests/integration_test.go @@ -4,9 +4,11 @@ import ( "bufio" "bytes" "encoding/json" + "math/rand" "os" "testing" + "github.com/go-faker/faker/v4" "github.com/logmanager-oss/logveil/cmd/logveil" "github.com/logmanager-oss/logveil/internal/anonymizer" "github.com/logmanager-oss/logveil/internal/config" @@ -24,19 +26,21 @@ func TestLogVeil_IntegrationTest(t *testing.T) { expectedProof []map[string]interface{} }{ { - name: "Test Test LM Backup Anonymizer", + name: "Test LM Backup Anonymizer", config: &config.Config{ AnonymizationDataPath: "data/anonymization_data", InputPath: "data/lm_backup_test_input.gz", IsLmExport: false, IsProofWriter: true, }, - expectedOutput: "<189>date=2024-11-06 time=12:29:25 devname=\"LM-FW-70F-Praha\" devid=\"FGT70FTK22012016\" eventtime=1730892565525108329 tz=\"+0100\" logid=\"0000000013\" type=\"traffic\" subtype=\"forward\" level=\"notice\" vd=\"root\" srcip=10.20.0.53 srcport=57158 srcintf=\"lan1\" srcintfrole=\"wan\" dstip=227.51.221.89 dstport=80 dstintf=\"lan1\" dstintfrole=\"lan\" srccountry=\"China\" dstcountry=\"Czech Republic\" sessionid=179455916 proto=6 action=\"client-rst\" policyid=9 policytype=\"policy\" poluuid=\"d8ccb3e4-74d4-51ef-69a3-73b41f46df74\" policyname=\"Gitlab web from all\" service=\"HTTP\" trandisp=\"noop\" duration=6 sentbyte=80 rcvdbyte=44 sentpkt=2 rcvdpkt=1 appcat=\"unscanned\" srchwvendor=\"H3C\" devtype=\"Router\" mastersrcmac=\"00:23:89:39:a4:ef\" srcmac=\"00:23:89:39:a4:ef\" srcserver=0 dsthwvendor=\"H3C\" dstdevtype=\"Router\" masterdstmac=\"00:23:89:39:a4:fa\" dstmac=\"00:23:89:39:a4:fa\" dstserver=0\n", + expectedOutput: "<189>date=2024-11-06 time=12:29:25 devname=\"LM-FW-70F-Praha\" devid=\"FGT70FTK22012016\" eventtime=1730892565525108329 tz=\"+0100\" logid=\"0000000013\" type=\"traffic\" subtype=\"forward\" level=\"notice\" vd=\"root\" srcip=10.20.0.53 srcport=57158 srcintf=\"lan1\" srcintfrole=\"wan\" dstip=227.51.221.89 dstport=80 dstintf=\"lan1\" dstintfrole=\"lan\" srccountry=\"China\" dstcountry=\"Czech Republic\" sessionid=179455916 proto=6 action=\"client-rst\" policyid=9 policytype=\"policy\" poluuid=\"d8ccb3e4-74d4-51ef-69a3-73b41f46df74\" policyname=\"Gitlab web from all\" service=\"HTTP\" trandisp=\"noop\" duration=6 sentbyte=80 rcvdbyte=44 sentpkt=2 rcvdpkt=1 appcat=\"unscanned\" srchwvendor=\"H3C\" devtype=\"Router\" mastersrcmac=\"0f:da:68:92:7f:2b\" srcmac=\"0f:da:68:92:7f:2b\" srcserver=0 dsthwvendor=\"H3C\" dstdevtype=\"Router\" masterdstmac=\"0f:da:68:92:7f:2b\" dstmac=\"0f:da:68:92:7f:2b\" dstserver=0\n", expectedProof: []map[string]interface{}{ {"original": "dev-uplink", "new": "lan1"}, - {"original": "95.80.197.108", "new": "227.51.221.89"}, - {"original": "27.221.126.209", "new": "10.20.0.53"}, {"original": "wan1-lm", "new": "lan1"}, + {"original": "00:23:89:39:a4:ef", "new": "0f:da:68:92:7f:2b"}, + {"original": "00:23:89:39:a4:fa", "new": "0f:da:68:92:7f:2b"}, + {"original": "27.221.126.209", "new": "10.20.0.53"}, + {"original": "95.80.197.108", "new": "227.51.221.89"}, }, }, { @@ -49,9 +53,9 @@ func TestLogVeil_IntegrationTest(t *testing.T) { }, expectedOutput: "{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"10.20.0.53\", \"username\":\"ladislav.dosek\", \"organization\":\"Apple\"}\n", expectedProof: []map[string]interface{}{ - {"original": "89.239.31.49", "new": "10.20.0.53"}, {"original": "test.user@test.cz", "new": "ladislav.dosek"}, {"original": "TESTuser.test.com", "new": "Apple"}, + {"original": "89.239.31.49", "new": "10.20.0.53"}, }, }, } @@ -80,6 +84,7 @@ func TestLogVeil_IntegrationTest(t *testing.T) { } // Disabling randomization so we know which values to expect anonymizer.SetRandFunc(func(int) int { return 1 }) + faker.SetRandomSource(rand.NewSource(1)) err = logveil.RunAnonymizationLoop(inputReader, outputWriter, anonymizer) if err != nil {