diff --git a/internal/anonymizer/anonymizer.go b/internal/anonymizer/anonymizer.go index d808429..0831738 100644 --- a/internal/anonymizer/anonymizer.go +++ b/internal/anonymizer/anonymizer.go @@ -6,12 +6,13 @@ import ( "maps" "regexp" + "math/rand/v2" + "github.com/logmanager-oss/logveil/internal/config" "github.com/logmanager-oss/logveil/internal/generator" "github.com/logmanager-oss/logveil/internal/loader" "github.com/logmanager-oss/logveil/internal/lookup" "github.com/logmanager-oss/logveil/internal/proof" - "golang.org/x/exp/rand" ) // Anonymizer represents an object responsible for anonymizing indivisual log lines feed to it. It contains anonymization data which will be used to anonymize input and a random number generator funtion used to select values from anonymization data. @@ -39,7 +40,7 @@ func CreateAnonymizer(config *config.Config, proofWriter *proof.ProofWriter) (*A return &Anonymizer{ anonymizationData: anonymizationData, replacementMap: customReplacementMap, - randFunc: rand.Intn, + randFunc: rand.IntN, proofWriter: proofWriter, lookup: lookup.New(), generator: &generator.Generator{}, @@ -64,11 +65,6 @@ func (an *Anonymizer) Anonymize(logLine map[string]string) string { return an.replace(logLineRaw, replacementMap) } -// SetRandFunc sets the function used by Anonymize() to select values from anonymization data at random -func (an *Anonymizer) SetRandFunc(randFunc func(int) int) { - an.randFunc = randFunc -} - func (an *Anonymizer) loadAndReplace(logLine map[string]string, replacementMap map[string]string) map[string]string { for field, value := range logLine { if field == "raw" { diff --git a/internal/anonymizer/anonymizer_test.go b/internal/anonymizer/anonymizer_test.go index e062480..f18e0dc 100644 --- a/internal/anonymizer/anonymizer_test.go +++ b/internal/anonymizer/anonymizer_test.go @@ -1,13 +1,15 @@ package anonymizer import ( - "math/rand" + "net" + "net/mail" + "net/url" + "slices" + "strings" "testing" - "github.com/go-faker/faker/v4" "github.com/logmanager-oss/logveil/internal/config" "github.com/logmanager-oss/logveil/internal/proof" - "github.com/stretchr/testify/assert" ) func TestAnonimizer_AnonymizeData(t *testing.T) { @@ -34,25 +36,6 @@ func TestAnonimizer_AnonymizeData(t *testing.T) { "custom:": "replacement_test", "raw": "2024-06-05T14:59:27.000+00:00, 10.10.10.1, 7f1d:64ed:536a:1fd7:fe8e:cc29:9df4:7911, miloslav.illes, Microsoft, 71:e5:41:18:cb:3e, test@test.com, https://www.testurl.com, replace_this", }, - expectedOutput: "2024-06-05T14:59:27.000+00:00, 10.20.0.53, 8186:39ac:48a4:c6af:a2f1:581a:8b95:25e2, ladislav.dosek, Apple, 0f:da:68:92:7f:2b, QHtPwsw@RJSkoHl.top, http://soqovkq.com/NfkcUjG.php, with_that", - }, - { - name: "Test AnonymizeData - with persisten replacement map", - anonymizationDataDir: "../../tests/data/anonymization_data", - customAnonymizationMappingPath: "../../tests/data/custom_mappings.txt", - input: map[string]string{ - "@timestamp": "2024-06-05T14:59:27.000+00:00", - "src_ip": "10.10.10.1", - "src_ipv6": "7f1d:64ed:536a:1fd7:fe8e:cc29:9df4:7911", - "mac": "71:e5:41:18:cb:3e", - "email": "atest@atest.com", - "url": "https://www.testurl.com", - "username": "miloslav.illes", - "organization": "Microsoft", - "custom:": "replacement_test", - "raw": "2024-06-05T14:59:27.000+00:00, 10.10.10.1, 7f1d:64ed:536a:1fd7:fe8e:cc29:9df4:7911, miloslav.illes, Microsoft, 71:e5:41:18:cb:3e, test@test.com, https://www.testurl.com, replace_this", - }, - expectedOutput: "2024-06-05T14:59:27.000+00:00, 10.20.0.53, 8186:39ac:48a4:c6af:a2f1:581a:8b95:25e2, ladislav.dosek, Apple, 0f:da:68:92:7f:2b, QHtPwsw@RJSkoHl.top, http://soqovkq.com/NfkcUjG.php, with_that", }, } @@ -68,12 +51,56 @@ func TestAnonimizer_AnonymizeData(t *testing.T) { if err != nil { t.Fatal(err) } - // Disabling randomization so we know which values to expect - anonymizer.SetRandFunc(func(int) int { return 1 }) - faker.SetRandomSource(rand.NewSource(1)) output := anonymizer.Anonymize(tt.input) - assert.Equal(t, tt.expectedOutput, output) + // Verify each part of the output individually - Is generated value valid in terms of its type and not the same as input? + parts := strings.Split(output, ", ") + + ipv4 := net.ParseIP(parts[1]) + if ipv4 == nil || ipv4.String() == tt.input["src_ip"] { + t.Fatalf("invalid IPv4 generated or it didn't got replaced at all: %s", parts[1]) + } + + ipv6 := net.ParseIP(parts[2]) + if ipv6 == nil || ipv6.String() == tt.input["src_ip"] { + t.Fatalf("invalid IPv6 generated or it didn't got replaced at all: %s", parts[2]) + } + + if !slices.Contains(anonymizer.anonymizationData["username"], parts[3]) || parts[3] == tt.input["username"] { + t.Fatalf("invalid username or it didn't got replaced at all: %s", parts[3]) + } + + if !slices.Contains(anonymizer.anonymizationData["organization"], parts[4]) || parts[4] == tt.input["organization"] { + t.Fatalf("invalid organization or it didn't got replaced at all: %s", parts[4]) + } + + mac, err := net.ParseMAC(parts[5]) + if err != nil { + t.Fatalf("invalid MAC generated: %s", parts[5]) + } + if mac.String() == tt.input["mac"] { + t.Fatalf("MAC not replaced at all") + } + + email, err := mail.ParseAddress(parts[6]) + if err != nil { + t.Fatalf("invalid email generated: %s", parts[6]) + } + if email.Address == tt.input["email"] { + t.Fatalf("email not replaced at all") + } + + url, err := url.ParseRequestURI(parts[7]) + if err != nil { + t.Fatalf("invalid url generated: %s", parts[7]) + } + if url.String() == tt.input["url"] { + t.Fatalf("url not replaced at all") + } + + if parts[8] != anonymizer.replacementMap["replace_this"] { + t.Fatalf("custom replacement didn't work") + } }) } } diff --git a/tests/data/anonymization_data/organization b/tests/data/anonymization_data/organization index 084560a..ffafa76 100644 --- a/tests/data/anonymization_data/organization +++ b/tests/data/anonymization_data/organization @@ -1,4 +1,5 @@ -Microsoft +Google Apple H&P IBM +Cisco diff --git a/tests/integration_test.go b/tests/integration_test.go deleted file mode 100644 index 38d9c02..0000000 --- a/tests/integration_test.go +++ /dev/null @@ -1,139 +0,0 @@ -package testing - -import ( - "bufio" - "bytes" - "encoding/json" - "math/rand" - "os" - "testing" - - "github.com/go-faker/faker/v4" - "github.com/logmanager-oss/logveil/cmd/logveil" - "github.com/logmanager-oss/logveil/internal/anonymizer" - "github.com/logmanager-oss/logveil/internal/config" - "github.com/logmanager-oss/logveil/internal/files" - "github.com/logmanager-oss/logveil/internal/proof" - "github.com/logmanager-oss/logveil/internal/reader" - "github.com/stretchr/testify/assert" -) - -func TestLogVeil_IntegrationTest(t *testing.T) { - tests := []struct { - name string - config *config.Config - expectedOutput string - expectedProof []map[string]interface{} - }{ - { - name: "Test LM Backup Anonymizer", - config: &config.Config{ - AnonymizationDataPath: "data/anonymization_data", - CustomReplacementMapPath: "data/custom_mappings.txt", - InputPath: "data/lm_backup_test_input.gz", - IsLmExport: false, - IsProofWriter: true, - }, - expectedOutput: "<189>date=2024-11-06 time=12:29:25 devname=\"LM-FW-70F-Praha\" devid=\"FGT70FTK22012016\" eventtime=1730892565525108329 tz=\"+0100\" logid=\"0000000013\" type=\"traffic\" subtype=\"forward\" level=\"notice\" vd=\"root\" srcip=10.20.0.53 srcport=57158 srcintf=\"lan1\" srcintfrole=\"wan\" dstip=227.51.221.89 dstport=80 dstintf=\"lan1\" dstintfrole=\"lan\" srccountry=\"China\" dstcountry=\"Czech Republic\" sessionid=179455916 proto=6 action=\"client-rst\" policyid=9 policytype=\"policy\" poluuid=\"d8ccb3e4-74d4-51ef-69a3-73b41f46df74\" policyname=\"Gitlab web from all\" service=\"HTTP\" trandisp=\"noop\" duration=6 sentbyte=80 rcvdbyte=44 sentpkt=2 rcvdpkt=1 appcat=\"unscanned\" srchwvendor=\"H3C\" devtype=\"Router\" mastersrcmac=\"0f:da:68:92:7f:2b\" srcmac=\"0f:da:68:92:7f:2b\" srcserver=0 dsthwvendor=\"H3C\" dstdevtype=\"Router\" masterdstmac=\"0f:da:68:92:7f:2b\" dstmac=\"0f:da:68:92:7f:2b\" dstserver=0\n", - expectedProof: []map[string]interface{}{ - {"original": "dev-uplink", "new": "lan1"}, - {"original": "00:23:89:39:a4:fa", "new": "0f:da:68:92:7f:2b"}, - {"original": "27.221.126.209", "new": "10.20.0.53"}, - {"original": "wan1-lm", "new": "lan1"}, - {"original": "00:23:89:39:a4:ef", "new": "0f:da:68:92:7f:2b"}, - {"original": "95.80.197.108", "new": "227.51.221.89"}, - }, - }, - { - name: "Test LM Export Anonymizer", - config: &config.Config{ - AnonymizationDataPath: "data/anonymization_data", - CustomReplacementMapPath: "data/custom_mappings.txt", - InputPath: "data/lm_export_test_input.csv", - IsLmExport: true, - IsProofWriter: true, - }, - expectedOutput: "{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"10.20.0.53\", \"username\":\"ladislav.dosek\", \"organization\":\"Apple\", \"replacement_test\":\"with_that\"}\n", - expectedProof: []map[string]interface{}{ - {"original": "replace_this", "new": "with_that"}, - {"original": "89.239.31.49", "new": "10.20.0.53"}, - {"original": "test.user@test.cz", "new": "ladislav.dosek"}, - {"original": "TESTuser.test.com", "new": "Apple"}, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - filesHandler := &files.FilesHandler{} - defer filesHandler.Close() - - inputReader, err := reader.CreateInputReader(tt.config, filesHandler) - if err != nil { - t.Fatal(err) - } - - var output bytes.Buffer - outputWriter := bufio.NewWriter(&output) - - proofWriter, err := proof.CreateProofWriter(tt.config, filesHandler) - if err != nil { - t.Fatal(err) - } - - anonymizer, err := anonymizer.CreateAnonymizer(tt.config, proofWriter) - if err != nil { - t.Fatal(err) - } - // Disabling randomization so we know which values to expect - anonymizer.SetRandFunc(func(int) int { return 1 }) - faker.SetRandomSource(rand.NewSource(1)) - - err = logveil.RunAnonymizationLoop(inputReader, outputWriter, anonymizer) - if err != nil { - t.Fatal(err) - } - - assert.Equal(t, tt.expectedOutput, output.String()) - - proofWriter.Flush() - actualProof, err := unpackProofOutput() - if err != nil { - t.Fatal(err) - } - - assert.ElementsMatch(t, tt.expectedProof, actualProof) - - err = os.Remove(proof.ProofFilename) - if err != nil { - t.Fatal(err) - } - }) - } -} - -func unpackProofOutput() ([]map[string]interface{}, error) { - outputFile, err := os.OpenFile(proof.ProofFilename, os.O_RDWR|os.O_CREATE, 0644) - if err != nil { - return nil, err - } - - var output []map[string]interface{} - scanner := bufio.NewScanner(outputFile) - for scanner.Scan() { - var unpackedLine map[string]interface{} - line := scanner.Bytes() - err := json.Unmarshal(line, &unpackedLine) - if err != nil { - return nil, err - } - output = append(output, unpackedLine) - } - - err = scanner.Err() - if err != nil { - return nil, err - } - - return output, nil -}