From 3641ebdd6ca5721bd80e5d9e0b6f3b74c5ac5981 Mon Sep 17 00:00:00 2001 From: solnicki Date: Wed, 20 Nov 2024 10:15:38 +0100 Subject: [PATCH] implement rexep scan and data generation --- README.md | 30 +++++--- go.mod | 3 + go.sum | 4 ++ internal/anonymizer/anonymizer.go | 67 ++++++++++++++---- internal/anonymizer/anonymizer_test.go | 17 ++++- internal/generator/generator.go | 27 +++++++ internal/lookup/constants.go | 9 +++ internal/lookup/lookup.go | 23 ++++++ internal/proof/proof.go | 32 +++++---- internal/proof/proof_test.go | 23 +++--- internal/reader/backup.go | 10 +++ internal/reader/backup_test.go | 23 +++++- internal/reader/export.go | 10 +++ internal/reader/export_test.go | 25 +++++++ tests/data/lm_backup_test_input_raw_empty.gz | Bin 0 -> 1023 bytes .../data/lm_backup_test_input_raw_missing.gz | Bin 0 -> 993 bytes tests/data/lm_export_test_input_raw_empty.csv | 2 + .../data/lm_export_test_input_raw_missing.csv | 2 + tests/integration_test.go | 15 ++-- 19 files changed, 268 insertions(+), 54 deletions(-) create mode 100644 internal/generator/generator.go create mode 100644 internal/lookup/constants.go create mode 100644 internal/lookup/lookup.go create mode 100644 tests/data/lm_backup_test_input_raw_empty.gz create mode 100644 tests/data/lm_backup_test_input_raw_missing.gz create mode 100644 tests/data/lm_export_test_input_raw_empty.csv create mode 100644 tests/data/lm_export_test_input_raw_missing.csv diff --git a/README.md b/README.md index ede46da..e633da5 100644 --- a/README.md +++ b/README.md @@ -63,10 +63,12 @@ Usage of ./logveil: ### How it works +**This is only a simplified example and does not match 1:1 with how anonymization is actually implemented** + Consider below log line. It is formatted in a common `key:value` format. ``` -{"@timestamp": "2024-06-05T14:59:27.000+00:00", "src_ip":"89.239.31.49", "username":"test.user@test.cz", "organization":"TESTuser.test.com"} +{"@timestamp": "2024-06-05T14:59:27.000+00:00", "src_ip":"89.239.31.49", "username":"test.user@test.cz", "organization":"TESTuser.test.com", "mac": "71:e5:41:18:cb:3e"} ``` First, LogVeil will load anonymization data from supplied directory (`-d example_anon_data/`). Each file in that folder should be named according to the values it will be masking. For example, lets assume we have following directory structure: @@ -80,32 +82,44 @@ Next, LogVeil will go over each log line in supplied input and extract `key:valu 2. `"src_ip":"89.239.31.49"` 3. `"username":"test.user@test.cz"` 4. `"organization":"TESTuser.test.com"` +5. `"mac": "71:e5:41:18:cb:3e"` Then, LogVeil will try to match extracted pairs to anonymization data it loaded in previous step. Two paris should be matched: -1. `"username":"test.user@test.cz"` with `username.txt` -2. `"organization":"TESTuser.test.com"` with `organization.txt` +1. `"src_ip":"89.239.31.49"` with `src_ip.txt` +2. `"username":"test.user@test.cz"` with `username.txt` +3. `"organization":"TESTuser.test.com"` with `organization.txt` + +And one pair should be matched by regular expression scanning: -Now LogVeil will grab a random values from files which filenames matched with keys and replace original values with them. Outcome should look like this: +1. `"mac": "71:e5:41:18:cb:3e"` -1. `"username":"ladislav.dosek"` -2. `"organization":"Apple"` +Now LogVeil will grab values (randomly) from files which filenames matched with keys, generate new value for `mac` key and create a replacement map in format `"original_value":"new_value"`: -And thats it. Now anonymized log can be written to output along with anonymization proof: +1. `"89.239.31.49":"10.20.0.53"` +1. `"test.user@test.cz":"ladislav.dosek"` +2. `"TESTuser.test.com":"Apple"` +3. `"71:e5:41:18:cb:3e": "0f:da:68:92:7f:2b"` + +Now each element from the above list will be iterated over and compared against log line. Whenever `original_value` is found it will be replaced with `new_value`. Outcome should look like this: ``` -{"@timestamp": "2024-06-05T14:59:27.000+00:00", "src_ip":"89.239.31.49", "username":"ladislav.dosek", "organization":"Apple"} +{"@timestamp": "2024-06-05T14:59:27.000+00:00", "src_ip":"10.20.0.53", "username":"ladislav.dosek", "organization":"Apple", "mac": "0f:da:68:92:7f:2b"} ``` ``` +{"original": "27.221.126.209", "new": "10.20.0.53"}, "{"original":"test.user@test.cz","new":"ladislav.dosek"}" "{"original":"TESTuser.test.com","new":"Apple"}" +{"original": "71:e5:41:18:cb:3e", "new": "0f:da:68:92:7f:2b"}, ``` ### Anonymization data Each `key:value` pair which you want to anonymize data must have its equivalent in anonymization data folder. +If anonymization data does not exist for any given `key:value` pair then LogVeil will attempt to use regular expressions to match and replace common values such as: IPv4, IPv6, MAC, Emails and URLs. + For example, if you want to anonymize values in `organization` and `username` keys, you need to have two files of the same name in anonymization folder containing some random data. ### Output diff --git a/go.mod b/go.mod index 662393f..fc7b61b 100644 --- a/go.mod +++ b/go.mod @@ -4,8 +4,11 @@ go 1.22.5 require github.com/stretchr/testify v1.9.0 +require golang.org/x/text v0.16.0 // indirect + require ( github.com/davecgh/go-spew v1.1.1 // indirect + github.com/go-faker/faker/v4 v4.5.0 github.com/pmezard/go-difflib v1.0.0 // indirect golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7 gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index fd1583e..85fb57e 100644 --- a/go.sum +++ b/go.sum @@ -1,11 +1,15 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-faker/faker/v4 v4.5.0 h1:ARzAY2XoOL9tOUK+KSecUQzyXQsUaZHefjyF8x6YFHc= +github.com/go-faker/faker/v4 v4.5.0/go.mod h1:p3oq1GRjG2PZ7yqeFFfQI20Xm61DoBDlCA8RiSyZ48M= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7 h1:wDLEX9a7YQoKdKNQt88rtydkqDxeGaBUTnIYc3iG/mA= golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/internal/anonymizer/anonymizer.go b/internal/anonymizer/anonymizer.go index f095155..98622bf 100644 --- a/internal/anonymizer/anonymizer.go +++ b/internal/anonymizer/anonymizer.go @@ -3,19 +3,25 @@ package anonymizer import ( "fmt" "log/slog" + "regexp" "strings" "github.com/logmanager-oss/logveil/internal/config" + "github.com/logmanager-oss/logveil/internal/generator" "github.com/logmanager-oss/logveil/internal/loader" + "github.com/logmanager-oss/logveil/internal/lookup" "github.com/logmanager-oss/logveil/internal/proof" "golang.org/x/exp/rand" ) // Anonymizer represents an object responsible for anonymizing indivisual log lines feed to it. It contains anonymization data which will be used to anonymize input and a random number generator funtion used to select values from anonymization data. type Anonymizer struct { - anonData map[string][]string - randFunc func(int) int - proofWriter *proof.ProofWriter + anonData map[string][]string + randFunc func(int) int + proofWriter *proof.ProofWriter + lookup *lookup.Lookup + generator *generator.Generator + replacementMap map[string]string } func CreateAnonymizer(config *config.Config, proofWriter *proof.ProofWriter) (*Anonymizer, error) { @@ -28,12 +34,35 @@ func CreateAnonymizer(config *config.Config, proofWriter *proof.ProofWriter) (*A anonData: anonymizingData, randFunc: rand.Intn, proofWriter: proofWriter, + lookup: lookup.New(), + generator: &generator.Generator{}, }, nil } func (an *Anonymizer) Anonymize(logLine map[string]string) string { - defer an.proofWriter.Flush() + an.replacementMap = make(map[string]string) + an.loadAndReplace(logLine) + + logLineRaw := logLine["raw"] + an.generateAndReplace(logLineRaw, an.lookup.ValidIpv4, an.generator.GenerateRandomIPv4()) + an.generateAndReplace(logLineRaw, an.lookup.ValidIpv6, an.generator.GenerateRandomIPv6()) + an.generateAndReplace(logLineRaw, an.lookup.ValidMac, an.generator.GenerateRandomMac()) + an.generateAndReplace(logLineRaw, an.lookup.ValidEmail, an.generator.GenerateRandomEmail()) + an.generateAndReplace(logLineRaw, an.lookup.ValidUrl, an.generator.GenerateRandomUrl()) + + an.proofWriter.Write(an.replacementMap) + an.proofWriter.Flush() + + return an.replace(logLineRaw) +} + +// SetRandFunc sets the function used by Anonymize() to select values from anonymization data at random +func (an *Anonymizer) SetRandFunc(randFunc func(int) int) { + an.randFunc = randFunc +} + +func (an *Anonymizer) loadAndReplace(logLine map[string]string) { for field, value := range logLine { if field == "raw" { continue @@ -43,21 +72,35 @@ func (an *Anonymizer) Anonymize(logLine map[string]string) string { continue } + if _, ok := an.replacementMap[value]; ok { + continue + } + if anonValues, exists := an.anonData[field]; exists { newAnonValue := anonValues[an.randFunc(len(anonValues))] - - an.proofWriter.Write(value, newAnonValue) + an.replacementMap[value] = newAnonValue slog.Debug(fmt.Sprintf("Replacing the values for field %s. From %s to %s.\n", field, value, newAnonValue)) - - logLine["raw"] = strings.Replace(logLine["raw"], value, newAnonValue, -1) } } +} + +func (an *Anonymizer) generateAndReplace(rawLog string, regexp *regexp.Regexp, generatedData string) { + values := regexp.FindAllString(rawLog, -1) + + for _, value := range values { + if _, ok := an.replacementMap[value]; ok { + continue + } - return logLine["raw"] + an.replacementMap[value] = generatedData + } } -// SetRandFunc sets the function used by Anonymize() to select values from anonymization data at random -func (an *Anonymizer) SetRandFunc(randFunc func(int) int) { - an.randFunc = randFunc +func (an *Anonymizer) replace(rawLog string) string { + for oldValue, newValue := range an.replacementMap { + rawLog = strings.ReplaceAll(rawLog, oldValue, newValue) + } + + return rawLog } diff --git a/internal/anonymizer/anonymizer_test.go b/internal/anonymizer/anonymizer_test.go index 16f4a60..30efd6d 100644 --- a/internal/anonymizer/anonymizer_test.go +++ b/internal/anonymizer/anonymizer_test.go @@ -1,8 +1,10 @@ package anonymizer import ( + "math/rand" "testing" + "github.com/go-faker/faker/v4" "github.com/logmanager-oss/logveil/internal/config" "github.com/logmanager-oss/logveil/internal/proof" "github.com/stretchr/testify/assert" @@ -18,8 +20,18 @@ func TestAnonimizer_AnonymizeData(t *testing.T) { { name: "Test AnonymizeData", anonymizingDataDir: "../../tests/data/anonymization_data", - input: map[string]string{"@timestamp": "2024-06-05T14:59:27.000+00:00", "src_ip": "10.10.10.1", "username": "miloslav.illes", "organization": "Microsoft", "raw": "2024-06-05T14:59:27.000+00:00, 10.10.10.1, miloslav.illes, Microsoft"}, - expectedOutput: "2024-06-05T14:59:27.000+00:00, 10.20.0.53, ladislav.dosek, Apple", + input: map[string]string{ + "@timestamp": "2024-06-05T14:59:27.000+00:00", + "src_ip": "10.10.10.1", + "src_ipv6": "7f1d:64ed:536a:1fd7:fe8e:cc29:9df4:7911", + "mac": "71:e5:41:18:cb:3e", + "email": "test@test.com", + "url": "https://www.testurl.com", + "username": "miloslav.illes", + "organization": "Microsoft", + "raw": "2024-06-05T14:59:27.000+00:00, 10.10.10.1, 7f1d:64ed:536a:1fd7:fe8e:cc29:9df4:7911, miloslav.illes, Microsoft, 71:e5:41:18:cb:3e, test@test.com, https://www.testurl.com", + }, + expectedOutput: "2024-06-05T14:59:27.000+00:00, 10.20.0.53, 8186:39ac:48a4:c6af:a2f1:581a:8b95:25e2, ladislav.dosek, Apple, 0f:da:68:92:7f:2b, QHtPwsw@RJSkoHl.top, http://soqovkq.com/NfkcUjG.php", }, } @@ -31,6 +43,7 @@ func TestAnonimizer_AnonymizeData(t *testing.T) { } // Disabling randomization so we know which values to expect anonymizer.SetRandFunc(func(int) int { return 1 }) + faker.SetRandomSource(rand.NewSource(1)) output := anonymizer.Anonymize(tt.input) assert.Equal(t, tt.expectedOutput, output) diff --git a/internal/generator/generator.go b/internal/generator/generator.go new file mode 100644 index 0000000..12c1a8f --- /dev/null +++ b/internal/generator/generator.go @@ -0,0 +1,27 @@ +package generator + +import ( + "github.com/go-faker/faker/v4" +) + +type Generator struct{} + +func (g *Generator) GenerateRandomIPv4() string { + return faker.IPv4() +} + +func (g *Generator) GenerateRandomIPv6() string { + return faker.IPv6() +} + +func (g *Generator) GenerateRandomMac() string { + return faker.MacAddress() +} + +func (g *Generator) GenerateRandomEmail() string { + return faker.Email() +} + +func (g *Generator) GenerateRandomUrl() string { + return faker.URL() +} diff --git a/internal/lookup/constants.go b/internal/lookup/constants.go new file mode 100644 index 0000000..9b130ac --- /dev/null +++ b/internal/lookup/constants.go @@ -0,0 +1,9 @@ +package lookup + +const ( + Ipv4Pattern = "((25[0-5]|(2[0-4]|1\\d|[1-9]|)\\d)\\.?\b){4}" + Ipv6Pattern = "(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))" + MacPattern = "([0-9A-Fa-f]{2}[:-]){5}([0-9A-Fa-f]{2})" + EmailPattern = "[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*" + UrlPattern = "https?:\\/\\/(www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b([-a-zA-Z0-9()@:%_\\+.~#?&//=]*)" +) diff --git a/internal/lookup/lookup.go b/internal/lookup/lookup.go new file mode 100644 index 0000000..a24164d --- /dev/null +++ b/internal/lookup/lookup.go @@ -0,0 +1,23 @@ +package lookup + +import ( + "regexp" +) + +type Lookup struct { + ValidIpv4 *regexp.Regexp + ValidIpv6 *regexp.Regexp + ValidMac *regexp.Regexp + ValidEmail *regexp.Regexp + ValidUrl *regexp.Regexp +} + +func New() *Lookup { + return &Lookup{ + ValidIpv4: regexp.MustCompile(Ipv4Pattern), + ValidIpv6: regexp.MustCompile(Ipv6Pattern), + ValidMac: regexp.MustCompile(MacPattern), + ValidEmail: regexp.MustCompile(EmailPattern), + ValidUrl: regexp.MustCompile(UrlPattern), + } +} diff --git a/internal/proof/proof.go b/internal/proof/proof.go index 1ab7539..461d15c 100644 --- a/internal/proof/proof.go +++ b/internal/proof/proof.go @@ -35,27 +35,29 @@ func CreateProofWriter(config *config.Config, openFiles *files.FilesHandler) (*P return &ProofWriter{IsEnabled: false}, nil } -func (p *ProofWriter) Write(originalValue string, maskedValue string) { +func (p *ProofWriter) Write(replacementMap map[string]string) { if !p.IsEnabled { return } - proof := struct { - OriginalValue string `json:"original"` - MaskedValue string `json:"new"` - }{ - OriginalValue: originalValue, - MaskedValue: maskedValue, - } + for originalValue, newValue := range replacementMap { + proof := struct { + OriginalValue string `json:"original"` + NewValue string `json:"new"` + }{ + OriginalValue: originalValue, + NewValue: newValue, + } - bytes, err := json.Marshal(proof) - if err != nil { - slog.Error("marshalling anonymisation proof", "error", err) - } + bytes, err := json.Marshal(proof) + if err != nil { + slog.Error("marshalling anonymisation proof", "error", err) + } - _, err = fmt.Fprintf(p.writer, "%s\n", bytes) - if err != nil { - slog.Error("writing anonymisation proof", "error", err) + _, err = fmt.Fprintf(p.writer, "%s\n", bytes) + if err != nil { + slog.Error("writing anonymisation proof", "error", err) + } } } diff --git a/internal/proof/proof_test.go b/internal/proof/proof_test.go index cd226bb..c2aaf42 100644 --- a/internal/proof/proof_test.go +++ b/internal/proof/proof_test.go @@ -15,22 +15,23 @@ func TestProof_Write(t *testing.T) { tests := []struct { name string isProofWriter bool - originalValue string - maskedValue string + replacementMap map[string]string expectedOutput string }{ { - name: "Test case 1: write proof", - isProofWriter: true, - originalValue: "test", - maskedValue: "masked", + name: "Test case 1: write proof", + isProofWriter: true, + replacementMap: map[string]string{ + "test": "masked", + }, expectedOutput: "{\"original\":\"test\",\"new\":\"masked\"}\n", }, { - name: "Test case 2: proof writer disabled", - isProofWriter: false, - originalValue: "test", - maskedValue: "masked", + name: "Test case 2: proof writer disabled", + isProofWriter: false, + replacementMap: map[string]string{ + "test": "masked", + }, expectedOutput: "", }, } @@ -44,7 +45,7 @@ func TestProof_Write(t *testing.T) { t.Fatal(err) } - p.Write(tt.originalValue, tt.maskedValue) + p.Write(tt.replacementMap) p.Flush() file, err := os.OpenFile("proof.json", os.O_RDWR|os.O_CREATE, 0644) diff --git a/internal/reader/backup.go b/internal/reader/backup.go index 66078b4..45b01b1 100644 --- a/internal/reader/backup.go +++ b/internal/reader/backup.go @@ -4,11 +4,14 @@ import ( "bufio" "compress/gzip" "encoding/json" + "errors" "fmt" "io" "os" ) +var syntaxError *json.SyntaxError + // LmBackup represents log line in LM Backup format type LmBackup struct { Source LmLog `json:"_source"` @@ -54,9 +57,16 @@ func (r *LmBackupReader) ReadLine() (map[string]string, error) { lmBackup := &LmBackup{} err := json.Unmarshal(line, &lmBackup) if err != nil { + if errors.As(err, &syntaxError) { + return nil, fmt.Errorf("Malformed lm backup file: %v", err) + } return nil, err } + if lmBackup.Source.Raw == "" { + return nil, fmt.Errorf("Malformed lm backup file - raw field cannot be empty") + } + // Convert map[string]interface{} to map[string]string as requred by anonymizer logLine := make(map[string]string) for key, value := range lmBackup.Source.Msg { diff --git a/internal/reader/backup_test.go b/internal/reader/backup_test.go index 32f8869..d760b97 100644 --- a/internal/reader/backup_test.go +++ b/internal/reader/backup_test.go @@ -2,6 +2,7 @@ package reader import ( "errors" + "fmt" "io" "os" "testing" @@ -14,12 +15,28 @@ func TestLmBackup(t *testing.T) { name string inputFilename string expectedOutput map[string]string + wantErr bool + expectedErr error }{ { - name: "Test Test LM Backup Anonymizer", + name: "Test LM Backup Anonymizer", inputFilename: "../../tests/data/lm_backup_test_input.gz", expectedOutput: map[string]string{"appcat": "unscanned", "device_id": "FGT70FTK22012016", "device_name": "LM-FW-70F-Praha", "dst_iface": "dev-uplink", "dst_ip": "95.80.197.108", "dst_ip@ip": "map[as_number:29208 as_organization:Quantcom, a.s. city:Unknown country_code:CZ country_name:Czechia is_link_local:false is_multicast:false is_reserved:false ptr:95.80.197.108 value:95.80.197.108 version:4]", "dst_port": "80", "dst_port@int": "map[value:80]", "duration": "6.0", "duration@float": "map[value:6]", "policy_id": "9", "protocol": "TCP", "raw": "<189>date=2024-11-06 time=12:29:25 devname=\"LM-FW-70F-Praha\" devid=\"FGT70FTK22012016\" eventtime=1730892565525108329 tz=\"+0100\" logid=\"0000000013\" type=\"traffic\" subtype=\"forward\" level=\"notice\" vd=\"root\" srcip=27.221.126.209 srcport=57158 srcintf=\"wan1-lm\" srcintfrole=\"wan\" dstip=95.80.197.108 dstport=80 dstintf=\"dev-uplink\" dstintfrole=\"lan\" srccountry=\"China\" dstcountry=\"Czech Republic\" sessionid=179455916 proto=6 action=\"client-rst\" policyid=9 policytype=\"policy\" poluuid=\"d8ccb3e4-74d4-51ef-69a3-73b41f46df74\" policyname=\"Gitlab web from all\" service=\"HTTP\" trandisp=\"noop\" duration=6 sentbyte=80 rcvdbyte=44 sentpkt=2 rcvdpkt=1 appcat=\"unscanned\" srchwvendor=\"H3C\" devtype=\"Router\" mastersrcmac=\"00:23:89:39:a4:ef\" srcmac=\"00:23:89:39:a4:ef\" srcserver=0 dsthwvendor=\"H3C\" dstdevtype=\"Router\" masterdstmac=\"00:23:89:39:a4:fa\" dstmac=\"00:23:89:39:a4:fa\" dstserver=0", "rcvd_byte": "44", "rcvd_byte@int": "map[value:44]", "rcvd_pkt": "1", "rcvd_pkt@int": "map[value:1]", "sent_byte": "80", "sent_byte@int": "map[value:80]", "sent_pkt": "2", "sent_pkt@int": "map[value:2]", "service": "HTTP", "src_iface": "wan1-lm", "src_ip": "27.221.126.209", "src_ip@ip": "map[as_number:4837 as_organization:CHINA UNICOM China169 Backbone city:Unknown country_code:CN country_name:China is_link_local:false is_multicast:false is_reserved:false ptr:27.221.126.209 value:27.221.126.209 version:4]", "src_port": "57158", "src_port@int": "map[value:57158]", "status": "client-rst", "subtype": "forward", "type": "traffic", "vd": "root"}, }, + { + name: "Test LM Backup Anonymizer - RAW missing", + inputFilename: "../../tests/data/lm_backup_test_input_raw_missing.gz", + expectedOutput: map[string]string{}, + wantErr: true, + expectedErr: fmt.Errorf("Malformed lm backup file: unexpected end of JSON input"), + }, + { + name: "Test LM Backup Anonymizer - RAW empty", + inputFilename: "../../tests/data/lm_backup_test_input_raw_empty.gz", + expectedOutput: map[string]string{}, + wantErr: true, + expectedErr: fmt.Errorf("Malformed lm backup file - raw field cannot be empty"), + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -40,6 +57,10 @@ func TestLmBackup(t *testing.T) { if errors.Is(err, io.EOF) { break } + if tt.wantErr { + assert.Equal(t, tt.expectedErr, err) + return + } t.Fatal(err) } diff --git a/internal/reader/export.go b/internal/reader/export.go index b9619cf..411ec28 100644 --- a/internal/reader/export.go +++ b/internal/reader/export.go @@ -2,7 +2,9 @@ package reader import ( "encoding/csv" + "fmt" "os" + "slices" "strings" ) @@ -22,6 +24,10 @@ func NewLmExportReader(input *os.File) (*LmExportReader, error) { return nil, err } + if !slices.Contains(fieldNames, "raw") { + return nil, fmt.Errorf("Malformed lm export file - RAW field is missing") + } + // Trimming prefix from field names for i, fieldName := range fieldNames { fieldNames[i] = strings.TrimPrefix(fieldName, "msg.") @@ -45,6 +51,10 @@ func (r *LmExportReader) ReadLine() (map[string]string, error) { logLine[r.fieldNames[i]] = val } + if logLine["raw"] == "" { + return nil, fmt.Errorf("Malformed lm export file - RAW field cannot be empty") + } + return logLine, nil } diff --git a/internal/reader/export_test.go b/internal/reader/export_test.go index 05c32a5..91098b6 100644 --- a/internal/reader/export_test.go +++ b/internal/reader/export_test.go @@ -2,6 +2,7 @@ package reader import ( "errors" + "fmt" "io" "os" "testing" @@ -15,12 +16,28 @@ func TestLmExport(t *testing.T) { inputFilename string outputFilename string expectedOutput map[string]string + wantErr bool + expectedErr error }{ { name: "Test LM Export Anonymizer", inputFilename: "../../tests/data/lm_export_test_input.csv", expectedOutput: map[string]string{"@timestamp": "2024-06-05T14:59:27.000+00:00", "src_ip": "89.239.31.49", "username": "test.user@test.cz", "organization": "TESTuser.test.com", "raw": "{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"89.239.31.49\", \"username\":\"test.user@test.cz\", \"organization\":\"TESTuser.test.com\"}"}, }, + { + name: "Test LM Export Anonymizer - RAW missing", + inputFilename: "../../tests/data/lm_export_test_input_raw_missing.csv", + expectedOutput: map[string]string{}, + wantErr: true, + expectedErr: fmt.Errorf("Malformed lm export file - RAW field is missing"), + }, + { + name: "Test LM Export Anonymizer - RAW empty", + inputFilename: "../../tests/data/lm_export_test_input_raw_empty.csv", + expectedOutput: map[string]string{}, + wantErr: true, + expectedErr: fmt.Errorf("Malformed lm export file - RAW field cannot be empty"), + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -32,6 +49,10 @@ func TestLmExport(t *testing.T) { inputReader, err := NewLmExportReader(inputFile) if err != nil { + if tt.wantErr { + assert.Equal(t, tt.expectedErr, err) + return + } t.Fatal(err) } @@ -41,6 +62,10 @@ func TestLmExport(t *testing.T) { if errors.Is(err, io.EOF) { break } + if tt.wantErr { + assert.Equal(t, tt.expectedErr, err) + return + } t.Fatal(err) } diff --git a/tests/data/lm_backup_test_input_raw_empty.gz b/tests/data/lm_backup_test_input_raw_empty.gz new file mode 100644 index 0000000000000000000000000000000000000000..896877aa7839cc98bee3713d8cd1475b6368017f GIT binary patch literal 1023 zcmVM?JbYF5|cVA^~aCCV9y;f~c zBR3HKD`>wq-rCN-=2JLM^iEd-t-_rILXqS31|J)HWqX0#DgV7Qn}nn!Qq>P_6d^n> z_A}4SJn%D`X;Z1!XcRR|0$)yf5;GRlEE+_!uxb^!YU_eth@eQLgF=U{z%Z_ixq4O z2u{?smSFJRd|%Xya242|G1*xg(C_3-w`1Mn? zq5ooXVB12yc+>^~N*z{kyfsh8z8HK>Ij^mWM#&)3e%5I7G;6FBO*E=S<5h26c1_Sy z_;6@+$}9J*syAC3+$nUz&rG+RD|9BsFyEN0TL`0nh|oVCU3~gHIX}Cde7?B8I_tr* ztUUW5K#q-(K2aJ0?_D=kzRO)qY+0YQY$!%;R`nLwP!X>V<#ofAbA1Wpy0PLAPPX1VAv}v|pKiizz6$CT_BZDaypzw>spuqGA2@M_lil6z z(|cO@dhfyx?Cw2|Wb3^PA{q9nZ6#I}b;e6!3^W^pmMRF$6OBTq?;k)${#m#<&)&2v zhy_r6zt>H$oufw7+Nl@OG||H*XpGF?M?;$OtV$FqDd!}~N>1iUUXr@V%0$*I8x}a+ zNixinxe%lPbt8!?84(Gq$UGIi$kLK3fnoVj@j07I!pjV9Q%Oon$RuT&%T(s6EQ(*?9$_3h*%&S9?(N0-31>t=(! zN0;}z0pK?_V*@VmAbPYuoXQ7fSU9oVb=fG?fl9-$%W%kg+c%BiG&^DZR&{Pa-u#*} z2Au+c@Tv)$=4LGCHA^`u<&eX6sG0~-Ky!J*YnoDu+J*}#ULZQ^f0^03_VD~D9Yo_d t_WQ-urtQM+zYoCU`@cs$gnO0`X;JbtOJVB**_{{sFf=1tHD008b-0d)WX literal 0 HcmV?d00001 diff --git a/tests/data/lm_backup_test_input_raw_missing.gz b/tests/data/lm_backup_test_input_raw_missing.gz new file mode 100644 index 0000000000000000000000000000000000000000..a731b5f4dd72231df15c3a94bda0f3d3a1392774 GIT binary patch literal 993 zcmV<710MVziwFop06%8{18i+yVqs%zb#PyFWpi|2X>M?JbYF5|cVBI3b8~5KX8^ra zZExE)5dJHHzRuK0TCe_;G)2~;i<1pb3nT~(nxY&wGAU4$6Swex-<@Q~UMyhPhs_WS z@SZ#J?zy|?XE4{MP_Mxxs8$3%PI(kE7Sbdb1#`ct6+oqRUN41Lq|#m?pbIcx++TmV z{W4{*-@blNUvIwN|M~qI!A4pKVWzDrJlWPsIQ|UIyUl!X!eD3%HYEa zb_IAR%2G=(H0|P0)QfNh*wz^Ato4XHIoI{MuJQ4;s9JR8X~;PX8Ban^bMTV3HQsIJ z(iVtcPLFK6R4<>j!33rD8#vyXXJcOszNTEGtqCU4DA3Kk(&l+ySt+VuQi`fkopse# zUQ5yVW1~}za%)w**=p~GvJ=hRw5x?eFv`YhZ?bMFjQ$~f_jqvm>GSmB>~8w`^5*)i zOO_@1*#{w?7uKj?yQgC40Yqw;iF0lI5l?Z-;%s|>A+$5?X)OiLLGU6BgHglgHU5AK?0jl27{oVf%f0$7J)I?``kc{-YJfOS>AKiLeQg-M;pL3lSI~ zH>2AF24NZ96S$)lHi5J?76?G?thdramBH-t78~G&ZyPwuN-N`uYjE0YTWPuJx2J0? zRIIz(KFwwrRM@I8G`xhwr{&q3)ggWe4_W-K=nqdF5iIeeq`Wp|HEQ?fmQBbMb%Zi)HqVjrm5_**A8sQ ziBPaSgS7!!8izcNPL<-$oGQgZ82)>uILYr=D(Ik0OmAjqS2uUlk2r^&I-Fbr+peoV z_b$7%+YLy*w;Ahkfk(lUZTwI?D8tf;)xOI{p$-%phF$t&*6Xfm1gF^v)9h5|?&IyR zF=NmvNDy8%zHe^AQeLu{lU$BDY=date=2024-11-06 time=12:29:25 devname=\"LM-FW-70F-Praha\" devid=\"FGT70FTK22012016\" eventtime=1730892565525108329 tz=\"+0100\" logid=\"0000000013\" type=\"traffic\" subtype=\"forward\" level=\"notice\" vd=\"root\" srcip=10.20.0.53 srcport=57158 srcintf=\"lan1\" srcintfrole=\"wan\" dstip=227.51.221.89 dstport=80 dstintf=\"lan1\" dstintfrole=\"lan\" srccountry=\"China\" dstcountry=\"Czech Republic\" sessionid=179455916 proto=6 action=\"client-rst\" policyid=9 policytype=\"policy\" poluuid=\"d8ccb3e4-74d4-51ef-69a3-73b41f46df74\" policyname=\"Gitlab web from all\" service=\"HTTP\" trandisp=\"noop\" duration=6 sentbyte=80 rcvdbyte=44 sentpkt=2 rcvdpkt=1 appcat=\"unscanned\" srchwvendor=\"H3C\" devtype=\"Router\" mastersrcmac=\"00:23:89:39:a4:ef\" srcmac=\"00:23:89:39:a4:ef\" srcserver=0 dsthwvendor=\"H3C\" dstdevtype=\"Router\" masterdstmac=\"00:23:89:39:a4:fa\" dstmac=\"00:23:89:39:a4:fa\" dstserver=0\n", + expectedOutput: "<189>date=2024-11-06 time=12:29:25 devname=\"LM-FW-70F-Praha\" devid=\"FGT70FTK22012016\" eventtime=1730892565525108329 tz=\"+0100\" logid=\"0000000013\" type=\"traffic\" subtype=\"forward\" level=\"notice\" vd=\"root\" srcip=10.20.0.53 srcport=57158 srcintf=\"lan1\" srcintfrole=\"wan\" dstip=227.51.221.89 dstport=80 dstintf=\"lan1\" dstintfrole=\"lan\" srccountry=\"China\" dstcountry=\"Czech Republic\" sessionid=179455916 proto=6 action=\"client-rst\" policyid=9 policytype=\"policy\" poluuid=\"d8ccb3e4-74d4-51ef-69a3-73b41f46df74\" policyname=\"Gitlab web from all\" service=\"HTTP\" trandisp=\"noop\" duration=6 sentbyte=80 rcvdbyte=44 sentpkt=2 rcvdpkt=1 appcat=\"unscanned\" srchwvendor=\"H3C\" devtype=\"Router\" mastersrcmac=\"0f:da:68:92:7f:2b\" srcmac=\"0f:da:68:92:7f:2b\" srcserver=0 dsthwvendor=\"H3C\" dstdevtype=\"Router\" masterdstmac=\"0f:da:68:92:7f:2b\" dstmac=\"0f:da:68:92:7f:2b\" dstserver=0\n", expectedProof: []map[string]interface{}{ {"original": "dev-uplink", "new": "lan1"}, - {"original": "95.80.197.108", "new": "227.51.221.89"}, - {"original": "27.221.126.209", "new": "10.20.0.53"}, {"original": "wan1-lm", "new": "lan1"}, + {"original": "00:23:89:39:a4:ef", "new": "0f:da:68:92:7f:2b"}, + {"original": "00:23:89:39:a4:fa", "new": "0f:da:68:92:7f:2b"}, + {"original": "27.221.126.209", "new": "10.20.0.53"}, + {"original": "95.80.197.108", "new": "227.51.221.89"}, }, }, { @@ -49,9 +53,9 @@ func TestLogVeil_IntegrationTest(t *testing.T) { }, expectedOutput: "{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"10.20.0.53\", \"username\":\"ladislav.dosek\", \"organization\":\"Apple\"}\n", expectedProof: []map[string]interface{}{ - {"original": "89.239.31.49", "new": "10.20.0.53"}, {"original": "test.user@test.cz", "new": "ladislav.dosek"}, {"original": "TESTuser.test.com", "new": "Apple"}, + {"original": "89.239.31.49", "new": "10.20.0.53"}, }, }, } @@ -80,6 +84,7 @@ func TestLogVeil_IntegrationTest(t *testing.T) { } // Disabling randomization so we know which values to expect anonymizer.SetRandFunc(func(int) int { return 1 }) + faker.SetRandomSource(rand.NewSource(1)) err = logveil.RunAnonymizationLoop(inputReader, outputWriter, anonymizer) if err != nil {