Skip to content

Commit

Permalink
implement rexep scan and data generation
Browse files Browse the repository at this point in the history
  • Loading branch information
solnicki committed Nov 20, 2024
1 parent a4d752c commit 3360f15
Show file tree
Hide file tree
Showing 9 changed files with 164 additions and 45 deletions.
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@ go 1.22.5

require github.com/stretchr/testify v1.9.0

require golang.org/x/text v0.16.0 // indirect

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-faker/faker/v4 v4.5.0
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7
gopkg.in/yaml.v3 v3.0.1 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-faker/faker/v4 v4.5.0 h1:ARzAY2XoOL9tOUK+KSecUQzyXQsUaZHefjyF8x6YFHc=
github.com/go-faker/faker/v4 v4.5.0/go.mod h1:p3oq1GRjG2PZ7yqeFFfQI20Xm61DoBDlCA8RiSyZ48M=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7 h1:wDLEX9a7YQoKdKNQt88rtydkqDxeGaBUTnIYc3iG/mA=
golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
Expand Down
65 changes: 53 additions & 12 deletions internal/anonymizer/anonymizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,25 @@ package anonymizer
import (
"fmt"
"log/slog"
"regexp"
"strings"

"github.com/logmanager-oss/logveil/internal/config"
"github.com/logmanager-oss/logveil/internal/generator"
"github.com/logmanager-oss/logveil/internal/loader"
"github.com/logmanager-oss/logveil/internal/lookup"
"github.com/logmanager-oss/logveil/internal/proof"
"golang.org/x/exp/rand"
)

// Anonymizer represents an object responsible for anonymizing indivisual log lines feed to it. It contains anonymization data which will be used to anonymize input and a random number generator funtion used to select values from anonymization data.
type Anonymizer struct {
anonData map[string][]string
randFunc func(int) int
proofWriter *proof.ProofWriter
anonData map[string][]string
randFunc func(int) int
proofWriter *proof.ProofWriter
lookup *lookup.Lookup
generator *generator.Generator
replacementMap map[string]string
}

func CreateAnonymizer(config *config.Config, proofWriter *proof.ProofWriter) (*Anonymizer, error) {
Expand All @@ -28,12 +34,33 @@ func CreateAnonymizer(config *config.Config, proofWriter *proof.ProofWriter) (*A
anonData: anonymizingData,
randFunc: rand.Intn,
proofWriter: proofWriter,
lookup: lookup.New(),
generator: &generator.Generator{},
}, nil
}

func (an *Anonymizer) Anonymize(logLine map[string]string) string {
defer an.proofWriter.Flush()
an.replacementMap = make(map[string]string)

an.loadAndReplace(logLine)
an.generateAndReplace(logLine["raw"], an.lookup.ValidIpv4, an.generator.GenerateRandomIPv4())
an.generateAndReplace(logLine["raw"], an.lookup.ValidIpv6, an.generator.GenerateRandomIPv6())
an.generateAndReplace(logLine["raw"], an.lookup.ValidMac, an.generator.GenerateRandomMac())
an.generateAndReplace(logLine["raw"], an.lookup.ValidEmail, an.generator.GenerateRandomEmail())
an.generateAndReplace(logLine["raw"], an.lookup.ValidUrl, an.generator.GenerateRandomUrl())

an.proofWriter.Write(an.replacementMap)
an.proofWriter.Flush()

return an.replace(logLine["raw"])
}

// SetRandFunc sets the function used by Anonymize() to select values from anonymization data at random
func (an *Anonymizer) SetRandFunc(randFunc func(int) int) {
an.randFunc = randFunc
}

func (an *Anonymizer) loadAndReplace(logLine map[string]string) {
for field, value := range logLine {
if field == "raw" {
continue
Expand All @@ -43,21 +70,35 @@ func (an *Anonymizer) Anonymize(logLine map[string]string) string {
continue
}

if _, ok := an.replacementMap[value]; ok {
continue
}

if anonValues, exists := an.anonData[field]; exists {
newAnonValue := anonValues[an.randFunc(len(anonValues))]

an.proofWriter.Write(value, newAnonValue)
an.replacementMap[value] = newAnonValue

slog.Debug(fmt.Sprintf("Replacing the values for field %s. From %s to %s.\n", field, value, newAnonValue))

logLine["raw"] = strings.Replace(logLine["raw"], value, newAnonValue, -1)
}
}
}

func (an *Anonymizer) generateAndReplace(rawLog string, regexp *regexp.Regexp, generatedData string) {
values := regexp.FindAllString(rawLog, -1)

for _, value := range values {
if _, ok := an.replacementMap[value]; ok {
continue
}

return logLine["raw"]
an.replacementMap[value] = generatedData
}
}

// SetRandFunc sets the function used by Anonymize() to select values from anonymization data at random
func (an *Anonymizer) SetRandFunc(randFunc func(int) int) {
an.randFunc = randFunc
func (an *Anonymizer) replace(rawLog string) string {
for oldValue, newValue := range an.replacementMap {
rawLog = strings.ReplaceAll(rawLog, oldValue, newValue)
}

return rawLog
}
17 changes: 15 additions & 2 deletions internal/anonymizer/anonymizer_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package anonymizer

import (
"math/rand"
"testing"

"github.com/go-faker/faker/v4"
"github.com/logmanager-oss/logveil/internal/config"
"github.com/logmanager-oss/logveil/internal/proof"
"github.com/stretchr/testify/assert"
Expand All @@ -18,8 +20,18 @@ func TestAnonimizer_AnonymizeData(t *testing.T) {
{
name: "Test AnonymizeData",
anonymizingDataDir: "../../tests/data/anonymization_data",
input: map[string]string{"@timestamp": "2024-06-05T14:59:27.000+00:00", "src_ip": "10.10.10.1", "username": "miloslav.illes", "organization": "Microsoft", "raw": "2024-06-05T14:59:27.000+00:00, 10.10.10.1, miloslav.illes, Microsoft"},
expectedOutput: "2024-06-05T14:59:27.000+00:00, 10.20.0.53, ladislav.dosek, Apple",
input: map[string]string{
"@timestamp": "2024-06-05T14:59:27.000+00:00",
"src_ip": "10.10.10.1",
"src_ipv6": "7f1d:64ed:536a:1fd7:fe8e:cc29:9df4:7911",
"mac": "71:e5:41:18:cb:3e",
"email": "[email protected]",
"url": "https://www.testurl.com",
"username": "miloslav.illes",
"organization": "Microsoft",
"raw": "2024-06-05T14:59:27.000+00:00, 10.10.10.1, 7f1d:64ed:536a:1fd7:fe8e:cc29:9df4:7911, miloslav.illes, Microsoft, 71:e5:41:18:cb:3e, [email protected], https://www.testurl.com",
},
expectedOutput: "2024-06-05T14:59:27.000+00:00, 10.20.0.53, 8186:39ac:48a4:c6af:a2f1:581a:8b95:25e2, ladislav.dosek, Apple, 0f:da:68:92:7f:2b, [email protected], http://soqovkq.com/NfkcUjG.php",
},
}

Expand All @@ -31,6 +43,7 @@ func TestAnonimizer_AnonymizeData(t *testing.T) {
}
// Disabling randomization so we know which values to expect
anonymizer.SetRandFunc(func(int) int { return 1 })
faker.SetRandomSource(rand.NewSource(1))
output := anonymizer.Anonymize(tt.input)

assert.Equal(t, tt.expectedOutput, output)
Expand Down
27 changes: 27 additions & 0 deletions internal/generator/generator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package generator

import (
"github.com/go-faker/faker/v4"
)

type Generator struct{}

func (g *Generator) GenerateRandomIPv4() string {
return faker.IPv4()
}

func (g *Generator) GenerateRandomIPv6() string {
return faker.IPv6()
}

func (g *Generator) GenerateRandomMac() string {
return faker.MacAddress()
}

func (g *Generator) GenerateRandomEmail() string {
return faker.Email()
}

func (g *Generator) GenerateRandomUrl() string {
return faker.URL()
}
23 changes: 23 additions & 0 deletions internal/lookup/lookup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package lookup

import (
"regexp"
)

type Lookup struct {
ValidIpv4 *regexp.Regexp
ValidIpv6 *regexp.Regexp
ValidMac *regexp.Regexp
ValidEmail *regexp.Regexp
ValidUrl *regexp.Regexp
}

func New() *Lookup {
return &Lookup{
ValidIpv4: regexp.MustCompile(`((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.?\b){4}`),
ValidIpv6: regexp.MustCompile(`(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))`),
ValidMac: regexp.MustCompile(`([0-9A-Fa-f]{2}[:-]){5}([0-9A-Fa-f]{2})`),
ValidEmail: regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*"),
ValidUrl: regexp.MustCompile(`https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)`),
}
}
32 changes: 17 additions & 15 deletions internal/proof/proof.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,27 +35,29 @@ func CreateProofWriter(config *config.Config, openFiles *files.FilesHandler) (*P
return &ProofWriter{IsEnabled: false}, nil
}

func (p *ProofWriter) Write(originalValue string, maskedValue string) {
func (p *ProofWriter) Write(replacementMap map[string]string) {
if !p.IsEnabled {
return
}

proof := struct {
OriginalValue string `json:"original"`
MaskedValue string `json:"new"`
}{
OriginalValue: originalValue,
MaskedValue: maskedValue,
}
for originalValue, newValue := range replacementMap {
proof := struct {
OriginalValue string `json:"original"`
NewValue string `json:"new"`
}{
OriginalValue: originalValue,
NewValue: newValue,
}

bytes, err := json.Marshal(proof)
if err != nil {
slog.Error("marshalling anonymisation proof", "error", err)
}
bytes, err := json.Marshal(proof)
if err != nil {
slog.Error("marshalling anonymisation proof", "error", err)
}

_, err = fmt.Fprintf(p.writer, "%s\n", bytes)
if err != nil {
slog.Error("writing anonymisation proof", "error", err)
_, err = fmt.Fprintf(p.writer, "%s\n", bytes)
if err != nil {
slog.Error("writing anonymisation proof", "error", err)
}
}
}

Expand Down
23 changes: 12 additions & 11 deletions internal/proof/proof_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,23 @@ func TestProof_Write(t *testing.T) {
tests := []struct {
name string
isProofWriter bool
originalValue string
maskedValue string
replacementMap map[string]string
expectedOutput string
}{
{
name: "Test case 1: write proof",
isProofWriter: true,
originalValue: "test",
maskedValue: "masked",
name: "Test case 1: write proof",
isProofWriter: true,
replacementMap: map[string]string{
"test": "masked",
},
expectedOutput: "{\"original\":\"test\",\"new\":\"masked\"}\n",
},
{
name: "Test case 2: proof writer disabled",
isProofWriter: false,
originalValue: "test",
maskedValue: "masked",
name: "Test case 2: proof writer disabled",
isProofWriter: false,
replacementMap: map[string]string{
"test": "masked",
},
expectedOutput: "",
},
}
Expand All @@ -44,7 +45,7 @@ func TestProof_Write(t *testing.T) {
t.Fatal(err)
}

p.Write(tt.originalValue, tt.maskedValue)
p.Write(tt.replacementMap)
p.Flush()

file, err := os.OpenFile("proof.json", os.O_RDWR|os.O_CREATE, 0644)
Expand Down
15 changes: 10 additions & 5 deletions tests/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ import (
"bufio"
"bytes"
"encoding/json"
"math/rand"
"os"
"testing"

"github.com/go-faker/faker/v4"
"github.com/logmanager-oss/logveil/cmd/logveil"
"github.com/logmanager-oss/logveil/internal/anonymizer"
"github.com/logmanager-oss/logveil/internal/config"
Expand All @@ -24,19 +26,21 @@ func TestLogVeil_IntegrationTest(t *testing.T) {
expectedProof []map[string]interface{}
}{
{
name: "Test Test LM Backup Anonymizer",
name: "Test LM Backup Anonymizer",
config: &config.Config{
AnonymizationDataPath: "data/anonymization_data",
InputPath: "data/lm_backup_test_input.gz",
IsLmExport: false,
IsProofWriter: true,
},
expectedOutput: "<189>date=2024-11-06 time=12:29:25 devname=\"LM-FW-70F-Praha\" devid=\"FGT70FTK22012016\" eventtime=1730892565525108329 tz=\"+0100\" logid=\"0000000013\" type=\"traffic\" subtype=\"forward\" level=\"notice\" vd=\"root\" srcip=10.20.0.53 srcport=57158 srcintf=\"lan1\" srcintfrole=\"wan\" dstip=227.51.221.89 dstport=80 dstintf=\"lan1\" dstintfrole=\"lan\" srccountry=\"China\" dstcountry=\"Czech Republic\" sessionid=179455916 proto=6 action=\"client-rst\" policyid=9 policytype=\"policy\" poluuid=\"d8ccb3e4-74d4-51ef-69a3-73b41f46df74\" policyname=\"Gitlab web from all\" service=\"HTTP\" trandisp=\"noop\" duration=6 sentbyte=80 rcvdbyte=44 sentpkt=2 rcvdpkt=1 appcat=\"unscanned\" srchwvendor=\"H3C\" devtype=\"Router\" mastersrcmac=\"00:23:89:39:a4:ef\" srcmac=\"00:23:89:39:a4:ef\" srcserver=0 dsthwvendor=\"H3C\" dstdevtype=\"Router\" masterdstmac=\"00:23:89:39:a4:fa\" dstmac=\"00:23:89:39:a4:fa\" dstserver=0\n",
expectedOutput: "<189>date=2024-11-06 time=12:29:25 devname=\"LM-FW-70F-Praha\" devid=\"FGT70FTK22012016\" eventtime=1730892565525108329 tz=\"+0100\" logid=\"0000000013\" type=\"traffic\" subtype=\"forward\" level=\"notice\" vd=\"root\" srcip=10.20.0.53 srcport=57158 srcintf=\"lan1\" srcintfrole=\"wan\" dstip=227.51.221.89 dstport=80 dstintf=\"lan1\" dstintfrole=\"lan\" srccountry=\"China\" dstcountry=\"Czech Republic\" sessionid=179455916 proto=6 action=\"client-rst\" policyid=9 policytype=\"policy\" poluuid=\"d8ccb3e4-74d4-51ef-69a3-73b41f46df74\" policyname=\"Gitlab web from all\" service=\"HTTP\" trandisp=\"noop\" duration=6 sentbyte=80 rcvdbyte=44 sentpkt=2 rcvdpkt=1 appcat=\"unscanned\" srchwvendor=\"H3C\" devtype=\"Router\" mastersrcmac=\"0f:da:68:92:7f:2b\" srcmac=\"0f:da:68:92:7f:2b\" srcserver=0 dsthwvendor=\"H3C\" dstdevtype=\"Router\" masterdstmac=\"0f:da:68:92:7f:2b\" dstmac=\"0f:da:68:92:7f:2b\" dstserver=0\n",
expectedProof: []map[string]interface{}{
{"original": "dev-uplink", "new": "lan1"},
{"original": "95.80.197.108", "new": "227.51.221.89"},
{"original": "27.221.126.209", "new": "10.20.0.53"},
{"original": "wan1-lm", "new": "lan1"},
{"original": "00:23:89:39:a4:ef", "new": "0f:da:68:92:7f:2b"},
{"original": "00:23:89:39:a4:fa", "new": "0f:da:68:92:7f:2b"},
{"original": "27.221.126.209", "new": "10.20.0.53"},
{"original": "95.80.197.108", "new": "227.51.221.89"},
},
},
{
Expand All @@ -49,9 +53,9 @@ func TestLogVeil_IntegrationTest(t *testing.T) {
},
expectedOutput: "{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"10.20.0.53\", \"username\":\"ladislav.dosek\", \"organization\":\"Apple\"}\n",
expectedProof: []map[string]interface{}{
{"original": "89.239.31.49", "new": "10.20.0.53"},
{"original": "[email protected]", "new": "ladislav.dosek"},
{"original": "TESTuser.test.com", "new": "Apple"},
{"original": "89.239.31.49", "new": "10.20.0.53"},
},
},
}
Expand Down Expand Up @@ -80,6 +84,7 @@ func TestLogVeil_IntegrationTest(t *testing.T) {
}
// Disabling randomization so we know which values to expect
anonymizer.SetRandFunc(func(int) int { return 1 })
faker.SetRandomSource(rand.NewSource(1))

err = logveil.RunAnonymizationLoop(inputReader, outputWriter, anonymizer)
if err != nil {
Expand Down

0 comments on commit 3360f15

Please sign in to comment.