Skip to content

Commit

Permalink
Merge pull request #14 from logmanager-oss/implement-per-session-repl…
Browse files Browse the repository at this point in the history
…acement-map

implement persistent (per session) replacement map
  • Loading branch information
tender-barbarian authored Nov 25, 2024
2 parents d0c6170 + c854660 commit 2045124
Show file tree
Hide file tree
Showing 10 changed files with 119 additions and 74 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ Usage of ./logveil:
Change input file type to LM export (default: LM Backup)
-p
Disable proof writer (default: Enabled)
-r
Disable persistent (per session) replacement map (default: Enabled)
-h
Help for logveil
```
Expand Down Expand Up @@ -160,6 +162,10 @@ And anonymization proof:
{"original": "71:e5:41:18:cb:3e", "new": "0f:da:68:92:7f:2b"},
```

## Replacement map and possible memory issues

LogVeil keeps a replacement map in memory for each code run (per session) to make sure each unique value gets the same anonymized value each time it is encountered. Depending on the size of input data this replacement map can grow quite large, potentially even exhausting available memory (though unlikely). If you'll encounter a memory issue use `-r` flag to disable persistent replacement map.

## Release

Go to: https://github.com/logmanager-oss/logveil/releases to grab latest version of LogVeil. It is available for Windows, Linux and MacOS (x86_64/Arm64).
Expand Down
72 changes: 39 additions & 33 deletions internal/anonymizer/anonymizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package anonymizer
import (
"fmt"
"log/slog"
"maps"
"regexp"

"github.com/logmanager-oss/logveil/internal/config"
Expand All @@ -15,19 +16,19 @@ import (

// Anonymizer represents an object responsible for anonymizing indivisual log lines feed to it. It contains anonymization data which will be used to anonymize input and a random number generator funtion used to select values from anonymization data.
type Anonymizer struct {
anonymizationData map[string][]string
customAnonymizationMapping map[string]string
randFunc func(int) int
proofWriter *proof.ProofWriter
lookup *lookup.Lookup
generator *generator.Generator
replacementMap map[string]string
anonymizationData map[string][]string
replacementMap map[string]string
randFunc func(int) int
proofWriter *proof.ProofWriter
lookup *lookup.Lookup
generator *generator.Generator
isPersistReplacementMap bool
}

func CreateAnonymizer(config *config.Config, proofWriter *proof.ProofWriter) (*Anonymizer, error) {
customAnonymizationMapping, err := loader.LoadCustomAnonymizationMapping(config.CustomAnonymizationMappingPath)
customReplacementMap, err := loader.LoadCustomReplacementMap(config.CustomReplacementMapPath)
if err != nil {
return nil, fmt.Errorf("loading custom anonymization mappings from path %s: %v", config.CustomAnonymizationMappingPath, err)
return nil, fmt.Errorf("loading custom replacement map from path %s: %v", config.CustomReplacementMapPath, err)
}

anonymizationData, err := loader.LoadAnonymizationData(config.AnonymizationDataPath)
Expand All @@ -36,38 +37,39 @@ func CreateAnonymizer(config *config.Config, proofWriter *proof.ProofWriter) (*A
}

return &Anonymizer{
anonymizationData: anonymizationData,
customAnonymizationMapping: customAnonymizationMapping,
randFunc: rand.Intn,
proofWriter: proofWriter,
lookup: lookup.New(),
generator: &generator.Generator{},
anonymizationData: anonymizationData,
replacementMap: customReplacementMap,
randFunc: rand.Intn,
proofWriter: proofWriter,
lookup: lookup.New(),
generator: &generator.Generator{},
isPersistReplacementMap: config.IsPersistReplacementMap,
}, nil
}

func (an *Anonymizer) Anonymize(logLine map[string]string) string {
an.replacementMap = an.customAnonymizationMapping

an.loadAndReplace(logLine)
replacementMap := an.loadAndReplace(logLine, an.replacementMap)

logLineRaw := logLine["raw"]
an.generateAndReplace(logLineRaw, an.lookup.ValidIpv4, an.generator.GenerateRandomIPv4())
an.generateAndReplace(logLineRaw, an.lookup.ValidIpv6, an.generator.GenerateRandomIPv6())
an.generateAndReplace(logLineRaw, an.lookup.ValidMac, an.generator.GenerateRandomMac())
an.generateAndReplace(logLineRaw, an.lookup.ValidEmail, an.generator.GenerateRandomEmail())
an.generateAndReplace(logLineRaw, an.lookup.ValidUrl, an.generator.GenerateRandomUrl())

an.proofWriter.Flush()
replacementMap = an.generateAndReplace(logLineRaw, replacementMap, an.lookup.ValidIpv4, an.generator.GenerateRandomIPv4())
replacementMap = an.generateAndReplace(logLineRaw, replacementMap, an.lookup.ValidIpv6, an.generator.GenerateRandomIPv6())
replacementMap = an.generateAndReplace(logLineRaw, replacementMap, an.lookup.ValidMac, an.generator.GenerateRandomMac())
replacementMap = an.generateAndReplace(logLineRaw, replacementMap, an.lookup.ValidEmail, an.generator.GenerateRandomEmail())
replacementMap = an.generateAndReplace(logLineRaw, replacementMap, an.lookup.ValidUrl, an.generator.GenerateRandomUrl())

if an.isPersistReplacementMap {
maps.Copy(an.replacementMap, replacementMap)
}

return an.replace(logLineRaw)
return an.replace(logLineRaw, replacementMap)
}

// SetRandFunc sets the function used by Anonymize() to select values from anonymization data at random
func (an *Anonymizer) SetRandFunc(randFunc func(int) int) {
an.randFunc = randFunc
}

func (an *Anonymizer) loadAndReplace(logLine map[string]string) {
func (an *Anonymizer) loadAndReplace(logLine map[string]string, replacementMap map[string]string) map[string]string {
for field, value := range logLine {
if field == "raw" {
continue
Expand All @@ -77,33 +79,37 @@ func (an *Anonymizer) loadAndReplace(logLine map[string]string) {
continue
}

if _, ok := an.replacementMap[value]; ok {
if _, ok := replacementMap[value]; ok {
continue
}

if anonValues, exists := an.anonymizationData[field]; exists {
newAnonValue := anonValues[an.randFunc(len(anonValues))]
an.replacementMap[value] = newAnonValue
replacementMap[value] = newAnonValue

slog.Debug(fmt.Sprintf("Replacing the values for field %s. From %s to %s.\n", field, value, newAnonValue))
}
}

return replacementMap
}

func (an *Anonymizer) generateAndReplace(rawLog string, regexp *regexp.Regexp, generatedData string) {
func (an *Anonymizer) generateAndReplace(rawLog string, replacementMap map[string]string, regexp *regexp.Regexp, generatedData string) map[string]string {
values := regexp.FindAllString(rawLog, -1)

for _, value := range values {
if _, ok := an.replacementMap[value]; ok {
continue
}

an.replacementMap[value] = generatedData
replacementMap[value] = generatedData
}

return replacementMap
}

func (an *Anonymizer) replace(rawLog string) string {
for originalValue, newValue := range an.replacementMap {
func (an *Anonymizer) replace(rawLog string, replacementMap map[string]string) string {
for originalValue, newValue := range replacementMap {
// Added word boundary to avoid matching words withing word. For example "test" in "testing".
r := regexp.MustCompile(fmt.Sprintf(`\b%s\b`, originalValue))

Expand Down
26 changes: 25 additions & 1 deletion internal/anonymizer/anonymizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,35 @@ func TestAnonimizer_AnonymizeData(t *testing.T) {
},
expectedOutput: "2024-06-05T14:59:27.000+00:00, 10.20.0.53, 8186:39ac:48a4:c6af:a2f1:581a:8b95:25e2, ladislav.dosek, Apple, 0f:da:68:92:7f:2b, [email protected], http://soqovkq.com/NfkcUjG.php, with_that",
},
{
name: "Test AnonymizeData - with persisten replacement map",
anonymizationDataDir: "../../tests/data/anonymization_data",
customAnonymizationMappingPath: "../../tests/data/custom_mappings.txt",
input: map[string]string{
"@timestamp": "2024-06-05T14:59:27.000+00:00",
"src_ip": "10.10.10.1",
"src_ipv6": "7f1d:64ed:536a:1fd7:fe8e:cc29:9df4:7911",
"mac": "71:e5:41:18:cb:3e",
"email": "[email protected]",
"url": "https://www.testurl.com",
"username": "miloslav.illes",
"organization": "Microsoft",
"custom:": "replacement_test",
"raw": "2024-06-05T14:59:27.000+00:00, 10.10.10.1, 7f1d:64ed:536a:1fd7:fe8e:cc29:9df4:7911, miloslav.illes, Microsoft, 71:e5:41:18:cb:3e, [email protected], https://www.testurl.com, replace_this",
},
expectedOutput: "2024-06-05T14:59:27.000+00:00, 10.20.0.53, 8186:39ac:48a4:c6af:a2f1:581a:8b95:25e2, ladislav.dosek, Apple, 0f:da:68:92:7f:2b, [email protected], http://soqovkq.com/NfkcUjG.php, with_that",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
anonymizer, err := CreateAnonymizer(&config.Config{AnonymizationDataPath: tt.anonymizationDataDir, CustomAnonymizationMappingPath: tt.customAnonymizationMappingPath}, &proof.ProofWriter{IsEnabled: false})
anonymizer, err := CreateAnonymizer(
&config.Config{
AnonymizationDataPath: tt.anonymizationDataDir,
CustomReplacementMapPath: tt.customAnonymizationMappingPath,
},
&proof.ProofWriter{IsEnabled: false},
)
if err != nil {
t.Fatal(err)
}
Expand Down
20 changes: 11 additions & 9 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ import (

// Config represents user supplied program input
type Config struct {
AnonymizationDataPath string
InputPath string
OutputPath string
CustomAnonymizationMappingPath string
IsVerbose bool
IsLmExport bool
IsProofWriter bool
AnonymizationDataPath string
InputPath string
OutputPath string
CustomReplacementMapPath string
IsVerbose bool
IsLmExport bool
IsProofWriter bool
IsPersistReplacementMap bool
}

// LoadAndValidate loads values from user supplied input into Config struct and validates them
Expand All @@ -23,13 +24,14 @@ func (c *Config) LoadAndValidate() {

flag.Func("i", "Path to input file containing logs to be anonymized", validateInput(c.InputPath))

flag.Func("c", "Path to input file containing custom anonymization mappings", validateInput(c.CustomAnonymizationMappingPath))
flag.Func("c", "Path to input file containing custom anonymization mappings", validateInput(c.CustomReplacementMapPath))

flag.Func("o", "Path to output file (default: Stdout)", validateOutput(c.OutputPath))

flag.BoolVar(&c.IsVerbose, "v", false, "Enable verbose logging (default: Disabled)")
flag.BoolVar(&c.IsLmExport, "e", false, "Change input file type to LM export (default: LM Backup)")
flag.BoolVar(&c.IsProofWriter, "p", true, "Disable proof wrtier (default: Enabled)")
flag.BoolVar(&c.IsProofWriter, "p", true, "Disable proof writer (default: Enabled)")
flag.BoolVar(&c.IsPersistReplacementMap, "r", true, "Disable persistent (per session) replacement map (default: Enabled)")

flag.Parse()

Expand Down
8 changes: 4 additions & 4 deletions internal/loader/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ import (
"strings"
)

func LoadCustomAnonymizationMapping(path string) (map[string]string, error) {
customMapping := make(map[string]string)
func LoadCustomReplacementMap(path string) (map[string]string, error) {
customReplacementMap := make(map[string]string)

file, err := os.OpenFile(path, os.O_RDONLY, os.ModePerm)
if err != nil {
Expand All @@ -30,14 +30,14 @@ func LoadCustomAnonymizationMapping(path string) (map[string]string, error) {
originalValue := values[0]
newValue := values[1]

customMapping[originalValue] = newValue
customReplacementMap[originalValue] = newValue
}

if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("error reading custom anonymization mapping: %w", err)
}

return customMapping, nil
return customReplacementMap, nil
}

// LoadAnonymizationData() loads anonymization data from given directory and returns it in a map format of: [filename][]values. Anonymization data is needed for the purposes of masking original values.
Expand Down
16 changes: 8 additions & 8 deletions internal/loader/loader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,21 @@ import (
"github.com/stretchr/testify/assert"
)

func TestAnonimizer_LoadCustomAnonymizationMapping(t *testing.T) {
func TestAnonimizer_LoadCustomReplacementMap(t *testing.T) {
tests := []struct {
name string
customAnonymizationMappingPath string
expectedMapping map[string]string
name string
customReplacementMapPath string
expectedMapping map[string]string
}{
{
name: "Test Loading Custom Anonymization Mapping",
customAnonymizationMappingPath: "../../tests/data/custom_mappings.txt",
expectedMapping: map[string]string{"replace_this": "with_that", "test123": "test1234", "test_custom_replacement": "test_custom_replacement123"},
name: "Test Loading Custom Anonymization Mapping",
customReplacementMapPath: "../../tests/data/custom_mappings.txt",
expectedMapping: map[string]string{"replace_this": "with_that", "test123": "test1234", "test_custom_replacement": "test_custom_replacement123"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
mapping, err := LoadCustomAnonymizationMapping(tt.customAnonymizationMappingPath)
mapping, err := LoadCustomReplacementMap(tt.customReplacementMapPath)
if err != nil {
t.Fatal(err)
}
Expand Down
5 changes: 5 additions & 0 deletions internal/proof/constants.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package proof

const (
ProofFilename = "proof.ndjson"
)
12 changes: 7 additions & 5 deletions internal/proof/proof.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ import (
"github.com/logmanager-oss/logveil/internal/files"
)

type Proof struct {
OriginalValue string `json:"original"`
NewValue string `json:"new"`
}

type ProofWriter struct {
IsEnabled bool
writer *bufio.Writer
Expand All @@ -19,7 +24,7 @@ type ProofWriter struct {

func CreateProofWriter(config *config.Config, openFiles *files.FilesHandler) (*ProofWriter, error) {
if config.IsProofWriter {
file, err := os.Create("proof.json")
file, err := os.Create(ProofFilename)
if err != nil {
return nil, fmt.Errorf("creating/opening proof file: %v", err)
}
Expand All @@ -40,10 +45,7 @@ func (p *ProofWriter) Write(originalValue string, newValue string) {
return
}

proof := struct {
OriginalValue string `json:"original"`
NewValue string `json:"new"`
}{
proof := &Proof{
OriginalValue: originalValue,
NewValue: newValue,
}
Expand Down
4 changes: 2 additions & 2 deletions internal/proof/proof_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func TestProof_Write(t *testing.T) {
p.Write(tt.originalValue, tt.newValue)
p.Flush()

file, err := os.OpenFile("proof.json", os.O_RDWR|os.O_CREATE, 0644)
file, err := os.OpenFile(ProofFilename, os.O_RDWR|os.O_CREATE, 0644)
if err != nil {
t.Fatal(err)
}
Expand All @@ -62,7 +62,7 @@ func TestProof_Write(t *testing.T) {

assert.Equal(t, tt.expectedOutput, buf.String())

os.Remove("proof.json")
os.Remove(ProofFilename)
})
}
}
Loading

0 comments on commit 2045124

Please sign in to comment.