diff --git a/cmd/logveil/logveil.go b/cmd/logveil/logveil.go new file mode 100644 index 0000000..aa6ec67 --- /dev/null +++ b/cmd/logveil/logveil.go @@ -0,0 +1,67 @@ +package logveil + +import ( + "bufio" + "log/slog" + "os" + + "github.com/logmanager-oss/logveil/internal/anonymizer" + "github.com/logmanager-oss/logveil/internal/flags" + "github.com/logmanager-oss/logveil/internal/loader" + "github.com/logmanager-oss/logveil/internal/proof" + "github.com/logmanager-oss/logveil/internal/runner" +) + +func Run() { + slog.Info("Anonymization process started...") + + anonymizingDataDir, inputPath, outputPath, isVerbose, isLmExport, isProofWriter := flags.LoadAndValidate() + + if isVerbose { + slog.SetLogLoggerLevel(slog.LevelDebug) + } + + inputReader, err := os.Open(inputPath) + if err != nil { + return + } + defer inputReader.Close() + + var outputFile *os.File + if outputPath != "" { + outputFile, err := os.Create(outputPath) + if err != nil { + return + } + defer outputFile.Close() + } else { + outputFile = os.Stdout + } + + outputWriter := bufio.NewWriter(outputFile) + defer outputWriter.Flush() + + anonymizingData, err := loader.Load(anonymizingDataDir) + if err != nil { + slog.Error("loading anonymizing data from dir %s: %v", anonymizingDataDir, err) + return + } + proofWriter := proof.New(isProofWriter) + anonymizer := anonymizer.New(anonymizingData, proofWriter) + + if isLmExport { + err := runner.AnonymizeLmExport(inputReader, outputWriter, anonymizer) + if err != nil { + slog.Error("reading lm export input file %s: %v", inputReader.Name(), err) + return + } + } else { + err := runner.AnonymizeLmBackup(inputReader, outputWriter, anonymizer) + if err != nil { + slog.Error("reading lm backup input file %s: %v", inputReader.Name(), err) + return + } + } + + slog.Info("All done. Exiting...") +} diff --git a/cmd/main.go b/cmd/main.go index 9f0cf5d..f1c1e3b 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -1,7 +1,7 @@ package main -import "github.com/logmanager-oss/logveil/internal/anonymizer" +import "github.com/logmanager-oss/logveil/cmd/logveil" func main() { - anonymizer.Run() + logveil.Run() } diff --git a/examples/.DS_Store b/examples/.DS_Store new file mode 100644 index 0000000..50f486d Binary files /dev/null and b/examples/.DS_Store differ diff --git a/examples/anon_data/msg.dst_iface b/examples/anon_data/dst_iface similarity index 100% rename from examples/anon_data/msg.dst_iface rename to examples/anon_data/dst_iface diff --git a/examples/anon_data/msg.dst_ip b/examples/anon_data/dst_ip similarity index 100% rename from examples/anon_data/msg.dst_ip rename to examples/anon_data/dst_ip diff --git a/examples/anon_data/msg.ip b/examples/anon_data/ip similarity index 100% rename from examples/anon_data/msg.ip rename to examples/anon_data/ip diff --git a/examples/anon_data/msg.name b/examples/anon_data/name similarity index 100% rename from examples/anon_data/msg.name rename to examples/anon_data/name diff --git a/examples/anon_data/msg.organization b/examples/anon_data/organization similarity index 100% rename from examples/anon_data/msg.organization rename to examples/anon_data/organization diff --git a/examples/anon_data/source-names-cz.txt b/examples/anon_data/source-names-cz.txt deleted file mode 100644 index aee4697..0000000 --- a/examples/anon_data/source-names-cz.txt +++ /dev/null @@ -1,601 +0,0 @@ -Miloslav Illéš -Ladislav Došek -Kateřina Janečková -Josef Varga -Miroslav Škamrala -Jiří Filip -Patrik Topič -Petr Fabiánek -Andrea Randýsková -Jaroslava Strnadová -Alena Čuříková -Jan Berger -Anna Ellingerová -Eliška Halajová -Kamila Tomešová -Vojtěch Le Nguyen -Šárka Haňáková -Lenka Ouvínová -Antonín Miksa -Marie Scheidelová -Bohuslava Hlaváčová -Stanislava Janáková -Simona Šíchová -Jitka Konečníková -Jiřina Pečenková -Bronislav Machů -Zdeňka Hofmanová -Veronika Mandátová -Jaroslav Bukovjan -Irena Dobrovolná -Jana Schmidtová -František Tomeček -Pavel Voženílek -Günter Andreas Mlejnecký -Roman Miler -Jindřich Grác -Liana Hutěčková -Helena Danihelková -Hana Rybová -Martin Palička -David Černý -Ludvík Pospíšil -Nela Ryšavá -Věra Tichá -Eva Lukášová -Václav Krčílek -Miloš Večeřa -Magdalena Rašmanová -Vladimíra Kůrková -Danuše Žatecká -Tomáš Matlocha -Vladimír Pelikán -Marcela Pelikánová -Zdeněk Bartal -Ludmila Žůrková -Milan Burián -Jarmila Hamplová -Michal Vrba -Lucie Hladíková -Daniel Pachovský -Ruslan Filip Ondryáš -Lukáš Sedlecký -Petra Šrámková -Olga Ludvíková -Monika Sýkorová -Kristina Viznarová -Karel Zadražil -Soňa Kašpárková -Karolína Nguyen Thi -Stanislav Nový -Robert Zedníček -Juraj Žufan -Ctibor Znišťal -Blahoslav Sedlák -Jaromír Buchta -Libor Dušek -Michaela Baršová -Song Hui Petržilková -Denis Václavík -Vlasta Kalousová -Zdena Zvonařová -Marek Srnec -Daša Canalová -Štěpán Štrouchal -Ondřej Černík -Dana Feslová -Daniela Rodáková -Matěj Pinkas -Iva Vávrová -Markéta Ernstová -Božena Kratochvílová -Viking Kašpar -Hildegarda Kuchařová -Nicole Novotná -Filip Rigó -Adéla Cabáková -Gabriela Slováčková -Ivana Staňková -Jakub Líbal -Zuzana Folwarczná -Jan Kadlec -Stanislav Bužek -Václav Čajan -Jiří Batelka -Jaroslav Klabeček -Hana Sýkorová -Miriam Vostárková -Michal Gargula -Šárka Šmídová -Robert Jiříček -Dominik Matyáš -Pavel Novák -Vladimír Ostrovka -František Parzyk -Františka Karbanová -Josef Kakara -Daniel Veselka -Zuzana Myslivcová -Anna Kapešová -Roman Krasnyanyk -Vlasta Čechová -Marie Hájková -Jorga Bradlerová -Eva Čejková -Dáša Bodáková -Alena Navrátilová -Renata Macigová -Bohuslav Musil -Jaroslava Ochodková -Ivana Janačová -Tomáš Ognar -Jana Velechovská -Petr Kořínek -Pavol Přichystal -Martina Pytlíková -Lucie Louvarová -Ludmila Vošická -Zdeněk Lokaj -Milan Kokta -Libuše Králová -Miloslav Novotný -Lenka Kranerová -Lukáš Šopík -Miloslava Pešková -Martin Benda -Jiřina Strnadová -Miroslav Bakalár -Ilona Pálková -Ladislav Fabík -Jolana Kohoutová -Radek Ivan -Klára Babáková -Eduardo Luis Fuksa -Věra Řezníčková -Karel Smolík -David Záruba -Markéta Machníková -Marcela Švecová -Filip Ramzer -Veronika Kleinová -Antonín Slavík -Leopold Groulík -Jarmila Hladíková -Radimír Kos -Ivan Kostka -Hynek Šulc -Rudolf Paulus -Miroslava Chmelíčková -Marek Jiruš -Gabriela Jágerová -Anežka Jaklová -Petra Samková -Jerett Josef Špalek -Terezia Zapletalová -Zdeňka Barlová -Alenka Kratochvílová -Milena Bradáčová -Jakub Charvát -Dana Filipová -Barbora Pluhařová -Drahomíra Divišová -Kateřina Plačková -Tuan Berka -Helena Sehnalíková -Ondřej Lakatoš -Irena Holišová -Luděk Dorazil -Dušan Kučera -Peter Popelář -Matěj Horáček -Daniela Bílková -Alessio Antonio Hašek -Iveta Vrabcová -Aurelie Stehlíková -Ingrid Otáhalová -Jitka Sněhotová -Lucia Korbelová -Alžběta Řechtáčková -Nicola Šrámková -Aneta Jeřábková -Jana Jílková -Pavel Adamíček -Michaela Stará -Zdeňka Jandová -Miroslav Venzara -Marie Kletečková -Veronika Bednáriková -Jan Vršecký -Eva Mužíková -Martin Kadidlo -Linda Bártíková -Jiří Závadský -Ondřej Macek -Věra Holečková -Libuše Šustrová -Milan Baran -Josef Jareš -Jaroslav Ježek -Petr Řičař -František Cíla -Blanka Josková -Miroslava Černá -Kristýna Štolcová -Stanislav Rod -Simona Kloudová -Petra Mezlová -Jarmila Muczková -Hana Vozňáková -Jiřina Domorádová -Kateřina Kanderová -Radek Jech -Ferdinand Krčmař -Václav Hejdušek -Vratislav Kerner -Tomáš Kment -Michael Šmídl -Jaroslava Štěchová -Lucie Veselá -Renata Dytrychová -Jakub Wagner -Dan Neumann -Zdeněk Kučera -Anna Gáborová -Pavlína Křížková -Daniel Čermák -Marta Hammerschmiedová -Tadeáš Pavlík -Lenka Dvořáková -Vítězslav Rambousek -Tereza Hadvičáková -Karel Friedrich -Ivana Šandová -Dominik Vandlíček -Růžena Hájková -Nela Srněnská -Gabriela Dragounová -Ladislav Braun -Michala Halíková -Matěj Šerák -Zuzana Ligocká -Miloslav Jelen -Michal Němec -Kornelie Seibertová -Alena Hanzlová -Adéla Koláčková -Jan Joshua Kopčil -Filip Bárek -Ung Odehnal -Sabina Kopáčová -Božena Šestáková -Vlastislav Nguyen Minh -Klára Vičarová -Martina Sojková -Anežka Třísková -Lea Hanušová -Pavla Vénosová -Oldřich Králíček -Ludmila Vrzalová -Ivan Toman -Miroslaw Zbigniew Kratochvíl -Vladimír Rubina -Olga Závrská -Marcela Pražáková -Helena Janoušková -Vlasta Hrdličková -David Tesárek -Jaromír Pícha -Jitka Červenková -Štěpánka Strnadová -Lukáš Smith -Květoslava Hantlová -Andrea Daňková -Antonín Tyrlik -Markéta Cahová -Rudolf Rejnart -Iakov Havriš -Lidie Šmídová -Zdenka Kňourková -Monika Císařová -Radim Knobloch -Monika Ouředníková -Jakub Sháněl -Jana Kolářová -Czeslaw Hrtoň -Ludmila Maštalířová -Robert Horák -Hana Šibalová -Jiří Stoy -Daniel Junghans -Jindřich Šteffl -Pavel Cabák -Renáta Cibulková -Věra Kyselová -Markéta Slípková -Radka Stránská -Jan Suchý -Bohuslav Janiš -Aneta Kopecká -Lukáš Sokol -Helena Foffová -Mykhaylo Hejtmánek -Lucie Synková -Mariia Fořtová -Marcela Rumlová -David Bělík -Marie Pešková -Renata Hřídelová -Václav Mánek -Josef Straka -Zdeňka Račková -Petr Prášek -Zdeněk Janda -Jaroslava Dalihodová -Milan Plášek -Magdalena Sobanská -Eva Dvořáková -Ivana Hinštová -Aleš Bolovanský -Ilona Šmerdová -Tomáš Alinče -Karel Páral -Alena Němcová -Jaroslav Peterka -František Svoboda -Jarmila Dědičová -Michal Pospíšil -Roman Hrstka -Lenka Pudová -Anna Buroňová -Ondřej Dopita -Martin Kleber -Miroslav Pindur -Vlastimil Šíp -Libor Svatý -Kateřina Kasálková -Vladimír Zaremba -Petra Machová -Ladislav Loučka -Mária Klemmová -Adéla Pelikánová -Stanislav Rýc -Patrick Drha -Martina Matušková -Lumír Válka -Libuše Procházková -Květoslava Vacková -Ginette Loudová -Dagmar Zvonková -Veronika Suchánková -Růžena Vaníčková -Antonín Lach -Samuel Hosnedl -Heidi Johnová -Květuše Jeřábková -Miroslava Míšková -Jitka Kotrčová -Šárka Šrautová -Taťána Furmánková -Soňa Serynková -Pavla Popelová -Božena Borkovcová -Milena Lušťková -Jonáš Vyhlídal -Darko Vojtěch -Adam Valder -Jiřina Pavelková -Vlastislav Urban -Michaela Oličová -Anežka Svobodová -Tereza Dzurková -Radek Vondráček -Táňa Michaličková -Marta Brachtlová -Filip Ernest -Mario Blažek -Miloslava Žvatorová -Stanislava Skřičková -Vlasta Bajerová -Dana Kociánová -Vratislav Všetečka -Lucie Neguczová -Michal Foldyna -Dušan Sasýn -Veronika Neničková -Anna Malíková -Helena Gregorová -Hana Makulová -Diana Wágnerová -Igor Petr -Zdeňka Marková -Marcela Bělíková -Miroslava Šarayová -Petr Drga -Jitka Applová -Alena Svátková -Michaela Ebenstreitová -Vilém Ševčík -Eliška Bětíková -Marie Šubrtová -Martin Buk -Matěj Pokorný -Kateřina Šmídová -Ivana Vážanová -Tomáš Hlinka -David Schoř -Philipp Hnilica -Jana Gorolová -Jan Semjon -Jiří Lieber -Vladislav Feichtinger -Miloslav Stružka -Eva Kynclová -Radek Toman -Nikola Trvajová -Vendula Štěpánková -Cvetko Kříž -Josef Čech -Čestmír Řeháček -Roman Metelka -Štěpánka Joselevičová -Thi Thanh Lieu Fialová -Věra Bučková -Ladislav Kořínek -Jaroslav Trýb -Zdeněk Maňásek -Pavel Kohout -Bedřich Mati -Miroslav Čelechovský -Ludmila Kubínová -Daniel Rehák -Karolína Adamčíková -Berta Poláchová -Iva Kozlíčková -Theodor Zeman -Jaromír Uher -Zdenka Pecková -Olga Zemanová -Tereza Markalousová -Vladislava Markovičová -Andrea Černá -Filip Král -Milan Hanuš -František Jatagandzidis -Martina Janoušková -Marek Romanyuk -Václav Břenek -Iveta Drgová -Jaroslava Trnková -Lukáš Fečko -Jiřina Bartoňková -Renata Vokřínková -Slavomír Nguyen -Karel Martsynyuk -Ondřej Jonáš -Petra Veselá -Milena Kloudová -Tamara Korejtková -Lenka Wildová -Rostislav Výborný -Edita Pokorná -Stanislav Hostýnek -Antonín Gazda -Markéta Mašková -Soňa Kabilková -Bohumila Kolerusová -Adam Roháček -Vladimír Vojkůvka -Truong Sedláček -Dana Severová -Blažej Jandura -Magdaléna Davidková -Ewa Hlavatá -Klára Strnisková -Sabina Mužíková -Jakub Konečný -Jarmila Pilařová -Erika Šubová -Božena Hrušková -Ctibor Štěpánek -Dmitri Švejcar -Anna Slabá -Petr Čechmánek -Vladimír Šantavý -Zdeněk Kleinbauer -Jaroslav Smetanka -Jiří Kocourek -Radomíra Mayerová -Milan Matásek -Josef Bednařík -Nikola Rozsívalová -Adriana Horáková -František Poula -Barbora Hromasová -Robert Hradecký -Dagmar Grivalská -Ladislav Sekyra -Marie Zikmundová -Martina Machatková -Věra Poláková -Jana Menclová -Filip Šimko -Drahomíra Kosíková -Martin Šabík -Mirijam Baierlová -Ludmila Chlupová -Thi Lan Huong Šístková -Jolana Frydryšková -Lukáš Selucký -Jan Vlodarčík -Hana Stibůrková -Andrea Poulová -Petra Kostková -Miroslav Hock -Zoran Hýbl -Antonín Špička -Tomáš Tóth -Albína Lencová -Pavel Hlaváček -Libor Toman -Ondřej Šenk -Daniela Čapková -Radek Kováč -Radim Novák -Kamila Hrdinová -Jaroslava Krásová -Miluše Jugová -Vlasta Vopařilová -Karel Kudža -Zuzana Krátká -Iva Fabianová -Stanislav Šrom -Tetyana Kalousová -Alois Slavíček -Anita Agata Němečková -Libuše Solichová -Eva Potočárová -Samuel Holub -Michal Štupalský -Kateřina Šlichtová -Lenka Staňková -Miroslava Švecová -Renata Davidová -Rostislav Štěpán -Valerie Šilhavá -Yuriy Pecuch -Lucie Humlová -Václav Pisklák -Miloslav Kavoň -Božena Chabadová -Natálie Bosáková -Miloslava Pazderová -Zdenka Tenzerová -Helena Mandová -Jaromír Komůrka -Jitka Jurkechová -Iryna Divácká -Valentina Janšová -Alexandra Tancošová -Dominik Pitel -Markéta Menšíková -Alena Kadeřábková -Marcela Červenková -Jarmila Kopřivová -Iveta Stejskalová -Magdaléna Rušarová -Jakub Martinek -Tereza Vegrichtová -Přemysl Babica -Irena Kasíková -Roman Bobot -Emil Šperlín -Šárka Hronovská -Pavla Walterová -Ulrika Škorupová -Aleš Směja -Michaela Chromková -Zdeňka Hájková -Denisa Nečasová -Veronika Bognerová -Vojtěch Berger - diff --git a/examples/anon_data/msg.src_iface b/examples/anon_data/src_iface similarity index 100% rename from examples/anon_data/msg.src_iface rename to examples/anon_data/src_iface diff --git a/examples/anon_data/msg.src_ip b/examples/anon_data/src_ip similarity index 100% rename from examples/anon_data/msg.src_ip rename to examples/anon_data/src_ip diff --git a/examples/anon_data/msg.username b/examples/anon_data/username similarity index 100% rename from examples/anon_data/msg.username rename to examples/anon_data/username diff --git a/examples/logs/.DS_Store b/examples/logs/.DS_Store new file mode 100644 index 0000000..16e9c9b Binary files /dev/null and b/examples/logs/.DS_Store differ diff --git a/examples/logs/lm-2024-06-09_0000.gz b/examples/logs/lm-2024-06-09_0000.gz new file mode 100644 index 0000000..88cdd22 Binary files /dev/null and b/examples/logs/lm-2024-06-09_0000.gz differ diff --git a/go.mod b/go.mod index 662393f..dc2e5f8 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require github.com/stretchr/testify v1.9.0 require ( github.com/davecgh/go-spew v1.1.1 // indirect + github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1 github.com/pmezard/go-difflib v1.0.0 // indirect golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7 gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index fd1583e..6bc9d5c 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1 h1:dOYG7LS/WK00RWZc8XGgcUTlTxpp3mKhdR2Q9z9HbXM= +github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= diff --git a/internal/anonymizer/anonymizer.go b/internal/anonymizer/anonymizer.go index 9e73aef..f9cae9e 100644 --- a/internal/anonymizer/anonymizer.go +++ b/internal/anonymizer/anonymizer.go @@ -5,50 +5,48 @@ import ( "log/slog" "strings" + "github.com/logmanager-oss/logveil/internal/proof" "golang.org/x/exp/rand" ) type Anonymizer struct { - csvData []map[string]string - anonData map[string][]string - randFunc func(int) int + anonData map[string][]string + randFunc func(int) int + proofWriter *proof.Proof } -func New(csvData []map[string]string, anonData map[string][]string) *Anonymizer { +func New(anonData map[string][]string, proofWriter *proof.Proof) *Anonymizer { return &Anonymizer{ - csvData: csvData, - anonData: anonData, - randFunc: rand.Intn, + anonData: anonData, + randFunc: rand.Intn, + proofWriter: proofWriter, } } -func (an *Anonymizer) anonymize() []string { - var output []string - for _, logLine := range an.csvData { - for field, value := range logLine { - if field == "raw" { - continue - } +func (an *Anonymizer) Anonymize(logLine map[string]string) string { + for field, value := range logLine { + if field == "raw" { + continue + } + + if value == "" { + continue + } - if value == "" { - continue - } + if anonValues, exists := an.anonData[field]; exists { + newAnonValue := anonValues[an.randFunc(len(anonValues))] - if anonValues, exists := an.anonData[field]; exists { - newAnonValue := anonValues[an.randFunc(len(anonValues))] + an.proofWriter.Write(value, newAnonValue) - slog.Debug(fmt.Sprintf("Replacing the values for field %s. From %s to %s.\n", field, value, newAnonValue)) + slog.Debug(fmt.Sprintf("Replacing the values for field %s. From %s to %s.\n", field, value, newAnonValue)) - logLine["raw"] = strings.Replace(logLine["raw"], value, newAnonValue, -1) - } + logLine["raw"] = strings.Replace(logLine["raw"], value, newAnonValue, -1) } - - output = append(output, fmt.Sprint(logLine["raw"])) } - return output + return logLine["raw"] } -func (an *Anonymizer) setRandFunc(randFunc func(int) int) { +func (an *Anonymizer) SetRandFunc(randFunc func(int) int) { an.randFunc = randFunc } diff --git a/internal/anonymizer/anonymizer_test.go b/internal/anonymizer/anonymizer_test.go index d3321cd..75d01a5 100644 --- a/internal/anonymizer/anonymizer_test.go +++ b/internal/anonymizer/anonymizer_test.go @@ -3,40 +3,38 @@ package anonymizer import ( "testing" - "github.com/logmanager-oss/logveil/internal/parser" + "github.com/logmanager-oss/logveil/internal/loader" + "github.com/logmanager-oss/logveil/internal/proof" "github.com/stretchr/testify/assert" ) func TestAnonimizer_AnonymizeData(t *testing.T) { tests := []struct { - name string - anonDataDir string - inputFile string - expectedOutput []string + name string + anonymizingDataDir string + input map[string]string + expectedOutput string }{ { - name: "Test AnonymizeData", - anonDataDir: "../../examples/anon_data", - inputFile: "../../examples/logs/example_logs.csv", - expectedOutput: []string{"{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"10.10.10.1\", \"username\":\"miloslav.illes\", \"organization\":\"Microsoft\"}"}, + name: "Test AnonymizeData", + anonymizingDataDir: "../../examples/anon_data", + input: map[string]string{"@timestamp": "2024-06-05T14:59:27.000+00:00", "src_ip": "10.10.10.1", "username": "miloslav.illes", "organization": "Microsoft", "raw": "2024-06-05T14:59:27.000+00:00, 10.10.10.1, miloslav.illes, Microsoft"}, + expectedOutput: "2024-06-05T14:59:27.000+00:00, 10.20.0.53, ladislav.dosek, Apple", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - fieldNames, csvData, err := parser.ParseCSV(tt.inputFile) + anonymizingData, err := loader.Load(tt.anonymizingDataDir) if err != nil { - t.Fatalf("reading input file %s: %v", tt.inputFile, err) + t.Fatalf("loading anonymizing data from dir %s: %v", tt.anonymizingDataDir, err) } - anonData, err := parser.ParseAnonData(tt.anonDataDir, fieldNames) - if err != nil { - t.Fatalf("loading anonymizing data from dir %s: %v", tt.anonDataDir, err) - } - - anonymizer := New(csvData, anonData) - anonymizer.setRandFunc(func(int) int { return 0 }) - output := anonymizer.anonymize() + proofWriter := proof.New(true) + anonymizer := New(anonymizingData, proofWriter) + // Disabling randomization so we know which values to expect + anonymizer.SetRandFunc(func(int) int { return 1 }) + output := anonymizer.Anonymize(tt.input) assert.Equal(t, tt.expectedOutput, output) }) diff --git a/internal/anonymizer/runner.go b/internal/anonymizer/runner.go deleted file mode 100644 index f9261e7..0000000 --- a/internal/anonymizer/runner.go +++ /dev/null @@ -1,44 +0,0 @@ -package anonymizer - -import ( - "fmt" - "log/slog" - - "github.com/logmanager-oss/logveil/internal/flags" - "github.com/logmanager-oss/logveil/internal/parser" - "github.com/logmanager-oss/logveil/internal/writer" -) - -func Run() { - slog.Info("Anonymization process started...") - - anonDataDir, inputFile, outputFile := flags.Load() - - fieldNames, csvData, err := parser.ParseCSV(inputFile) - if err != nil { - slog.Error("reading input file %s: %v", inputFile, err) - return - } - - anonData, err := parser.ParseAnonData(anonDataDir, fieldNames) - if err != nil { - slog.Error("loading anonymizing data from dir %s: %v", anonDataDir, err) - return - } - - anonymizer := New(csvData, anonData) - anonymizedData := anonymizer.anonymize() - if outputFile != "" { - outputwriter := &writer.Output{ - Output: anonymizedData, - } - err := outputwriter.Write(outputFile) - if err != nil { - slog.Error("writing anonymized data to output file %s: %v", outputFile, err) - } - } else { - fmt.Println(anonymizedData) - } - - slog.Info("All done. Exiting...") -} diff --git a/internal/flags/flags.go b/internal/flags/flags.go index 7ccd760..4c15ad3 100644 --- a/internal/flags/flags.go +++ b/internal/flags/flags.go @@ -3,43 +3,59 @@ package flags import ( "errors" "fmt" - "io/fs" "os" ) -type input string - -func (f *input) String() string { - return fmt.Sprint(*f) -} +func validateInput(inputPath string) func(string) error { + return func(flagValue string) error { + fileInfo, err := os.Stat(flagValue) + if err != nil { + return err + } -func (f *input) Set(value string) error { - _, err := os.Stat(value) - if err != nil { - if errors.Is(err, fs.ErrNotExist) { - return fmt.Errorf("Provided file or dir %s does not exist. Aborting.", value) + if fileInfo.IsDir() { + return fmt.Errorf("Output file %s cannot be a directory.\n", flagValue) } - } - *f = input(value) + inputPath = flagValue - return nil + return nil + } } -type output string +func validateOutput(outputPath string) func(string) error { + return func(flagValue string) error { + fileInfo, err := os.Stat(flagValue) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil + } + return err + } -func (f *output) String() string { - return fmt.Sprint(*f) -} + if fileInfo.IsDir() { + return fmt.Errorf("Output file %s cannot be a directory.\n", flagValue) + } -func (f *output) Set(value string) error { - file, err := os.Create(value) - if err != nil { - return err + outputPath = flagValue + + return nil } - defer file.Close() +} - *f = output(value) +func validateDir(dir string) func(string) error { + return func(flagValue string) error { + fileInfo, err := os.Stat(flagValue) + if err != nil { + return err + } - return nil + if !fileInfo.IsDir() { + return fmt.Errorf("Path to anonymization data %s needs to be a directory.\n", flagValue) + } + + dir = flagValue + + return nil + } } diff --git a/internal/flags/initalize.go b/internal/flags/initalize.go index 46a5f8a..f5f4024 100644 --- a/internal/flags/initalize.go +++ b/internal/flags/initalize.go @@ -2,25 +2,23 @@ package flags import ( "flag" - "log/slog" ) -func Load() (string, string, string) { - var anonDataDir input - flag.Var(&anonDataDir, "d", "Path to directory with anonymizing data") +func LoadAndValidate() (string, string, string, bool, bool, bool) { + var anonymizationDataPath string + flag.Func("d", "Path to directory with anonymizing data", validateDir(anonymizationDataPath)) - var inputFile input - flag.Var(&inputFile, "i", "Path to input file containing logs to be anonymized") + var inputPath string + flag.Func("i", "Path to input file containing logs to be anonymized", validateInput(inputPath)) - var outputFile output - flag.Var(&outputFile, "o", "Path to output file containing anonymized logs") + var outputPath string + flag.Func("o", "Path to output file (default: Stdout)", validateOutput(outputPath)) - var verbose = flag.Bool("v", false, "Enable verbose logging") - flag.Parse() + var isVerbose = flag.Bool("v", false, "Enable verbose logging (default: Disabled)") + var isLmExport = flag.Bool("e", false, "Change input file type to LM export (default: LM Backup)") + var isProofWriter = flag.Bool("p", true, "Disable proof wrtier (default: Enabled)") - if *verbose { - slog.SetLogLoggerLevel(slog.LevelDebug) - } + flag.Parse() - return anonDataDir.String(), inputFile.String(), outputFile.String() + return anonymizationDataPath, inputPath, outputPath, *isVerbose, *isLmExport, *isProofWriter } diff --git a/internal/inputs/backup.go b/internal/inputs/backup.go new file mode 100644 index 0000000..2f1ef82 --- /dev/null +++ b/internal/inputs/backup.go @@ -0,0 +1,64 @@ +package inputs + +import ( + "bufio" + "compress/gzip" + "encoding/json" + "fmt" + "io" + "os" + + "github.com/logmanager-oss/logveil/internal/anonymizer" +) + +type LmBackup struct { + Source LmLog `json:"_source"` +} + +type LmLog struct { + Raw string `json:"raw"` + Msg map[string]interface{} `json:"msg"` +} + +func AnonymizeLmBackup(input *os.File, output io.Writer, anonymizer *anonymizer.Anonymizer) error { + gzReader, err := gzip.NewReader(input) + if err != nil { + return fmt.Errorf("error creating gzip reader: %w", err) + } + defer gzReader.Close() + + scanner := bufio.NewScanner(gzReader) + + for scanner.Scan() { + line := scanner.Bytes() + + lmBackup := &LmBackup{} + err = json.Unmarshal(line, &lmBackup) + if err != nil { + return fmt.Errorf("unmarshaling log line: %w", err) + } + + // Convert map[string]interface{} to map[string]string as requred by anonymizer + logLine := make(map[string]string) + for key, value := range lmBackup.Source.Msg { + strKey := fmt.Sprintf("%v", key) + strValue := fmt.Sprintf("%v", value) + + logLine[strKey] = strValue + } + logLine["raw"] = lmBackup.Source.Raw + + anonymizedLogLine := anonymizer.Anonymize(logLine) + + _, err = io.WriteString(output, fmt.Sprintln(anonymizedLogLine)) + if err != nil { + return fmt.Errorf("writing anonymized data: %v", err) + } + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("error reading input: %w", err) + } + + return nil +} diff --git a/internal/inputs/backup_test.go b/internal/inputs/backup_test.go new file mode 100644 index 0000000..b1d3cec --- /dev/null +++ b/internal/inputs/backup_test.go @@ -0,0 +1,75 @@ +package inputs + +import ( + "bytes" + "os" + "testing" + + "github.com/logmanager-oss/logveil/internal/anonymizer" + "github.com/logmanager-oss/logveil/internal/parser" + "github.com/logmanager-oss/logveil/internal/proof" + "github.com/logmanager-oss/logveil/internal/utils" + "github.com/stretchr/testify/assert" +) + +func TestLmBackup(t *testing.T) { + tests := []struct { + name string + isProofWriterEnabled bool + inputFilename string + anonDataDir string + expectedOutput string + expectedProof []map[string]interface{} + }{ + { + name: "Test Test LM Backup Anonymizer", + isProofWriterEnabled: true, + inputFilename: "../../examples/logs/lm-2024-06-09_0000.gz", + anonDataDir: "../../examples/anon_data", + expectedOutput: "<189>date=2024-11-06 time=12:29:25 devname=\"LM-FW-70F-Praha\" devid=\"FGT70FTK22012016\" eventtime=1730892565525108329 tz=\"+0100\" logid=\"0000000013\" type=\"traffic\" subtype=\"forward\" level=\"notice\" vd=\"root\" srcip=10.20.0.53 srcport=57158 srcintf=\"lan1\" srcintfrole=\"wan\" dstip=227.51.221.89 dstport=80 dstintf=\"lan1\" dstintfrole=\"lan\" srccountry=\"China\" dstcountry=\"Czech Republic\" sessionid=179455916 proto=6 action=\"client-rst\" policyid=9 policytype=\"policy\" poluuid=\"d8ccb3e4-74d4-51ef-69a3-73b41f46df74\" policyname=\"Gitlab web from all\" service=\"HTTP\" trandisp=\"noop\" duration=6 sentbyte=80 rcvdbyte=44 sentpkt=2 rcvdpkt=1 appcat=\"unscanned\" srchwvendor=\"H3C\" devtype=\"Router\" mastersrcmac=\"00:23:89:39:a4:ef\" srcmac=\"00:23:89:39:a4:ef\" srcserver=0 dsthwvendor=\"H3C\" dstdevtype=\"Router\" masterdstmac=\"00:23:89:39:a4:fa\" dstmac=\"00:23:89:39:a4:fa\" dstserver=0\n", + expectedProof: []map[string]interface{}{ + {"original": "dev-uplink", "new": "lan1"}, + {"original": "95.80.197.108", "new": "227.51.221.89"}, + {"original": "27.221.126.209", "new": "10.20.0.53"}, + {"original": "wan1-lm", "new": "lan1"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + input, err := os.Open(tt.inputFilename) + if err != nil { + t.Fatal(err) + } + defer input.Close() + + anonData, err := parser.LoadAnonData(tt.anonDataDir) + if err != nil { + t.Fatal(err) + } + + proofWriter := proof.New(tt.isProofWriterEnabled) + anonymizer := anonymizer.New(anonData, proofWriter) + // Disabling randomization so we know which values to expect + anonymizer.SetRandFunc(func(int) int { return 1 }) + + var output bytes.Buffer + err = AnonymizeLmBackup(input, &output, anonymizer) + if err != nil { + t.Fatal(err) + } + assert.Equal(t, tt.expectedOutput, output.String()) + + proofWriter.Close() + + actualProof, err := utils.UnpackProofOutput() + if err != nil { + t.Fatal(err) + } + + assert.ElementsMatch(t, tt.expectedProof, actualProof) + + os.Remove("proof.json") + }) + } +} diff --git a/internal/inputs/export.go b/internal/inputs/export.go new file mode 100644 index 0000000..d97e0fc --- /dev/null +++ b/internal/inputs/export.go @@ -0,0 +1,47 @@ +package inputs + +import ( + "encoding/csv" + "fmt" + "io" + "os" + "strings" + + "github.com/logmanager-oss/logveil/internal/anonymizer" +) + +func AnonymizeLmExport(input *os.File, output io.Writer, anonymizer *anonymizer.Anonymizer) error { + csvReader := csv.NewReader(input) + + // First element of the csvReader contains field names + fieldNames, err := csvReader.Read() + if err != nil { + return err + } + + // Trimming prefix from field names + for i, fieldName := range fieldNames { + fieldNames[i] = strings.TrimPrefix(fieldName, "msg.") + } + + for { + row, err := csvReader.Read() + if err != nil { + break + } + + logLine := make(map[string]string) + for i, val := range row { + logLine[fieldNames[i]] = val + } + + anonymizedLogLine := anonymizer.Anonymize(logLine) + + _, err = io.WriteString(output, fmt.Sprintln(anonymizedLogLine)) + if err != nil { + return fmt.Errorf("writing anonymized data: %v", err) + } + } + + return nil +} diff --git a/internal/inputs/export_test.go b/internal/inputs/export_test.go new file mode 100644 index 0000000..f22caf0 --- /dev/null +++ b/internal/inputs/export_test.go @@ -0,0 +1,74 @@ +package inputs + +import ( + "bytes" + "os" + "testing" + + "github.com/logmanager-oss/logveil/internal/anonymizer" + "github.com/logmanager-oss/logveil/internal/parser" + "github.com/logmanager-oss/logveil/internal/proof" + "github.com/logmanager-oss/logveil/internal/utils" + "github.com/stretchr/testify/assert" +) + +func TestLmExport(t *testing.T) { + tests := []struct { + name string + isProofWriterEnabled bool + inputFilename string + anonDataDir string + expectedOutput string + expectedProof []map[string]interface{} + }{ + { + name: "Test LM Export Anonymizer", + isProofWriterEnabled: true, + inputFilename: "../../examples/logs/example_logs.csv", + anonDataDir: "../../examples/anon_data", + expectedOutput: "{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"10.20.0.53\", \"username\":\"ladislav.dosek\", \"organization\":\"Apple\"}\n", + expectedProof: []map[string]interface{}{ + {"original": "89.239.31.49", "new": "10.20.0.53"}, + {"original": "test.user@test.cz", "new": "ladislav.dosek"}, + {"original": "TESTuser.test.com", "new": "Apple"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + input, err := os.Open(tt.inputFilename) + if err != nil { + t.Fatal(err) + } + defer input.Close() + + anonData, err := parser.LoadAnonData(tt.anonDataDir) + if err != nil { + t.Fatal(err) + } + + proofWriter := proof.New(tt.isProofWriterEnabled) + anonymizer := anonymizer.New(anonData, proofWriter) + // Disabling randomization so we know which values to expect + anonymizer.SetRandFunc(func(int) int { return 1 }) + + var output bytes.Buffer + err = AnonymizeLmExport(input, &output, anonymizer) + if err != nil { + t.Fatal(err) + } + assert.Equal(t, tt.expectedOutput, output.String()) + + proofWriter.Close() + + actualProof, err := utils.UnpackProofOutput() + if err != nil { + t.Fatal(err) + } + + assert.ElementsMatch(t, tt.expectedProof, actualProof) + + os.Remove("proof.json") + }) + } +} diff --git a/internal/loader/loader.go b/internal/loader/loader.go new file mode 100644 index 0000000..18e798a --- /dev/null +++ b/internal/loader/loader.go @@ -0,0 +1,54 @@ +package loader + +import ( + "bufio" + "fmt" + "log" + "log/slog" + "os" + "path/filepath" +) + +func Load(anonDataDir string) (map[string][]string, error) { + var anonData = make(map[string][]string) + + files, err := os.ReadDir(anonDataDir) + if err != nil { + log.Fatal(err) + } + + for _, file := range files { + if file.IsDir() { + continue + } + + data, err := loadAnonymizingData(filepath.Join(anonDataDir, file.Name())) + if err != nil { + return nil, fmt.Errorf("loading anonymizing data from file %s: %v", file.Name(), err) + } + + anonData[file.Name()] = data + slog.Debug(fmt.Sprintf("Loaded anonymizing data for field: %s; values loaded: %d\n", file.Name(), len(data))) + } + + return anonData, nil +} + +func loadAnonymizingData(filepath string) ([]string, error) { + anonDataFile, err := os.OpenFile(filepath, os.O_RDONLY, os.ModePerm) + if err != nil { + return nil, err + } + + var anonData []string + scanner := bufio.NewScanner(anonDataFile) + for scanner.Scan() { + anonData = append(anonData, scanner.Text()) + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("error reading anon data: %w", err) + } + + return anonData, anonDataFile.Close() +} diff --git a/internal/loader/loader_test.go b/internal/loader/loader_test.go new file mode 100644 index 0000000..9c89274 --- /dev/null +++ b/internal/loader/loader_test.go @@ -0,0 +1,57 @@ +package loader + +import ( + "bufio" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestAnonimizer_Anondataloader(t *testing.T) { + tests := []struct { + name string + anonDataDir string + expectedFields []string + }{ + { + name: "Test Anondataloader", + anonDataDir: "../../examples/anon_data", + expectedFields: []string{"dst_iface", "dst_ip", "ip", "name", "organization", "src_iface", "src_ip", "username"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + anonData, err := Load(tt.anonDataDir) + if err != nil { + t.Fatal(err) + } + + for field, value := range anonData { + assert.Contains(t, tt.expectedFields, field) + assert.Equal(t, readLines(t, filepath.Join(tt.anonDataDir, field)), value) + } + }) + } +} + +func readLines(t *testing.T, path string) []string { + file, err := os.Open(path) + if err != nil { + t.Fatal(err) + } + defer file.Close() + + var lines []string + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } + + if scanner.Err() != nil { + t.Fatal(err) + } + + return lines +} diff --git a/internal/parser/anondataparser.go b/internal/parser/anondataparser.go index 5806815..9c584f6 100644 --- a/internal/parser/anondataparser.go +++ b/internal/parser/anondataparser.go @@ -2,42 +2,33 @@ package parser import ( "bufio" - "errors" "fmt" - "io/fs" + "log" "log/slog" "os" "path/filepath" ) -// ParseAnonData reads text files from provided directory based on provided field names. -// In other words if file name matches one of the provided field names, it is loaded into the map[fieldName][]anonymizationValues. -// Returned map will be used in anonymization process to match original values with corresponding anonymization values. -func ParseAnonData(anonDataDir string, fieldNames []string) (map[string][]string, error) { +func LoadAnonData(anonDataDir string) (map[string][]string, error) { var anonData = make(map[string][]string) - for i := range fieldNames { - if fieldNames[i] == "raw" { - continue - } + files, err := os.ReadDir(anonDataDir) + if err != nil { + log.Fatal(err) + } - filename := filepath.Join(anonDataDir, fieldNames[i]) - _, err := os.Stat(filename) - if err != nil { - if errors.Is(err, fs.ErrNotExist) { - slog.Debug(fmt.Sprintf("Anonymizing data not found for field %s. Skipping.\n", fieldNames[i])) - continue - } - return nil, err + for _, file := range files { + if file.IsDir() { + continue } - data, err := loadAnonymizingData(filename) + data, err := loadAnonymizingData(filepath.Join(anonDataDir, file.Name())) if err != nil { - return nil, fmt.Errorf("loading anonymizing data from file %s: %v", filename, err) + return nil, fmt.Errorf("loading anonymizing data from file %s: %v", file.Name(), err) } - anonData[fieldNames[i]] = data - slog.Debug(fmt.Sprintf("Loaded anonymizing data for field: %s; values loaded: %d\n", fieldNames[i], len(data))) + anonData[file.Name()] = data + slog.Debug(fmt.Sprintf("Loaded anonymizing data for field: %s; values loaded: %d\n", file.Name(), len(data))) } return anonData, nil @@ -50,9 +41,13 @@ func loadAnonymizingData(filepath string) ([]string, error) { } var anonData []string - anonDataFileScanner := bufio.NewScanner(anonDataFile) - for anonDataFileScanner.Scan() { - anonData = append(anonData, anonDataFileScanner.Text()) + scanner := bufio.NewScanner(anonDataFile) + for scanner.Scan() { + anonData = append(anonData, scanner.Text()) + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("error reading anon data: %w", err) } return anonData, anonDataFile.Close() diff --git a/internal/parser/anondataparser_test.go b/internal/parser/anondataparser_test.go index f701bec..53e887f 100644 --- a/internal/parser/anondataparser_test.go +++ b/internal/parser/anondataparser_test.go @@ -1,6 +1,9 @@ package parser import ( + "bufio" + "os" + "path/filepath" "testing" "github.com/stretchr/testify/assert" @@ -8,29 +11,47 @@ import ( func TestAnonimizer_Anondataloader(t *testing.T) { tests := []struct { - name string - anonDataDir string - fieldNames []string - expectedAnonData map[string][]string + name string + anonDataDir string + expectedFields []string }{ { - name: "Test Anondataloader", - fieldNames: []string{"msg.organization"}, - anonDataDir: "../../examples/anon_data", - expectedAnonData: map[string][]string{ - "msg.organization": { - "Microsoft", "Apple", "H&P", "IBM", - }, - }, + name: "Test Anondataloader", + anonDataDir: "../../examples/anon_data", + expectedFields: []string{"dst_iface", "dst_ip", "ip", "name", "organization", "src_iface", "src_ip", "username"}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - anonData, err := ParseAnonData(tt.anonDataDir, tt.fieldNames) + anonData, err := LoadAnonData(tt.anonDataDir) if err != nil { t.Fatal(err) } - assert.Equal(t, tt.expectedAnonData, anonData) + + for field, value := range anonData { + assert.Contains(t, tt.expectedFields, field) + assert.Equal(t, readLines(t, filepath.Join(tt.anonDataDir, field)), value) + } }) } } + +func readLines(t *testing.T, path string) []string { + file, err := os.Open(path) + if err != nil { + t.Fatal(err) + } + defer file.Close() + + var lines []string + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } + + if scanner.Err() != nil { + t.Fatal(err) + } + + return lines +} diff --git a/internal/parser/csvparser.go b/internal/parser/csvparser.go deleted file mode 100644 index 1364e7b..0000000 --- a/internal/parser/csvparser.go +++ /dev/null @@ -1,46 +0,0 @@ -package parser - -import ( - "encoding/csv" - "log/slog" - "os" -) - -// ParseCSV takes a CSV file containing logs and transforms it into a list of maps, where each map entry represents a log line. -// Such format is required to be able to modify log data (replace original values with anonymous values). -// It is also returning names of the CSV columns. Names of the columns (field names) are needed to grab corresponding anonymization data. -func ParseCSV(filename string) ([]string, []map[string]string, error) { - file, err := os.Open(filename) - if err != nil { - return nil, nil, err - } - defer func(fs *os.File) { - if err := fs.Close(); err != nil { - slog.Error(err.Error()) - } - }(file) - - csvReader := csv.NewReader(file) - - // First element of the csvReader contains field names - fieldNames, err := csvReader.Read() - if err != nil { - return nil, nil, err - } - - var csvData []map[string]string - for { - row, err := csvReader.Read() - if err != nil { - break - } - - m := make(map[string]string) - for i, val := range row { - m[fieldNames[i]] = val - } - csvData = append(csvData, m) - } - - return fieldNames, csvData, nil -} diff --git a/internal/parser/csvparser_test.go b/internal/parser/csvparser_test.go deleted file mode 100644 index 349bbd5..0000000 --- a/internal/parser/csvparser_test.go +++ /dev/null @@ -1,38 +0,0 @@ -package parser - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestAnonimizer_CSVloader(t *testing.T) { - tests := []struct { - name string - filename string - expectedFieldNames []string - expectedValues []map[string]string - }{ - { - name: "Test CSVLoader", - filename: "../../examples/logs/example_logs.csv", - expectedFieldNames: []string{"@timestamp", "raw", "msg.src_ip", "msg.username", "msg.organization"}, - expectedValues: []map[string]string{{ - "@timestamp": "2024-06-05T14:59:27.000+00:00", - "msg.organization": "TESTuser.test.com", - "msg.src_ip": "89.239.31.49", "msg.username": "test.user@test.cz", - "raw": "{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"89.239.31.49\", \"username\":\"test.user@test.cz\", \"organization\":\"TESTuser.test.com\"}", - }}, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fieldNames, csvData, err := ParseCSV(tt.filename) - if err != nil { - t.Fatal(err) - } - assert.Equal(t, tt.expectedFieldNames, fieldNames) - assert.Equal(t, tt.expectedValues, csvData) - }) - } -} diff --git a/internal/proof/proof.go b/internal/proof/proof.go new file mode 100644 index 0000000..8f85d45 --- /dev/null +++ b/internal/proof/proof.go @@ -0,0 +1,77 @@ +package proof + +import ( + "bufio" + "encoding/json" + "fmt" + "log/slog" + "os" + + "github.com/logmanager-oss/logveil/internal/utils" +) + +type Proof struct { + isEnabled bool + writer *bufio.Writer + file *os.File +} + +func New(isEnabled bool) *Proof { + var writer *bufio.Writer + var file *os.File + + if isEnabled { + var err error + proofFile, err := os.OpenFile("proof.json", os.O_RDWR|os.O_CREATE, 0644) + if err != nil { + slog.Error("opening/creating proof file", "error", err) + return nil + } + + writer = bufio.NewWriter(proofFile) + file = proofFile + } + + return &Proof{ + isEnabled: isEnabled, + writer: writer, + file: file, + } +} + +func (p *Proof) Write(originalValue string, maskedValue string) { + if !p.isEnabled { + return + } + + proof := struct { + OriginalValue string `json:"original"` + MaskedValue string `json:"new"` + }{ + OriginalValue: originalValue, + MaskedValue: maskedValue, + } + + bytes, err := json.Marshal(proof) + if err != nil { + slog.Error("marshalling anonymisation proof", "error", err) + } + + _, err = fmt.Fprintf(p.writer, "%s\n", bytes) + if err != nil { + slog.Error("writing anonymisation proof", "error", err) + } +} + +func (p *Proof) Close() { + if !p.isEnabled { + return + } + + err := p.writer.Flush() + if err != nil { + slog.Error("flushing buffer", "error", err) + } + + utils.CloseFile(p.file) +} diff --git a/internal/proof/proof_test.go b/internal/proof/proof_test.go new file mode 100644 index 0000000..ede9f1f --- /dev/null +++ b/internal/proof/proof_test.go @@ -0,0 +1,61 @@ +package proof + +import ( + "bytes" + "io" + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestProof_Write(t *testing.T) { + tests := []struct { + name string + isProofWriterEnabled bool + originalValue string + maskedValue string + expectedOutput string + }{ + { + name: "Test case 1: write proof", + isProofWriterEnabled: true, + originalValue: "test", + maskedValue: "masked", + expectedOutput: "{\"original\":\"test\",\"new\":\"masked\"}\n", + }, + { + name: "Test case 2: proof writer disabled", + isProofWriterEnabled: false, + originalValue: "test", + maskedValue: "masked", + expectedOutput: "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := New(tt.isProofWriterEnabled) + + p.Write(tt.originalValue, tt.maskedValue) + + p.Close() + + file, err := os.OpenFile("proof.json", os.O_RDWR|os.O_CREATE, 0644) + if err != nil { + t.Fatal(err) + } + + buf := bytes.NewBuffer(nil) + _, err = io.Copy(buf, file) + if err != nil { + t.Fatal(err) + } + + file.Close() + + assert.Equal(t, tt.expectedOutput, buf.String()) + + os.Remove("proof.json") + }) + } +} diff --git a/internal/runner/backup.go b/internal/runner/backup.go new file mode 100644 index 0000000..92747ac --- /dev/null +++ b/internal/runner/backup.go @@ -0,0 +1,64 @@ +package runner + +import ( + "bufio" + "compress/gzip" + "encoding/json" + "fmt" + "io" + "os" + + "github.com/logmanager-oss/logveil/internal/anonymizer" +) + +type LmBackup struct { + Source LmLog `json:"_source"` +} + +type LmLog struct { + Raw string `json:"raw"` + Msg map[string]interface{} `json:"msg"` +} + +func AnonymizeLmBackup(input *os.File, output io.Writer, anonymizer *anonymizer.Anonymizer) error { + gzReader, err := gzip.NewReader(input) + if err != nil { + return fmt.Errorf("error creating gzip reader: %w", err) + } + defer gzReader.Close() + + scanner := bufio.NewScanner(gzReader) + + for scanner.Scan() { + line := scanner.Bytes() + + lmBackup := &LmBackup{} + err = json.Unmarshal(line, &lmBackup) + if err != nil { + return fmt.Errorf("unmarshaling log line: %w", err) + } + + // Convert map[string]interface{} to map[string]string as requred by anonymizer + logLine := make(map[string]string) + for key, value := range lmBackup.Source.Msg { + strKey := fmt.Sprintf("%v", key) + strValue := fmt.Sprintf("%v", value) + + logLine[strKey] = strValue + } + logLine["raw"] = lmBackup.Source.Raw + + anonymizedLogLine := anonymizer.Anonymize(logLine) + + _, err = fmt.Fprintln(output, anonymizedLogLine) + if err != nil { + return fmt.Errorf("writing log line to buffer: %v", err) + } + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("error reading input: %w", err) + } + + return nil +} diff --git a/internal/runner/backup_test.go b/internal/runner/backup_test.go new file mode 100644 index 0000000..9081c5a --- /dev/null +++ b/internal/runner/backup_test.go @@ -0,0 +1,74 @@ +package runner + +import ( + "bytes" + "os" + "testing" + + "github.com/logmanager-oss/logveil/internal/anonymizer" + "github.com/logmanager-oss/logveil/internal/loader" + "github.com/logmanager-oss/logveil/internal/proof" + "github.com/logmanager-oss/logveil/internal/utils" + "github.com/stretchr/testify/assert" +) + +func TestLmBackup(t *testing.T) { + tests := []struct { + name string + inputFilename string + anonymizingDataDir string + expectedOutput string + expectedProof []map[string]interface{} + }{ + { + name: "Test Test LM Backup Anonymizer", + inputFilename: "../../examples/logs/lm-2024-06-09_0000.gz", + anonymizingDataDir: "../../examples/anon_data", + expectedOutput: "<189>date=2024-11-06 time=12:29:25 devname=\"LM-FW-70F-Praha\" devid=\"FGT70FTK22012016\" eventtime=1730892565525108329 tz=\"+0100\" logid=\"0000000013\" type=\"traffic\" subtype=\"forward\" level=\"notice\" vd=\"root\" srcip=10.20.0.53 srcport=57158 srcintf=\"lan1\" srcintfrole=\"wan\" dstip=227.51.221.89 dstport=80 dstintf=\"lan1\" dstintfrole=\"lan\" srccountry=\"China\" dstcountry=\"Czech Republic\" sessionid=179455916 proto=6 action=\"client-rst\" policyid=9 policytype=\"policy\" poluuid=\"d8ccb3e4-74d4-51ef-69a3-73b41f46df74\" policyname=\"Gitlab web from all\" service=\"HTTP\" trandisp=\"noop\" duration=6 sentbyte=80 rcvdbyte=44 sentpkt=2 rcvdpkt=1 appcat=\"unscanned\" srchwvendor=\"H3C\" devtype=\"Router\" mastersrcmac=\"00:23:89:39:a4:ef\" srcmac=\"00:23:89:39:a4:ef\" srcserver=0 dsthwvendor=\"H3C\" dstdevtype=\"Router\" masterdstmac=\"00:23:89:39:a4:fa\" dstmac=\"00:23:89:39:a4:fa\" dstserver=0\n", + expectedProof: []map[string]interface{}{ + {"original": "dev-uplink", "new": "lan1"}, + {"original": "95.80.197.108", "new": "227.51.221.89"}, + {"original": "27.221.126.209", "new": "10.20.0.53"}, + {"original": "wan1-lm", "new": "lan1"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + input, err := os.Open(tt.inputFilename) + if err != nil { + t.Fatal(err) + } + defer input.Close() + + var output bytes.Buffer + + anonymizingData, err := loader.Load(tt.anonymizingDataDir) + if err != nil { + t.Fatal(err) + } + proofWriter := proof.New(true) + anonymizer := anonymizer.New(anonymizingData, proofWriter) + // Disabling randomization so we know which values to expect + anonymizer.SetRandFunc(func(int) int { return 1 }) + + err = AnonymizeLmBackup(input, &output, anonymizer) + if err != nil { + t.Fatal(err) + } + + proofWriter.Close() + + actualProof, err := utils.UnpackProofOutput() + if err != nil { + t.Fatal(err) + } + + assert.ElementsMatch(t, tt.expectedProof, actualProof) + + os.Remove("proof.json") + + assert.Equal(t, tt.expectedOutput, output.String()) + }) + } +} diff --git a/internal/runner/export.go b/internal/runner/export.go new file mode 100644 index 0000000..c04f70e --- /dev/null +++ b/internal/runner/export.go @@ -0,0 +1,47 @@ +package runner + +import ( + "encoding/csv" + "fmt" + "io" + "os" + "strings" + + "github.com/logmanager-oss/logveil/internal/anonymizer" +) + +func AnonymizeLmExport(input *os.File, output io.Writer, anonymizer *anonymizer.Anonymizer) error { + csvReader := csv.NewReader(input) + + // First element of the csvReader contains field names + fieldNames, err := csvReader.Read() + if err != nil { + return err + } + + // Trimming prefix from field names + for i, fieldName := range fieldNames { + fieldNames[i] = strings.TrimPrefix(fieldName, "msg.") + } + + for { + row, err := csvReader.Read() + if err != nil { + break + } + + logLine := make(map[string]string) + for i, val := range row { + logLine[fieldNames[i]] = val + } + + anonymizedLogLine := anonymizer.Anonymize(logLine) + + _, err = fmt.Fprintln(output, anonymizedLogLine) + if err != nil { + return fmt.Errorf("writing log line to buffer: %v", err) + } + } + + return nil +} diff --git a/internal/runner/export_test.go b/internal/runner/export_test.go new file mode 100644 index 0000000..5906509 --- /dev/null +++ b/internal/runner/export_test.go @@ -0,0 +1,74 @@ +package runner + +import ( + "bytes" + "os" + "testing" + + "github.com/logmanager-oss/logveil/internal/anonymizer" + "github.com/logmanager-oss/logveil/internal/loader" + "github.com/logmanager-oss/logveil/internal/proof" + "github.com/logmanager-oss/logveil/internal/utils" + "github.com/stretchr/testify/assert" +) + +func TestLmExport(t *testing.T) { + tests := []struct { + name string + inputFilename string + outputFilename string + anonymizingData string + expectedOutput string + expectedProof []map[string]interface{} + }{ + { + name: "Test LM Export Anonymizer", + inputFilename: "../../examples/logs/example_logs.csv", + anonymizingData: "../../examples/anon_data", + expectedOutput: "{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"10.20.0.53\", \"username\":\"ladislav.dosek\", \"organization\":\"Apple\"}\n", + expectedProof: []map[string]interface{}{ + {"original": "89.239.31.49", "new": "10.20.0.53"}, + {"original": "test.user@test.cz", "new": "ladislav.dosek"}, + {"original": "TESTuser.test.com", "new": "Apple"}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + input, err := os.Open(tt.inputFilename) + if err != nil { + t.Fatal(err) + } + defer input.Close() + + var output bytes.Buffer + + anonymizingData, err := loader.Load(tt.anonymizingData) + if err != nil { + t.Fatal(err) + } + proofWriter := proof.New(true) + anonymizer := anonymizer.New(anonymizingData, proofWriter) + // Disabling randomization so we know which values to expect + anonymizer.SetRandFunc(func(int) int { return 1 }) + + err = AnonymizeLmExport(input, &output, anonymizer) + if err != nil { + t.Fatal(err) + } + + assert.Equal(t, tt.expectedOutput, output.String()) + + proofWriter.Close() + + actualProof, err := utils.UnpackProofOutput() + if err != nil { + t.Fatal(err) + } + + assert.ElementsMatch(t, tt.expectedProof, actualProof) + + os.Remove("proof.json") + }) + } +} diff --git a/internal/utils/utils.go b/internal/utils/utils.go new file mode 100644 index 0000000..2775fab --- /dev/null +++ b/internal/utils/utils.go @@ -0,0 +1,41 @@ +package utils + +import ( + "bufio" + "encoding/json" + "log/slog" + "os" +) + +func CloseFile(fs *os.File) { + err := fs.Close() + if err != nil { + slog.Error(err.Error()) + } +} + +func UnpackProofOutput() ([]map[string]interface{}, error) { + outputFile, err := os.OpenFile("proof.json", os.O_RDWR|os.O_CREATE, 0644) + if err != nil { + return nil, err + } + + var output []map[string]interface{} + scanner := bufio.NewScanner(outputFile) + for scanner.Scan() { + var unpackedLine map[string]interface{} + line := scanner.Bytes() + err := json.Unmarshal(line, &unpackedLine) + if err != nil { + return nil, err + } + output = append(output, unpackedLine) + } + + err = scanner.Err() + if err != nil { + return nil, err + } + + return output, nil +} diff --git a/internal/writer/writer.go b/internal/writer/writer.go deleted file mode 100644 index a2e3207..0000000 --- a/internal/writer/writer.go +++ /dev/null @@ -1,32 +0,0 @@ -package writer - -import ( - "fmt" - "log/slog" - "os" -) - -type Output struct { - Output []string -} - -func (o *Output) Write(filename string) error { - file, err := os.Create(filename) - if err != nil { - return err - } - defer func(fs *os.File) { - if err := fs.Close(); err != nil { - slog.Error(err.Error()) - } - }(file) - - for _, line := range o.Output { - _, err := file.WriteString(line + "\n") - if err != nil { - return fmt.Errorf("writing anonymized data to output file %s: %v", filename, err) - } - } - - return nil -} diff --git a/internal/writer/writer_test.go b/internal/writer/writer_test.go deleted file mode 100644 index ed96e77..0000000 --- a/internal/writer/writer_test.go +++ /dev/null @@ -1,45 +0,0 @@ -package writer - -import ( - "os" - "strings" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestAnonimizer_Outputwriter(t *testing.T) { - tests := []struct { - name string - outputFile string - expectedOutput string - }{ - { - name: "Test Output Writer", - outputFile: "output.txt", - expectedOutput: "{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"10.10.10.1\", \"username\":\"miloslav.illes\", \"organization\":\"Microsoft\"}", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - outputwriter := &Output{ - Output: []string{tt.expectedOutput}, - } - - defer os.Remove(tt.outputFile) - - err := outputwriter.Write(tt.outputFile) - if err != nil { - t.Fatal(err) - } - - data, err := os.ReadFile(tt.outputFile) - if err != nil { - t.Fatal(err) - } - - assert.Equal(t, tt.expectedOutput, strings.TrimRight(string(data), "\n")) - }) - } -}