diff --git a/cmd/logveil/logveil.go b/cmd/logveil/logveil.go new file mode 100644 index 0000000..ebace4f --- /dev/null +++ b/cmd/logveil/logveil.go @@ -0,0 +1,65 @@ +package logveil + +import ( + "bufio" + "log/slog" + "os" + + "github.com/logmanager-oss/logveil/internal/anonymizer" + "github.com/logmanager-oss/logveil/internal/flags" + "github.com/logmanager-oss/logveil/internal/loader" + "github.com/logmanager-oss/logveil/internal/runner" +) + +func Run() { + slog.Info("Anonymization process started...") + + anonymizingDataDir, inputPath, outputPath, isVerbose, isLmExport := flags.LoadAndValidate() + + if isVerbose { + slog.SetLogLoggerLevel(slog.LevelDebug) + } + + inputReader, err := os.Open(inputPath) + if err != nil { + return + } + defer inputReader.Close() + + var outputFile *os.File + if outputPath != "" { + outputFile, err := os.Create(outputPath) + if err != nil { + return + } + defer outputFile.Close() + } else { + outputFile = os.Stdout + } + + outputWriter := bufio.NewWriter(outputFile) + defer outputWriter.Flush() + + anonymizingData, err := loader.Load(anonymizingDataDir) + if err != nil { + slog.Error("loading anonymizing data from dir %s: %v", anonymizingDataDir, err) + return + } + anonymizer := anonymizer.New(anonymizingData) + + if isLmExport { + err := runner.AnonymizeLmExport(inputReader, outputWriter, anonymizer) + if err != nil { + slog.Error("reading lm export input file %s: %v", inputReader.Name(), err) + return + } + } else { + err := runner.AnonymizeLmBackup(inputReader, outputWriter, anonymizer) + if err != nil { + slog.Error("reading lm backup input file %s: %v", inputReader.Name(), err) + return + } + } + + slog.Info("All done. Exiting...") +} diff --git a/cmd/main.go b/cmd/main.go index 9f0cf5d..f1c1e3b 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -1,7 +1,7 @@ package main -import "github.com/logmanager-oss/logveil/internal/anonymizer" +import "github.com/logmanager-oss/logveil/cmd/logveil" func main() { - anonymizer.Run() + logveil.Run() } diff --git a/examples/.DS_Store b/examples/.DS_Store new file mode 100644 index 0000000..50f486d Binary files /dev/null and b/examples/.DS_Store differ diff --git a/examples/anon_data/msg.dst_iface b/examples/anon_data/dst_iface similarity index 100% rename from examples/anon_data/msg.dst_iface rename to examples/anon_data/dst_iface diff --git a/examples/anon_data/msg.dst_ip b/examples/anon_data/dst_ip similarity index 100% rename from examples/anon_data/msg.dst_ip rename to examples/anon_data/dst_ip diff --git a/examples/anon_data/msg.ip b/examples/anon_data/ip similarity index 100% rename from examples/anon_data/msg.ip rename to examples/anon_data/ip diff --git a/examples/anon_data/msg.name b/examples/anon_data/name similarity index 100% rename from examples/anon_data/msg.name rename to examples/anon_data/name diff --git a/examples/anon_data/msg.organization b/examples/anon_data/organization similarity index 100% rename from examples/anon_data/msg.organization rename to examples/anon_data/organization diff --git a/examples/anon_data/source-names-cz.txt b/examples/anon_data/source-names-cz.txt deleted file mode 100644 index aee4697..0000000 --- a/examples/anon_data/source-names-cz.txt +++ /dev/null @@ -1,601 +0,0 @@ -Miloslav Illéš -Ladislav Došek -Kateřina Janečková -Josef Varga -Miroslav Škamrala -Jiří Filip -Patrik Topič -Petr Fabiánek -Andrea Randýsková -Jaroslava Strnadová -Alena Čuříková -Jan Berger -Anna Ellingerová -Eliška Halajová -Kamila Tomešová -Vojtěch Le Nguyen -Šárka Haňáková -Lenka Ouvínová -Antonín Miksa -Marie Scheidelová -Bohuslava Hlaváčová -Stanislava Janáková -Simona Šíchová -Jitka Konečníková -Jiřina Pečenková -Bronislav Machů -Zdeňka Hofmanová -Veronika Mandátová -Jaroslav Bukovjan -Irena Dobrovolná -Jana Schmidtová -František Tomeček -Pavel Voženílek -Günter Andreas Mlejnecký -Roman Miler -Jindřich Grác -Liana Hutěčková -Helena Danihelková -Hana Rybová -Martin Palička -David Černý -Ludvík Pospíšil -Nela Ryšavá -Věra Tichá -Eva Lukášová -Václav Krčílek -Miloš Večeřa -Magdalena Rašmanová -Vladimíra Kůrková -Danuše Žatecká -Tomáš Matlocha -Vladimír Pelikán -Marcela Pelikánová -Zdeněk Bartal -Ludmila Žůrková -Milan Burián -Jarmila Hamplová -Michal Vrba -Lucie Hladíková -Daniel Pachovský -Ruslan Filip Ondryáš -Lukáš Sedlecký -Petra Šrámková -Olga Ludvíková -Monika Sýkorová -Kristina Viznarová -Karel Zadražil -Soňa Kašpárková -Karolína Nguyen Thi -Stanislav Nový -Robert Zedníček -Juraj Žufan -Ctibor Znišťal -Blahoslav Sedlák -Jaromír Buchta -Libor Dušek -Michaela Baršová -Song Hui Petržilková -Denis Václavík -Vlasta Kalousová -Zdena Zvonařová -Marek Srnec -Daša Canalová -Štěpán Štrouchal -Ondřej Černík -Dana Feslová -Daniela Rodáková -Matěj Pinkas -Iva Vávrová -Markéta Ernstová -Božena Kratochvílová -Viking Kašpar -Hildegarda Kuchařová -Nicole Novotná -Filip Rigó -Adéla Cabáková -Gabriela Slováčková -Ivana Staňková -Jakub Líbal -Zuzana Folwarczná -Jan Kadlec -Stanislav Bužek -Václav Čajan -Jiří Batelka -Jaroslav Klabeček -Hana Sýkorová -Miriam Vostárková -Michal Gargula -Šárka Šmídová -Robert Jiříček -Dominik Matyáš -Pavel Novák -Vladimír Ostrovka -František Parzyk -Františka Karbanová -Josef Kakara -Daniel Veselka -Zuzana Myslivcová -Anna Kapešová -Roman Krasnyanyk -Vlasta Čechová -Marie Hájková -Jorga Bradlerová -Eva Čejková -Dáša Bodáková -Alena Navrátilová -Renata Macigová -Bohuslav Musil -Jaroslava Ochodková -Ivana Janačová -Tomáš Ognar -Jana Velechovská -Petr Kořínek -Pavol Přichystal -Martina Pytlíková -Lucie Louvarová -Ludmila Vošická -Zdeněk Lokaj -Milan Kokta -Libuše Králová -Miloslav Novotný -Lenka Kranerová -Lukáš Šopík -Miloslava Pešková -Martin Benda -Jiřina Strnadová -Miroslav Bakalár -Ilona Pálková -Ladislav Fabík -Jolana Kohoutová -Radek Ivan -Klára Babáková -Eduardo Luis Fuksa -Věra Řezníčková -Karel Smolík -David Záruba -Markéta Machníková -Marcela Švecová -Filip Ramzer -Veronika Kleinová -Antonín Slavík -Leopold Groulík -Jarmila Hladíková -Radimír Kos -Ivan Kostka -Hynek Šulc -Rudolf Paulus -Miroslava Chmelíčková -Marek Jiruš -Gabriela Jágerová -Anežka Jaklová -Petra Samková -Jerett Josef Špalek -Terezia Zapletalová -Zdeňka Barlová -Alenka Kratochvílová -Milena Bradáčová -Jakub Charvát -Dana Filipová -Barbora Pluhařová -Drahomíra Divišová -Kateřina Plačková -Tuan Berka -Helena Sehnalíková -Ondřej Lakatoš -Irena Holišová -Luděk Dorazil -Dušan Kučera -Peter Popelář -Matěj Horáček -Daniela Bílková -Alessio Antonio Hašek -Iveta Vrabcová -Aurelie Stehlíková -Ingrid Otáhalová -Jitka Sněhotová -Lucia Korbelová -Alžběta Řechtáčková -Nicola Šrámková -Aneta Jeřábková -Jana Jílková -Pavel Adamíček -Michaela Stará -Zdeňka Jandová -Miroslav Venzara -Marie Kletečková -Veronika Bednáriková -Jan Vršecký -Eva Mužíková -Martin Kadidlo -Linda Bártíková -Jiří Závadský -Ondřej Macek -Věra Holečková -Libuše Šustrová -Milan Baran -Josef Jareš -Jaroslav Ježek -Petr Řičař -František Cíla -Blanka Josková -Miroslava Černá -Kristýna Štolcová -Stanislav Rod -Simona Kloudová -Petra Mezlová -Jarmila Muczková -Hana Vozňáková -Jiřina Domorádová -Kateřina Kanderová -Radek Jech -Ferdinand Krčmař -Václav Hejdušek -Vratislav Kerner -Tomáš Kment -Michael Šmídl -Jaroslava Štěchová -Lucie Veselá -Renata Dytrychová -Jakub Wagner -Dan Neumann -Zdeněk Kučera -Anna Gáborová -Pavlína Křížková -Daniel Čermák -Marta Hammerschmiedová -Tadeáš Pavlík -Lenka Dvořáková -Vítězslav Rambousek -Tereza Hadvičáková -Karel Friedrich -Ivana Šandová -Dominik Vandlíček -Růžena Hájková -Nela Srněnská -Gabriela Dragounová -Ladislav Braun -Michala Halíková -Matěj Šerák -Zuzana Ligocká -Miloslav Jelen -Michal Němec -Kornelie Seibertová -Alena Hanzlová -Adéla Koláčková -Jan Joshua Kopčil -Filip Bárek -Ung Odehnal -Sabina Kopáčová -Božena Šestáková -Vlastislav Nguyen Minh -Klára Vičarová -Martina Sojková -Anežka Třísková -Lea Hanušová -Pavla Vénosová -Oldřich Králíček -Ludmila Vrzalová -Ivan Toman -Miroslaw Zbigniew Kratochvíl -Vladimír Rubina -Olga Závrská -Marcela Pražáková -Helena Janoušková -Vlasta Hrdličková -David Tesárek -Jaromír Pícha -Jitka Červenková -Štěpánka Strnadová -Lukáš Smith -Květoslava Hantlová -Andrea Daňková -Antonín Tyrlik -Markéta Cahová -Rudolf Rejnart -Iakov Havriš -Lidie Šmídová -Zdenka Kňourková -Monika Císařová -Radim Knobloch -Monika Ouředníková -Jakub Sháněl -Jana Kolářová -Czeslaw Hrtoň -Ludmila Maštalířová -Robert Horák -Hana Šibalová -Jiří Stoy -Daniel Junghans -Jindřich Šteffl -Pavel Cabák -Renáta Cibulková -Věra Kyselová -Markéta Slípková -Radka Stránská -Jan Suchý -Bohuslav Janiš -Aneta Kopecká -Lukáš Sokol -Helena Foffová -Mykhaylo Hejtmánek -Lucie Synková -Mariia Fořtová -Marcela Rumlová -David Bělík -Marie Pešková -Renata Hřídelová -Václav Mánek -Josef Straka -Zdeňka Račková -Petr Prášek -Zdeněk Janda -Jaroslava Dalihodová -Milan Plášek -Magdalena Sobanská -Eva Dvořáková -Ivana Hinštová -Aleš Bolovanský -Ilona Šmerdová -Tomáš Alinče -Karel Páral -Alena Němcová -Jaroslav Peterka -František Svoboda -Jarmila Dědičová -Michal Pospíšil -Roman Hrstka -Lenka Pudová -Anna Buroňová -Ondřej Dopita -Martin Kleber -Miroslav Pindur -Vlastimil Šíp -Libor Svatý -Kateřina Kasálková -Vladimír Zaremba -Petra Machová -Ladislav Loučka -Mária Klemmová -Adéla Pelikánová -Stanislav Rýc -Patrick Drha -Martina Matušková -Lumír Válka -Libuše Procházková -Květoslava Vacková -Ginette Loudová -Dagmar Zvonková -Veronika Suchánková -Růžena Vaníčková -Antonín Lach -Samuel Hosnedl -Heidi Johnová -Květuše Jeřábková -Miroslava Míšková -Jitka Kotrčová -Šárka Šrautová -Taťána Furmánková -Soňa Serynková -Pavla Popelová -Božena Borkovcová -Milena Lušťková -Jonáš Vyhlídal -Darko Vojtěch -Adam Valder -Jiřina Pavelková -Vlastislav Urban -Michaela Oličová -Anežka Svobodová -Tereza Dzurková -Radek Vondráček -Táňa Michaličková -Marta Brachtlová -Filip Ernest -Mario Blažek -Miloslava Žvatorová -Stanislava Skřičková -Vlasta Bajerová -Dana Kociánová -Vratislav Všetečka -Lucie Neguczová -Michal Foldyna -Dušan Sasýn -Veronika Neničková -Anna Malíková -Helena Gregorová -Hana Makulová -Diana Wágnerová -Igor Petr -Zdeňka Marková -Marcela Bělíková -Miroslava Šarayová -Petr Drga -Jitka Applová -Alena Svátková -Michaela Ebenstreitová -Vilém Ševčík -Eliška Bětíková -Marie Šubrtová -Martin Buk -Matěj Pokorný -Kateřina Šmídová -Ivana Vážanová -Tomáš Hlinka -David Schoř -Philipp Hnilica -Jana Gorolová -Jan Semjon -Jiří Lieber -Vladislav Feichtinger -Miloslav Stružka -Eva Kynclová -Radek Toman -Nikola Trvajová -Vendula Štěpánková -Cvetko Kříž -Josef Čech -Čestmír Řeháček -Roman Metelka -Štěpánka Joselevičová -Thi Thanh Lieu Fialová -Věra Bučková -Ladislav Kořínek -Jaroslav Trýb -Zdeněk Maňásek -Pavel Kohout -Bedřich Mati -Miroslav Čelechovský -Ludmila Kubínová -Daniel Rehák -Karolína Adamčíková -Berta Poláchová -Iva Kozlíčková -Theodor Zeman -Jaromír Uher -Zdenka Pecková -Olga Zemanová -Tereza Markalousová -Vladislava Markovičová -Andrea Černá -Filip Král -Milan Hanuš -František Jatagandzidis -Martina Janoušková -Marek Romanyuk -Václav Břenek -Iveta Drgová -Jaroslava Trnková -Lukáš Fečko -Jiřina Bartoňková -Renata Vokřínková -Slavomír Nguyen -Karel Martsynyuk -Ondřej Jonáš -Petra Veselá -Milena Kloudová -Tamara Korejtková -Lenka Wildová -Rostislav Výborný -Edita Pokorná -Stanislav Hostýnek -Antonín Gazda -Markéta Mašková -Soňa Kabilková -Bohumila Kolerusová -Adam Roháček -Vladimír Vojkůvka -Truong Sedláček -Dana Severová -Blažej Jandura -Magdaléna Davidková -Ewa Hlavatá -Klára Strnisková -Sabina Mužíková -Jakub Konečný -Jarmila Pilařová -Erika Šubová -Božena Hrušková -Ctibor Štěpánek -Dmitri Švejcar -Anna Slabá -Petr Čechmánek -Vladimír Šantavý -Zdeněk Kleinbauer -Jaroslav Smetanka -Jiří Kocourek -Radomíra Mayerová -Milan Matásek -Josef Bednařík -Nikola Rozsívalová -Adriana Horáková -František Poula -Barbora Hromasová -Robert Hradecký -Dagmar Grivalská -Ladislav Sekyra -Marie Zikmundová -Martina Machatková -Věra Poláková -Jana Menclová -Filip Šimko -Drahomíra Kosíková -Martin Šabík -Mirijam Baierlová -Ludmila Chlupová -Thi Lan Huong Šístková -Jolana Frydryšková -Lukáš Selucký -Jan Vlodarčík -Hana Stibůrková -Andrea Poulová -Petra Kostková -Miroslav Hock -Zoran Hýbl -Antonín Špička -Tomáš Tóth -Albína Lencová -Pavel Hlaváček -Libor Toman -Ondřej Šenk -Daniela Čapková -Radek Kováč -Radim Novák -Kamila Hrdinová -Jaroslava Krásová -Miluše Jugová -Vlasta Vopařilová -Karel Kudža -Zuzana Krátká -Iva Fabianová -Stanislav Šrom -Tetyana Kalousová -Alois Slavíček -Anita Agata Němečková -Libuše Solichová -Eva Potočárová -Samuel Holub -Michal Štupalský -Kateřina Šlichtová -Lenka Staňková -Miroslava Švecová -Renata Davidová -Rostislav Štěpán -Valerie Šilhavá -Yuriy Pecuch -Lucie Humlová -Václav Pisklák -Miloslav Kavoň -Božena Chabadová -Natálie Bosáková -Miloslava Pazderová -Zdenka Tenzerová -Helena Mandová -Jaromír Komůrka -Jitka Jurkechová -Iryna Divácká -Valentina Janšová -Alexandra Tancošová -Dominik Pitel -Markéta Menšíková -Alena Kadeřábková -Marcela Červenková -Jarmila Kopřivová -Iveta Stejskalová -Magdaléna Rušarová -Jakub Martinek -Tereza Vegrichtová -Přemysl Babica -Irena Kasíková -Roman Bobot -Emil Šperlín -Šárka Hronovská -Pavla Walterová -Ulrika Škorupová -Aleš Směja -Michaela Chromková -Zdeňka Hájková -Denisa Nečasová -Veronika Bognerová -Vojtěch Berger - diff --git a/examples/anon_data/msg.src_iface b/examples/anon_data/src_iface similarity index 100% rename from examples/anon_data/msg.src_iface rename to examples/anon_data/src_iface diff --git a/examples/anon_data/msg.src_ip b/examples/anon_data/src_ip similarity index 100% rename from examples/anon_data/msg.src_ip rename to examples/anon_data/src_ip diff --git a/examples/anon_data/msg.username b/examples/anon_data/username similarity index 100% rename from examples/anon_data/msg.username rename to examples/anon_data/username diff --git a/examples/logs/.DS_Store b/examples/logs/.DS_Store new file mode 100644 index 0000000..16e9c9b Binary files /dev/null and b/examples/logs/.DS_Store differ diff --git a/examples/logs/lm-2024-06-09_0000.gz b/examples/logs/lm-2024-06-09_0000.gz new file mode 100644 index 0000000..88cdd22 Binary files /dev/null and b/examples/logs/lm-2024-06-09_0000.gz differ diff --git a/internal/anonymizer/anonymizer.go b/internal/anonymizer/anonymizer.go index 9e73aef..2a96c20 100644 --- a/internal/anonymizer/anonymizer.go +++ b/internal/anonymizer/anonymizer.go @@ -9,46 +9,39 @@ import ( ) type Anonymizer struct { - csvData []map[string]string anonData map[string][]string randFunc func(int) int } -func New(csvData []map[string]string, anonData map[string][]string) *Anonymizer { +func New(anonData map[string][]string) *Anonymizer { return &Anonymizer{ - csvData: csvData, anonData: anonData, randFunc: rand.Intn, } } -func (an *Anonymizer) anonymize() []string { - var output []string - for _, logLine := range an.csvData { - for field, value := range logLine { - if field == "raw" { - continue - } +func (an *Anonymizer) Anonymize(logLine map[string]string) string { + for field, value := range logLine { + if field == "raw" { + continue + } - if value == "" { - continue - } + if value == "" { + continue + } - if anonValues, exists := an.anonData[field]; exists { - newAnonValue := anonValues[an.randFunc(len(anonValues))] + if anonValues, exists := an.anonData[field]; exists { + newAnonValue := anonValues[an.randFunc(len(anonValues))] - slog.Debug(fmt.Sprintf("Replacing the values for field %s. From %s to %s.\n", field, value, newAnonValue)) + slog.Debug(fmt.Sprintf("Replacing the values for field %s. From %s to %s.\n", field, value, newAnonValue)) - logLine["raw"] = strings.Replace(logLine["raw"], value, newAnonValue, -1) - } + logLine["raw"] = strings.Replace(logLine["raw"], value, newAnonValue, -1) } - - output = append(output, fmt.Sprint(logLine["raw"])) } - return output + return logLine["raw"] } -func (an *Anonymizer) setRandFunc(randFunc func(int) int) { +func (an *Anonymizer) SetRandFunc(randFunc func(int) int) { an.randFunc = randFunc } diff --git a/internal/anonymizer/anonymizer_test.go b/internal/anonymizer/anonymizer_test.go index d3321cd..5e2c69f 100644 --- a/internal/anonymizer/anonymizer_test.go +++ b/internal/anonymizer/anonymizer_test.go @@ -3,40 +3,36 @@ package anonymizer import ( "testing" - "github.com/logmanager-oss/logveil/internal/parser" + "github.com/logmanager-oss/logveil/internal/loader" "github.com/stretchr/testify/assert" ) func TestAnonimizer_AnonymizeData(t *testing.T) { tests := []struct { - name string - anonDataDir string - inputFile string - expectedOutput []string + name string + anonymizingDataDir string + input map[string]string + expectedOutput string }{ { - name: "Test AnonymizeData", - anonDataDir: "../../examples/anon_data", - inputFile: "../../examples/logs/example_logs.csv", - expectedOutput: []string{"{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"10.10.10.1\", \"username\":\"miloslav.illes\", \"organization\":\"Microsoft\"}"}, + name: "Test AnonymizeData", + anonymizingDataDir: "../../examples/anon_data", + input: map[string]string{"@timestamp": "2024-06-05T14:59:27.000+00:00", "src_ip": "10.10.10.1", "username": "miloslav.illes", "organization": "Microsoft", "raw": "2024-06-05T14:59:27.000+00:00, 10.10.10.1, miloslav.illes, Microsoft"}, + expectedOutput: "2024-06-05T14:59:27.000+00:00, 10.20.0.53, ladislav.dosek, Apple", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - fieldNames, csvData, err := parser.ParseCSV(tt.inputFile) + anonymizingData, err := loader.Load(tt.anonymizingDataDir) if err != nil { - t.Fatalf("reading input file %s: %v", tt.inputFile, err) + t.Fatalf("loading anonymizing data from dir %s: %v", tt.anonymizingDataDir, err) } - anonData, err := parser.ParseAnonData(tt.anonDataDir, fieldNames) - if err != nil { - t.Fatalf("loading anonymizing data from dir %s: %v", tt.anonDataDir, err) - } - - anonymizer := New(csvData, anonData) - anonymizer.setRandFunc(func(int) int { return 0 }) - output := anonymizer.anonymize() + anonymizer := New(anonymizingData) + // Disabling randomization so we know which values to expect + anonymizer.SetRandFunc(func(int) int { return 1 }) + output := anonymizer.Anonymize(tt.input) assert.Equal(t, tt.expectedOutput, output) }) diff --git a/internal/anonymizer/runner.go b/internal/anonymizer/runner.go deleted file mode 100644 index f9261e7..0000000 --- a/internal/anonymizer/runner.go +++ /dev/null @@ -1,44 +0,0 @@ -package anonymizer - -import ( - "fmt" - "log/slog" - - "github.com/logmanager-oss/logveil/internal/flags" - "github.com/logmanager-oss/logveil/internal/parser" - "github.com/logmanager-oss/logveil/internal/writer" -) - -func Run() { - slog.Info("Anonymization process started...") - - anonDataDir, inputFile, outputFile := flags.Load() - - fieldNames, csvData, err := parser.ParseCSV(inputFile) - if err != nil { - slog.Error("reading input file %s: %v", inputFile, err) - return - } - - anonData, err := parser.ParseAnonData(anonDataDir, fieldNames) - if err != nil { - slog.Error("loading anonymizing data from dir %s: %v", anonDataDir, err) - return - } - - anonymizer := New(csvData, anonData) - anonymizedData := anonymizer.anonymize() - if outputFile != "" { - outputwriter := &writer.Output{ - Output: anonymizedData, - } - err := outputwriter.Write(outputFile) - if err != nil { - slog.Error("writing anonymized data to output file %s: %v", outputFile, err) - } - } else { - fmt.Println(anonymizedData) - } - - slog.Info("All done. Exiting...") -} diff --git a/internal/flags/flags.go b/internal/flags/flags.go index 7ccd760..4c15ad3 100644 --- a/internal/flags/flags.go +++ b/internal/flags/flags.go @@ -3,43 +3,59 @@ package flags import ( "errors" "fmt" - "io/fs" "os" ) -type input string - -func (f *input) String() string { - return fmt.Sprint(*f) -} +func validateInput(inputPath string) func(string) error { + return func(flagValue string) error { + fileInfo, err := os.Stat(flagValue) + if err != nil { + return err + } -func (f *input) Set(value string) error { - _, err := os.Stat(value) - if err != nil { - if errors.Is(err, fs.ErrNotExist) { - return fmt.Errorf("Provided file or dir %s does not exist. Aborting.", value) + if fileInfo.IsDir() { + return fmt.Errorf("Output file %s cannot be a directory.\n", flagValue) } - } - *f = input(value) + inputPath = flagValue - return nil + return nil + } } -type output string +func validateOutput(outputPath string) func(string) error { + return func(flagValue string) error { + fileInfo, err := os.Stat(flagValue) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil + } + return err + } -func (f *output) String() string { - return fmt.Sprint(*f) -} + if fileInfo.IsDir() { + return fmt.Errorf("Output file %s cannot be a directory.\n", flagValue) + } -func (f *output) Set(value string) error { - file, err := os.Create(value) - if err != nil { - return err + outputPath = flagValue + + return nil } - defer file.Close() +} - *f = output(value) +func validateDir(dir string) func(string) error { + return func(flagValue string) error { + fileInfo, err := os.Stat(flagValue) + if err != nil { + return err + } - return nil + if !fileInfo.IsDir() { + return fmt.Errorf("Path to anonymization data %s needs to be a directory.\n", flagValue) + } + + dir = flagValue + + return nil + } } diff --git a/internal/flags/initalize.go b/internal/flags/initalize.go index 46a5f8a..2f641af 100644 --- a/internal/flags/initalize.go +++ b/internal/flags/initalize.go @@ -2,25 +2,22 @@ package flags import ( "flag" - "log/slog" ) -func Load() (string, string, string) { - var anonDataDir input - flag.Var(&anonDataDir, "d", "Path to directory with anonymizing data") +func LoadAndValidate() (string, string, string, bool, bool) { + var anonymizationDataPath string + flag.Func("d", "Path to directory with anonymizing data", validateDir(anonymizationDataPath)) - var inputFile input - flag.Var(&inputFile, "i", "Path to input file containing logs to be anonymized") + var inputPath string + flag.Func("i", "Path to input file containing logs to be anonymized", validateInput(inputPath)) - var outputFile output - flag.Var(&outputFile, "o", "Path to output file containing anonymized logs") + var outputPath string + flag.Func("o", "Path to output file (default: Stdout)", validateOutput(outputPath)) - var verbose = flag.Bool("v", false, "Enable verbose logging") - flag.Parse() + var isVerbose = flag.Bool("v", false, "Enable verbose logging") + var isLmExport = flag.Bool("e", false, "Change input file type to LM export (default input file type is LM Backup)") - if *verbose { - slog.SetLogLoggerLevel(slog.LevelDebug) - } + flag.Parse() - return anonDataDir.String(), inputFile.String(), outputFile.String() + return anonymizationDataPath, inputPath, outputPath, *isVerbose, *isLmExport } diff --git a/internal/loader/loader.go b/internal/loader/loader.go new file mode 100644 index 0000000..18e798a --- /dev/null +++ b/internal/loader/loader.go @@ -0,0 +1,54 @@ +package loader + +import ( + "bufio" + "fmt" + "log" + "log/slog" + "os" + "path/filepath" +) + +func Load(anonDataDir string) (map[string][]string, error) { + var anonData = make(map[string][]string) + + files, err := os.ReadDir(anonDataDir) + if err != nil { + log.Fatal(err) + } + + for _, file := range files { + if file.IsDir() { + continue + } + + data, err := loadAnonymizingData(filepath.Join(anonDataDir, file.Name())) + if err != nil { + return nil, fmt.Errorf("loading anonymizing data from file %s: %v", file.Name(), err) + } + + anonData[file.Name()] = data + slog.Debug(fmt.Sprintf("Loaded anonymizing data for field: %s; values loaded: %d\n", file.Name(), len(data))) + } + + return anonData, nil +} + +func loadAnonymizingData(filepath string) ([]string, error) { + anonDataFile, err := os.OpenFile(filepath, os.O_RDONLY, os.ModePerm) + if err != nil { + return nil, err + } + + var anonData []string + scanner := bufio.NewScanner(anonDataFile) + for scanner.Scan() { + anonData = append(anonData, scanner.Text()) + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("error reading anon data: %w", err) + } + + return anonData, anonDataFile.Close() +} diff --git a/internal/loader/loader_test.go b/internal/loader/loader_test.go new file mode 100644 index 0000000..9c89274 --- /dev/null +++ b/internal/loader/loader_test.go @@ -0,0 +1,57 @@ +package loader + +import ( + "bufio" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestAnonimizer_Anondataloader(t *testing.T) { + tests := []struct { + name string + anonDataDir string + expectedFields []string + }{ + { + name: "Test Anondataloader", + anonDataDir: "../../examples/anon_data", + expectedFields: []string{"dst_iface", "dst_ip", "ip", "name", "organization", "src_iface", "src_ip", "username"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + anonData, err := Load(tt.anonDataDir) + if err != nil { + t.Fatal(err) + } + + for field, value := range anonData { + assert.Contains(t, tt.expectedFields, field) + assert.Equal(t, readLines(t, filepath.Join(tt.anonDataDir, field)), value) + } + }) + } +} + +func readLines(t *testing.T, path string) []string { + file, err := os.Open(path) + if err != nil { + t.Fatal(err) + } + defer file.Close() + + var lines []string + scanner := bufio.NewScanner(file) + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } + + if scanner.Err() != nil { + t.Fatal(err) + } + + return lines +} diff --git a/internal/parser/anondataparser.go b/internal/parser/anondataparser.go deleted file mode 100644 index 5806815..0000000 --- a/internal/parser/anondataparser.go +++ /dev/null @@ -1,59 +0,0 @@ -package parser - -import ( - "bufio" - "errors" - "fmt" - "io/fs" - "log/slog" - "os" - "path/filepath" -) - -// ParseAnonData reads text files from provided directory based on provided field names. -// In other words if file name matches one of the provided field names, it is loaded into the map[fieldName][]anonymizationValues. -// Returned map will be used in anonymization process to match original values with corresponding anonymization values. -func ParseAnonData(anonDataDir string, fieldNames []string) (map[string][]string, error) { - var anonData = make(map[string][]string) - - for i := range fieldNames { - if fieldNames[i] == "raw" { - continue - } - - filename := filepath.Join(anonDataDir, fieldNames[i]) - _, err := os.Stat(filename) - if err != nil { - if errors.Is(err, fs.ErrNotExist) { - slog.Debug(fmt.Sprintf("Anonymizing data not found for field %s. Skipping.\n", fieldNames[i])) - continue - } - return nil, err - } - - data, err := loadAnonymizingData(filename) - if err != nil { - return nil, fmt.Errorf("loading anonymizing data from file %s: %v", filename, err) - } - - anonData[fieldNames[i]] = data - slog.Debug(fmt.Sprintf("Loaded anonymizing data for field: %s; values loaded: %d\n", fieldNames[i], len(data))) - } - - return anonData, nil -} - -func loadAnonymizingData(filepath string) ([]string, error) { - anonDataFile, err := os.OpenFile(filepath, os.O_RDONLY, os.ModePerm) - if err != nil { - return nil, err - } - - var anonData []string - anonDataFileScanner := bufio.NewScanner(anonDataFile) - for anonDataFileScanner.Scan() { - anonData = append(anonData, anonDataFileScanner.Text()) - } - - return anonData, anonDataFile.Close() -} diff --git a/internal/parser/anondataparser_test.go b/internal/parser/anondataparser_test.go deleted file mode 100644 index f701bec..0000000 --- a/internal/parser/anondataparser_test.go +++ /dev/null @@ -1,36 +0,0 @@ -package parser - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestAnonimizer_Anondataloader(t *testing.T) { - tests := []struct { - name string - anonDataDir string - fieldNames []string - expectedAnonData map[string][]string - }{ - { - name: "Test Anondataloader", - fieldNames: []string{"msg.organization"}, - anonDataDir: "../../examples/anon_data", - expectedAnonData: map[string][]string{ - "msg.organization": { - "Microsoft", "Apple", "H&P", "IBM", - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - anonData, err := ParseAnonData(tt.anonDataDir, tt.fieldNames) - if err != nil { - t.Fatal(err) - } - assert.Equal(t, tt.expectedAnonData, anonData) - }) - } -} diff --git a/internal/parser/csvparser.go b/internal/parser/csvparser.go deleted file mode 100644 index 1364e7b..0000000 --- a/internal/parser/csvparser.go +++ /dev/null @@ -1,46 +0,0 @@ -package parser - -import ( - "encoding/csv" - "log/slog" - "os" -) - -// ParseCSV takes a CSV file containing logs and transforms it into a list of maps, where each map entry represents a log line. -// Such format is required to be able to modify log data (replace original values with anonymous values). -// It is also returning names of the CSV columns. Names of the columns (field names) are needed to grab corresponding anonymization data. -func ParseCSV(filename string) ([]string, []map[string]string, error) { - file, err := os.Open(filename) - if err != nil { - return nil, nil, err - } - defer func(fs *os.File) { - if err := fs.Close(); err != nil { - slog.Error(err.Error()) - } - }(file) - - csvReader := csv.NewReader(file) - - // First element of the csvReader contains field names - fieldNames, err := csvReader.Read() - if err != nil { - return nil, nil, err - } - - var csvData []map[string]string - for { - row, err := csvReader.Read() - if err != nil { - break - } - - m := make(map[string]string) - for i, val := range row { - m[fieldNames[i]] = val - } - csvData = append(csvData, m) - } - - return fieldNames, csvData, nil -} diff --git a/internal/parser/csvparser_test.go b/internal/parser/csvparser_test.go deleted file mode 100644 index 349bbd5..0000000 --- a/internal/parser/csvparser_test.go +++ /dev/null @@ -1,38 +0,0 @@ -package parser - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestAnonimizer_CSVloader(t *testing.T) { - tests := []struct { - name string - filename string - expectedFieldNames []string - expectedValues []map[string]string - }{ - { - name: "Test CSVLoader", - filename: "../../examples/logs/example_logs.csv", - expectedFieldNames: []string{"@timestamp", "raw", "msg.src_ip", "msg.username", "msg.organization"}, - expectedValues: []map[string]string{{ - "@timestamp": "2024-06-05T14:59:27.000+00:00", - "msg.organization": "TESTuser.test.com", - "msg.src_ip": "89.239.31.49", "msg.username": "test.user@test.cz", - "raw": "{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"89.239.31.49\", \"username\":\"test.user@test.cz\", \"organization\":\"TESTuser.test.com\"}", - }}, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fieldNames, csvData, err := ParseCSV(tt.filename) - if err != nil { - t.Fatal(err) - } - assert.Equal(t, tt.expectedFieldNames, fieldNames) - assert.Equal(t, tt.expectedValues, csvData) - }) - } -} diff --git a/internal/runner/backup.go b/internal/runner/backup.go new file mode 100644 index 0000000..92747ac --- /dev/null +++ b/internal/runner/backup.go @@ -0,0 +1,64 @@ +package runner + +import ( + "bufio" + "compress/gzip" + "encoding/json" + "fmt" + "io" + "os" + + "github.com/logmanager-oss/logveil/internal/anonymizer" +) + +type LmBackup struct { + Source LmLog `json:"_source"` +} + +type LmLog struct { + Raw string `json:"raw"` + Msg map[string]interface{} `json:"msg"` +} + +func AnonymizeLmBackup(input *os.File, output io.Writer, anonymizer *anonymizer.Anonymizer) error { + gzReader, err := gzip.NewReader(input) + if err != nil { + return fmt.Errorf("error creating gzip reader: %w", err) + } + defer gzReader.Close() + + scanner := bufio.NewScanner(gzReader) + + for scanner.Scan() { + line := scanner.Bytes() + + lmBackup := &LmBackup{} + err = json.Unmarshal(line, &lmBackup) + if err != nil { + return fmt.Errorf("unmarshaling log line: %w", err) + } + + // Convert map[string]interface{} to map[string]string as requred by anonymizer + logLine := make(map[string]string) + for key, value := range lmBackup.Source.Msg { + strKey := fmt.Sprintf("%v", key) + strValue := fmt.Sprintf("%v", value) + + logLine[strKey] = strValue + } + logLine["raw"] = lmBackup.Source.Raw + + anonymizedLogLine := anonymizer.Anonymize(logLine) + + _, err = fmt.Fprintln(output, anonymizedLogLine) + if err != nil { + return fmt.Errorf("writing log line to buffer: %v", err) + } + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("error reading input: %w", err) + } + + return nil +} diff --git a/internal/runner/backup_test.go b/internal/runner/backup_test.go new file mode 100644 index 0000000..384ef24 --- /dev/null +++ b/internal/runner/backup_test.go @@ -0,0 +1,53 @@ +package runner + +import ( + "bytes" + "os" + "testing" + + "github.com/logmanager-oss/logveil/internal/anonymizer" + "github.com/logmanager-oss/logveil/internal/loader" + "github.com/stretchr/testify/assert" +) + +func TestLmBackup(t *testing.T) { + tests := []struct { + name string + inputFilename string + anonymizingDataDir string + expectedOutput string + }{ + { + name: "Test Test LM Backup Anonymizer", + inputFilename: "../../examples/logs/lm-2024-06-09_0000.gz", + anonymizingDataDir: "../../examples/anon_data", + expectedOutput: "<189>date=2024-11-06 time=12:29:25 devname=\"LM-FW-70F-Praha\" devid=\"FGT70FTK22012016\" eventtime=1730892565525108329 tz=\"+0100\" logid=\"0000000013\" type=\"traffic\" subtype=\"forward\" level=\"notice\" vd=\"root\" srcip=10.20.0.53 srcport=57158 srcintf=\"lan1\" srcintfrole=\"wan\" dstip=227.51.221.89 dstport=80 dstintf=\"lan1\" dstintfrole=\"lan\" srccountry=\"China\" dstcountry=\"Czech Republic\" sessionid=179455916 proto=6 action=\"client-rst\" policyid=9 policytype=\"policy\" poluuid=\"d8ccb3e4-74d4-51ef-69a3-73b41f46df74\" policyname=\"Gitlab web from all\" service=\"HTTP\" trandisp=\"noop\" duration=6 sentbyte=80 rcvdbyte=44 sentpkt=2 rcvdpkt=1 appcat=\"unscanned\" srchwvendor=\"H3C\" devtype=\"Router\" mastersrcmac=\"00:23:89:39:a4:ef\" srcmac=\"00:23:89:39:a4:ef\" srcserver=0 dsthwvendor=\"H3C\" dstdevtype=\"Router\" masterdstmac=\"00:23:89:39:a4:fa\" dstmac=\"00:23:89:39:a4:fa\" dstserver=0\n", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + input, err := os.Open(tt.inputFilename) + if err != nil { + t.Fatal(err) + } + defer input.Close() + + var output bytes.Buffer + + anonymizingData, err := loader.Load(tt.anonymizingDataDir) + if err != nil { + t.Fatal(err) + } + anonymizer := anonymizer.New(anonymizingData) + // Disabling randomization so we know which values to expect + anonymizer.SetRandFunc(func(int) int { return 1 }) + + err = AnonymizeLmBackup(input, &output, anonymizer) + if err != nil { + t.Fatal(err) + } + + assert.Equal(t, tt.expectedOutput, output.String()) + }) + } +} diff --git a/internal/runner/export.go b/internal/runner/export.go new file mode 100644 index 0000000..c04f70e --- /dev/null +++ b/internal/runner/export.go @@ -0,0 +1,47 @@ +package runner + +import ( + "encoding/csv" + "fmt" + "io" + "os" + "strings" + + "github.com/logmanager-oss/logveil/internal/anonymizer" +) + +func AnonymizeLmExport(input *os.File, output io.Writer, anonymizer *anonymizer.Anonymizer) error { + csvReader := csv.NewReader(input) + + // First element of the csvReader contains field names + fieldNames, err := csvReader.Read() + if err != nil { + return err + } + + // Trimming prefix from field names + for i, fieldName := range fieldNames { + fieldNames[i] = strings.TrimPrefix(fieldName, "msg.") + } + + for { + row, err := csvReader.Read() + if err != nil { + break + } + + logLine := make(map[string]string) + for i, val := range row { + logLine[fieldNames[i]] = val + } + + anonymizedLogLine := anonymizer.Anonymize(logLine) + + _, err = fmt.Fprintln(output, anonymizedLogLine) + if err != nil { + return fmt.Errorf("writing log line to buffer: %v", err) + } + } + + return nil +} diff --git a/internal/runner/export_test.go b/internal/runner/export_test.go new file mode 100644 index 0000000..3a22878 --- /dev/null +++ b/internal/runner/export_test.go @@ -0,0 +1,54 @@ +package runner + +import ( + "bytes" + "os" + "testing" + + "github.com/logmanager-oss/logveil/internal/anonymizer" + "github.com/logmanager-oss/logveil/internal/loader" + "github.com/stretchr/testify/assert" +) + +func TestLmExport(t *testing.T) { + tests := []struct { + name string + inputFilename string + outputFilename string + anonymizingData string + expectedOutput string + }{ + { + name: "Test LM Export Anonymizer", + inputFilename: "../../examples/logs/example_logs.csv", + anonymizingData: "../../examples/anon_data", + expectedOutput: "{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"10.20.0.53\", \"username\":\"ladislav.dosek\", \"organization\":\"Apple\"}\n", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + input, err := os.Open(tt.inputFilename) + if err != nil { + t.Fatal(err) + } + defer input.Close() + + var output bytes.Buffer + + anonData, err := loader.Load(tt.anonymizingData) + if err != nil { + t.Fatal(err) + } + anonymizer := anonymizer.New(anonData) + // Disabling randomization so we know which values to expect + anonymizer.SetRandFunc(func(int) int { return 1 }) + + err = AnonymizeLmExport(input, &output, anonymizer) + if err != nil { + t.Fatal(err) + } + + assert.Equal(t, tt.expectedOutput, output.String()) + }) + } +} diff --git a/internal/writer/writer.go b/internal/writer/writer.go deleted file mode 100644 index a2e3207..0000000 --- a/internal/writer/writer.go +++ /dev/null @@ -1,32 +0,0 @@ -package writer - -import ( - "fmt" - "log/slog" - "os" -) - -type Output struct { - Output []string -} - -func (o *Output) Write(filename string) error { - file, err := os.Create(filename) - if err != nil { - return err - } - defer func(fs *os.File) { - if err := fs.Close(); err != nil { - slog.Error(err.Error()) - } - }(file) - - for _, line := range o.Output { - _, err := file.WriteString(line + "\n") - if err != nil { - return fmt.Errorf("writing anonymized data to output file %s: %v", filename, err) - } - } - - return nil -} diff --git a/internal/writer/writer_test.go b/internal/writer/writer_test.go deleted file mode 100644 index ed96e77..0000000 --- a/internal/writer/writer_test.go +++ /dev/null @@ -1,45 +0,0 @@ -package writer - -import ( - "os" - "strings" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestAnonimizer_Outputwriter(t *testing.T) { - tests := []struct { - name string - outputFile string - expectedOutput string - }{ - { - name: "Test Output Writer", - outputFile: "output.txt", - expectedOutput: "{\"@timestamp\": \"2024-06-05T14:59:27.000+00:00\", \"msg.src_ip\":\"10.10.10.1\", \"username\":\"miloslav.illes\", \"organization\":\"Microsoft\"}", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - outputwriter := &Output{ - Output: []string{tt.expectedOutput}, - } - - defer os.Remove(tt.outputFile) - - err := outputwriter.Write(tt.outputFile) - if err != nil { - t.Fatal(err) - } - - data, err := os.ReadFile(tt.outputFile) - if err != nil { - t.Fatal(err) - } - - assert.Equal(t, tt.expectedOutput, strings.TrimRight(string(data), "\n")) - }) - } -}