From 1957532d22a86dd83a933b30a92416fd31fb6063 Mon Sep 17 00:00:00 2001 From: Yanick Fratantonio Date: Mon, 3 Feb 2025 21:14:01 +0000 Subject: [PATCH 1/3] python: go back to -dev version --- python/src/magika/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/src/magika/__init__.py b/python/src/magika/__init__.py index e0934e95..c9d1642d 100644 --- a/python/src/magika/__init__.py +++ b/python/src/magika/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. -__version__ = "0.6.1-rc1" +__version__ = "0.6.1-dev" import dotenv From 947bbab5a3c31292f3b989579da0c77b274f2bf5 Mon Sep 17 00:00:00 2001 From: Yanick Fratantonio Date: Mon, 3 Feb 2025 21:14:12 +0000 Subject: [PATCH 2/3] assets: more models thresholds tuning --- assets/models/standard_v3_0/config.min.json | 2 +- python/src/magika/models/standard_v3_0/config.min.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/models/standard_v3_0/config.min.json b/assets/models/standard_v3_0/config.min.json index 3175be20..5ef5147d 100644 --- a/assets/models/standard_v3_0/config.min.json +++ b/assets/models/standard_v3_0/config.min.json @@ -1 +1 @@ -{"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.9,"pascal":0.95},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"version_major":3} +{"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.9,"ocaml":0.9,"pascal":0.95,"rst":0.9,"sql":0.9,"tsv":0.9},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"version_major":3} \ No newline at end of file diff --git a/python/src/magika/models/standard_v3_0/config.min.json b/python/src/magika/models/standard_v3_0/config.min.json index 3175be20..5ef5147d 100644 --- a/python/src/magika/models/standard_v3_0/config.min.json +++ b/python/src/magika/models/standard_v3_0/config.min.json @@ -1 +1 @@ -{"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.9,"pascal":0.95},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"version_major":3} +{"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.9,"ocaml":0.9,"pascal":0.95,"rst":0.9,"sql":0.9,"tsv":0.9},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"version_major":3} \ No newline at end of file From ebcc5e23e33d2c2bcfcda22dc9e9ee9dc0b17ba3 Mon Sep 17 00:00:00 2001 From: Yanick Fratantonio Date: Mon, 3 Feb 2025 21:15:23 +0000 Subject: [PATCH 3/3] rust: update model thresholds --- rust/lib/src/model.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lib/src/model.rs b/rust/lib/src/model.rs index cc1fe827..e537d8f8 100644 --- a/rust/lib/src/model.rs +++ b/rust/lib/src/model.rs @@ -33,7 +33,7 @@ pub(crate) const CONFIG: ModelConfig = ModelConfig { }; #[rustfmt::skip] -const THRESHOLDS: [f32; ContentType::SIZE] = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]; +const THRESHOLDS: [f32; ContentType::SIZE] = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]; const OVERWRITE_MAP: [ContentType; ContentType::SIZE] = [ ContentType::_3gp, ContentType::Ace,