Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

assets/python: models thresholds tuning #942

Merged
merged 3 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion assets/models/standard_v3_0/config.min.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.9,"pascal":0.95},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"version_major":3}
{"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.9,"ocaml":0.9,"pascal":0.95,"rst":0.9,"sql":0.9,"tsv":0.9},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"version_major":3}
2 changes: 1 addition & 1 deletion python/src/magika/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.


__version__ = "0.6.1-rc1"
__version__ = "0.6.1-dev"


import dotenv
Expand Down
2 changes: 1 addition & 1 deletion python/src/magika/models/standard_v3_0/config.min.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.9,"pascal":0.95},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"version_major":3}
{"beg_size":1024,"mid_size":0,"end_size":1024,"use_inputs_at_offsets":false,"medium_confidence_threshold":0.5,"min_file_size_for_dl":8,"padding_token":256,"block_size":4096,"target_labels_space":["3gp","ace","ai","aidl","apk","applebplist","appleplist","asm","asp","autohotkey","autoit","awk","batch","bazel","bib","bmp","bzip","c","cab","cat","chm","clojure","cmake","cobol","coff","coffeescript","cpp","crt","crx","cs","csproj","css","csv","dart","deb","dex","dicom","diff","dm","dmg","doc","dockerfile","docx","dsstore","dwg","dxf","elf","elixir","emf","eml","epub","erb","erlang","flac","flv","fortran","gemfile","gemspec","gif","gitattributes","gitmodules","go","gradle","groovy","gzip","h5","handlebars","haskell","hcl","hlp","htaccess","html","icns","ico","ics","ignorefile","ini","internetshortcut","ipynb","iso","jar","java","javabytecode","javascript","jinja","jp2","jpeg","json","jsonl","julia","kotlin","latex","lha","lisp","lnk","lua","m3u","m4","macho","makefile","markdown","matlab","mht","midi","mkv","mp3","mp4","mscompress","msi","mum","npy","npz","nupkg","objectivec","ocaml","odp","ods","odt","ogg","one","onnx","otf","outlook","parquet","pascal","pcap","pdb","pdf","pebin","pem","perl","php","pickle","png","po","postscript","powershell","ppt","pptx","prolog","proteindb","proto","psd","python","pythonbytecode","pytorch","qt","r","randombytes","randomtxt","rar","rdf","rpm","rst","rtf","ruby","rust","scala","scss","sevenzip","sgml","shell","smali","snap","solidity","sql","sqlite","squashfs","srt","stlbinary","stltext","sum","svg","swf","swift","tar","tcl","textproto","tga","thumbsdb","tiff","toml","torrent","tsv","ttf","twig","txt","typescript","vba","vcxproj","verilog","vhdl","vtt","vue","wasm","wav","webm","webp","winregistry","wmf","woff","woff2","xar","xls","xlsb","xlsx","xml","xpi","xz","yaml","yara","zig","zip","zlibstream"],"thresholds":{"handlebars":0.9,"ignorefile":0.95,"latex":0.95,"markdown":0.9,"ocaml":0.9,"pascal":0.95,"rst":0.9,"sql":0.9,"tsv":0.9},"overwrite_map":{"randombytes":"unknown","randomtxt":"txt"},"version_major":3}
2 changes: 1 addition & 1 deletion rust/lib/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub(crate) const CONFIG: ModelConfig = ModelConfig {
};

#[rustfmt::skip]
const THRESHOLDS: [f32; ContentType::SIZE] = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5];
const THRESHOLDS: [f32; ContentType::SIZE] = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.95, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5];
const OVERWRITE_MAP: [ContentType; ContentType::SIZE] = [
ContentType::_3gp,
ContentType::Ace,
Expand Down
Loading