Skip to content

Commit

Permalink
gh workflow: add poc to show different onnxruntime results on windows
Browse files Browse the repository at this point in the history
  • Loading branch information
reyammer committed Jan 22, 2025
1 parent 0cc5ed5 commit 0ae5b1b
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 0 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/onnxruntime-windows-diff-poc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: POC for debugging windows onnxruntime diff

on:
workflow_dispatch:
pull_request:

permissions:
contents: read

jobs:
run-inference:
strategy:
matrix:
python-version: ["3.12"]
os: [ "ubuntu-latest", "macos-latest", "windows-latest" ]
runs-on: ${{ matrix.os }}
steps:
- name: Disable automatic git CRLF conversion
run: git config --global core.autocrlf false
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # pin@v4
- name: Setup Python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # pin@v5
with:
python-version: '${{ matrix.python-version }}'
- name: Install magika with pip
run: python3 -m pip install magika==0.6.0rc3
- run: python3 -c "import magika, pathlib; m = magika.Magika(); res = m.identify_path(pathlib.Path('tests_data/basic/python/code.py')); print(res)"
- run: python3 ./python/scripts/extract_features.py

# do hexdump
- if: matrix.os == 'windows-latest'
shell: pwsh
run: |
Format-Hex tests_data/basic/python/code.py
- if: matrix.os == 'ubuntu-latest'
run: hexdump -C tests_data/basic/python/code.py

# get the hash
- if: matrix.os == 'windows-latest'
shell: pwsh
run: |
Get-FileHash tests_data/basic/python/code.py
- if: matrix.os == 'ubuntu-latest'
run: sha256sum tests_data/basic/python/code.py
- if: matrix.os == 'macos-latest'
run: shasum -a256 tests_data/basic/python/code.py

39 changes: 39 additions & 0 deletions python/scripts/extract_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import hashlib
from pathlib import Path

import magika

test_path = Path(__file__).parent.parent.parent / "tests_data/basic/python/code.py"

m = magika.Magika()

fs = m._extract_features_from_path(
test_path,
beg_size=1024,
mid_size=0,
end_size=1024,
padding_token=256,
block_size=4096,
use_inputs_at_offsets=False,
)


def serialize(fs):
return hashlib.sha256(str(fs.beg + fs.end).encode("ascii")).hexdigest()


print(fs)
print(serialize(fs))

f = open(test_path, "rb")
content_bytes = f.read()
print(f"content bytes len: {len(content_bytes)}")
f.close()

content_bytes_2 = test_path.read_bytes()
print(f"content bytes 2 len: {len(content_bytes_2)}")

print(f"file size: {test_path.stat().st_size}")

content_bytes_ints = list(map(int, content_bytes[:64]))
print(content_bytes_ints)

0 comments on commit 0ae5b1b

Please sign in to comment.