From 253b6aea3a66628e3e63d27c57ae7f471cb949d2 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Wed, 3 Jan 2024 13:57:45 +0200 Subject: [PATCH] Convert script from old tensorflow model to onnx (#482) * tf2onnx convert approach * add ml_model integrity check * description added * Python3.10 checked * move script to experiment * removed extra file * Delete credsweeper/ml_model/ml_model.onnx.md5 --- .github/workflows/check.yml | 34 ++------------- cicd/README.md | 1 - cicd/git_workflow.sh | 62 ---------------------------- credsweeper/ml_model/ml_model.onnx | Bin 165415 -> 165124 bytes experiment/tf2onnx/requirements.txt | 8 ++++ experiment/tf2onnx/tf2onnx.sh | 17 ++++++++ 6 files changed, 28 insertions(+), 94 deletions(-) delete mode 100755 cicd/git_workflow.sh create mode 100644 experiment/tf2onnx/requirements.txt create mode 100755 experiment/tf2onnx/tf2onnx.sh diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 04ae609ba..da4cc9f28 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -53,40 +53,12 @@ jobs: done exit ${n} - # # # git workflow + # # # ml_model integrity - - name: Get latest release tag name + - name: Check ml_model.onnx integrity if: ${{ always() && steps.code_checkout.conclusion == 'success' }} run: | - if [ "pull_request" == "${{ github.event_name }}" ]; then - API_RELEASE_URL=$(echo "${{ github.event.pull_request.base.repo.releases_url }}") - else - API_RELEASE_URL=$(echo "${{ github.event.repository.releases_url }}") - fi - echo "'${API_RELEASE_URL}'" # dbg - API_RELEASE_URL=$(echo "${API_RELEASE_URL}" | sed 's|.....$||') - echo "'${API_RELEASE_URL}'" # dbg - API_RELEASE_URL=$(echo "${API_RELEASE_URL}/latest") - echo "'${API_RELEASE_URL}'" # dbg - LATEST_RELEASE_TAG=$( - curl \ - --silent \ - --header "Accept: application/vnd.github.v3+json" \ - --header "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ - ${API_RELEASE_URL} \ - | \ - jq \ - --raw-output \ - '.tag_name' \ - ) - echo "LATEST_RELEASE_TAG='${LATEST_RELEASE_TAG}'" - export GIT_ANCESTOR=${LATEST_RELEASE_TAG} - if bash cicd/git_workflow.sh; then - echo "GIT workflow OK" - else - echo "Please, rebase the branch after ${LATEST_RELEASE_TAG}" - exit 1 - fi + md5sum --binary credsweeper/ml_model/ml_model.onnx | grep 8cb870a200d7bc07893aacec38f54033 # # # Python setup diff --git a/cicd/README.md b/cicd/README.md index 436346de9..c26c01276 100644 --- a/cicd/README.md +++ b/cicd/README.md @@ -2,4 +2,3 @@ The following files are used for: * **benchmark.txt** - template scores to compare it with benchmark - * **git_workflow.sh** - checks whether branch in is pure rebased after latest release diff --git a/cicd/git_workflow.sh b/cicd/git_workflow.sh deleted file mode 100755 index e5a264e16..000000000 --- a/cicd/git_workflow.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/bash -set -e - -if [ -z "${GIT_ANCESTOR}" ]; then - echo "GIT_ANCESTOR is empty!" - exit 1 -fi - -head_hash=$(git log --pretty=%H -n 1 HEAD) -ancestor_hash=$(git log --pretty=%H -n 1 ${GIT_ANCESTOR}) - -if ! git merge-base --is-ancestor ${ancestor_hash} ${head_hash}; then - echo "${ancestor_hash} is not ancestor of ${head_hash}" - exit 1 -fi - -declare -A commits - -function git_test() -{ - echo -e -n "\ntest for ${1}" - - if [ "${ancestor_hash}" == "${1}" ]; then - echo "This commit is searched ${ancestor_hash}" - return 0 - fi - - if [ -v commits[${1}] ]; then - echo -n " - already checked" - return 0 - else - echo -n " - need investigation" - commits[${1}]+=1 - fi - - local has_parents=false - for commit in $(git log --pretty=%P -n 1 ${1}); do - has_parents=true - echo -n " - parent: ${commit}" - if ! git_test ${commit}; then - echo " - commit ${commit} fail" - return 1 - fi - done - - if ! ${has_parents}; then - echo " - the end. Commit ${1} has no parents" - return 1 - fi - - echo "end" - return 0 -} - -if ! git_test ${head_hash}; then - echo "FAIL: ${head_hash} is not pure rebased to ${ancestor_hash}" - exit 1 -fi - -echo "OK" - -exit 0 diff --git a/credsweeper/ml_model/ml_model.onnx b/credsweeper/ml_model/ml_model.onnx index c22d4c87950822b0c6daeaff82f1070f2bcb6058..bd64a77345434a3154301d08f3c131b4b716954d 100644 GIT binary patch delta 3583 zcma)9-*4MQ9PhbF>VCT{!RwDCO-X77>sqz5+c<3^L0P(XD57PwLkzab@@khwq)ADf zsTx8p+A6ea6I2F;7X%W&BqRjVgd!e_$RAJ%Bp!GfVtZv0@GxmF6Wry8<1DpZKiGHY zk3aYMe(vsj=dW+V^=&v!(@clDGM>$3)&p*SjE|0SVL_HBC!_ISZUFp=0Me<9BBnBH z1(i9!q)Mu?Qb?bXa%xIVWiyIABc;?lx%w;W31D%t(24xDzNf z>vMgEDENi~E8fKlFL;)-nY=1W>q=e}`3a%kpk(AqL!{oIBbb-Wr3`7;9bW&u72qdF zU+_l?EJ-LQ8x&&9MKc~UAlgUHz7QW7$KIGui#m?tBigr4fA9ke4#;U9B;g#os^qeH zk;jfjCa?h0fC)Hm1UC&+2N@>p#e!v=dp+)exn29nVNRRzk!5nm=?r4Yvo@HgS=a3P znv{{@Jp^`y8*#36ns{ExeYA~EWzXOmImg~Og6JDsiHW= zV>W2iSnWG!U+@PCOecaNKcv9=Nuntlhj%%NEvD)8<{rv;bDW`JEGD%ph`%+4;NNH`r6WF)}2L8H{Eeo*Lz{Oe+ z%%P=pY8l&%69I9iQNx*tyuk*~K@-Txu|XDogmEwFqZPv}U4|;#QLA90WoQQ0ne_Zn zZ)gcXM=)JAg)sql)VZP88gggBh^bRH7#bR4&0=FQqu70&)mdt^8)A!8y`f<(N3`nJ zazaC^!CC^Ui*0Z;#76K0GB_KVBw{aD774a?hcb9epzuUBJLgrr-;s1IJkBog_3rej z7|CjB-}kV7(HanXrV-m71(tL-y?npGtk@6^66qvS$~M5!3eY%$?^9sMK3!usm`5fB zxK|WOE#&HlhnQL4+B6d)DRI&QAO!JdZpQ$p%ovJ;4%DN4&-na;9mGYXOxU7uex8@q z`9d20MQ|WeFirVH-DrS`T6G#EvLcoUmTf;N%kUKfTPDUj!0@7yF5v!ntNwb95ftkn zcX)`vnlecRsV1hCY+4qxYj{uQ zg;;Ugh59LhFJ5pV--e$4k~C_oUX^b<615{|4Zy{cG_@~4&uB@y_P%XcyXBxMdYzv0 zqC|BcPj23FoO*ftS^CNsyKjuV{oI9xk9QZ&^_5^7jb1B#>Ov>44V=ShyF*s@;JtQQ z#=)V4*>MNE!rcz^euzS6D>(DReYu+(NjmP_@$L?ne{ZLYh?X(0ZF=atebo2hukasO z2!@-k*AiZ@Wz?(PMHMfe#MtU#Cb@YRl|G-Po}VhsyQz~?M|M0I@ehfqddx{~dP;lm zQa`ts9=?O`{>45#j#reNs>tD$Ty{0Qnif~HvXU7%g5YAg~?e!#1;!WH*Y2!FsDy4=7|7`pdfk>RT0jnvH02NeXS>m-~k**Va zQ=))yQA-6VtrTrhda0BfD(VR&f;ga@5bB9Sk>Ci3TU9u5LojduytX3{V9DP3pLz4m z8}H@!;loWh%K3PoR!UY>b$#eSVmvpVn!LIRPy<*ltBO=sSJpIMzZf{?Us#e?l*#ze zasBPUSg*ke2aE@NM8Ivrr>IH5&`DA@iB1VBdDxK%?2KH~Vt!z>O4z$Y82FepWjvuj z%O2Bj1$wvYT_oRN{9C^av%eB>={p(zY1<$?uUt^7HRXk*WaUaEvY39(HW2nRu#Pd= z0aqDtVvX%*IvI!V#fnaOmO1D%jjx=I6Ae?XkvhXPWdr9UO?w(>5(s{WMZoV6_{Z`Z7Ww_k!wKU9W`F3wr7>fti?w@) z$!;@E_a+FtW8&0bteZ7iT7+ATbKdwJbh-to2SnlbURk=;RP*+XyVBVn?A<_xPjr9m2&1z+Ws^l|`*wQ3YSAQhi;n7I%mz#zW^)xmZ+G zsaD=l!V&WfGtfxVz9Iq(_CRFlKIn9@g3n^G5kC{4mec0!P*Vt+cfY2|RgK>MWY!)y zMJ*bFoGozn17ENZ8==?#Nt@B@UBJG}fTPv#ck!OIvpfZFirzbWRSJA6;V#aN(pJ5* zZlWWmO>{&=iJ~KB{|!1KZVwv~q#bmScF+-X4RmCLq}wL1%Y zSC~)6Z%khrXj6)5L;0Y8>e9j3w=0HcgnF9Wwy?Ldd+`J_oej0>#0=F`ho^SX1MP|>_rW3PNckfNq&CVafQ{$_erWB*4YUO;iw5A%nR$i78FO4<5 w3l2e;BYZyq&#kGi(f)|Qa7p~0+CG1oIpP&3kUqEl)nz6MxR9_3d8Rk;7s%5XLI3~& diff --git a/experiment/tf2onnx/requirements.txt b/experiment/tf2onnx/requirements.txt new file mode 100644 index 000000000..e83f5bc8f --- /dev/null +++ b/experiment/tf2onnx/requirements.txt @@ -0,0 +1,8 @@ +h5py==3.10.0 +keras==2.13.1 +numpy==1.23.5 +onnx==1.15.0 +protobuf==3.20.3 +tensorflow==2.13.1 +tf2onnx==1.16.0 +wrapt==1.14.1 diff --git a/experiment/tf2onnx/tf2onnx.sh b/experiment/tf2onnx/tf2onnx.sh new file mode 100755 index 000000000..f7d659a68 --- /dev/null +++ b/experiment/tf2onnx/tf2onnx.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +set -e + +# tensorflow model may be obtained like this: git restore -s be06d6059f0def4f0fdb50444c08db4ce542173e -- ml_model.h5 +# use virtual environment and the requirements.txt - there are very specific luke combination of packages verions +# python -m venv .venv +# . .venv/bin/activate +# python -m pip install --upgrade pip +# python -m pip install --requirement requirements.txt + +# [optional] thransform model form h5 to saved directory +python -c 'import tensorflow as tf;model=tf.keras.models.load_model("ml_model.h5");model.save("ml_model")' +# transform the model +python -m tf2onnx.convert --saved-model ml_model --output ml_model.onnx --verbose --rename-inputs feature_input,line_input +# md5sum for integrity +md5sum --binary ml_model.onnx