Skip to content

Commit

Permalink
Benchmark categories renaming (#498)
Browse files Browse the repository at this point in the history
* precision .6f for scores

* renamed categories

* round result values

* custom branch for benchmark

* BM scores fix

* diff --ignore-all-space

* BM scores fix 2

* Apply suggestions from code review
  • Loading branch information
babenek authored Jan 31, 2024
1 parent 2a53a44 commit 4ef5209
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 24 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ jobs:
if: ${{ 'pull_request' == github.event_name }}
# update cicd/benchmark.txt with uploaded artifact if a difference is found
run: |
diff temp/CredSweeper/cicd/benchmark.txt benchmark.txt
diff --ignore-all-space --ignore-blank-lines temp/CredSweeper/cicd/benchmark.txt benchmark.txt
- name: Checkout CredSweeper on push event
if: ${{ 'pull_request' != github.event_name }}
Expand All @@ -144,7 +144,7 @@ jobs:
if: ${{ 'pull_request' != github.event_name }}
# update cicd/benchmark.txt with uploaded artifact if a difference is found
run: |
diff CredSweeper/cicd/benchmark.txt benchmark.txt
diff --ignore-all-space --ignore-blank-lines CredSweeper/cicd/benchmark.txt benchmark.txt
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

Expand Down
26 changes: 13 additions & 13 deletions cicd/benchmark.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
DATA: 19071512 valid lines. MARKUP: 64428 items
Category Positives Negatives Template
-------------------------- ----------- ----------- ----------
Authentication Key & Token 91 2653 32
Authentication Credentials 91 2653 32
Cryptographic Primitives 54 171 1
Generic Secret 1064 29577 225
Generic Token 329 3718 555
Other 822 3759 546
Password 1400 7127 4139
Predefined Pattern 377 5289 11
Private Key 1011 1477
Seed, Salt, Nonce 54 171 1
TOTAL: 5148 53771 5509
FileType FileNumber ValidLines Positives Negatives Template
--------------- ------------ ------------ ----------- ----------- ----------
Expand Down Expand Up @@ -234,14 +234,14 @@ FileType FileNumber ValidLines Positives Negatives Templat
TOTAL: 10188 19071512 5148 53771 5509
Detected Credentials: 6105
credsweeper result_cnt : 5224, lost_cnt : 0, true_cnt : 4350, false_cnt : 874
Category TP FP TN FN FPR FNR ACC PRC RCL F1
-------------------------- ---- ---- -------- ---- ---------- --------- -------- -------- -------- --------
Authentication Key & Token 76 78 2607 15 0.0290503 0.164835 0.966499 0.493506 0.835165 0.620408
Generic Secret 980 64 29738 84 0.00214751 0.0789474 0.995205 0.938697 0.921053 0.929791
Generic Token 292 33 4240 37 0.00772291 0.112462 0.984789 0.898462 0.887538 0.892966
Other 574 370 3935 248 0.0859466 0.301703 0.879462 0.608051 0.698297 0.650057
Password 1010 248 11018 390 0.0220131 0.278571 0.949629 0.802862 0.721429 0.75997
Predefined Pattern 360 67 5233 17 0.0126415 0.0450928 0.985203 0.843091 0.954907 0.895522
Private Key 1011 0 1477 0 1 1 1 1
Seed, Salt, Nonce 47 14 158 7 0.0813954 0.12963 0.90708 0.770492 0.87037 0.817391
4350 874 19065490 798 4.584e-05 0.155012 0.999912 0.832695 0.844988 0.838797
Category TP FP TN FN FPR FNR ACC PRC RCL F1
-------------------------- ---- ---- -------- ---- -------- -------- -------- -------- -------- --------
Authentication Credentials 76 78 2607 15 0.029050 0.164835 0.966499 0.493506 0.835165 0.620408
Cryptographic Primitives 47 14 158 7 0.081395 0.129630 0.907080 0.770492 0.870370 0.817391
Generic Secret 980 64 29738 84 0.002148 0.078947 0.995205 0.938697 0.921053 0.929791
Generic Token 292 33 4240 37 0.007723 0.112462 0.984789 0.898462 0.887538 0.892966
Other 574 370 3935 248 0.085947 0.301703 0.879462 0.608051 0.698297 0.650057
Password 1010 248 11018 390 0.022013 0.278571 0.949629 0.802862 0.721429 0.759970
Predefined Pattern 360 67 5233 17 0.012642 0.045093 0.985203 0.843091 0.954907 0.895522
Private Key 1011 0 1477 0 1.000000 1.000000 1.000000 1.000000
4350 874 19065490 798 0.000046 0.155012 0.999912 0.832695 0.844988 0.838797
6 changes: 3 additions & 3 deletions experiment/augmentation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def obfuscate_row(row, meta, secret_creds):
obfuscated_value = get_obfuscated_value(value, pattern)
else:
if meta.WithWords == "1" and meta.Category not in [
"Authentication Key & Token", #
"Authentication Credentials", #
"Generic Secret", #
"Generic Token" #
]:
Expand Down Expand Up @@ -272,9 +272,9 @@ def aug_dir(arg):
"Password", #
"Generic Secret", #
"Predefined Pattern", #
"Seed, Salt, Nonce", #
"Cryptographic Primitives", #
"Generic Token", #
"Authentication Key & Token" #
"Authentication Credentials" #
]
meta_df = meta_df[meta_df["Category"].isin(augument_list)]
exts = get_extentions(meta_df)
Expand Down
12 changes: 6 additions & 6 deletions experiment/src/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ def eval_no_model(df: pd.DataFrame, df_missing: pd.DataFrame):
f1: float = (2 * precision * recall) / (precision + recall)

report = f"TP : {true_positive}, FP : {false_positive}, TN : {true_negative}, " \
f"FN : {false_negative}, FPR : {false_positive_rate:.10f}, " \
f"FNR : {false_negative_rate:.10f}, PRC : {precision:.10f}, " \
f"RCL : {recall:.10f}, F1 : {f1:.10f}"
f"FN : {false_negative}, FPR : {false_positive_rate:.6f}, " \
f"FNR : {false_negative_rate:.6f}, PRC : {precision:.6f}, " \
f"RCL : {recall:.6f}, F1 : {f1:.6f}"
print(report)


Expand Down Expand Up @@ -146,9 +146,9 @@ def eval_with_model(df: pd.DataFrame, df_missing: pd.DataFrame, predictions: np.
f1: float = (2 * precision * recall) / (precision + recall)

report = f"TP : {true_positive}, FP : {false_positive}, TN : {true_negative}, " \
f"FN : {false_negative}, FPR : {false_positive_rate:.10f}, " \
f"FNR : {false_negative_rate:.10f}, PRC : {precision:.10f}, " \
f"RCL : {recall:.10f}, F1 : {f1:.10f}"
f"FN : {false_negative}, FPR : {false_positive_rate:.6f}, " \
f"FNR : {false_negative_rate:.6f}, PRC : {precision:.6f}, " \
f"RCL : {recall:.6f}, F1 : {f1:.6f}"
print(report)


Expand Down

0 comments on commit 4ef5209

Please sign in to comment.