Skip to content

Commit

Permalink
Update ValueFilePathCheck filter (#407)
Browse files Browse the repository at this point in the history
* Update filter ValueFilePathCheck

* benchmark scores fix and style

* Update credsweeper/filters/value_file_path_check.py
  • Loading branch information
babenek authored Aug 18, 2023
1 parent c757496 commit 2bbb1f4
Show file tree
Hide file tree
Showing 10 changed files with 234 additions and 22 deletions.
10 changes: 5 additions & 5 deletions cicd/benchmark.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ Predefined Pattern 326 2 40
Private Key 1001 1 3
Seed, Salt, Nonce 39 4 4
TOTAL: 4739 63313 5644
Detected Credentials: 4738
credsweeper result_cnt : 4222, lost_cnt : 0, true_cnt : 3852, false_cnt : 370
Detected Credentials: 4744
credsweeper result_cnt : 4227, lost_cnt : 0, true_cnt : 3857, false_cnt : 370
Category TP FP TN FN FPR FNR ACC PRC RCL F1
-------------------------- ---- ---- -------- ---- ---------- --------- -------- -------- -------- --------
Authentication Key & Token 51 4 28 16 0.125 0.238806 0.79798 0.927273 0.761194 0.836066
Generic Secret 971 2 216 84 0.00917431 0.0796209 0.932443 0.997945 0.920379 0.957594
Generic Token 284 7 596 49 0.0116086 0.147147 0.940171 0.975945 0.852853 0.910256
Generic Token 287 7 596 46 0.0116086 0.138138 0.943376 0.97619 0.861862 0.91547
Other 253 237 63533 262 0.00371648 0.508738 0.992238 0.516327 0.491262 0.503483
Password 982 116 4164 421 0.0271028 0.300071 0.905508 0.894353 0.699929 0.785286
Password 984 116 4164 419 0.0271028 0.298646 0.90586 0.894545 0.701354 0.786256
Predefined Pattern 309 2 40 17 0.0476191 0.0521472 0.94837 0.993569 0.947853 0.970173
Private Key 967 0 4 34 0.033966 0.966169 1 0.966034 0.982724
Seed, Salt, Nonce 35 2 6 4 0.25 0.102564 0.87234 0.945946 0.897436 0.921053
3852 370 19429349 887 1.904e-05 0.18717 0.999935 0.912364 0.81283 0.859725
3857 370 19429349 882 1.904e-05 0.186115 0.999936 0.912467 0.813885 0.860361
2 changes: 1 addition & 1 deletion credsweeper/filters/value_entropy_base32_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
return True
entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE32_CHARS.value)
min_entropy = ValueEntropyBase32Check.get_min_data_entropy(len(line_data.value))
return min_entropy > entropy
return min_entropy > entropy or 0 == min_entropy

@staticmethod
def get_min_data_entropy(x: int) -> float:
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/filters/value_entropy_base36_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
return True
entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE36_CHARS.value)
min_entropy = ValueEntropyBase36Check.get_min_data_entropy(len(line_data.value))
return min_entropy > entropy
return min_entropy > entropy or 0 == min_entropy

@staticmethod
def get_min_data_entropy(x: int) -> float:
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/filters/value_entropy_base64_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
else:
entropy = Util.get_shannon_entropy(line_data.value, Chars.BASE64STD_CHARS.value)
min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(line_data.value))
return min_entropy > entropy
return min_entropy > entropy or 0 == min_entropy

@staticmethod
def get_min_data_entropy(x: int) -> float:
Expand Down
30 changes: 20 additions & 10 deletions credsweeper/filters/value_file_path_check.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from credsweeper.common.constants import Chars
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter
from credsweeper.filters import Filter, ValueEntropyBase64Check
from credsweeper.utils import Util


class ValueFilePathCheck(Filter):
r"""Check that candidate value is a path or not.
Check if a value contains either '/' or ':\' separators (but not both)
and do not have any special characters ( !$`&*()+)
and do not have any special characters ( !$@`&*()+)
"""

def __init__(self, config: Config = None) -> None:
Expand All @@ -27,13 +29,21 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""
if not line_data.value:
return True
contains_unix_separator = '/' in line_data.value
contains_windows_separator = ':\\' in line_data.value
contains_special_characters = False
for i in " !$`&*()+":
if i in line_data.value:
contains_special_characters = True
value = line_data.value
contains_unix_separator = '/' in value and not value.endswith('=')
if contains_unix_separator:
# base64 encoded data might look like linux path
min_entropy = ValueEntropyBase64Check.get_min_data_entropy(len(value))
# get minimal entropy to compare with shannon entropy of found value
# min_entropy == 0 means that the value cannot be checked with the entropy due high variance
contains_unix_separator = (0 == min_entropy
or min_entropy > Util.get_shannon_entropy(value, Chars.BASE64STD_CHARS.value))
# low shannon entropy points that the value maybe not a high randomized value in base64
contains_windows_separator = ':\\' in value
for i in " !$@`&*()+":
if i in value:
break
if (contains_unix_separator ^ contains_windows_separator) and not contains_special_characters:
return True
else:
if contains_unix_separator ^ contains_windows_separator:
return True
return False
6 changes: 3 additions & 3 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
SAMPLES_FILES_COUNT: int = 108

# credentials count after scan
SAMPLES_CRED_COUNT: int = 105
SAMPLES_CRED_LINE_COUNT: int = 116
SAMPLES_CRED_COUNT: int = 108
SAMPLES_CRED_LINE_COUNT: int = 119

# credentials count after post-processing
SAMPLES_POST_CRED_COUNT: int = 99
SAMPLES_POST_CRED_COUNT: int = 102

# with option --doc
SAMPLES_IN_DOC = 72
Expand Down
66 changes: 66 additions & 0 deletions tests/data/depth_3.json
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,72 @@
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.99799,
"rule": "Secret",
"severity": "medium",
"line_data_list": [
{
"line": "secret_looks_like_linux_path_1=\"/VnpmUGWxhQW9KQAwrL2ZYdDJPNG1PQjYxMXNPaF\"",
"line_num": 2,
"path": "tests/samples/key.hs",
"info": "tests/samples/key.hs|RAW",
"value": "/VnpmUGWxhQW9KQAwrL2ZYdDJPNG1PQjYxMXNPaF",
"variable": "secret_looks_like_linux_path_1",
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.8341837197791895,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.99801,
"rule": "Secret",
"severity": "medium",
"line_data_list": [
{
"line": "secret_looks_like_linux_path_2=\"VnpmUGWxhQW/9KQAwrL2ZYd/DJPNG1PQjYxMXNPF\"",
"line_num": 3,
"path": "tests/samples/key.hs",
"info": "tests/samples/key.hs|RAW",
"value": "VnpmUGWxhQW/9KQAwrL2ZYd/DJPNG1PQjYxMXNPF",
"variable": "secret_looks_like_linux_path_2",
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.784183719779189,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.99794,
"rule": "Secret",
"severity": "medium",
"line_data_list": [
{
"line": "secret_looks_like_linux_path_3=\"VnpmUGWxhQW/9KQAwrL2ZYdDJPNG1PQjYxMXNPF=\"",
"line_num": 4,
"path": "tests/samples/key.hs",
"info": "tests/samples/key.hs|RAW",
"value": "VnpmUGWxhQW/9KQAwrL2ZYdDJPNG1PQjYxMXNPF=",
"variable": "secret_looks_like_linux_path_3",
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.8341837197791895,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
Expand Down
66 changes: 66 additions & 0 deletions tests/data/ml_threshold_0.json
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,72 @@
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Secret",
"severity": "medium",
"line_data_list": [
{
"line": "secret_looks_like_linux_path_1=\"/VnpmUGWxhQW9KQAwrL2ZYdDJPNG1PQjYxMXNPaF\"",
"line_num": 2,
"path": "tests/samples/key.hs",
"info": "",
"value": "/VnpmUGWxhQW9KQAwrL2ZYdDJPNG1PQjYxMXNPaF",
"variable": "secret_looks_like_linux_path_1",
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.8341837197791895,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Secret",
"severity": "medium",
"line_data_list": [
{
"line": "secret_looks_like_linux_path_2=\"VnpmUGWxhQW/9KQAwrL2ZYd/DJPNG1PQjYxMXNPF\"",
"line_num": 3,
"path": "tests/samples/key.hs",
"info": "",
"value": "VnpmUGWxhQW/9KQAwrL2ZYd/DJPNG1PQjYxMXNPF",
"variable": "secret_looks_like_linux_path_2",
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.784183719779189,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Secret",
"severity": "medium",
"line_data_list": [
{
"line": "secret_looks_like_linux_path_3=\"VnpmUGWxhQW/9KQAwrL2ZYdDJPNG1PQjYxMXNPF=\"",
"line_num": 4,
"path": "tests/samples/key.hs",
"info": "",
"value": "VnpmUGWxhQW/9KQAwrL2ZYdDJPNG1PQjYxMXNPF=",
"variable": "secret_looks_like_linux_path_3",
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.8341837197791895,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
Expand Down
66 changes: 66 additions & 0 deletions tests/data/output.json
Original file line number Diff line number Diff line change
Expand Up @@ -1257,6 +1257,72 @@
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.99799,
"rule": "Secret",
"severity": "medium",
"line_data_list": [
{
"line": "secret_looks_like_linux_path_1=\"/VnpmUGWxhQW9KQAwrL2ZYdDJPNG1PQjYxMXNPaF\"",
"line_num": 2,
"path": "tests/samples/key.hs",
"info": "",
"value": "/VnpmUGWxhQW9KQAwrL2ZYdDJPNG1PQjYxMXNPaF",
"variable": "secret_looks_like_linux_path_1",
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.8341837197791895,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.99801,
"rule": "Secret",
"severity": "medium",
"line_data_list": [
{
"line": "secret_looks_like_linux_path_2=\"VnpmUGWxhQW/9KQAwrL2ZYd/DJPNG1PQjYxMXNPF\"",
"line_num": 3,
"path": "tests/samples/key.hs",
"info": "",
"value": "VnpmUGWxhQW/9KQAwrL2ZYd/DJPNG1PQjYxMXNPF",
"variable": "secret_looks_like_linux_path_2",
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.784183719779189,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
"ml_probability": 0.99794,
"rule": "Secret",
"severity": "medium",
"line_data_list": [
{
"line": "secret_looks_like_linux_path_3=\"VnpmUGWxhQW/9KQAwrL2ZYdDJPNG1PQjYxMXNPF=\"",
"line_num": 4,
"path": "tests/samples/key.hs",
"info": "",
"value": "VnpmUGWxhQW/9KQAwrL2ZYdDJPNG1PQjYxMXNPF=",
"variable": "secret_looks_like_linux_path_3",
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.8341837197791895,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
Expand Down
6 changes: 5 additions & 1 deletion tests/samples/key.hs
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
prKeyValid=LS0tLS1CRUdJTiBQUklWQVRFIENDcUdTTTQ5QXdFSEJHMHdhd0lCQVFRZ0ViVnpmUGWxhQW9KQWwrLzZYdDJPNG1PQjYxMXNPaFJBTkNBQVNnRlRLandKQUFVOTVnKysvdnpLV0hrekFWbU5NSQp0QjV2VGpaT09Jd25FYjcwTXNXWkZJeVVGRDFQOUd3c3R6NCtha0hYN3ZJOEJINmhIbUJtZmVRbAotLS0tLUVORCBQUklWJNR0J5cUdTTTQ5QW5aUHhmQXl4cUUKWlYwNdFR0QVRFIEtFWS0tLS0tCgtFWS0tLS0tCk1JR0hBZ0VBTU==
prKeyValid=LS0tLS1CRUdJTiBQUklWQVRFIENDcUdTTTQ5QXdFSEJHMHdhd0lCQVFRZ0ViVnpmUGWxhQW9KQWwrLzZYdDJPNG1PQjYxMXNPaFJBTkNBQVNnRlRLandKQUFVOTVnKysvdnpLV0hrekFWbU5NSQp0QjV2VGpaT09Jd25FYjcwTXNXWkZJeVVGRDFQOUd3c3R6NCtha0hYN3ZJOEJINmhIbUJtZmVRbAotLS0tLUVORCBQUklWJNR0J5cUdTTTQ5QW5aUHhmQXl4cUUKWlYwNdFR0QVRFIEtFWS0tLS0tCgtFWS0tLS0tCk1JR0hBZ0VBTU==
secret_looks_like_linux_path_1="/VnpmUGWxhQW9KQAwrL2ZYdDJPNG1PQjYxMXNPaF"
secret_looks_like_linux_path_2="VnpmUGWxhQW/9KQAwrL2ZYd/DJPNG1PQjYxMXNPF"
secret_looks_like_linux_path_3="VnpmUGWxhQW/9KQAwrL2ZYdDJPNG1PQjYxMXNPF="
secret_looks_like_linux_path__="VnpmUGWxhQW/9KQAwrL2ZYd/DJPNG1PQjEXAMPLE"

0 comments on commit 2bbb1f4

Please sign in to comment.