Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IP addresses rules and filters #394

Merged
merged 4 commits into from
Aug 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions cicd/benchmark.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
DATA: 19434458 valid lines. MARKUP: 73890 items
DATA: 19434458 valid lines. MARKUP: 74507 items
Category Positives Negatives Template
-------------------------- ----------- ----------- ----------
Authentication Key & Token 67 1 31
Generic Secret 1055 15 203
Generic Token 333 45 558
Other 708 63136 635
Other 1074 63387 635
Password 1403 110 4170
Predefined Pattern 326 2 40
Private Key 1001 1 3
Seed, Salt, Nonce 39 4 4
TOTAL: 4932 63314 5644
Detected Credentials: 4938
credsweeper result_cnt : 4421, lost_cnt : 0, true_cnt : 4050, false_cnt : 371
TOTAL: 5298 63565 5644
Detected Credentials: 5637
credsweeper result_cnt : 5118, lost_cnt : 0, true_cnt : 4416, false_cnt : 702
Category TP FP TN FN FPR FNR ACC PRC RCL F1
-------------------------- ---- ---- -------- ---- ---------- --------- -------- -------- -------- --------
Authentication Key & Token 51 4 28 16 0.125 0.238806 0.79798 0.927273 0.761194 0.836066
Generic Secret 971 2 216 84 0.00917431 0.0796209 0.932443 0.997945 0.920379 0.957594
Generic Token 287 7 596 46 0.0116086 0.138138 0.943376 0.97619 0.861862 0.91547
Other 446 238 63533 262 0.0037321 0.370057 0.992246 0.652047 0.629943 0.640805
Password 984 116 4164 419 0.0271028 0.298646 0.90586 0.894545 0.701354 0.786256
Other 812 562 63460 262 0.00877823 0.243948 0.987342 0.590975 0.756052 0.663399
Password 984 123 4157 419 0.0287383 0.298646 0.904628 0.888889 0.701354 0.784064
Predefined Pattern 309 2 40 17 0.0476191 0.0521472 0.94837 0.993569 0.947853 0.970173
Private Key 967 0 4 34 0.033966 0.966169 1 0.966034 0.982724
Seed, Salt, Nonce 35 2 6 4 0.25 0.102564 0.87234 0.945946 0.897436 0.921053
4050 371 19429155 882 1.909e-05 0.178832 0.999936 0.916082 0.821168 0.866032
4416 702 19428458 882 3.613e-05 0.166478 0.999919 0.862837 0.833522 0.847926
1 change: 1 addition & 0 deletions credsweeper/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from credsweeper.filters.value_first_word_check import ValueFirstWordCheck
from credsweeper.filters.value_grafana_check import ValueGrafanaCheck
from credsweeper.filters.value_iban_check import ValueIbanCheck
from credsweeper.filters.value_ip_check import ValueIPCheck
from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenCheck
from credsweeper.filters.value_last_word_check import ValueLastWordCheck
from credsweeper.filters.value_length_check import ValueLengthCheck
Expand Down
43 changes: 43 additions & 0 deletions credsweeper/filters/value_ip_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import contextlib
import ipaddress

from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter


class ValueIPCheck(Filter):
"""Filter out some of insensible IP"""

FALSE_POSITIVE_MARKERS = ["version", "oid", "section", "rfc"]

def __init__(self, config: Config = None) -> None:
pass

def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""Run filter checks on received credential candidate data 'line_data'.

Args:
line_data: credential candidate data
target: multiline target from which line data was obtained

Return:
True, if need to filter candidate and False if left

"""
if not line_data.value:
return True

with contextlib.suppress(Exception):
ip = ipaddress.ip_address(line_data.value)
if 4 == ip.version:
line_lower = target.line.lower()
for i in ValueIPCheck.FALSE_POSITIVE_MARKERS:
if i in line_lower:
return True
if ip.is_loopback or ip.is_private or ip.is_reserved or ip.is_link_local or ip.is_multicast:
return True
return False

return True
24 changes: 24 additions & 0 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,30 @@
- api
doc_available: false

- name: IPv4
severity: info
type: pattern
values:
- (^|[^.0-9a-zA-Z])(?P<value>[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2})(?!/([123]?[0-9])([^0-9]|$))([^.0-9a-zA-Z$]|$)
filter_type:
- ValueIPCheck
min_line_len: 10
required_substrings:
- "."
doc_available: false

- name: IPv6
severity: info
type: pattern
values:
- (^|[^:0-9a-zA-Z])(?P<value>[0-9A-Fa-f]{0,4}:(:?[0-9A-Fa-f]{1,4}:?){0,6}:[0-9A-Fa-f]{1,4})([^:0-9a-zA-Z]|$)
filter_type:
- ValueIPCheck
min_line_len: 10
required_substrings:
- ":"
doc_available: false

- name: AWS Client ID
severity: high
type: pattern
Expand Down
8 changes: 4 additions & 4 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from pathlib import Path

# total number of files in test samples
SAMPLES_FILES_COUNT: int = 109
SAMPLES_FILES_COUNT: int = 111

# credentials count after scan
SAMPLES_CRED_COUNT: int = 112
SAMPLES_CRED_LINE_COUNT: int = 123
SAMPLES_CRED_COUNT: int = 118
SAMPLES_CRED_LINE_COUNT: int = 129

# credentials count after post-processing
SAMPLES_POST_CRED_COUNT: int = 106
SAMPLES_POST_CRED_COUNT: int = 112

# with option --doc
SAMPLES_IN_DOC = 72
Expand Down
132 changes: 132 additions & 0 deletions tests/data/depth_3.json
Original file line number Diff line number Diff line change
Expand Up @@ -1213,6 +1213,138 @@
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "IPv4",
"severity": "info",
"line_data_list": [
{
"line": "100.64.0.0\u2013100.127.255.255",
"line_num": 14,
"path": "tests/samples/ipv4",
"info": "tests/samples/ipv4|RAW",
"value": "100.64.0.0",
"variable": null,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 1.5253496664211537,
"valid": false
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "IPv4",
"severity": "info",
"line_data_list": [
{
"line": "192.88.99.0\u2013192.88.99.255",
"line_num": 26,
"path": "tests/samples/ipv4",
"info": "tests/samples/ipv4|RAW",
"value": "192.88.99.0",
"variable": null,
"entropy_validation": {
"iterator": "BASE36_CHARS",
"entropy": 1.9018695860849921,
"valid": false
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "IPv6",
"severity": "info",
"line_data_list": [
{
"line": "2004:5678::9324",
"line_num": 12,
"path": "tests/samples/ipv6",
"info": "tests/samples/ipv6|RAW",
"value": "2004:5678::9324",
"variable": null,
"entropy_validation": {
"iterator": "BASE36_CHARS",
"entropy": 2.725512476486815,
"valid": false
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "IPv6",
"severity": "info",
"line_data_list": [
{
"line": "2004::5678:9",
"line_num": 13,
"path": "tests/samples/ipv6",
"info": "tests/samples/ipv6|RAW",
"value": "2004::5678:9",
"variable": null,
"entropy_validation": {
"iterator": "BASE36_CHARS",
"entropy": 2.5220552088742005,
"valid": false
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "IPv6",
"severity": "info",
"line_data_list": [
{
"line": "2041:0000:140F::875B:131B",
"line_num": 14,
"path": "tests/samples/ipv6",
"info": "tests/samples/ipv6|RAW",
"value": "2041:0000:140F::875B:131B",
"variable": null,
"entropy_validation": {
"iterator": "HEX_CHARS",
"entropy": 2.6146939516467023,
"valid": false
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "IPv6",
"severity": "info",
"line_data_list": [
{
"line": "2041:0:140F::875B:131B",
"line_num": 15,
"path": "tests/samples/ipv6",
"info": "tests/samples/ipv6|RAW",
"value": "2041:0:140F::875B:131B",
"variable": null,
"entropy_validation": {
"iterator": "HEX_CHARS",
"entropy": 2.684338637030481,
"valid": false
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
Expand Down
Loading