diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 834c908fd1..16fdd6b236 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -62,7 +62,8 @@ jobs: - name: Checkout CredData uses: actions/checkout@v3 with: - repository: Samsung/CredData + repository: babenek/CredData + ref: ipmarkup - name: Cache data id: cache-data diff --git a/credsweeper/filters/__init__.py b/credsweeper/filters/__init__.py index ce16128e49..316e8a8a54 100644 --- a/credsweeper/filters/__init__.py +++ b/credsweeper/filters/__init__.py @@ -19,6 +19,7 @@ from credsweeper.filters.value_first_word_check import ValueFirstWordCheck from credsweeper.filters.value_grafana_check import ValueGrafanaCheck from credsweeper.filters.value_iban_check import ValueIbanCheck +from credsweeper.filters.value_ip_check import ValueIPCheck from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenCheck from credsweeper.filters.value_last_word_check import ValueLastWordCheck from credsweeper.filters.value_length_check import ValueLengthCheck diff --git a/credsweeper/filters/value_ip_check.py b/credsweeper/filters/value_ip_check.py new file mode 100644 index 0000000000..472565e0f3 --- /dev/null +++ b/credsweeper/filters/value_ip_check.py @@ -0,0 +1,43 @@ +import contextlib +import ipaddress + +from credsweeper.config import Config +from credsweeper.credentials import LineData +from credsweeper.file_handler.analysis_target import AnalysisTarget +from credsweeper.filters import Filter + + +class ValueIPCheck(Filter): + """Filter out some of insensible IP""" + + FALSE_POSITIVE_MARKERS = ["version", "oid", "section", "rfc"] + + def __init__(self, config: Config = None) -> None: + pass + + def run(self, line_data: LineData, target: AnalysisTarget) -> bool: + """Run filter checks on received credential candidate data 'line_data'. + + Args: + line_data: credential candidate data + target: multiline target from which line data was obtained + + Return: + True, if need to filter candidate and False if left + + """ + if not line_data.value: + return True + + with contextlib.suppress(Exception): + ip = ipaddress.ip_address(line_data.value) + if 4 == ip.version: + line_lower = target.line.lower() + for i in ValueIPCheck.FALSE_POSITIVE_MARKERS: + if i in line_lower: + return True + if ip.is_loopback or ip.is_private or ip.is_reserved or ip.is_link_local or ip.is_multicast: + return True + return False + + return True diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index 2bf5b1f7ec..124f5b4845 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -44,6 +44,30 @@ - api doc_available: false +- name: IPv4 + severity: info + type: pattern + values: + - (^|[^.0-9a-zA-Z])(?P<value>[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2})(?!/([123]?[0-9])([^0-9]|$))([^.0-9a-zA-Z$]|$) + filter_type: + - ValueIPCheck + min_line_len: 10 + required_substrings: + - "." + doc_available: false + +- name: IPv6 + severity: info + type: pattern + values: + - (^|[^:0-9a-zA-Z])(?P<value>[0-9A-Fa-f]{0,4}:(:?[0-9A-Fa-f]{1,4}:?){0,6}:[0-9A-Fa-f]{1,4})([^:0-9a-zA-Z]|$) + filter_type: + - ValueIPCheck + min_line_len: 10 + required_substrings: + - ":" + doc_available: false + - name: AWS Client ID severity: high type: pattern diff --git a/tests/__init__.py b/tests/__init__.py index 1c19bd753b..7ce2f706c7 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,14 +1,14 @@ from pathlib import Path # total number of files in test samples -SAMPLES_FILES_COUNT: int = 108 +SAMPLES_FILES_COUNT: int = 110 # credentials count after scan -SAMPLES_CRED_COUNT: int = 105 -SAMPLES_CRED_LINE_COUNT: int = 116 +SAMPLES_CRED_COUNT: int = 111 +SAMPLES_CRED_LINE_COUNT: int = 122 # credentials count after post-processing -SAMPLES_POST_CRED_COUNT: int = 99 +SAMPLES_POST_CRED_COUNT: int = 105 # with option --doc SAMPLES_IN_DOC = 72 diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index 15c7b3a727..a4d2f69126 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -1213,6 +1213,138 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv4", + "severity": "info", + "line_data_list": [ + { + "line": "100.64.0.0\u2013100.127.255.255", + "line_num": 14, + "path": "tests/samples/ipv4", + "info": "tests/samples/ipv4|RAW", + "value": "100.64.0.0", + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 1.5253496664211537, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv4", + "severity": "info", + "line_data_list": [ + { + "line": "192.88.99.0\u2013192.88.99.255", + "line_num": 26, + "path": "tests/samples/ipv4", + "info": "tests/samples/ipv4|RAW", + "value": "192.88.99.0", + "variable": null, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 1.9018695860849921, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv6", + "severity": "info", + "line_data_list": [ + { + "line": "2004:5678::9324", + "line_num": 12, + "path": "tests/samples/ipv6", + "info": "tests/samples/ipv6|RAW", + "value": "2004:5678::9324", + "variable": null, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 2.725512476486815, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv6", + "severity": "info", + "line_data_list": [ + { + "line": "2004::5678:9", + "line_num": 13, + "path": "tests/samples/ipv6", + "info": "tests/samples/ipv6|RAW", + "value": "2004::5678:9", + "variable": null, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 2.5220552088742005, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv6", + "severity": "info", + "line_data_list": [ + { + "line": "2041:0000:140F::875B:131B", + "line_num": 14, + "path": "tests/samples/ipv6", + "info": "tests/samples/ipv6|RAW", + "value": "2041:0000:140F::875B:131B", + "variable": null, + "entropy_validation": { + "iterator": "HEX_CHARS", + "entropy": 2.6146939516467023, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv6", + "severity": "info", + "line_data_list": [ + { + "line": "2041:0:140F::875B:131B", + "line_num": 15, + "path": "tests/samples/ipv6", + "info": "tests/samples/ipv6|RAW", + "value": "2041:0:140F::875B:131B", + "variable": null, + "entropy_validation": { + "iterator": "HEX_CHARS", + "entropy": 2.684338637030481, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/data/ml_threshold_0.json b/tests/data/ml_threshold_0.json index 3aadf674c9..903d013694 100644 --- a/tests/data/ml_threshold_0.json +++ b/tests/data/ml_threshold_0.json @@ -1213,6 +1213,138 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv4", + "severity": "info", + "line_data_list": [ + { + "line": "100.64.0.0\u2013100.127.255.255", + "line_num": 14, + "path": "tests/samples/ipv4", + "info": "", + "value": "100.64.0.0", + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 1.5253496664211537, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv4", + "severity": "info", + "line_data_list": [ + { + "line": "192.88.99.0\u2013192.88.99.255", + "line_num": 26, + "path": "tests/samples/ipv4", + "info": "", + "value": "192.88.99.0", + "variable": null, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 1.9018695860849921, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv6", + "severity": "info", + "line_data_list": [ + { + "line": "2004:5678::9324", + "line_num": 12, + "path": "tests/samples/ipv6", + "info": "", + "value": "2004:5678::9324", + "variable": null, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 2.725512476486815, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv6", + "severity": "info", + "line_data_list": [ + { + "line": "2004::5678:9", + "line_num": 13, + "path": "tests/samples/ipv6", + "info": "", + "value": "2004::5678:9", + "variable": null, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 2.5220552088742005, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv6", + "severity": "info", + "line_data_list": [ + { + "line": "2041:0000:140F::875B:131B", + "line_num": 14, + "path": "tests/samples/ipv6", + "info": "", + "value": "2041:0000:140F::875B:131B", + "variable": null, + "entropy_validation": { + "iterator": "HEX_CHARS", + "entropy": 2.6146939516467023, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv6", + "severity": "info", + "line_data_list": [ + { + "line": "2041:0:140F::875B:131B", + "line_num": 15, + "path": "tests/samples/ipv6", + "info": "", + "value": "2041:0:140F::875B:131B", + "variable": null, + "entropy_validation": { + "iterator": "HEX_CHARS", + "entropy": 2.684338637030481, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", diff --git a/tests/data/output.json b/tests/data/output.json index 7a986acb56..185ebb5e0b 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -1147,6 +1147,138 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv4", + "severity": "info", + "line_data_list": [ + { + "line": "100.64.0.0\u2013100.127.255.255", + "line_num": 14, + "path": "tests/samples/ipv4", + "info": "", + "value": "100.64.0.0", + "variable": null, + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 1.5253496664211537, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv4", + "severity": "info", + "line_data_list": [ + { + "line": "192.88.99.0\u2013192.88.99.255", + "line_num": 26, + "path": "tests/samples/ipv4", + "info": "", + "value": "192.88.99.0", + "variable": null, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 1.9018695860849921, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv6", + "severity": "info", + "line_data_list": [ + { + "line": "2004:5678::9324", + "line_num": 12, + "path": "tests/samples/ipv6", + "info": "", + "value": "2004:5678::9324", + "variable": null, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 2.725512476486815, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv6", + "severity": "info", + "line_data_list": [ + { + "line": "2004::5678:9", + "line_num": 13, + "path": "tests/samples/ipv6", + "info": "", + "value": "2004::5678:9", + "variable": null, + "entropy_validation": { + "iterator": "BASE36_CHARS", + "entropy": 2.5220552088742005, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv6", + "severity": "info", + "line_data_list": [ + { + "line": "2041:0000:140F::875B:131B", + "line_num": 14, + "path": "tests/samples/ipv6", + "info": "", + "value": "2041:0000:140F::875B:131B", + "variable": null, + "entropy_validation": { + "iterator": "HEX_CHARS", + "entropy": 2.6146939516467023, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "NOT_AVAILABLE", + "ml_probability": null, + "rule": "IPv6", + "severity": "info", + "line_data_list": [ + { + "line": "2041:0:140F::875B:131B", + "line_num": 15, + "path": "tests/samples/ipv6", + "info": "", + "value": "2041:0:140F::875B:131B", + "variable": null, + "entropy_validation": { + "iterator": "HEX_CHARS", + "entropy": 2.684338637030481, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", diff --git a/tests/samples/ipv4 b/tests/samples/ipv4 new file mode 100644 index 0000000000..4067eab809 --- /dev/null +++ b/tests/samples/ipv4 @@ -0,0 +1,42 @@ +# wrong values: +299.199.99.0 +321.500.312.32 +99.199.299.0 +version 8.8.8.8 +rfc 1.2.3.4 + +# list from https://en.wikipedia.org/wiki/Reserved_IP_addresses +0.0.0.0/8 +0.0.0.0–0.255.255.255 +10.0.0.0/8 +10.0.0.0–10.255.255.255 +100.64.0.0/10 +100.64.0.0–100.127.255.255 +127.0.0.0/8 +127.0.0.0–127.255.255.255 +169.254.0.0/16 +169.254.0.0–169.254.255.255 +172.16.0.0/12 +172.16.0.0–172.31.255.255 +192.0.0.0/24 +192.0.0.0–192.0.0.255 +192.0.2.0/24 +192.0.2.0–192.0.2.255 +192.88.99.0/24 +192.88.99.0–192.88.99.255 +192.168.0.0/16 +192.168.0.0–192.168.255.255 +198.18.0.0/15 +198.18.0.0–198.19.255.255 +198.51.100.0/24 +198.51.100.0–198.51.100.255 +203.0.113.0/24 +203.0.113.0–203.0.113.255 +224.0.0.0/4 +224.0.0.0–239.255.255.255 +233.252.0.0/24 +233.252.0.0-233.252.0.255 +240.0.0.0/4 +240.0.0.0–255.255.255.254 +255.255.255.255/32 +255.255.255.255 \ No newline at end of file diff --git a/tests/samples/ipv6 b/tests/samples/ipv6 new file mode 100644 index 0000000000..57df85ab8b --- /dev/null +++ b/tests/samples/ipv6 @@ -0,0 +1,16 @@ +# not an ipv6 +abba:03911 +1234::5678::9 + +# loopback +0000:0000:0000:0000:0000:0000:0000:0001 +::1 # fill +2001:db8:85a3:8d3:1319:8a2e:370:7348 private +fe80::1ff:fe23:4567:890a # link_local + +# dummy but valid +2004:5678::9324 +2004::5678:9 +2041:0000:140F::875B:131B +2041:0:140F::875B:131B + diff --git a/tests/samples/test.html b/tests/samples/test.html index 832df90d14..f087acc701 100644 --- a/tests/samples/test.html +++ b/tests/samples/test.html @@ -35,7 +35,7 @@ <tr> <td class="confluenceTd">2 </td> - <td class="confluenceTd">10.0.0.1 + <td class="confluenceTd">127.0.0.1 </td> <td class="confluenceTd">user </td>