Samsung · babenek · Aug 28, 2023 · Aug 23, 2023 · Aug 23, 2023 · Aug 23, 2023
@@ -1,25 +1,25 @@
-DATA: 19434458 valid lines. MARKUP: 73890 items
+DATA: 19434458 valid lines. MARKUP: 74507 items
 Category                      Positives    Negatives    Template
 --------------------------  -----------  -----------  ----------
 Authentication Key & Token           67            1          31
 Generic Secret                     1055           15         203
 Generic Token                       333           45         558
-Other                               708        63136         635
+Other                              1074        63387         635
 Password                           1403          110        4170
 Predefined Pattern                  326            2          40
 Private Key                        1001            1           3
 Seed, Salt, Nonce                    39            4           4
-TOTAL:                             4932        63314        5644
-Detected Credentials: 4938
-credsweeper result_cnt : 4421, lost_cnt : 0, true_cnt : 4050, false_cnt : 371
+TOTAL:                             5298        63565        5644
+Detected Credentials: 5637
+credsweeper result_cnt : 5118, lost_cnt : 0, true_cnt : 4416, false_cnt : 702
 Category                      TP    FP        TN    FN         FPR        FNR       ACC       PRC       RCL        F1
 --------------------------  ----  ----  --------  ----  ----------  ---------  --------  --------  --------  --------
 Authentication Key & Token    51     4        28    16  0.125       0.238806   0.79798   0.927273  0.761194  0.836066
 Generic Secret               971     2       216    84  0.00917431  0.0796209  0.932443  0.997945  0.920379  0.957594
 Generic Token                287     7       596    46  0.0116086   0.138138   0.943376  0.97619   0.861862  0.91547
-Other                        446   238     63533   262  0.0037321   0.370057   0.992246  0.652047  0.629943  0.640805
-Password                     984   116      4164   419  0.0271028   0.298646   0.90586   0.894545  0.701354  0.786256
+Other                        812   562     63460   262  0.00877823  0.243948   0.987342  0.590975  0.756052  0.663399
+Password                     984   123      4157   419  0.0287383   0.298646   0.904628  0.888889  0.701354  0.784064
 Predefined Pattern           309     2        40    17  0.0476191   0.0521472  0.94837   0.993569  0.947853  0.970173
 Private Key                  967     0         4    34              0.033966   0.966169  1         0.966034  0.982724
 Seed, Salt, Nonce             35     2         6     4  0.25        0.102564   0.87234   0.945946  0.897436  0.921053
-                            4050   371  19429155   882  1.909e-05   0.178832   0.999936  0.916082  0.821168  0.866032
+                            4416   702  19428458   882  3.613e-05   0.166478   0.999919  0.862837  0.833522  0.847926
@@ -19,6 +19,7 @@
 from credsweeper.filters.value_first_word_check import ValueFirstWordCheck
 from credsweeper.filters.value_grafana_check import ValueGrafanaCheck
 from credsweeper.filters.value_iban_check import ValueIbanCheck
+from credsweeper.filters.value_ip_check import ValueIPCheck
 from credsweeper.filters.value_json_web_token_check import ValueJsonWebTokenCheck
 from credsweeper.filters.value_last_word_check import ValueLastWordCheck
 from credsweeper.filters.value_length_check import ValueLengthCheck

@@ -0,0 +1,43 @@
+import contextlib
+import ipaddress
+
+from credsweeper.config import Config
+from credsweeper.credentials import LineData
+from credsweeper.file_handler.analysis_target import AnalysisTarget
+from credsweeper.filters import Filter
+
+
+class ValueIPCheck(Filter):
+    """Filter out some of insensible IP"""
+
+    FALSE_POSITIVE_MARKERS = ["version", "oid", "section", "rfc"]
+
+    def __init__(self, config: Config = None) -> None:
+        pass
+
+    def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
+        """Run filter checks on received credential candidate data 'line_data'.
+
+        Args:
+            line_data: credential candidate data
+            target: multiline target from which line data was obtained
+
+        Return:
+            True, if need to filter candidate and False if left
+
+        """
+        if not line_data.value:
+            return True
+
+        with contextlib.suppress(Exception):
+            ip = ipaddress.ip_address(line_data.value)
+            if 4 == ip.version:
+                line_lower = target.line.lower()
+                for i in ValueIPCheck.FALSE_POSITIVE_MARKERS:
+                    if i in line_lower:
+                        return True
+            if ip.is_loopback or ip.is_private or ip.is_reserved or ip.is_link_local or ip.is_multicast:
+                return True
+            return False
+
+        return True
@@ -56,6 +56,30 @@
     - api
   doc_available: false
 
+- name: IPv4
+  severity: info
+  type: pattern
+  values:
+    - (^|[^.0-9a-zA-Z])(?P<value>[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2})(?!/([123]?[0-9])([^0-9]|$))([^.0-9a-zA-Z$]|$)
+  filter_type:
+    - ValueIPCheck
+  min_line_len: 10
+  required_substrings:
+    - "."
+  doc_available: false
+
+- name: IPv6
+  severity: info
+  type: pattern
+  values:
+    - (^|[^:0-9a-zA-Z])(?P<value>[0-9A-Fa-f]{0,4}:(:?[0-9A-Fa-f]{1,4}:?){0,6}:[0-9A-Fa-f]{1,4})([^:0-9a-zA-Z]|$)
+  filter_type:
+    - ValueIPCheck
+  min_line_len: 10
+  required_substrings:
+    - ":"
+  doc_available: false
+
 - name: AWS Client ID
   severity: high
   type: pattern

@@ -1,14 +1,14 @@
 from pathlib import Path
 
 # total number of files in test samples
-SAMPLES_FILES_COUNT: int = 109
+SAMPLES_FILES_COUNT: int = 111
 
 # credentials count after scan
-SAMPLES_CRED_COUNT: int = 112
-SAMPLES_CRED_LINE_COUNT: int = 123
+SAMPLES_CRED_COUNT: int = 118
+SAMPLES_CRED_LINE_COUNT: int = 129
 
 # credentials count after post-processing
-SAMPLES_POST_CRED_COUNT: int = 106
+SAMPLES_POST_CRED_COUNT: int = 112
 
 # with option --doc
 SAMPLES_IN_DOC = 72

@@ -1213,6 +1213,138 @@
             }
         ]
     },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "IPv4",
+        "severity": "info",
+        "line_data_list": [
+            {
+                "line": "100.64.0.0\u2013100.127.255.255",
+                "line_num": 14,
+                "path": "tests/samples/ipv4",
+                "info": "tests/samples/ipv4|RAW",
+                "value": "100.64.0.0",
+                "variable": null,
+                "entropy_validation": {
+                    "iterator": "BASE64_CHARS",
+                    "entropy": 1.5253496664211537,
+                    "valid": false
+                }
+            }
+        ]
+    },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "IPv4",
+        "severity": "info",
+        "line_data_list": [
+            {
+                "line": "192.88.99.0\u2013192.88.99.255",
+                "line_num": 26,
+                "path": "tests/samples/ipv4",
+                "info": "tests/samples/ipv4|RAW",
+                "value": "192.88.99.0",
+                "variable": null,
+                "entropy_validation": {
+                    "iterator": "BASE36_CHARS",
+                    "entropy": 1.9018695860849921,
+                    "valid": false
+                }
+            }
+        ]
+    },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "IPv6",
+        "severity": "info",
+        "line_data_list": [
+            {
+                "line": "2004:5678::9324",
+                "line_num": 12,
+                "path": "tests/samples/ipv6",
+                "info": "tests/samples/ipv6|RAW",
+                "value": "2004:5678::9324",
+                "variable": null,
+                "entropy_validation": {
+                    "iterator": "BASE36_CHARS",
+                    "entropy": 2.725512476486815,
+                    "valid": false
+                }
+            }
+        ]
+    },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "IPv6",
+        "severity": "info",
+        "line_data_list": [
+            {
+                "line": "2004::5678:9",
+                "line_num": 13,
+                "path": "tests/samples/ipv6",
+                "info": "tests/samples/ipv6|RAW",
+                "value": "2004::5678:9",
+                "variable": null,
+                "entropy_validation": {
+                    "iterator": "BASE36_CHARS",
+                    "entropy": 2.5220552088742005,
+                    "valid": false
+                }
+            }
+        ]
+    },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "IPv6",
+        "severity": "info",
+        "line_data_list": [
+            {
+                "line": "2041:0000:140F::875B:131B",
+                "line_num": 14,
+                "path": "tests/samples/ipv6",
+                "info": "tests/samples/ipv6|RAW",
+                "value": "2041:0000:140F::875B:131B",
+                "variable": null,
+                "entropy_validation": {
+                    "iterator": "HEX_CHARS",
+                    "entropy": 2.6146939516467023,
+                    "valid": false
+                }
+            }
+        ]
+    },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "IPv6",
+        "severity": "info",
+        "line_data_list": [
+            {
+                "line": "2041:0:140F::875B:131B",
+                "line_num": 15,
+                "path": "tests/samples/ipv6",
+                "info": "tests/samples/ipv6|RAW",
+                "value": "2041:0:140F::875B:131B",
+                "variable": null,
+                "entropy_validation": {
+                    "iterator": "HEX_CHARS",
+                    "entropy": 2.684338637030481,
+                    "valid": false
+                }
+            }
+        ]
+    },
     {
         "api_validation": "NOT_AVAILABLE",
         "ml_validation": "VALIDATED_KEY",