Skip to content

Commit

Permalink
Optimizations (#415)
Browse files Browse the repository at this point in the history
* [skip actions] [auxiliary] 2023-08-31T11:37:29+03:00

* optimization
  • Loading branch information
babenek authored Sep 7, 2023
1 parent fd24912 commit 9a1d3ff
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 15 deletions.
1 change: 1 addition & 0 deletions credsweeper/deep_scanner/deep_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ def structure_scan(
info=f"{struct_provider.info}|STRUCT:{key}")
new_candidates = self.structure_scan(val_struct_provider, depth, recursive_limit_size)
candidates.extend(new_candidates)

elif isinstance(value, bytes):
bytes_struct_provider = DataContentProvider(data=value,
file_path=struct_provider.file_path,
Expand Down
5 changes: 5 additions & 0 deletions credsweeper/file_handler/analysis_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ def line_strip_len(self) -> int:
"""cached value"""
return len(self.line_strip)

@cached_property
def line_strip_lower(self) -> str:
"""cached value"""
return self.line_strip.lower()

@cached_property
def lines(self) -> List[str]:
"""cached value"""
Expand Down
5 changes: 3 additions & 2 deletions credsweeper/filters/value_ip_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
with contextlib.suppress(Exception):
ip = ipaddress.ip_address(line_data.value)
if 4 == ip.version:
line_lower = target.line.lower()
# use line_strip_lower due the property should be cached already
line_strip_lower = target.line_strip_lower
for i in ValueIPCheck.FALSE_POSITIVE_MARKERS:
if i in line_lower:
if i in line_strip_lower:
return True
if ip.is_loopback or ip.is_private or ip.is_reserved or ip.is_link_local or ip.is_multicast:
return True
Expand Down
6 changes: 3 additions & 3 deletions credsweeper/rules/rule.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import re
from functools import cached_property
from typing import Dict, List, Optional, Union
from typing import Dict, List, Optional, Union, Set

from credsweeper import validations, filters
from credsweeper.common.constants import RuleType, Severity, MAX_LINE_LENGTH
Expand Down Expand Up @@ -68,7 +68,7 @@ def __init__(self, config: Config, rule_dict: Dict) -> None:
self.__filters = self._init_filters(rule_dict.get(Rule.FILTER_TYPE))
self.__use_ml = bool(rule_dict.get(Rule.USE_ML))
self.__validations = self._init_validations(rule_dict.get(Rule.VALIDATIONS))
self.__required_substrings = [i.strip().lower() for i in rule_dict.get(Rule.REQUIRED_SUBSTRINGS, [])]
self.__required_substrings = set(i.strip().lower() for i in rule_dict.get(Rule.REQUIRED_SUBSTRINGS, []))
self.__has_required_substrings = bool(self.__required_substrings)
required_regex = rule_dict.get(Rule.REQUIRED_REGEX)
if required_regex and not isinstance(required_regex, str):
Expand Down Expand Up @@ -220,7 +220,7 @@ def _assert_rule_mandatory_fields(rule_template: Dict) -> None:
raise ValueError(f"Malformed rule config file. Contain rule with missing fields: {missing_fields}.")

@cached_property
def required_substrings(self) -> List[str]:
def required_substrings(self) -> Set[str]:
"""required_substrings getter"""
return self.__required_substrings

Expand Down
2 changes: 1 addition & 1 deletion credsweeper/scanner/scan_type/pem_key_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def detect_pem_key(cls, config: Config, rule: Rule, target: AnalysisTarget) -> L
logger.debug("Filtered with entropy %f '%s'", entropy_validator.entropy, key_data)
return []
# OPENSSH format has multiple AAAAA pattern
if "OPENSSH" not in target.line and cls.pem_pattern_check.equal_pattern_check(key_data):
if "OPENSSH" not in target.line_strip and cls.pem_pattern_check.equal_pattern_check(key_data):
logger.debug("Filtered with ValuePemPatternCheck %s", target)
return []
# all OK - return line data with all lines which include PEM
Expand Down
2 changes: 1 addition & 1 deletion credsweeper/scanner/scan_type/scan_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def _get_candidate(cls, config: Config, rule: Rule, target: AnalysisTarget) -> O
remove current line. None otherwise
"""
if config.exclude_lines and target.line.strip() in config.exclude_lines:
if config.exclude_lines and target.line_strip in config.exclude_lines:
return None

line_data = cls.get_line_data(config=config, target=target, pattern=rule.patterns[0], filters=rule.filters)
Expand Down
22 changes: 14 additions & 8 deletions credsweeper/scanner/scanner.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import re
from pathlib import Path
from typing import List, Type, Tuple, Union, Dict, Generator
from typing import List, Type, Tuple, Union, Dict, Generator, Set

from credsweeper.app import APP_PATH
from credsweeper.common.constants import RuleType, MIN_VARIABLE_LENGTH, MIN_SEPARATOR_LENGTH, MIN_VALUE_LENGTH, \
Expand Down Expand Up @@ -43,6 +43,14 @@ def __init__(self, config: Config, rule_path: Union[None, str, Path]) -> None:
self.min_len = min(self.min_pattern_len, self.min_keyword_len, self.min_pem_key_len, self.min_multi_len,
MIN_VARIABLE_LENGTH + MIN_SEPARATOR_LENGTH + MIN_VALUE_LENGTH)

@staticmethod
def _substring_check(substrings: Set[str], text: str) -> bool:
"""checks whether `text` has any required substring. Set is used to reduce extra transformations"""
for substring in substrings:
if substring in text:
return True
return False

def _set_rules_scanners(self, rule_path: Union[None, str, Path]) -> None:
"""Auxiliary method to fill rules, determine min_pattern_len and set scanners"""
if rule_path is None:
Expand Down Expand Up @@ -125,21 +133,19 @@ def scan(self, provider: ContentProvider) -> List[Candidate]:
matched_multi = target_line_stripped_len >= self.min_multi_len

if not (matched_keyword or matched_pem_key or matched_pattern or matched_multi):
# target may be skipped only with length because not all rules have required_substrings
continue

# use lower case for required substring
target_line_stripped_lower = target_line_stripped.lower()
target_line_stripped_lower = target.line_strip_lower
# cached value to skip the same regex verifying
matched_regex: Dict[re.Pattern, bool] = {}

for rule, scanner in self.yield_rule_scanner(target_line_stripped_len, matched_pattern, matched_keyword,
matched_pem_key, matched_multi):
for substring in rule.required_substrings:
if substring in target_line_stripped_lower:
break
else:
if rule.has_required_substrings:
continue
if rule.has_required_substrings \
and not self._substring_check(rule.required_substrings, target_line_stripped_lower):
continue

# common regex might be triggered for the same target
if rule.required_regex:
Expand Down

0 comments on commit 9a1d3ff

Please sign in to comment.