Skip to content

Commit

Permalink
keyword rules for --doc only (#420)
Browse files Browse the repository at this point in the history
* refactoring to cover special charachters in hex encoded data

* with new markup

* fix

* benchmark scores

* Update .github/workflows/benchmark.yml

Co-authored-by: ShinHyung Choi <[email protected]>

* rules for --doc only

* requirements.txt updated

* update sample

* SINGLE_STR_PAIR: apply value for strong password

* PASS keyword updated

* version up

* Remove SINGLE_STR_PAIR rule and modify other rules
- PIN keyword removed
- The verb strings of delimiter are removed

* [skip actions] [dockeyword] 2023-10-19T10:33:37+03:00

* Delete tests/samples/doc_single_str_pair

* fix samples count

* Apply suggestions from code review

* Apply suggestions from code review

---------

Co-authored-by: ShinHyung Choi <[email protected]>
  • Loading branch information
babenek and csh519 authored Oct 20, 2023
1 parent f31f2bb commit 19bbb24
Show file tree
Hide file tree
Showing 18 changed files with 25,044 additions and 476 deletions.
1 change: 1 addition & 0 deletions credsweeper/common/keyword_checklist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -904,6 +904,7 @@ unstable
until
update
upload
used
username
using
usually
Expand Down
92 changes: 92 additions & 0 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,95 @@
- name: SECRET_PAIR
severity: medium
type: pattern
values:
- (?P<variable>[`'\"]?(?i:token|secret|key|키|암호|암호화|토큰)[`'\"]?)((\s)*[=:](\s)*)(?P<quote>[`'\"(])?(?P<value>\S{4,})(?(quote)[)`'\"])
filter_type:
- ValueAllowlistCheck
min_line_len: 10
required_substrings:
- token
- secret
- key
- ":"
- "/"
- "="
-
- 암호
- 암호화
- 토큰
doc_only: true

- name: PASSWD_PAIR
severity: medium
type: pattern
values:
- (?P<variable>[`'\"]?(?i:(?<!id[ :/])pa[as]swo?r?ds?|pwd?|p/w|비밀번호|비번|패스워드|암호)[`'\"]?)((\s)*[=:](\s)*)(?P<quote>[`'\"(])?(?P<value>\S{4,})(?(quote)[)`'\"])
filter_type:
- ValueAllowlistCheck
min_line_len: 10
required_substrings:
- pass
- sword
- ":"
- "/"
- "="
- 비밀번호
- 비번
- 패스워드
- 암호
doc_only: true

- name: IP_ID_PASSWORD_TRIPLE
severity: medium
type: pattern
values:
- (^|(?P<variable>(?i:\bip[\s/]+id[\s/]+pw[\s/:]*))|(?P<url>://)|\s)(?P<ip>[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2}\.[0-2]?[0-9]{1,2})((?P<lpar>\s*\()?\s*|(?(variable)[\s,/]+|\s*(?(url)[,]|[,/])\s*))[\w.-]{3,}[\s,/]+(?P<value>(?(lpar)[^)\s/]{4,}|(?(url)[^\s/]{4,}|[^\s]{4,})))
filter_type:
- ValueAllowlistCheck
min_line_len: 10
required_substrings:
- "."
doc_only: true

- name: ID_PAIR_PASSWD_PAIR
severity: medium
type: pattern
values:
- (?P<ddash>--)?(?P<variable>\w*(?i:pa[as]swords?|passwd?|pwd|\bp/w|\bpw|비밀번호|비번|패스워드|암호))\s*?(?(ddash)[ =]|[:=/>-]{1,2})\s*?(?P<quote>[`'\"]+)?(?P<value>\S{3,}?)(?(quote)(?P=quote)|\b)
- (?P<ddash>--)?(?P<variable>(?i:user\s*)?(?i:id|login|account|root|admin|user|name|wifi|role|host|default|계정|아이디))\s*?(?(ddash)[ =]|[ :=])\s*?(?P<value>\S+)
filter_type:
- ValueAllowlistCheck
min_line_len: 10
required_substrings:
- pass
- sword
- p/w
- pw
- 비밀번호
- 비번
- 패스워드
- 암호
doc_only: true

- name: ID_PASSWD_PAIR
severity: medium
type: pattern
values:
- (?P<variable>[\w.-]*(?i:(?P<id>\bid\b)|id\b|user|name|계정|아이디)[\w.-]*(?(id)[ :(/]+|[:(/]+)(?i:pa[as]swo?r?ds?|pwd?|비밀번호|비번|패스워드|암호))\)?(\s*->\s*|[ =:)(/]+|\s+is\s+|\s+are\s+|\s*는\s*|\s*은\s*|\s*설정은\s*)\(?(?P<id_value>[\w.-]{2,31})[ :\(/\"',]+(?P<value>[^\s}\])\"']{4,31})
filter_type:
- ValueAllowlistCheck
- ValueDictionaryKeywordCheck
min_line_len: 10
required_substrings:
- pw
- pass
- sword
- 비밀번호
- 비번
- 패스워드
- 암호
doc_only: true

- name: PII
severity: info
type: keyword
Expand Down
7 changes: 7 additions & 0 deletions credsweeper/rules/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class Rule:
REQUIRED_REGEX = "required_regex"
VALIDATIONS = "validations"
DOC_AVAILABLE = "doc_available" # True - by default
DOC_ONLY = "doc_only" # False - by default

def __init__(self, config: Config, rule_dict: Dict) -> None:
self.config = config
Expand Down Expand Up @@ -75,6 +76,7 @@ def __init__(self, config: Config, rule_dict: Dict) -> None:
self.__required_regex = re.compile(required_regex) if required_regex else None
self.__min_line_len = int(rule_dict.get(Rule.MIN_LINE_LEN, MAX_LINE_LENGTH))
self.__doc_available: bool = rule_dict.get(Rule.DOC_AVAILABLE, True)
self.__doc_only: bool = rule_dict.get(Rule.DOC_ONLY, False)

def _malformed_rule_error(self, rule_dict: Dict, field: str):
raise ValueError(f"Malformed rule '{self.__rule_name}'."
Expand Down Expand Up @@ -241,3 +243,8 @@ def min_line_len(self) -> int:
def doc_available(self) -> bool:
"""doc_available getter"""
return self.__doc_available

@cached_property
def doc_only(self) -> bool:
"""doc_only getter"""
return self.__doc_only
7 changes: 5 additions & 2 deletions credsweeper/scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,13 @@ def _is_available(self, rule: Rule) -> bool:
return False
if self.config.doc:
# apply only available for doc scanning rules
if rule.doc_available:
if rule.doc_available or rule.doc_only:
return True
else:
return True
if rule.doc_only:
return False
else:
return True
return False

def yield_rule_scanner(
Expand Down
10 changes: 5 additions & 5 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from pathlib import Path

# total number of files in test samples
SAMPLES_FILES_COUNT: int = 114
SAMPLES_FILES_COUNT: int = 120

# credentials count after scan
SAMPLES_CRED_COUNT: int = 129
SAMPLES_CRED_LINE_COUNT: int = 148
SAMPLES_CRED_COUNT: int = 383
SAMPLES_CRED_LINE_COUNT: int = 402

# credentials count after post-processing
SAMPLES_POST_CRED_COUNT: int = 122
SAMPLES_POST_CRED_COUNT: int = 293

# with option --doc
SAMPLES_IN_DOC = 80
SAMPLES_IN_DOC = 426

# archived credentials that are not found without --depth
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 16
Expand Down
Loading

0 comments on commit 19bbb24

Please sign in to comment.