Skip to content

Commit

Permalink
Phone number detection (#409)
Browse files Browse the repository at this point in the history
* [skip actions] [phone] 2023-08-16T10:14:11+03:00

* [skip actions] [phone] 2023-08-16T10:57:52+03:00

* [skip actions] [phone] 2023-08-16T12:22:56+03:00

* [skip actions] [phone] 2023-08-16T12:41:28+03:00

* Check phone number with length

* custom_benchmark

* fix test samples

* fix test

* fix benchmark

* Update .github/workflows/benchmark.yml
  • Loading branch information
babenek authored Aug 23, 2023
1 parent 2bbb1f4 commit 21bef6d
Show file tree
Hide file tree
Showing 9 changed files with 553 additions and 11 deletions.
14 changes: 7 additions & 7 deletions cicd/benchmark.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
DATA: 19434458 valid lines. MARKUP: 73696 items
DATA: 19434458 valid lines. MARKUP: 73890 items
Category Positives Negatives Template
-------------------------- ----------- ----------- ----------
Authentication Key & Token 67 1 31
Generic Secret 1055 15 203
Generic Token 333 45 558
Other 515 63135 635
Other 708 63136 635
Password 1403 110 4170
Predefined Pattern 326 2 40
Private Key 1001 1 3
Seed, Salt, Nonce 39 4 4
TOTAL: 4739 63313 5644
Detected Credentials: 4744
credsweeper result_cnt : 4227, lost_cnt : 0, true_cnt : 3857, false_cnt : 370
TOTAL: 4932 63314 5644
Detected Credentials: 4938
credsweeper result_cnt : 4421, lost_cnt : 0, true_cnt : 4050, false_cnt : 371
Category TP FP TN FN FPR FNR ACC PRC RCL F1
-------------------------- ---- ---- -------- ---- ---------- --------- -------- -------- -------- --------
Authentication Key & Token 51 4 28 16 0.125 0.238806 0.79798 0.927273 0.761194 0.836066
Generic Secret 971 2 216 84 0.00917431 0.0796209 0.932443 0.997945 0.920379 0.957594
Generic Token 287 7 596 46 0.0116086 0.138138 0.943376 0.97619 0.861862 0.91547
Other 253 237 63533 262 0.00371648 0.508738 0.992238 0.516327 0.491262 0.503483
Other 446 238 63533 262 0.0037321 0.370057 0.992246 0.652047 0.629943 0.640805
Password 984 116 4164 419 0.0271028 0.298646 0.90586 0.894545 0.701354 0.786256
Predefined Pattern 309 2 40 17 0.0476191 0.0521472 0.94837 0.993569 0.947853 0.970173
Private Key 967 0 4 34 0.033966 0.966169 1 0.966034 0.982724
Seed, Salt, Nonce 35 2 6 4 0.25 0.102564 0.87234 0.945946 0.897436 0.921053
3857 370 19429349 882 1.904e-05 0.186115 0.999936 0.912467 0.813885 0.860361
4050 371 19429155 882 1.909e-05 0.178832 0.999936 0.916082 0.821168 0.866032
1 change: 1 addition & 0 deletions credsweeper/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from credsweeper.filters.value_pattern_check import ValuePatternCheck
from credsweeper.filters.value_pattern_length_check import ValuePatternLengthCheck
from credsweeper.filters.value_pem_pattern_check import ValuePemPatternCheck
from credsweeper.filters.value_phone_check import ValuePhoneCheck
from credsweeper.filters.value_similarity_check import ValueSimilarityCheck
from credsweeper.filters.value_split_keyword_check import ValueSplitKeywordCheck
from credsweeper.filters.value_string_type_check import ValueStringTypeCheck
Expand Down
258 changes: 258 additions & 0 deletions credsweeper/filters/value_phone_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter


class ValuePhoneCheck(Filter):
"""Check that value may be a phone number"""

PREFIX_LENGTH = {
"1": 10,
"20": 10,
"211": 7,
"212": 9,
"213": 9,
"216": 8,
"218": 10,
"220": 7,
"221": 9,
"222": 8,
"223": 8,
"224": 9,
"225": 8,
"226": 8,
"227": 8,
"228": 8,
"229": 8,
"230": 8,
"231": (8, 9),
"232": 8,
"233": 9,
"234": 8,
"235": 6,
"236": 8,
"237": 9,
"238": 7,
"239": 7,
"240": 9,
"241": 7,
"242": 9,
"243": 7,
"244": 9,
"245": 9,
"246": 7,
"248": 7,
"249": 7,
"250": 9,
"251": 9,
"252": (8, 9),
"253": 10,
"254": 10,
"255": 7,
"256": 7,
"257": 8,
"258": 12,
"260": 9,
"261": 7,
"262": (9, 10),
"263": 9,
"264": 7,
"265": [7, 9],
"266": 8,
"267": 7,
"268": 8,
"269": 7,
"27": 9,
"290": 4,
"291": 7,
"297": 7,
"298": 5,
"299": 6,
"30": 10,
"31": 9,
"32": 9,
"33": 9,
"34": 9,
"350": 8,
"351": 9,
"352": 9,
"353": 9,
"354": 7,
"355": 9,
"356": 8,
"357": 8,
"358": (8, 11),
"359": 9,
"36": 9,
"370": 8,
"371": 8,
"372": 8,
"373": 8,
"374": 6,
"375": 9,
"376": 6,
"377": 8,
"378": 10,
"379": 10,
"380": 9,
"381": 9,
"382": 8,
"383": 8,
"385": 9,
"386": 9,
"387": 8,
"389": 8,
"39": 10,
"40": 10,
"41": 9,
"420": 9,
"421": 9,
"423": 7,
"43": (10, 11),
"44": 10,
"45": 8,
"46": 7,
"47": (8, 10),
"48": 9,
"49": 10,
"500": 5,
"501": 7,
"502": 8,
"503": 8,
"504": 8,
"505": 8,
"506": 8,
"507": 8,
"508": 6,
"509": 8,
"51": 9,
"52": 10,
"53": 8,
"54": 10,
"55": 11,
"56": 9,
"57": 10,
"58": 7,
"590": [9, 12],
"591": 9,
"592": 7,
"593": 9,
"594": 9,
"595": 9,
"596": 9,
"597": (6, 7),
"598": 8,
"599": 7,
"60": 7,
"61": (9, 10),
"62": 11,
"63": 10,
"64": (8, 9),
"65": 8,
"66": 9,
"670": 7,
"672": [6, 9],
"673": 7,
"674": 7,
"675": 8,
"676": 5,
"677": 7,
"678": 5,
"679": 7,
"680": 7,
"681": 6,
"682": 5,
"683": 4,
"685": 5,
"686": 8,
"687": 6,
"688": 5,
"689": 8,
"690": 5,
"691": 7,
"692": 7,
"7": 10,
"81": 10,
"82": (9, 10),
"84": 9,
"850": (3, 10),
"852": 8,
"853": 8,
"855": 9,
"856": (8, 9),
"86": 11,
"870": 9,
"880": 10,
"886": 9,
"90": 11,
"91": 10,
"92": 10,
"93": 9,
"94": 7,
"95": (7, 10),
"960": 7,
"961": (7, 8),
"962": (8, 9),
"963": 7,
"964": 10,
"965": 8,
"966": 9,
"967": 9,
"968": 8,
"970": 9,
"971": 9,
"972": 9,
"973": 8,
"974": 8,
"975": 7,
"976": 8,
"977": 10,
"98": 11,
"992": 9,
"993": 8,
"994": 9,
"995": 9,
"996": 9,
"998": 9,
}

def __init__(self, config: Config = None) -> None:
self.prefix_limit = 1 + max(len(x) for x in self.PREFIX_LENGTH.keys())
pass

def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""Run filter checks on received credential candidate data 'line_data'.
Args:
line_data: credential candidate data
target: multiline target from which line data was obtained
Return:
False, if the sequence is not card number. True if it is
"""
if line_data.value is None:
return True

# https://en.wikipedia.org/wiki/List_of_country_calling_codes
# https://en.wikipedia.org/wiki/List_of_mobile_telephone_prefixes_by_country

# until rule regex requires '+' at start
value = line_data.value[1:]
value_len = len(value)

for prefix_size in range(1, self.prefix_limit):
key = value[:prefix_size]
if key in self.PREFIX_LENGTH:
phone_length = value_len - prefix_size
length = self.PREFIX_LENGTH[key]
if isinstance(length, int) and phone_length == length:
break
elif isinstance(length, list) and phone_length in length:
break
elif isinstance(length, tuple) and length[0] <= phone_length <= length[1]:
break
else:
return True
return False
12 changes: 12 additions & 0 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
- name: Phone
severity: info
type: pattern
values:
- (^|[^0-9A-Za-z])(?P<value>\+[1-9][0-9]{6,14})([^=0-9A-Za-z]|$)
filter_type:
- ValuePhoneCheck
min_line_len: 10
required_substrings:
- "+"
doc_available: false

- name: VIN
severity: info
type: pattern
Expand Down
8 changes: 4 additions & 4 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from pathlib import Path

# total number of files in test samples
SAMPLES_FILES_COUNT: int = 108
SAMPLES_FILES_COUNT: int = 109

# credentials count after scan
SAMPLES_CRED_COUNT: int = 108
SAMPLES_CRED_LINE_COUNT: int = 119
SAMPLES_CRED_COUNT: int = 112
SAMPLES_CRED_LINE_COUNT: int = 123

# credentials count after post-processing
SAMPLES_POST_CRED_COUNT: int = 102
SAMPLES_POST_CRED_COUNT: int = 106

# with option --doc
SAMPLES_IN_DOC = 72
Expand Down
Loading

0 comments on commit 21bef6d

Please sign in to comment.