diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 630077e3b..10d3439b4 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -422,7 +422,7 @@ jobs: # crc32 should be changed python -m credsweeper --banner # run quick scan - python -m credsweeper --log debug --path ../tests/samples --save-json + python -m credsweeper --ml_providers AzureExecutionProvider,CPUExecutionProvider --log debug --path ../tests/samples --save-json NEW_MODEL_FOUND_SAMPLES=$(jq '.|length' output.json) if [ 10 -gt ${NEW_MODEL_FOUND_SAMPLES} ]; then echo "Failure: found ${NEW_MODEL_FOUND_SAMPLES} credentials" diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index caae36812..2f0fa0e46 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -27,6 +27,15 @@ jobs: fetch-depth: 0 ref: ${{ github.event.pull_request.head.sha }} + # # # ml_config & ml_model integrity + + - name: Check ml_model.onnx integrity + if: ${{ always() && steps.code_checkout.conclusion == 'success' }} + run: | + md5sum --binary credsweeper/ml_model/ml_config.json | grep 2b29c5e1aa199d14b788652bd542c7c0 + md5sum --binary credsweeper/ml_model/ml_model.onnx | grep 88f37978fc0599ac8d1bf732ad40c077 + + # # # line ending - name: Check for text file ending @@ -53,14 +62,6 @@ jobs: done exit ${n} - # # # ml_model integrity - - - name: Check ml_model.onnx integrity - if: ${{ always() && steps.code_checkout.conclusion == 'success' }} - run: | - md5sum --binary credsweeper/ml_model/ml_model.onnx | grep 88f37978fc0599ac8d1bf732ad40c077 - md5sum --binary credsweeper/ml_model/model_config.json | grep 2b29c5e1aa199d14b788652bd542c7c0 - # # # Python setup - name: Set up Python diff --git a/credsweeper/__main__.py b/credsweeper/__main__.py index 15b0a05c1..f025e5d51 100644 --- a/credsweeper/__main__.py +++ b/credsweeper/__main__.py @@ -117,7 +117,6 @@ def get_arguments() -> Namespace: dest="export_log_config", metavar="PATH") parser.add_argument("--rules", - nargs="?", help="path of rule config file (default: credsweeper/rules/config.yaml). " f"severity:{[i.value for i in Severity]} " f"type:{[i.value for i in RuleType]}", @@ -131,13 +130,11 @@ def get_arguments() -> Namespace: dest="severity", type=severity_levels) parser.add_argument("--config", - nargs="?", help="use custom config (default: built-in)", default=None, dest="config_path", metavar="PATH") parser.add_argument("--log_config", - nargs="?", help="use custom log config (default: built-in)", default=None, dest="log_config_path", @@ -178,15 +175,27 @@ def get_arguments() -> Namespace: default=16, required=False, metavar="POSITIVE_INT") - ml_provider_group = parser.add_mutually_exclusive_group() - ml_provider_group.add_argument("--azure", - help="enable AzureExecutionProvider for onnx", - dest="azure", - action="store_true") - ml_provider_group.add_argument("--cuda", - help="enable CUDAExecutionProvider for onnx", - dest="cuda", - action="store_true") + parser.add_argument("--ml_config", + help="use external config for ml model", + type=str, + default=None, + dest="ml_config", + required=False, + metavar="PATH") + parser.add_argument("--ml_model", + help="use external ml model", + type=str, + default=None, + dest="ml_model", + required=False, + metavar="PATH") + parser.add_argument("--ml_providers", + help="comma separated list of providers for onnx (CPUExecutionProvider is used by default)", + type=str, + default=None, + dest="ml_providers", + required=False, + metavar="STR") parser.add_argument("--api_validation", help="add credential api validation option to credsweeper pipeline. " "External API is used to reduce FP for some rule types.", @@ -297,8 +306,9 @@ def scan(args: Namespace, content_provider: AbstractProvider, json_filename: Opt pool_count=args.jobs, ml_batch_size=args.ml_batch_size, ml_threshold=args.ml_threshold, - azure=args.azure, - cuda=args.cuda, + ml_config=args.ml_config, + ml_model=args.ml_model, + ml_providers=args.ml_providers, find_by_ext=args.find_by_ext, depth=args.depth, doc=args.doc, diff --git a/credsweeper/app.py b/credsweeper/app.py index e254ebbc4..f60b28394 100644 --- a/credsweeper/app.py +++ b/credsweeper/app.py @@ -49,8 +49,9 @@ def __init__(self, pool_count: int = 1, ml_batch_size: Optional[int] = None, ml_threshold: Union[float, ThresholdPreset] = ThresholdPreset.medium, - azure: bool = False, - cuda: bool = False, + ml_config: Union[None, str, Path] = None, + ml_model: Union[None, str, Path] = None, + ml_providers: Optional[str] = None, find_by_ext: bool = False, depth: int = 0, doc: bool = False, @@ -78,6 +79,9 @@ def __init__(self, pool_count: int value, number of parallel processes to use ml_batch_size: int value, size of the batch for model inference ml_threshold: float or string value to specify threshold for the ml model + ml_config: str or Path to set custom config of ml model + ml_model: str or Path to set custom ml model + ml_providers: str - comma separated list with providers find_by_ext: boolean - files will be reported by extension depth: int - how deep container files will be scanned doc: boolean - document-specific scanning @@ -113,8 +117,9 @@ def __init__(self, self.sort_output = sort_output self.ml_batch_size = ml_batch_size if ml_batch_size and 0 < ml_batch_size else 16 self.ml_threshold = ml_threshold - self.azure = azure - self.cuda = cuda + self.ml_config = ml_config + self.ml_model = ml_model + self.ml_providers = ml_providers self.ml_validator = None self.__log_level = log_level @@ -187,7 +192,12 @@ def ml_validator(self) -> MlValidator: """ml_validator getter""" from credsweeper.ml_model import MlValidator if not self.__ml_validator: - self.__ml_validator: MlValidator = MlValidator(threshold=self.ml_threshold) + self.__ml_validator: MlValidator = MlValidator( + threshold=self.ml_threshold, # + ml_config=self.ml_config, # + ml_model=self.ml_model, # + ml_providers=self.ml_providers, # + ) assert self.__ml_validator, "self.__ml_validator was not initialized" return self.__ml_validator diff --git a/credsweeper/ml_model/model_config.json b/credsweeper/ml_model/ml_config.json similarity index 100% rename from credsweeper/ml_model/model_config.json rename to credsweeper/ml_model/ml_config.json diff --git a/credsweeper/ml_model/ml_validator.py b/credsweeper/ml_model/ml_validator.py index 743501f04..97011a6e5 100644 --- a/credsweeper/ml_model/ml_validator.py +++ b/credsweeper/ml_model/ml_validator.py @@ -1,7 +1,8 @@ +import hashlib import logging -import os import string -from typing import List, Tuple, Union +from pathlib import Path +from typing import List, Tuple, Union, Optional import numpy as np import onnxruntime as ort @@ -21,35 +22,56 @@ class MlValidator: CHAR_INDEX = {char: index for index, char in enumerate('\0' + string.printable + NON_ASCII)} NUM_CLASSES = len(CHAR_INDEX) - def __init__(self, threshold: Union[float, ThresholdPreset], azure: bool = False, cuda: bool = False) -> None: + def __init__( + self, # + threshold: Union[float, ThresholdPreset], # + ml_config: Union[None, str, Path] = None, # + ml_model: Union[None, str, Path] = None, # + ml_providers: Optional[str] = None) -> None: """Init Args: threshold: decision threshold + ml_config: path to ml config + ml_model: path to ml model + ml_providers: coma separated list of providers https://onnxruntime.ai/docs/execution-providers/ """ - dir_path = os.path.dirname(os.path.realpath(__file__)) - model_file_path = os.path.join(dir_path, "ml_model.onnx") - if azure: - provider = "AzureExecutionProvider" - elif cuda: - provider = "CUDAExecutionProvider" + dir_path = Path(__file__).parent + + if ml_config: + ml_config_path = Path(ml_config) + else: + ml_config_path = dir_path / "ml_config.json" + with open(ml_config_path, "rb") as f: + md5_config = hashlib.md5(f.read()).hexdigest() + + if ml_model: + ml_model_path = Path(ml_model) + else: + ml_model_path = dir_path / "ml_model.onnx" + with open(ml_model_path, "rb") as f: + md5_model = hashlib.md5(f.read()).hexdigest() + + if ml_providers: + providers = ml_providers.split(',') else: - provider = "CPUExecutionProvider" - self.model_session = ort.InferenceSession(model_file_path, providers=[provider]) + providers = ["CPUExecutionProvider"] + self.model_session = ort.InferenceSession(ml_model_path, providers=providers) - model_details = Util.json_load(os.path.join(dir_path, "model_config.json")) + model_config = Util.json_load(ml_config_path) if isinstance(threshold, float): self.threshold = threshold - elif isinstance(threshold, ThresholdPreset) and "thresholds" in model_details: - self.threshold = model_details["thresholds"][threshold.value] + elif isinstance(threshold, ThresholdPreset) and "thresholds" in model_config: + self.threshold = model_config["thresholds"][threshold.value] else: self.threshold = 0.5 self.common_feature_list = [] self.unique_feature_list = [] - logger.info("Init ML validator, model file path: %s", model_file_path) - logger.debug("ML validator details: %s", model_details) - for feature_definition in model_details["features"]: + logger.info("Init ML validator with %s provider; config:'%s' md5:%s model:'%s' md5:%s", providers, + ml_config_path, md5_config, ml_model_path, md5_model) + logger.debug("ML validator details: %s", model_config) + for feature_definition in model_config["features"]: feature_class = feature_definition["type"] kwargs = feature_definition.get("kwargs", {}) feature_constructor = getattr(features, feature_class, None) diff --git a/docs/source/guide.rst b/docs/source/guide.rst index b09434954..ebebbc67f 100644 --- a/docs/source/guide.rst +++ b/docs/source/guide.rst @@ -13,9 +13,13 @@ Get all argument list: .. code-block:: text - usage: python -m credsweeper [-h] (--path PATH [PATH ...] | --diff_path PATH [PATH ...] | --export_config [PATH] | --export_log_config [PATH]) [--rules [PATH]] [--severity SEVERITY] [--config [PATH]] - [--log_config [PATH]] [--denylist PATH] [--find-by-ext] [--depth POSITIVE_INT] [--no-filters] [--doc] [--ml_threshold FLOAT_OR_STR] [--ml_batch_size POSITIVE_INT] - [--azure | --cuda] [--api_validation] [--jobs POSITIVE_INT] [--skip_ignored] [--save-json [PATH]] [--save-xlsx [PATH]] [--hashed] [--subtext] [--sort] [--log LOG_LEVEL] [--size_limit SIZE_LIMIT] + usage: python -m credsweeper [-h] (--path PATH [PATH ...] | --diff_path PATH [PATH ...] | --export_config [PATH] | --export_log_config [PATH]) + [--rules PATH] [--severity SEVERITY] [--config PATH] [--log_config PATH] [--denylist PATH] + [--find-by-ext] [--depth POSITIVE_INT] [--no-filters] [--doc] [--ml_threshold FLOAT_OR_STR] + [--ml_batch_size POSITIVE_INT] [--ml_config PATH] [--ml_model PATH] [--ml_providers STR] + [--api_validation] [--jobs POSITIVE_INT] [--skip_ignored] [--save-json [PATH]] + [--save-xlsx [PATH]] [--hashed] [--subtext] [--sort] [--log LOG_LEVEL] + [--size_limit SIZE_LIMIT] [--banner] [--version] options: -h, --help show this help message and exit @@ -27,10 +31,10 @@ Get all argument list: exporting default config to file (default: config.json) --export_log_config [PATH] exporting default logger config to file (default: log.yaml) - --rules [PATH] path of rule config file (default: credsweeper/rules/config.yaml). severity:['critical', 'high', 'medium', 'low', 'info'] type:['keyword', 'pattern', 'pem_key', 'multi'] + --rules PATH path of rule config file (default: credsweeper/rules/config.yaml). severity:['critical', 'high', 'medium', 'low', 'info'] type:['keyword', 'pattern', 'pem_key', 'multi'] --severity SEVERITY set minimum level for rules to apply ['critical', 'high', 'medium', 'low', 'info'](default: 'Severity.INFO', case insensitive) - --config [PATH] use custom config (default: built-in) - --log_config [PATH] use custom log config (default: built-in) + --config PATH use custom config (default: built-in) + --log_config PATH use custom log config (default: built-in) --denylist PATH path to a plain text file with lines or secrets to ignore --find-by-ext find files by predefined extension --depth POSITIVE_INT additional recursive search in data (experimental) @@ -41,8 +45,9 @@ Get all argument list: 'highest'] (default: medium) --ml_batch_size POSITIVE_INT, -b POSITIVE_INT batch size for model inference (default: 16) - --azure enable AzureExecutionProvider for onnx - --cuda enable CUDAExecutionProvider for onnx + --ml_config PATH use external config for ml model + --ml_model PATH use external ml model + --ml_providers STR comma separated list of providers for onnx (CPUExecutionProvider is used by default) --api_validation add credential api validation option to credsweeper pipeline. External API is used to reduce FP for some rule types. --jobs POSITIVE_INT, -j POSITIVE_INT number of parallel processes to use (default: 1) diff --git a/experiment/main.py b/experiment/main.py index cee074e6c..cb69c9a6a 100644 --- a/experiment/main.py +++ b/experiment/main.py @@ -216,6 +216,13 @@ def main(cred_data_location: str, jobs: int) -> str: # print in last line the name print(f"\nYou can find your model in:\n{_model_file_name}") + # convert the model to onnx right now command = f"{sys.executable} -m tf2onnx.convert --saved-model {_model_file_name}" \ f" --output {pathlib.Path(__file__).parent.parent}/credsweeper/ml_model/ml_model.onnx --verbose" subprocess.check_call(command, shell=True, cwd=pathlib.Path(__file__).parent) + + # to keep the hash in log + command = f"md5sum {pathlib.Path(__file__).parent.parent}/credsweeper/ml_model/ml_model.onnx" + subprocess.check_call(command, shell=True, cwd=pathlib.Path(__file__).parent) + command = f"md5sum {pathlib.Path(__file__).parent.parent}/credsweeper/ml_model/ml_config.json" + subprocess.check_call(command, shell=True, cwd=pathlib.Path(__file__).parent) diff --git a/experiment/src/model_config_preprocess.py b/experiment/src/model_config_preprocess.py index 4ad50b30d..5d060df12 100644 --- a/experiment/src/model_config_preprocess.py +++ b/experiment/src/model_config_preprocess.py @@ -7,10 +7,10 @@ def model_config_preprocess(df_all: pd.DataFrame) -> Dict[str, float]: - model_config_path = APP_PATH / "ml_model" / "model_config.json" + model_config_path = APP_PATH / "ml_model" / "ml_config.json" model_config = Util.json_load(model_config_path) - # check whether all extensions from meta are in model_config.json + # check whether all extensions from meta are in ml_config.json for x in model_config["features"]: if "FileExtension" == x["type"]: diff --git a/tests/ml_model/test_ml_validator.py b/tests/ml_model/test_ml_validator.py index 369c4ce88..b6e93fd42 100644 --- a/tests/ml_model/test_ml_validator.py +++ b/tests/ml_model/test_ml_validator.py @@ -30,15 +30,14 @@ def setUp(self): config_dict["size_limit"] = None self.config = Config(config_dict) - def test_ml_validator_simple_p(self): - - def validate(_candidate: Candidate) -> Tuple[bool, float]: - """Validate single credential candidate.""" - candidate_key = CandidateKey(_candidate.line_data_list[0]) - sample_as_batch = [(candidate_key, [_candidate])] - is_cred_batch, probability_batch = self.ml_validator.validate_groups(sample_as_batch, 1) - return is_cred_batch[0], probability_batch[0] + def validate(self, _candidate: Candidate) -> Tuple[bool, float]: + """Validate single credential candidate.""" + candidate_key = CandidateKey(_candidate.line_data_list[0]) + sample_as_batch = [(candidate_key, [_candidate])] + is_cred_batch, probability_batch = self.ml_validator.validate_groups(sample_as_batch, 1) + return is_cred_batch[0], probability_batch[0] + def test_ml_validator_simple_p(self): candidate = Candidate.get_dummy_candidate(self.config, "main.py", ".py", "info") candidate.rule_name = "Password" candidate.line_data_list[0].line = 'password="Ahga%$FiQ@Ei8"' @@ -47,25 +46,56 @@ def validate(_candidate: Candidate) -> Tuple[bool, float]: candidate.line_data_list[0].value_end = 25 candidate.line_data_list[0].value = "Ahga%$FiQ@Ei8" - decision, probability = validate(candidate) + decision, probability = self.validate(candidate) self.assertAlmostEqual(0.9996037483215332, probability, delta=NEGLIGIBLE_ML_THRESHOLD) - candidate.line_data_list[0].path = "sample.py" + candidate.line_data_list[0].path = "sample.yaml" candidate.line_data_list[0].file_type = ".yaml" - decision, probability = validate(candidate) - self.assertAlmostEqual(0.9994515776634216, probability, delta=NEGLIGIBLE_ML_THRESHOLD) + decision, probability = self.validate(candidate) + self.assertAlmostEqual(0.9993805885314941, probability, delta=NEGLIGIBLE_ML_THRESHOLD) candidate.line_data_list[0].path = "test.zip" candidate.line_data_list[0].file_type = ".zip" - decision, probability = validate(candidate) + decision, probability = self.validate(candidate) self.assertAlmostEqual(0.9992872476577759, probability, delta=NEGLIGIBLE_ML_THRESHOLD) candidate.line_data_list[0].path = "other.txt" candidate.line_data_list[0].file_type = ".txt" - decision, probability = validate(candidate) + decision, probability = self.validate(candidate) self.assertAlmostEqual(0.9987422823905945, probability, delta=NEGLIGIBLE_ML_THRESHOLD) def test_ml_validator_auxiliary_p(self): + candidate = Candidate.get_dummy_candidate(self.config, "mycred", "", "") + candidate.rule_name = "Secret" + candidate.line_data_list[0].line = "secret=238475614782" + candidate.line_data_list[0].variable = "secret" + candidate.line_data_list[0].value_start = 7 + candidate.line_data_list[0].value_end = 43 + candidate.line_data_list[0].value = "238475614782" + # auxiliary candidate for a pattern rule - without variable + aux_candidate = copy.deepcopy(candidate) + aux_candidate.line_data_list[0].variable = None + + # todo: the scores are low for current ML model - will be changed after train + + candidate_key = CandidateKey(candidate.line_data_list[0]) + sample_as_batch = [(candidate_key, [candidate])] + is_cred_batch, probability_batch = self.ml_validator.validate_groups(sample_as_batch, 2) + self.assertAlmostEqual(0.9774117469787598, probability_batch[0], delta=NEGLIGIBLE_ML_THRESHOLD) + + # auxiliary rule which was not trained - keeps the same ML probability + aux_candidate.rule_name = "PASSWD_PAIR" + sample_as_batch = [(candidate_key, [candidate, aux_candidate])] + is_cred_batch, probability_batch = self.ml_validator.validate_groups(sample_as_batch, 2) + self.assertAlmostEqual(0.9774117469787598, probability_batch[0], delta=NEGLIGIBLE_ML_THRESHOLD) + + # auxiliary rule in train increases ML probability + aux_candidate.rule_name = "Token" + sample_as_batch = [(candidate_key, [candidate, aux_candidate])] + is_cred_batch, probability_batch = self.ml_validator.validate_groups(sample_as_batch, 2) + self.assertAlmostEqual(0.9825288653373718, probability_batch[0], delta=NEGLIGIBLE_ML_THRESHOLD) + + def test_ml_validator_auxiliary_n(self): candidate = Candidate.get_dummy_candidate(self.config, "secret", "", "") candidate.rule_name = "Secret" candidate.line_data_list[0].line = "secret=bace4d19-dead-beef-cafe-9129474bcd81" diff --git a/tests/test_app.py b/tests/test_app.py index c960b78f9..030b235cf 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -221,10 +221,10 @@ def test_it_works_n(self) -> None: " | --export_config [PATH]" \ " | --export_log_config [PATH]" \ ")" \ - " [--rules [PATH]]" \ + " [--rules PATH]" \ " [--severity SEVERITY]" \ - " [--config [PATH]]" \ - " [--log_config [PATH]]" \ + " [--config PATH]" \ + " [--log_config PATH]" \ " [--denylist PATH]" \ " [--find-by-ext]" \ " [--depth POSITIVE_INT]" \ @@ -232,7 +232,9 @@ def test_it_works_n(self) -> None: " [--doc]" \ " [--ml_threshold FLOAT_OR_STR]" \ " [--ml_batch_size POSITIVE_INT]" \ - " [--azure | --cuda] " \ + " [--ml_config PATH]" \ + " [--ml_model PATH]" \ + " [--ml_providers STR] " \ " [--api_validation]" \ " [--jobs POSITIVE_INT]" \ " [--skip_ignored]" \ @@ -323,7 +325,7 @@ def test_help_p(self) -> None: else: text = ' '.join([text, line]) expected = " ".join(text.split()) - self.maxDiff = 65536 + self.maxDiff = None self.assertEqual(expected, output) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -684,3 +686,65 @@ def test_doc_n(self) -> None: _stdout, _stderr = self._m_credsweeper(["--doc", "--path", str(SAMPLES_PATH), "--save-json", json_filename]) report = Util.json_load(json_filename) self.assertEqual(SAMPLES_IN_DOC, len(report)) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + + def test_external_ml_n(self) -> None: + # not existed ml_config + _stdout, _stderr = self._m_credsweeper( + ["--ml_config", "not_existed_file", "--path", + str(APP_PATH), "--log", "CRITICAL"]) + self.assertEqual(0, len(_stderr)) + self.assertIn("CRITICAL", _stdout) + # not existed ml_model + _stdout, _stderr = self._m_credsweeper( + ["--ml_model", "not_existed_file", "--path", + str(APP_PATH), "--log", "CRITICAL"]) + self.assertEqual(0, len(_stderr)) + self.assertIn("CRITICAL", _stdout) + # wrong config + with tempfile.TemporaryDirectory() as tmp_dir: + json_filename = os.path.join(tmp_dir, f"{__name__}.json") + with open(json_filename, "w") as f: + f.write('{}') + _stdout, _stderr = self._m_credsweeper( + ["--ml_config", json_filename, "--path", + str(APP_PATH), "--log", "CRITICAL"]) + self.assertEqual(0, len(_stderr)) + self.assertIn("CRITICAL", _stdout) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + + def test_external_ml_p(self) -> None: + log_pattern = re.compile( + r".*Init ML validator with .+ provider; config:'.+' md5:([0-9a-f]{32}) model:'.+' md5:([0-9a-f]{32})") + _stdout, _stderr = self._m_credsweeper(["--path", str(APP_PATH), "--log", "INFO"]) + self.assertEqual(0, len(_stderr)) + self.assertNotIn("CRITICAL", _stdout) + for i in _stdout.splitlines(): + if log_match := re.match(log_pattern, i): + md5_config = log_match.group(1) + md5_model = log_match.group(2) + break + else: + self.fail(f"'Init ML validator' not found in {_stdout}") + with tempfile.TemporaryDirectory() as tmp_dir: + custom_ml_config = os.path.join(tmp_dir, f"{__name__}.json") + shutil.copyfile(APP_PATH / "ml_model" / "ml_config.json", custom_ml_config) + custom_ml_model = os.path.join(tmp_dir, f"{__name__}.onnx") + shutil.copyfile(APP_PATH / "ml_model" / "ml_model.onnx", custom_ml_model) + with open(custom_ml_config, "a") as f: + f.write("\n\n\n") + args = [ + "--ml_config", custom_ml_config, "--ml_model", custom_ml_model, "--path", + str(APP_PATH), "--log", "INFO" + ] + _stdout, _stderr = self._m_credsweeper(args) + self.assertEqual("", _stderr) + self.assertNotIn("CRITICAL", _stdout) + # model hash is the same + self.assertIn(md5_model, _stdout) + # hash of ml config will be different + self.assertNotIn(md5_config, _stdout) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # diff --git a/tests/test_main.py b/tests/test_main.py index 3d2eb37d6..24ca464d6 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -256,6 +256,9 @@ def test_report_p(self, mock_get_arguments) -> None: jobs=1, ml_threshold=NEGLIGIBLE_ML_THRESHOLD, ml_batch_size=16, + ml_config=None, + ml_model=None, + ml_providers=None, depth=0, doc=False, size_limit="1G",