diff --git a/pyproject.toml b/pyproject.toml index edf44c55..00e1dfa5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ dependencies = [ "flask==3.*", "tomlkit==0.13.*", "requests>=2.32.3", + "toml>=0.10.2", ] dynamic = ["version"] diff --git a/surfactant/database_manager/__init__.py b/surfactant/database_manager/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/surfactant/database_manager/database_utils.py b/surfactant/database_manager/database_utils.py new file mode 100644 index 00000000..c3b7cab3 --- /dev/null +++ b/surfactant/database_manager/database_utils.py @@ -0,0 +1,47 @@ +import toml +from typing import Optional, Dict +import hashlib + +def calculate_hash(data: str) -> str: + """Calculate the SHA-256 hash of the given data.""" + return hashlib.sha256(data.encode("utf-8")).hexdigest() + + +def load_hash_and_timestamp(hash_file_path, pattern_key: str, pattern_file: str) -> Optional[Dict[str, str]]: + """Load the hash and timestamp for a specific pattern from the specified TOML file.""" + try: + with open(hash_file_path, "r") as f: + hash_data = toml.load(f) + # Access the specific structure using the provided keys + return hash_data.get(pattern_key, {}).get(pattern_file) + except FileNotFoundError: + return None + +def save_hash_and_timestamp(hash_file_path, pattern_key: str, pattern_file: str, source: str, hash_value: str, timestamp: str) -> None: + """Save the hash and timestamp for a specific pattern to the specified TOML file.""" + try: + with open(hash_file_path, "r") as f: + hash_data = toml.load(f) + except FileNotFoundError: + hash_data = {} + + # Define the new data structure + new_data = { + pattern_key: { + pattern_file: { + "source": source, + "hash": hash_value, + "timestamp": timestamp, + } + } + } + + # Update the existing data with the new data + if pattern_key in hash_data: + hash_data[pattern_key].update(new_data[pattern_key]) + else: + hash_data.update(new_data) + + # Write the updated data back to the TOML file + with open(hash_file_path, "w") as f: + toml.dump(hash_data, f) diff --git a/surfactant/infoextractors/js_file.py b/surfactant/infoextractors/js_file.py index 54314037..c8e4fcfb 100644 --- a/surfactant/infoextractors/js_file.py +++ b/surfactant/infoextractors/js_file.py @@ -16,6 +16,8 @@ from surfactant.configmanager import ConfigManager from surfactant.sbomtypes import SBOM, Software +from surfactant.database_manager.database_utils import calculate_hash, load_hash_and_timestamp, save_hash_and_timestamp + class JSDatabaseManager: def __init__(self): @@ -23,6 +25,9 @@ def __init__(self): self.hash_file_path = ( ConfigManager().get_data_dir_path() / "infoextractors" / "js_library_patterns.toml" ) + self.pattern_key = "js_library_patterns" + self.pattern_file = "js_library_patterns.json" + self.source = "jsfile.retirejs" def load_db(self) -> None: js_lib_file = ( @@ -40,48 +45,7 @@ def load_db(self) -> None: def get_database(self) -> Optional[Dict[str, Any]]: return self.js_lib_database - - def calculate_hash(self, data: str) -> str: - return hashlib.sha256(data.encode("utf-8")).hexdigest() - - def load_hash_and_timestamp(self) -> Optional[Dict[str, str]]: - try: - with open(self.hash_file_path, "r") as f: - hash_data = toml.load(f) - return hash_data.get("js_library_patterns", {}).get("js_library_patterns.json") - except FileNotFoundError: - return None - - def save_hash_and_timestamp(self, hash_value: str, timestamp: str) -> None: - # Try to load existing data - try: - with open(self.hash_file_path, "r") as f: - hash_data = toml.load(f) - except FileNotFoundError: - # If the file does not exist, start with an empty dictionary - hash_data = {} - - # Prepare the new data to be added/updated - new_data = { - "js_library_patterns": { - "js_library_patterns.json": { - "source": "jsfile.retirejs", - "hash": hash_value, - "timestamp": timestamp, - } - } - } - - # Update the existing data with the new data - if "js_library_patterns" in hash_data: - hash_data["js_library_patterns"].update(new_data["js_library_patterns"]) - else: - hash_data.update(new_data) - - # Save the updated data back to the file - with open(self.hash_file_path, "w") as f: - toml.dump(hash_data, f) - + js_db_manager = JSDatabaseManager() @@ -178,8 +142,8 @@ def strip_irrelevant_data(retirejs_db: dict) -> dict: def update_db() -> str: raw_data = download_database() if raw_data is not None: - new_hash = js_db_manager.calculate_hash(raw_data) - current_data = js_db_manager.load_hash_and_timestamp() + new_hash = calculate_hash(raw_data) + current_data = load_hash_and_timestamp(js_db_manager.hash_file_path, js_db_manager.pattern_key, js_db_manager.pattern_file) if current_data and new_hash == current_data.get("hash"): return "No update occurred. Database is up-to-date." @@ -192,8 +156,7 @@ def update_db() -> str: json_file_path = path / "js_library_patterns.json" with open(json_file_path, "w") as f: json.dump(cleaned, f, indent=4) - - js_db_manager.save_hash_and_timestamp(new_hash, download_timestamp) + save_hash_and_timestamp(js_db_manager.hash_file_path, js_db_manager.pattern_key, js_db_manager.pattern_file, js_db_manager.source, new_hash, download_timestamp) return "Update complete." return "No update occurred."