Skip to content

Commit

Permalink
Refactor JSDatabaseManager to use shared database utilities
Browse files Browse the repository at this point in the history
- Moved hash calculation and TOML file operations to a new module  for better modularity and reuse.
- Updated  to utilize the new utility functions , , and .
- Replaced direct  operations with calls to the shared utility functions to handle hash and timestamp management.
- Improved code organization by separating concerns and reducing redundancy.
- Updated import statements to reflect the new module structure.
- Ensured backward compatibility with existing functionality and improved maintainability.
  • Loading branch information
willis89pr committed Jan 22, 2025
1 parent b740467 commit 2144c41
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 46 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ dependencies = [
"flask==3.*",
"tomlkit==0.13.*",
"requests>=2.32.3",
"toml>=0.10.2",
]
dynamic = ["version"]

Expand Down
Empty file.
47 changes: 47 additions & 0 deletions surfactant/database_manager/database_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import toml
from typing import Optional, Dict
import hashlib

def calculate_hash(data: str) -> str:
"""Calculate the SHA-256 hash of the given data."""
return hashlib.sha256(data.encode("utf-8")).hexdigest()


def load_hash_and_timestamp(hash_file_path, pattern_key: str, pattern_file: str) -> Optional[Dict[str, str]]:
"""Load the hash and timestamp for a specific pattern from the specified TOML file."""
try:
with open(hash_file_path, "r") as f:
hash_data = toml.load(f)
# Access the specific structure using the provided keys
return hash_data.get(pattern_key, {}).get(pattern_file)
except FileNotFoundError:
return None

def save_hash_and_timestamp(hash_file_path, pattern_key: str, pattern_file: str, source: str, hash_value: str, timestamp: str) -> None:
"""Save the hash and timestamp for a specific pattern to the specified TOML file."""
try:
with open(hash_file_path, "r") as f:
hash_data = toml.load(f)
except FileNotFoundError:
hash_data = {}

# Define the new data structure
new_data = {
pattern_key: {
pattern_file: {
"source": source,
"hash": hash_value,
"timestamp": timestamp,
}
}
}

# Update the existing data with the new data
if pattern_key in hash_data:
hash_data[pattern_key].update(new_data[pattern_key])
else:
hash_data.update(new_data)

# Write the updated data back to the TOML file
with open(hash_file_path, "w") as f:
toml.dump(hash_data, f)
55 changes: 9 additions & 46 deletions surfactant/infoextractors/js_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,18 @@
from surfactant.configmanager import ConfigManager
from surfactant.sbomtypes import SBOM, Software

from surfactant.database_manager.database_utils import calculate_hash, load_hash_and_timestamp, save_hash_and_timestamp


class JSDatabaseManager:
def __init__(self):
self.js_lib_database = None
self.hash_file_path = (
ConfigManager().get_data_dir_path() / "infoextractors" / "js_library_patterns.toml"
)
self.pattern_key = "js_library_patterns"
self.pattern_file = "js_library_patterns.json"
self.source = "jsfile.retirejs"

def load_db(self) -> None:
js_lib_file = (
Expand All @@ -40,48 +45,7 @@ def load_db(self) -> None:

def get_database(self) -> Optional[Dict[str, Any]]:
return self.js_lib_database

def calculate_hash(self, data: str) -> str:
return hashlib.sha256(data.encode("utf-8")).hexdigest()

def load_hash_and_timestamp(self) -> Optional[Dict[str, str]]:
try:
with open(self.hash_file_path, "r") as f:
hash_data = toml.load(f)
return hash_data.get("js_library_patterns", {}).get("js_library_patterns.json")
except FileNotFoundError:
return None

def save_hash_and_timestamp(self, hash_value: str, timestamp: str) -> None:
# Try to load existing data
try:
with open(self.hash_file_path, "r") as f:
hash_data = toml.load(f)
except FileNotFoundError:
# If the file does not exist, start with an empty dictionary
hash_data = {}

# Prepare the new data to be added/updated
new_data = {
"js_library_patterns": {
"js_library_patterns.json": {
"source": "jsfile.retirejs",
"hash": hash_value,
"timestamp": timestamp,
}
}
}

# Update the existing data with the new data
if "js_library_patterns" in hash_data:
hash_data["js_library_patterns"].update(new_data["js_library_patterns"])
else:
hash_data.update(new_data)

# Save the updated data back to the file
with open(self.hash_file_path, "w") as f:
toml.dump(hash_data, f)



js_db_manager = JSDatabaseManager()

Expand Down Expand Up @@ -178,8 +142,8 @@ def strip_irrelevant_data(retirejs_db: dict) -> dict:
def update_db() -> str:
raw_data = download_database()
if raw_data is not None:
new_hash = js_db_manager.calculate_hash(raw_data)
current_data = js_db_manager.load_hash_and_timestamp()
new_hash = calculate_hash(raw_data)
current_data = load_hash_and_timestamp(js_db_manager.hash_file_path, js_db_manager.pattern_key, js_db_manager.pattern_file)
if current_data and new_hash == current_data.get("hash"):
return "No update occurred. Database is up-to-date."

Expand All @@ -192,8 +156,7 @@ def update_db() -> str:
json_file_path = path / "js_library_patterns.json"
with open(json_file_path, "w") as f:
json.dump(cleaned, f, indent=4)

js_db_manager.save_hash_and_timestamp(new_hash, download_timestamp)
save_hash_and_timestamp(js_db_manager.hash_file_path, js_db_manager.pattern_key, js_db_manager.pattern_file, js_db_manager.source, new_hash, download_timestamp)
return "Update complete."
return "No update occurred."

Expand Down

0 comments on commit 2144c41

Please sign in to comment.