Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding xGitGuard pre commit hook #11

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e6885c1
Pushing my initial code before I make any major changes
Nov 17, 2023
4c9a85f
Adding very rough initial commit, stripping the unneeded github fetches
Nov 17, 2023
6708474
Change the file name to something more logical
Nov 17, 2023
7c9b042
Functionally finding secrets and outputting them to a CSV
Nov 17, 2023
5a64889
Performing minor cleanup
Nov 17, 2023
e1210a1
Adding file names to output csv
Nov 17, 2023
b68ac29
Remove unneeded fields and add logs to output for pre-commit hook
Nov 19, 2023
2760d6e
Adding prototype pre-commit script
Nov 19, 2023
1e53416
Complete first layer of parallelization, reducing time to run by abou…
Nov 20, 2023
ddfbd89
Adding threading for long commits, moved vectorization into files for…
Nov 22, 2023
015c86e
Create an installer for the pre-commit hook
Nov 22, 2023
93b88b2
Create an installer for the pre-commit hook
BuiltInParris Nov 22, 2023
8314aa9
Merge branch 'pre-commit-hook' of https://github.com/BuiltInParris/xG…
BuiltInParris Nov 22, 2023
afd7098
Pushing my initial code before I make any major changes
BuiltInParris Nov 17, 2023
326ae63
Adding very rough initial commit, stripping the unneeded github fetches
BuiltInParris Nov 17, 2023
867e5a0
Change the file name to something more logical
BuiltInParris Nov 17, 2023
1576bc9
Functionally finding secrets and outputting them to a CSV
BuiltInParris Nov 17, 2023
18134b3
Performing minor cleanup
BuiltInParris Nov 17, 2023
4ca638e
Adding file names to output csv
BuiltInParris Nov 17, 2023
9a1cd52
Remove unneeded fields and add logs to output for pre-commit hook
BuiltInParris Nov 19, 2023
230e6b6
Adding prototype pre-commit script
BuiltInParris Nov 19, 2023
d91bf86
Complete first layer of parallelization, reducing time to run by abou…
BuiltInParris Nov 20, 2023
b2d0346
Adding threading for long commits, moved vectorization into files for…
BuiltInParris Nov 22, 2023
17d4a95
Create an installer for the pre-commit hook
BuiltInParris Nov 22, 2023
8c137b9
Create an installer for the pre-commit hook
BuiltInParris Nov 22, 2023
4ab1f53
Merge branch 'pre-commit-hook' of https://github.com/BuiltInParris/xG…
BuiltInParris Nov 22, 2023
5e872da
Merge branch 'pre-commit-hook' of https://github.com/BuiltInParris/xG…
BuiltInParris Nov 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Remove unneeded fields and add logs to output for pre-commit hook
  • Loading branch information
BuiltInParris committed Nov 22, 2023
commit 9a1cd52c268baf3959246b5f2d5f63ff1a0eb80f
10 changes: 5 additions & 5 deletions xgitguard/common/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
import os
from datetime import datetime


def create_logger(log_level=20, console_logging=True, log_dir=None, log_file_name=None):
def create_logger(log_level=20, console_logging=True, log_dir=None, log_file_name=None, show_current_run_logs=True):
"""
Create logging class and return
params: log_level - int - Default - 10
Expand All @@ -42,15 +41,15 @@ def create_logger(log_level=20, console_logging=True, log_dir=None, log_file_nam
)

# add file handler to logger
logger.addHandler(set_file_handler(logger_name, formatter, log_dir, log_file_name))
logger.addHandler(set_file_handler(logger_name, formatter, log_dir, log_file_name, show_current_run_logs))

if console_logging:
logger.addHandler(set_console_handler(formatter))

return logger


def set_file_handler(logger_name, formatter, log_dir, log_file_name):
def set_file_handler(logger_name, formatter, log_dir, log_file_name, show_current_run_logs):
"""Setting File streaming Handler"""
# define file handler and set formatter
if log_dir and os.path.exists(log_dir):
Expand All @@ -65,7 +64,8 @@ def set_file_handler(logger_name, formatter, log_dir, log_file_name):
log_file = os.path.join(log_dir, log_file_name)
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(formatter)
print(f"Current run logs file: {log_file}")
if(show_current_run_logs):
print(f"Current run logs file: {log_file}")
return file_handler


Expand Down
96 changes: 96 additions & 0 deletions xgitguard/config/secondary_keys_creds.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
keyword
--password
--token
?accesskeyid
?access_token
access_key
access_key_id
access_key_secret
access_secret
access_token
account_sid
agfa
algolia_api_key
amazon_secret_access_key
api_key
api_key_secret
api_key_sid
app_token
artifacts_bucket
artifacts_secret
atoken
auth
auth_token
aws_access_key
aws_access_key_id
aws_secret_access_key
aws_secret_key
bintray_key
cf_password
client_secret
cloudflare_api_key
codecov_token
consumer_secret
coveralls_repo_token
coverity_scan_token
cred
customer_secret
database_password
datadog_api_key
db_password
db_pw
deploy_password
deploy_token
dockerhubpassword
docker_hub_password
docker_key
docker_pass
docker_passwd
docker_password
encryption_password
file_password
firebase_token
ftp_password
ftp_pw
get_token
gh_token
github_access_token
github_api_key
github_auth
github_key
github_oauth_token
github_password
github_pwd
github_token
gpg_passphrase
heroku_api_key
key
keystore_pass
mapbox_access_token
mysql_password
npm_auth_token
npm_token
oauth_token
os_password
pass
passphrase
password
private_key
publish_key
pypi_password
release_token
repotoken
s3_access_key
s3_access_key_id
s3_key
s3_secret_key
sauce_access_key
secret
secret_key_base
signing_key
sonar_token
sonatype_password
sshpass
sshpassword
token
user_secret
7 changes: 1 addition & 6 deletions xgitguard/config/xgg_configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,11 @@ secrets:
"File",
"Secret",
"Code",
"Detected_Timestamp",
"Key_Weight",
"SKey_Count",
"Entropy",
"Dictionary_Similarity",
"Score",
"Year",
"Month",
"Day",
"Hour",
"Score"
]
public_data_collector_columns:
[
Expand Down
119 changes: 65 additions & 54 deletions xgitguard/git-precommit-hook/precommit_cred_detections.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
MODULE_DIR = os.path.dirname(os.path.realpath('__file__'))

parent_dir = os.path.dirname(MODULE_DIR)
sys.path.insert(0, parent_dir)
sys.path.insert(0, '/Users/sparri919/Documents/GitHub/xGitGuard/xgitguard')


from common.configs_read import ConfigsData
Expand All @@ -78,7 +78,7 @@
file_prefix = "xgg_"


def calculate_confidence(secondary_keyword, extension, secret):
def calculate_confidence(secondary_keyword, secret):
"""
Calculates confidence scores for given Keywords
params: secondary_keyword - string
Expand Down Expand Up @@ -117,7 +117,7 @@ def calculate_confidence(secondary_keyword, extension, secret):
return [sum([secondary_keyword_value, extension_value]), entro, d_match[0]]


def format_detection(file, skeyword, code_contents, secrets, skeyword_count, extension):
def format_detection(file, skeyword, code_contents, secrets, skeyword_count):
"""
Format the secret data from the given code content and other data
Format the secrets data in the required format
Expand All @@ -141,7 +141,7 @@ def format_detection(file, skeyword, code_contents, secrets, skeyword_count, ext
logger.debug("<<<< 'Current Executing Function calculate_confidence loop' >>>>")
for secret in secrets:
# Calculate confidence values for detected secrets
confidence_score = calculate_confidence(skeyword, extension, secret)
confidence_score = calculate_confidence(skeyword, secret)
if confidence_score[1] > 1.5:
valid_secret_row = [value for value in secret_data]
secret_lines = re.findall(".*" + secret + ".*$", code_contents, re.MULTILINE)
Expand Down Expand Up @@ -188,8 +188,6 @@ def format_detection(file, skeyword, code_contents, secrets, skeyword_count, ext
# Mask the current secret
masked_secret = mask_data(code_line, secret)
valid_secret_row.append(masked_secret)

valid_secret_row.append(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
valid_secret_row.append(confidence_score[0])
count_score = math.log2(50) / (math.log2(skeyword_count + 1) + 1)
valid_secret_row.append(count_score)
Expand All @@ -200,18 +198,14 @@ def format_detection(file, skeyword, code_contents, secrets, skeyword_count, ext
confidence_score[0] + confidence_score[1] + count_score + d_match
)
now = datetime.now()
valid_secret_row.append(now.year)
valid_secret_row.append(now.month)
valid_secret_row.append(now.day)
valid_secret_row.append(now.hour)
secrets_data_list.append(valid_secret_row)
valid_secret = False
logger.debug(f"Current formatted secrets_data_list count: {len(secrets_data_list)}")
# logger.debug(f"secrets_data_list: {secrets_data_list}")
return secrets_data_list


def process_file_diffs(code_contents, search_query, extension):
def process_file_diffs(code_contents, search_query):
"""
Extract secret values using regex
Format the secrets detected
Expand Down Expand Up @@ -245,7 +239,7 @@ def process_file_diffs(code_contents, search_query, extension):
if len(secrets_data) >= 1 and len(secrets_data) <= 20:
clean_line = "".join(line).lower()[1:].strip()
secret_data_list = format_detection(file,
skeyword, "".join(clean_line).lower(), secrets_data, skeyword_count, extension
skeyword, "".join(clean_line).lower(), secrets_data, skeyword_count
)
if secret_data_list:
for secret_data in secret_data_list:
Expand All @@ -259,7 +253,7 @@ def process_file_diffs(code_contents, search_query, extension):
return secrets_data_list


def process_search_results(git_changes, search_query, ml_prediction, extension):
def process_search_results(git_changes, search_query, ml_prediction, total_secrets_map):
"""
For the user code content
Format and clean the code content
Expand All @@ -279,7 +273,7 @@ def process_search_results(git_changes, search_query, ml_prediction, extension):
detection_writes_per_query = 0
detections_per_query = 0
global file_prefix
secrets_detected = process_file_diffs(git_changes, search_query, extension)
secrets_detected = process_file_diffs(git_changes, search_query)
detections = len(secrets_detected)
if secrets_detected:
detections_per_query = detections
Expand Down Expand Up @@ -351,6 +345,16 @@ def process_search_results(git_changes, search_query, ml_prediction, extension):
configs.output_dir,
"xgg_precommit_creds_detected.csv",
)
specific_secrets_attributes = secrets_detected_df.loc[:,['File','Code','Score']]
file = ""
for index, row in specific_secrets_attributes.iterrows():
if row['Code'] not in total_secrets_map:
total_secrets_map[row['Code']] = 1
if(row['File'] != file):
print("The following credentials have been detected:\n")
print(f"File: {row['File']}")
file = row['File']
print(f"Code: {row['Code']}\nScore: {row['Score']}\n")
write_to_csv_file(
secrets_detected_df, secrets_detected_file
)
Expand Down Expand Up @@ -433,7 +437,7 @@ def run_detection(secondary_keywords=[], extensions=[], ml_prediction=False):
sys.exit(1)
else:
# Get the secondary_keywords from secondary_keywords file
configs.read_secondary_keywords(file_name="secondary_creds.csv")
configs.read_secondary_keywords(file_name="secondary_keys_creds.csv")
logger.info(f"Total Secondary Keywords: {len(configs.secondary_keywords)}")

if extensions:
Expand Down Expand Up @@ -475,50 +479,57 @@ def run_detection(secondary_keywords=[], extensions=[], ml_prediction=False):
logger.info(f"No Search query to process. Ending.")
sys.exit(1)

total_secrets_map = dict()
# Loop over each extension for each search query
for extension in configs.extensions:
for search_query in search_query_list:
detection_writes_per_query = 0
detections_per_query = 0
logger.info(
f"******* Processing Search Query: '{search_query} extension:{extension}' *******"
)
try:
# Search GitHub and return search response confidence_score
total_processed_search += 1
git_changes = subprocess.check_output(["git", "diff", "--staged"]).decode("utf-8")
# If search has detections, process the code changes
if git_changes:
(
detection_writes_per_query,
detections_per_query,
) = process_search_results(
git_changes, search_query, ml_prediction, extension
)
logger.info(
f"Detection writes in current search query: {detection_writes_per_query}"
)
total_detection_writes += detection_writes_per_query
else:
# time.sleep(2)
logger.info(
f"Search '{search_query}' returns no results. Continuing..."
)
continue
except Exception as e:
logger.error(f"Process Error: {e}")
logger.info(f"Current Total Processed Search: {total_processed_search}")
logger.info(f"Current Total Detections Write: {total_detection_writes}")

#for extension in configs.extensions:
for search_query in search_query_list:
detection_writes_per_query = 0
detections_per_query = 0
logger.info(
f"Total: {total_search_pairs} "
+ f"Processed: {total_processed_search} "
+ f"Detected: {detections_per_query} "
+ f"Total Writes: {detection_writes_per_query} "
f"******* Processing Search Query: '{search_query}"
)
try:
# Search GitHub and return search response confidence_score
total_processed_search += 1
git_changes = subprocess.check_output(["git", "diff", "--staged"]).decode("utf-8")
# If search has detections, process the code changes
if git_changes:
(
detection_writes_per_query,
detections_per_query,
) = process_search_results(
git_changes, search_query, ml_prediction, total_secrets_map
)
logger.info(
f"Detection writes in current search query: {detection_writes_per_query}"
)
total_detection_writes += detection_writes_per_query
else:
# time.sleep(2)
logger.info(
f"Search '{search_query}' returns no results. Continuing..."
)
continue
except Exception as e:
logger.error(f"Process Error: {e}")

logger.info(f"Current Total Processed Search: {total_processed_search}")
logger.info(f"Current Total Detections Write: {total_detection_writes}")

logger.info(
f"Total: {total_search_pairs} "
+ f"Processed: {total_processed_search} "
+ f"Detected: {detections_per_query} "
+ f"Total Writes: {detection_writes_per_query} "
)

logger.info(f"Total Processed Search: {total_processed_search}")
logger.info(f"Total Detections Write: {total_detection_writes}")

if total_detection_writes > 0:
print("\nIf these are not credentials, use --no-verify to bypass this check:")
print("\ngit commit -m 'message' --no-verify")

return True


Expand All @@ -533,7 +544,7 @@ def setup_logger(log_level=10, console_logging=True):
global logger
# Creates a logger
logger = create_logger(
log_level, console_logging, log_dir=log_dir, log_file_name=log_file_name
log_level, console_logging, log_dir=log_dir, log_file_name=log_file_name, show_current_run_logs=False
)


Expand Down