Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding xGitGuard pre commit hook #11

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
e6885c1
Pushing my initial code before I make any major changes
Nov 17, 2023
4c9a85f
Adding very rough initial commit, stripping the unneeded github fetches
Nov 17, 2023
6708474
Change the file name to something more logical
Nov 17, 2023
7c9b042
Functionally finding secrets and outputting them to a CSV
Nov 17, 2023
5a64889
Performing minor cleanup
Nov 17, 2023
e1210a1
Adding file names to output csv
Nov 17, 2023
b68ac29
Remove unneeded fields and add logs to output for pre-commit hook
Nov 19, 2023
2760d6e
Adding prototype pre-commit script
Nov 19, 2023
1e53416
Complete first layer of parallelization, reducing time to run by abou…
Nov 20, 2023
ddfbd89
Adding threading for long commits, moved vectorization into files for…
Nov 22, 2023
015c86e
Create an installer for the pre-commit hook
Nov 22, 2023
93b88b2
Create an installer for the pre-commit hook
BuiltInParris Nov 22, 2023
8314aa9
Merge branch 'pre-commit-hook' of https://github.com/BuiltInParris/xG…
BuiltInParris Nov 22, 2023
afd7098
Pushing my initial code before I make any major changes
BuiltInParris Nov 17, 2023
326ae63
Adding very rough initial commit, stripping the unneeded github fetches
BuiltInParris Nov 17, 2023
867e5a0
Change the file name to something more logical
BuiltInParris Nov 17, 2023
1576bc9
Functionally finding secrets and outputting them to a CSV
BuiltInParris Nov 17, 2023
18134b3
Performing minor cleanup
BuiltInParris Nov 17, 2023
4ca638e
Adding file names to output csv
BuiltInParris Nov 17, 2023
9a1cd52
Remove unneeded fields and add logs to output for pre-commit hook
BuiltInParris Nov 19, 2023
230e6b6
Adding prototype pre-commit script
BuiltInParris Nov 19, 2023
d91bf86
Complete first layer of parallelization, reducing time to run by abou…
BuiltInParris Nov 20, 2023
b2d0346
Adding threading for long commits, moved vectorization into files for…
BuiltInParris Nov 22, 2023
17d4a95
Create an installer for the pre-commit hook
BuiltInParris Nov 22, 2023
8c137b9
Create an installer for the pre-commit hook
BuiltInParris Nov 22, 2023
4ab1f53
Merge branch 'pre-commit-hook' of https://github.com/BuiltInParris/xG…
BuiltInParris Nov 22, 2023
5e872da
Merge branch 'pre-commit-hook' of https://github.com/BuiltInParris/xG…
BuiltInParris Nov 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions xgitguard/common/configs_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import logging
import os
import sys
import pickle

import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
Expand Down Expand Up @@ -176,6 +177,39 @@ def read_confidence_values(self, file_name="confidence_values.csv"):
f"confidence_values file is not present/readable: {self.confidence_values_file}"
)


def read_cached_dictionary_words(self, file_name="dictionary_words.csv"):
"""
Read the given dictionary words csv file in config path
Create dictionary similarity values
Set the Class Variables for further use
params: file_name - string
"""
logger.debug("<<<< 'Current Executing Function' >>>>")
# Creating dictionary similarity values
self.dictionary_words_file = os.path.join(self.config_dir, file_name)
self.dictionary_words = read_csv_file(
self.dictionary_words_file, output="dataframe", header=0
)
# logger.debug("Dictionary_words file Read")
if not self.dictionary_words.empty:
try:
with open('/Users/sparri919/Documents/GitHub/xGitGuard/xgitguard/config/vectorizer.pkl', 'rb') as file:
self.dict_words_vc = pickle.load(file)
with open('/Users/sparri919/Documents/GitHub/xGitGuard/xgitguard/config/count_matrix.pkl', 'rb') as file:
count = pickle.load(file)
self.dict_words_ct = np.log10(count.sum(axis=0).getA1())
except Exception as e:
logger.error(f"Count Vectorizer Error: {e}")
raise Exception(f"Count Vectorizer Error: {e}")
else:
logger.error(
f"confidence_values file is not present/readable: {self.dictionary_words_file}"
)
raise Exception(
f"confidence_values file is not present/readable: {self.dictionary_words_file}"
)

def read_dictionary_words(self, file_name="dictionary_words.csv"):
"""
Read the given dictionary words csv file in config path
Expand Down
45 changes: 45 additions & 0 deletions xgitguard/common/data_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
from urlextract import URLExtract



specialCharacterRegex = re.compile("[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+")

def remove_url_from_keys(code_content):
"""
Replace special chars in the given code content data
Expand Down Expand Up @@ -118,6 +121,48 @@ def remove_url_from_creds(code_content, key):
codes_list = code_data.split()
return codes_list

def clean_credentials(code_content):
"""
Replace special chars in the given code content data
params: code_content - string - code data
returns: data - string - Cleaned up code data
"""
code_data = specialCharacterRegex.sub(" ", code_content)

special_chars = [
"'",
'"',
"#",
"%",
"&",
"(",
")",
"*",
"+",
",",
"-",
".",
"/",
":",
";",
"<",
"=",
">",
"?",
"[",
"\\",
"]",
"`",
"{",
"|",
"}",
"~",
]
# Remove special characters if present
for special_char in special_chars:
code_data = code_data.replace(special_char, " ")
codes_list = code_data.split()
return codes_list

def keys_extractor(code_content):
"""
Expand Down
10 changes: 5 additions & 5 deletions xgitguard/common/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
import os
from datetime import datetime


def create_logger(log_level=20, console_logging=True, log_dir=None, log_file_name=None):
def create_logger(log_level=20, console_logging=True, log_dir=None, log_file_name=None, show_current_run_logs=True):
"""
Create logging class and return
params: log_level - int - Default - 10
Expand All @@ -42,15 +41,15 @@ def create_logger(log_level=20, console_logging=True, log_dir=None, log_file_nam
)

# add file handler to logger
logger.addHandler(set_file_handler(logger_name, formatter, log_dir, log_file_name))
logger.addHandler(set_file_handler(logger_name, formatter, log_dir, log_file_name, show_current_run_logs))

if console_logging:
logger.addHandler(set_console_handler(formatter))

return logger


def set_file_handler(logger_name, formatter, log_dir, log_file_name):
def set_file_handler(logger_name, formatter, log_dir, log_file_name, show_current_run_logs):
"""Setting File streaming Handler"""
# define file handler and set formatter
if log_dir and os.path.exists(log_dir):
Expand All @@ -65,7 +64,8 @@ def set_file_handler(logger_name, formatter, log_dir, log_file_name):
log_file = os.path.join(log_dir, log_file_name)
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(formatter)
print(f"Current run logs file: {log_file}")
if(show_current_run_logs):
print(f"Current run logs file: {log_file}")
return file_handler


Expand Down
Binary file added xgitguard/config/count_matrix.pkl
Binary file not shown.
96 changes: 96 additions & 0 deletions xgitguard/config/secondary_keys_creds.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
keyword
--password
--token
?accesskeyid
?access_token
access_key
access_key_id
access_key_secret
access_secret
access_token
account_sid
agfa
algolia_api_key
amazon_secret_access_key
api_key
api_key_secret
api_key_sid
app_token
artifacts_bucket
artifacts_secret
atoken
auth
auth_token
aws_access_key
aws_access_key_id
aws_secret_access_key
aws_secret_key
bintray_key
cf_password
client_secret
cloudflare_api_key
codecov_token
consumer_secret
coveralls_repo_token
coverity_scan_token
cred
customer_secret
database_password
datadog_api_key
db_password
db_pw
deploy_password
deploy_token
dockerhubpassword
docker_hub_password
docker_key
docker_pass
docker_passwd
docker_password
encryption_password
file_password
firebase_token
ftp_password
ftp_pw
get_token
gh_token
github_access_token
github_api_key
github_auth
github_key
github_oauth_token
github_password
github_pwd
github_token
gpg_passphrase
heroku_api_key
key
keystore_pass
mapbox_access_token
mysql_password
npm_auth_token
npm_token
oauth_token
os_password
pass
passphrase
password
private_key
publish_key
pypi_password
release_token
repotoken
s3_access_key
s3_access_key_id
s3_key
s3_secret_key
sauce_access_key
secret
secret_key_base
signing_key
sonar_token
sonatype_password
sshpass
sshpassword
token
user_secret
Binary file added xgitguard/config/vectorizer.pkl
Binary file not shown.
11 changes: 11 additions & 0 deletions xgitguard/config/xgg_configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ secrets:
"Day",
"Hour",
]
precommit_data_collector_columns:
[
"File",
"Secret",
"Code",
"Key_Weight",
"SKey_Count",
"Entropy",
"Dictionary_Similarity",
"Score"
]
public_data_collector_columns:
[
"Source",
Expand Down
7 changes: 7 additions & 0 deletions xgitguard/executables/create_install_files_zip
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pyinstaller ../git-precommit-hook/xgitguard_precommit_cred_detections.py --hidden-import=sklearn.feature_extraction.text --hidden-import=urlextract --hidden-import=sklearn.model_selection --hidden-import=sklearn.ensemble

zip -r xgitguard-precommit-hook.zip pre-commit
cd dist/xgitguard_precommit_cred_detections
zip -r ../../xgitguard-precommit-hook.zip xgitguard_precommit_cred_detections
zip -r ../../xgitguard-precommit-hook.zip _internal
chmod 777 ../../xgitguard-precommit-hook.zip
7 changes: 7 additions & 0 deletions xgitguard/executables/pre-commit
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
results=$(~/gitconfig/hooks/xgitguard_precommit_cred_detections)
echo "$results"
if grep -q "no-verify" <<< "$results"; then
exit 1;
else
exit 0;
fi
4 changes: 4 additions & 0 deletions xgitguard/executables/xgitguard-pre-commit-installer
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
git config --global ~/gitconfig
mkdir -p ~/gitconfig
mkdir -p ~/gitconfig/hooks
unzip xgitguard-precommit-hook.zip -d ~/gitconfig/hooks
Empty file.
Loading