Skip to content

Commit

Permalink
Main
Browse files Browse the repository at this point in the history
  • Loading branch information
tedoaba committed Sep 17, 2024
1 parent 03be817 commit 0059bca
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 21 deletions.
21 changes: 5 additions & 16 deletions scripts/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,15 @@
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../src')))

import pandas as pd
from data_loader import load_data, preprocess_data, display_basic_info, print_unique_values
from eda import summarize_data, check_missing_values, correlation_analysis, handle_missing_values, get_numerical_columns, correlation_analysis, data_compression
from visualization import plot_histogram, plot_scatter, plot_missing_values, plot_histograms, plot_categorical_bars, plot_correlation_matrix, plot_postalcode_premium, plot_premium_vs_claims
from data_loader import load_data, display_basic_info, print_unique_values
from eda import check_missing_values, handle_missing_values, get_numerical_columns, correlation_analysis, data_compression
from visualization import plot_missing_values, plot_histograms, plot_categorical_bars, plot_correlation_matrix, plot_postalcode_premium, plot_premium_vs_claims

def main():
# Data Collection
file_path = '../data/cleaned_insurance_data.csv'
df = load_data(file_path)

#df = preprocess_data(df)

# EDA tasks
#summary = summarize_data(df)
#missing = check_missing_values(df)
#correlation = correlation_analysis(df)

# Visualization tasks
#plot_histogram(df, 'TotalPremium')
#plot_scatter(df, 'TotalPremium', 'TotalClaims', 'PostalCode')

#print("EDA complete!")

# Display basic information about the dataset
display_basic_info(df)
print_unique_values(df)
Expand All @@ -48,5 +35,7 @@ def main():
plot_postalcode_premium(df)
plot_premium_vs_claims(df)

print("EDA complete!")

if __name__ == '__main__':
main()
5 changes: 0 additions & 5 deletions src/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@ def load_data(file_path):
"""Load the dataset from a CSV file."""
return pd.read_csv(file_path, delimiter='|')

def preprocess_data(df):
"""Perform basic preprocessing tasks such as handling missing values."""
df.fillna('Unknown', inplace=True)
df['TransactionMonth'] = pd.to_datetime(df['TransactionMonth'], errors='coerce')
return df
def display_basic_info(df):
print("Data Overview")
print(df.head())
Expand Down

0 comments on commit 0059bca

Please sign in to comment.