Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add comprehensive error handling across ML-CaPsule #1207

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions Diabetes_Prediction/diabetes_prediction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from utils.error_handler import error_handler, DataValidationError, ModelError
import pandas as pd
import numpy as np

@error_handler
def load_data(file_path):
try:
df = pd.read_csv(file_path)
if df.empty:
raise DataValidationError("Empty dataset loaded")
return df
except FileNotFoundError:
raise DataValidationError(f"Dataset not found at {file_path}")

@error_handler
def preprocess_data(df):
if not isinstance(df, pd.DataFrame):
raise DataValidationError("Input must be a pandas DataFrame")

# Check for missing values
if df.isnull().sum().any():
logging.warning("Missing values detected in the dataset")
df = df.fillna(df.mean())

# Check for invalid values
numeric_columns = df.select_dtypes(include=[np.number]).columns
for col in numeric_columns:
if (df[col] < 0).any():
raise DataValidationError(f"Negative values found in column {col}")

return df

@error_handler
def train_model(X_train, y_train, model_type='random_forest'):
if len(X_train) != len(y_train):
raise DataValidationError("Feature and target dimensions do not match")

try:
if model_type == 'random_forest':
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=42)
else:
raise ValueError(f"Unsupported model type: {model_type}")

model.fit(X_train, y_train)
return model
except Exception as e:
raise ModelError(f"Model training failed: {str(e)}")
46 changes: 46 additions & 0 deletions Heart_Disease_Prediction/heart_disease_prediction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from utils.error_handler import error_handler, DataValidationError, ModelError
import pandas as pd
import numpy as np

@error_handler
def validate_heart_data(df):
required_columns = [
'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs',
'restecg', 'thalach', 'exang', 'oldpeak', 'slope',
'ca', 'thal', 'target'
]

# Check required columns
missing_cols = [col for col in required_columns if col not in df.columns]
if missing_cols:
raise DataValidationError(f"Missing required columns: {missing_cols}")

# Validate value ranges
if not (df['age'] >= 0).all():
raise DataValidationError("Age cannot be negative")
if not df['sex'].isin([0, 1]).all():
raise DataValidationError("Sex must be binary (0 or 1)")
if not (df['trestbps'] > 0).all():
raise DataValidationError("Blood pressure must be positive")

return True

@error_handler
def prepare_heart_data(df):
try:
validate_heart_data(df)

# Handle missing values
if df.isnull().sum().any():
logging.warning("Missing values found - applying mean imputation")
df = df.fillna(df.mean())

# Feature scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
numeric_cols = df.select_dtypes(include=[np.number]).columns
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

return df
except Exception as e:
raise DataValidationError(f"Data preparation failed: {str(e)}")
38 changes: 38 additions & 0 deletions utils/error_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import logging
import sys
from functools import wraps

# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('ml_capsule.log'),
logging.StreamHandler(sys.stdout)
]
)

class MLCapsuleError(Exception):
"""Base exception class for ML-CaPsule"""
pass

class DataValidationError(MLCapsuleError):
"""Raised when data validation fails"""
pass

class ModelError(MLCapsuleError):
"""Raised when model operations fail"""
pass

def error_handler(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except MLCapsuleError as e:
logging.error(f"ML-CaPsule error in {func.__name__}: {str(e)}")
raise
except Exception as e:
logging.error(f"Unexpected error in {func.__name__}: {str(e)}")
raise MLCapsuleError(f"Function {func.__name__} failed: {str(e)}")
return wrapper
159 changes: 159 additions & 0 deletions utils/model_evaluation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Model Evaluation and Cross-Validation\n",
"\n",
"This notebook contains functions for evaluating classification models and performing cross-validation. It includes error handling to ensure robustness. # type: ignore\n",
"\n",
"## 1. Importing Libraries\n",
"\n",
"We will start by importing the necessary libraries."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Import necessary libraries\n",
"from utils.error_handler import error_handler, ModelError\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n",
"from sklearn.model_selection import cross_val_score\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Evaluating Classification Models\n",
"\n",
"The `evaluate_classification_model` function computes various evaluation metrics for classification models, including accuracy, precision, recall, and F1 score. It also includes error handling to manage mismatched dimensions between predictions and true labels."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"@error_handler\n",
"def evaluate_classification_model(y_true, y_pred):\n",
" \"\"\"\n",
" Evaluate the classification model using various metrics.\n",
"\n",
" Parameters:\n",
" - y_true: array-like, true labels\n",
" - y_pred: array-like, predicted labels\n",
"\n",
" Returns:\n",
" - metrics: dict, containing accuracy, precision, recall, and F1 score\n",
" \"\"\"\n",
" if len(y_true) != len(y_pred):\n",
" raise ModelError(\"Prediction and ground truth dimensions do not match\")\n",
" \n",
" metrics = {\n",
" 'accuracy': accuracy_score(y_true, y_pred),\n",
" 'precision': precision_score(y_true, y_pred, average='weighted'),\n",
" 'recall': recall_score(y_true, y_pred, average='weighted'),\n",
" 'f1': f1_score(y_true, y_pred, average='weighted')\n",
" }\n",
" \n",
" return metrics"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Cross-Validation\n",
"\n",
"The `cross_validate_model` function performs cross-validation on a given model and dataset. It returns the mean and standard deviation of the scores obtained during cross-validation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"@error_handler\n",
"def cross_validate_model(model, X, y, cv=5):\n",
" \"\"\"\n",
" Perform cross-validation on the given model.\n",
"\n",
" Parameters:\n",
" - model: the model to evaluate\n",
" - X: array-like, feature data\n",
" - y: array-like, target labels\n",
" - cv: int, number of cross-validation folds\n",
"\n",
" Returns:\n",
" - dict: containing mean score, standard deviation, and individual scores\n",
" \"\"\"\n",
" scores = cross_val_score(model, X, y, cv=cv)\n",
" return {\n",
" 'mean_score': np.mean(scores),\n",
" 'std_score': np.std(scores),\n",
" 'scores': scores\n",
" }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. Example Usage\n",
"\n",
"Here, we can provide an example of how to use the above functions with a sample dataset and model."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Example usage\n",
"if __name__ == \"__main__\":\n",
" from sklearn.datasets import load_iris\n",
" from sklearn.ensemble import RandomForestClassifier\n",
"\n",
" # Load sample data\n",
" data = load_iris()\n",
" X, y = data.data, data.target\n",
"\n",
" # Train a model\n",
" model = RandomForestClassifier(random_state=42)\n",
" model.fit(X, y)\n",
"\n",
" # Make predictions\n",
" y_pred = model.predict(X)\n",
"\n",
" # Evaluate the model\n",
" metrics = evaluate_classification_model(y, y_pred)\n",
" print(\"Evaluation Metrics:\", metrics)\n",
"\n",
" # Perform cross-validation\n",
" cv_results = cross_validate_model(model, X, y, cv=5)\n",
" print(\"Cross-Validation Results:\", cv_results)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
34 changes: 34 additions & 0 deletions utils/model_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from utils.error_handler import error_handler, ModelError
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

@error_handler
def evaluate_classification_model(y_true, y_pred):
if len(y_true) != len(y_pred):
raise ModelError("Prediction and ground truth dimensions do not match")

try:
metrics = {
'accuracy': accuracy_score(y_true, y_pred),
'precision': precision_score(y_true, y_pred, average='weighted'),
'recall': recall_score(y_true, y_pred, average='weighted'),
'f1': f1_score(y_true, y_pred, average='weighted')
}

return metrics
except Exception as e:
raise ModelError(f"Model evaluation failed: {str(e)}")

@error_handler
def cross_validate_model(model, X, y, cv=5):
from sklearn.model_selection import cross_val_score

try:
scores = cross_val_score(model, X, y, cv=cv)
return {
'mean_score': np.mean(scores),
'std_score': np.std(scores),
'scores': scores
}
except Exception as e:
raise ModelError(f"Cross-validation failed: {str(e)}")