diff --git a/Diabetes_Prediction/diabetes_prediction.py b/Diabetes_Prediction/diabetes_prediction.py new file mode 100644 index 000000000..d04425ec2 --- /dev/null +++ b/Diabetes_Prediction/diabetes_prediction.py @@ -0,0 +1,48 @@ +from utils.error_handler import error_handler, DataValidationError, ModelError +import pandas as pd +import numpy as np + +@error_handler +def load_data(file_path): + try: + df = pd.read_csv(file_path) + if df.empty: + raise DataValidationError("Empty dataset loaded") + return df + except FileNotFoundError: + raise DataValidationError(f"Dataset not found at {file_path}") + +@error_handler +def preprocess_data(df): + if not isinstance(df, pd.DataFrame): + raise DataValidationError("Input must be a pandas DataFrame") + + # Check for missing values + if df.isnull().sum().any(): + logging.warning("Missing values detected in the dataset") + df = df.fillna(df.mean()) + + # Check for invalid values + numeric_columns = df.select_dtypes(include=[np.number]).columns + for col in numeric_columns: + if (df[col] < 0).any(): + raise DataValidationError(f"Negative values found in column {col}") + + return df + +@error_handler +def train_model(X_train, y_train, model_type='random_forest'): + if len(X_train) != len(y_train): + raise DataValidationError("Feature and target dimensions do not match") + + try: + if model_type == 'random_forest': + from sklearn.ensemble import RandomForestClassifier + model = RandomForestClassifier(random_state=42) + else: + raise ValueError(f"Unsupported model type: {model_type}") + + model.fit(X_train, y_train) + return model + except Exception as e: + raise ModelError(f"Model training failed: {str(e)}") \ No newline at end of file diff --git a/Heart_Disease_Prediction/heart_disease_prediction.py b/Heart_Disease_Prediction/heart_disease_prediction.py new file mode 100644 index 000000000..60b49b422 --- /dev/null +++ b/Heart_Disease_Prediction/heart_disease_prediction.py @@ -0,0 +1,46 @@ +from utils.error_handler import error_handler, DataValidationError, ModelError +import pandas as pd +import numpy as np + +@error_handler +def validate_heart_data(df): + required_columns = [ + 'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', + 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', + 'ca', 'thal', 'target' + ] + + # Check required columns + missing_cols = [col for col in required_columns if col not in df.columns] + if missing_cols: + raise DataValidationError(f"Missing required columns: {missing_cols}") + + # Validate value ranges + if not (df['age'] >= 0).all(): + raise DataValidationError("Age cannot be negative") + if not df['sex'].isin([0, 1]).all(): + raise DataValidationError("Sex must be binary (0 or 1)") + if not (df['trestbps'] > 0).all(): + raise DataValidationError("Blood pressure must be positive") + + return True + +@error_handler +def prepare_heart_data(df): + try: + validate_heart_data(df) + + # Handle missing values + if df.isnull().sum().any(): + logging.warning("Missing values found - applying mean imputation") + df = df.fillna(df.mean()) + + # Feature scaling + from sklearn.preprocessing import StandardScaler + scaler = StandardScaler() + numeric_cols = df.select_dtypes(include=[np.number]).columns + df[numeric_cols] = scaler.fit_transform(df[numeric_cols]) + + return df + except Exception as e: + raise DataValidationError(f"Data preparation failed: {str(e)}") \ No newline at end of file diff --git a/utils/error_handler.py b/utils/error_handler.py new file mode 100644 index 000000000..7fe4346b8 --- /dev/null +++ b/utils/error_handler.py @@ -0,0 +1,38 @@ +import logging +import sys +from functools import wraps + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('ml_capsule.log'), + logging.StreamHandler(sys.stdout) + ] +) + +class MLCapsuleError(Exception): + """Base exception class for ML-CaPsule""" + pass + +class DataValidationError(MLCapsuleError): + """Raised when data validation fails""" + pass + +class ModelError(MLCapsuleError): + """Raised when model operations fail""" + pass + +def error_handler(func): + @wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except MLCapsuleError as e: + logging.error(f"ML-CaPsule error in {func.__name__}: {str(e)}") + raise + except Exception as e: + logging.error(f"Unexpected error in {func.__name__}: {str(e)}") + raise MLCapsuleError(f"Function {func.__name__} failed: {str(e)}") + return wrapper \ No newline at end of file diff --git a/utils/model_evaluation.ipynb b/utils/model_evaluation.ipynb new file mode 100644 index 000000000..54a6db483 --- /dev/null +++ b/utils/model_evaluation.ipynb @@ -0,0 +1,159 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Model Evaluation and Cross-Validation\n", + "\n", + "This notebook contains functions for evaluating classification models and performing cross-validation. It includes error handling to ensure robustness. # type: ignore\n", + "\n", + "## 1. Importing Libraries\n", + "\n", + "We will start by importing the necessary libraries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import necessary libraries\n", + "from utils.error_handler import error_handler, ModelError\n", + "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n", + "from sklearn.model_selection import cross_val_score\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Evaluating Classification Models\n", + "\n", + "The `evaluate_classification_model` function computes various evaluation metrics for classification models, including accuracy, precision, recall, and F1 score. It also includes error handling to manage mismatched dimensions between predictions and true labels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@error_handler\n", + "def evaluate_classification_model(y_true, y_pred):\n", + " \"\"\"\n", + " Evaluate the classification model using various metrics.\n", + "\n", + " Parameters:\n", + " - y_true: array-like, true labels\n", + " - y_pred: array-like, predicted labels\n", + "\n", + " Returns:\n", + " - metrics: dict, containing accuracy, precision, recall, and F1 score\n", + " \"\"\"\n", + " if len(y_true) != len(y_pred):\n", + " raise ModelError(\"Prediction and ground truth dimensions do not match\")\n", + " \n", + " metrics = {\n", + " 'accuracy': accuracy_score(y_true, y_pred),\n", + " 'precision': precision_score(y_true, y_pred, average='weighted'),\n", + " 'recall': recall_score(y_true, y_pred, average='weighted'),\n", + " 'f1': f1_score(y_true, y_pred, average='weighted')\n", + " }\n", + " \n", + " return metrics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Cross-Validation\n", + "\n", + "The `cross_validate_model` function performs cross-validation on a given model and dataset. It returns the mean and standard deviation of the scores obtained during cross-validation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@error_handler\n", + "def cross_validate_model(model, X, y, cv=5):\n", + " \"\"\"\n", + " Perform cross-validation on the given model.\n", + "\n", + " Parameters:\n", + " - model: the model to evaluate\n", + " - X: array-like, feature data\n", + " - y: array-like, target labels\n", + " - cv: int, number of cross-validation folds\n", + "\n", + " Returns:\n", + " - dict: containing mean score, standard deviation, and individual scores\n", + " \"\"\"\n", + " scores = cross_val_score(model, X, y, cv=cv)\n", + " return {\n", + " 'mean_score': np.mean(scores),\n", + " 'std_score': np.std(scores),\n", + " 'scores': scores\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Example Usage\n", + "\n", + "Here, we can provide an example of how to use the above functions with a sample dataset and model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Example usage\n", + "if __name__ == \"__main__\":\n", + " from sklearn.datasets import load_iris\n", + " from sklearn.ensemble import RandomForestClassifier\n", + "\n", + " # Load sample data\n", + " data = load_iris()\n", + " X, y = data.data, data.target\n", + "\n", + " # Train a model\n", + " model = RandomForestClassifier(random_state=42)\n", + " model.fit(X, y)\n", + "\n", + " # Make predictions\n", + " y_pred = model.predict(X)\n", + "\n", + " # Evaluate the model\n", + " metrics = evaluate_classification_model(y, y_pred)\n", + " print(\"Evaluation Metrics:\", metrics)\n", + "\n", + " # Perform cross-validation\n", + " cv_results = cross_validate_model(model, X, y, cv=5)\n", + " print(\"Cross-Validation Results:\", cv_results)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/utils/model_evaluation.py b/utils/model_evaluation.py new file mode 100644 index 000000000..2878ccd2a --- /dev/null +++ b/utils/model_evaluation.py @@ -0,0 +1,34 @@ +from utils.error_handler import error_handler, ModelError +from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score +import numpy as np + +@error_handler +def evaluate_classification_model(y_true, y_pred): + if len(y_true) != len(y_pred): + raise ModelError("Prediction and ground truth dimensions do not match") + + try: + metrics = { + 'accuracy': accuracy_score(y_true, y_pred), + 'precision': precision_score(y_true, y_pred, average='weighted'), + 'recall': recall_score(y_true, y_pred, average='weighted'), + 'f1': f1_score(y_true, y_pred, average='weighted') + } + + return metrics + except Exception as e: + raise ModelError(f"Model evaluation failed: {str(e)}") + +@error_handler +def cross_validate_model(model, X, y, cv=5): + from sklearn.model_selection import cross_val_score + + try: + scores = cross_val_score(model, X, y, cv=cv) + return { + 'mean_score': np.mean(scores), + 'std_score': np.std(scores), + 'scores': scores + } + except Exception as e: + raise ModelError(f"Cross-validation failed: {str(e)}") \ No newline at end of file