-
Notifications
You must be signed in to change notification settings - Fork 0
/
Stroke Prediction - Classification
1 lines (1 loc) · 33.6 KB
/
Stroke Prediction - Classification
1
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"pygments_lexer":"ipython3","nbconvert_exporter":"python","version":"3.6.4","file_extension":".py","codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import pandas as pd \nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport sketch \nimport plotly.express as px\nfrom dataprep.eda import plot, plot_correlation\nimport tensorflow as tf\nimport autokeras as ak\n%matplotlib inline\npd.set_option('display.max_columns', 50)","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2023-10-24T07:33:57.328269Z","iopub.execute_input":"2023-10-24T07:33:57.328832Z","iopub.status.idle":"2023-10-24T07:34:06.108381Z","shell.execute_reply.started":"2023-10-24T07:33:57.328793Z","shell.execute_reply":"2023-10-24T07:34:06.107156Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df = pd.read_csv('/kaggle/input/stroke-prediction/stroke_prediction_dataset.csv')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:06.1101Z","iopub.execute_input":"2023-10-24T07:34:06.11107Z","iopub.status.idle":"2023-10-24T07:34:06.218844Z","shell.execute_reply.started":"2023-10-24T07:34:06.111034Z","shell.execute_reply":"2023-10-24T07:34:06.217609Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# EDA","metadata":{}},{"cell_type":"code","source":"df.info()","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:06.220622Z","iopub.execute_input":"2023-10-24T07:34:06.221041Z","iopub.status.idle":"2023-10-24T07:34:06.309336Z","shell.execute_reply.started":"2023-10-24T07:34:06.221009Z","shell.execute_reply":"2023-10-24T07:34:06.30843Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.describe(include='O').T","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:06.310802Z","iopub.execute_input":"2023-10-24T07:34:06.311177Z","iopub.status.idle":"2023-10-24T07:34:06.419435Z","shell.execute_reply.started":"2023-10-24T07:34:06.311142Z","shell.execute_reply":"2023-10-24T07:34:06.41809Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.describe().round().T.style.background_gradient(cmap='icefire')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:06.42125Z","iopub.execute_input":"2023-10-24T07:34:06.421585Z","iopub.status.idle":"2023-10-24T07:34:06.534546Z","shell.execute_reply.started":"2023-10-24T07:34:06.421554Z","shell.execute_reply":"2023-10-24T07:34:06.533253Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df)","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:06.538994Z","iopub.execute_input":"2023-10-24T07:34:06.539377Z","iopub.status.idle":"2023-10-24T07:34:12.270491Z","shell.execute_reply.started":"2023-10-24T07:34:06.539347Z","shell.execute_reply":"2023-10-24T07:34:12.26906Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.boxplot(figsize=(10,10), rot=20);","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:12.272705Z","iopub.execute_input":"2023-10-24T07:34:12.27355Z","iopub.status.idle":"2023-10-24T07:34:12.847275Z","shell.execute_reply.started":"2023-10-24T07:34:12.273506Z","shell.execute_reply":"2023-10-24T07:34:12.845874Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Symptoms')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:12.849071Z","iopub.execute_input":"2023-10-24T07:34:12.849427Z","iopub.status.idle":"2023-10-24T07:34:14.189592Z","shell.execute_reply.started":"2023-10-24T07:34:12.849398Z","shell.execute_reply":"2023-10-24T07:34:14.186271Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"data = pd.melt(df, id_vars='Gender', value_vars=['Age', 'Average Glucose Level', 'Body Mass Index (BMI)', 'Stress Levels'])\npx.box(data, x='Gender', y='value', color='variable', title='Gender Analysis').show()","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:14.191481Z","iopub.execute_input":"2023-10-24T07:34:14.191952Z","iopub.status.idle":"2023-10-24T07:34:15.215569Z","shell.execute_reply.started":"2023-10-24T07:34:14.191918Z","shell.execute_reply":"2023-10-24T07:34:15.214166Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"data = pd.melt(df, id_vars='Diagnosis', value_vars=['Age', 'Average Glucose Level', 'Body Mass Index (BMI)', 'Stress Levels'])\npx.box(data, x='Diagnosis', y='value', color='variable', title='Diagnosis Analysis').show()","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:15.217368Z","iopub.execute_input":"2023-10-24T07:34:15.218614Z","iopub.status.idle":"2023-10-24T07:34:15.534164Z","shell.execute_reply.started":"2023-10-24T07:34:15.218567Z","shell.execute_reply":"2023-10-24T07:34:15.532981Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.scatter_3d(df, x='Age', y='Body Mass Index (BMI)', z='Average Glucose Level', color='Diagnosis', hover_data=['Gender', 'Hypertension', 'Heart Disease', 'Stress Levels','Stroke History','Family History of Stroke'], title='Age vs BMI vs Average Glucose Level').show()","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:15.535539Z","iopub.execute_input":"2023-10-24T07:34:15.535922Z","iopub.status.idle":"2023-10-24T07:34:15.965452Z","shell.execute_reply.started":"2023-10-24T07:34:15.535892Z","shell.execute_reply":"2023-10-24T07:34:15.963986Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Marital Status')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:15.967547Z","iopub.execute_input":"2023-10-24T07:34:15.968088Z","iopub.status.idle":"2023-10-24T07:34:16.344097Z","shell.execute_reply.started":"2023-10-24T07:34:15.968004Z","shell.execute_reply":"2023-10-24T07:34:16.342002Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Hypertension')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:16.345616Z","iopub.execute_input":"2023-10-24T07:34:16.345962Z","iopub.status.idle":"2023-10-24T07:34:16.697552Z","shell.execute_reply.started":"2023-10-24T07:34:16.345933Z","shell.execute_reply":"2023-10-24T07:34:16.69493Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Heart Disease')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:16.699007Z","iopub.execute_input":"2023-10-24T07:34:16.699324Z","iopub.status.idle":"2023-10-24T07:34:17.05807Z","shell.execute_reply.started":"2023-10-24T07:34:16.699297Z","shell.execute_reply":"2023-10-24T07:34:17.056445Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Work Type')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:17.059784Z","iopub.execute_input":"2023-10-24T07:34:17.06021Z","iopub.status.idle":"2023-10-24T07:34:17.45234Z","shell.execute_reply.started":"2023-10-24T07:34:17.060163Z","shell.execute_reply":"2023-10-24T07:34:17.450468Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Residence Type')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:17.454086Z","iopub.execute_input":"2023-10-24T07:34:17.454496Z","iopub.status.idle":"2023-10-24T07:34:17.81707Z","shell.execute_reply.started":"2023-10-24T07:34:17.454461Z","shell.execute_reply":"2023-10-24T07:34:17.81531Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Smoking Status')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:17.824123Z","iopub.execute_input":"2023-10-24T07:34:17.824809Z","iopub.status.idle":"2023-10-24T07:34:18.200868Z","shell.execute_reply.started":"2023-10-24T07:34:17.824769Z","shell.execute_reply":"2023-10-24T07:34:18.198842Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Alcohol Intake')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:18.20269Z","iopub.execute_input":"2023-10-24T07:34:18.203198Z","iopub.status.idle":"2023-10-24T07:34:18.608908Z","shell.execute_reply.started":"2023-10-24T07:34:18.203158Z","shell.execute_reply":"2023-10-24T07:34:18.606916Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Physical Activity')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:18.610473Z","iopub.execute_input":"2023-10-24T07:34:18.610871Z","iopub.status.idle":"2023-10-24T07:34:18.979392Z","shell.execute_reply.started":"2023-10-24T07:34:18.610839Z","shell.execute_reply":"2023-10-24T07:34:18.977319Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Stroke History')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:18.981847Z","iopub.execute_input":"2023-10-24T07:34:18.982204Z","iopub.status.idle":"2023-10-24T07:34:19.337126Z","shell.execute_reply.started":"2023-10-24T07:34:18.982174Z","shell.execute_reply":"2023-10-24T07:34:19.334633Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Family History of Stroke')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:19.339476Z","iopub.execute_input":"2023-10-24T07:34:19.339892Z","iopub.status.idle":"2023-10-24T07:34:19.690049Z","shell.execute_reply.started":"2023-10-24T07:34:19.339858Z","shell.execute_reply":"2023-10-24T07:34:19.687334Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Family History of Stroke', 'Age')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:19.691715Z","iopub.execute_input":"2023-10-24T07:34:19.692127Z","iopub.status.idle":"2023-10-24T07:34:20.197047Z","shell.execute_reply.started":"2023-10-24T07:34:19.692093Z","shell.execute_reply":"2023-10-24T07:34:20.193946Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Stroke History', 'Age')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:20.199098Z","iopub.execute_input":"2023-10-24T07:34:20.199576Z","iopub.status.idle":"2023-10-24T07:34:20.692673Z","shell.execute_reply.started":"2023-10-24T07:34:20.199538Z","shell.execute_reply":"2023-10-24T07:34:20.691215Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Dietary Habits')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:20.694787Z","iopub.execute_input":"2023-10-24T07:34:20.695384Z","iopub.status.idle":"2023-10-24T07:34:21.121609Z","shell.execute_reply.started":"2023-10-24T07:34:20.695304Z","shell.execute_reply":"2023-10-24T07:34:21.11921Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Blood Pressure Levels')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:21.123152Z","iopub.execute_input":"2023-10-24T07:34:21.123532Z","iopub.status.idle":"2023-10-24T07:34:21.570962Z","shell.execute_reply.started":"2023-10-24T07:34:21.1235Z","shell.execute_reply":"2023-10-24T07:34:21.568458Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender','Cholesterol Levels')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:21.572695Z","iopub.execute_input":"2023-10-24T07:34:21.573101Z","iopub.status.idle":"2023-10-24T07:34:22.041925Z","shell.execute_reply.started":"2023-10-24T07:34:21.57307Z","shell.execute_reply":"2023-10-24T07:34:22.039198Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Symptoms')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:22.043504Z","iopub.execute_input":"2023-10-24T07:34:22.044567Z","iopub.status.idle":"2023-10-24T07:34:22.497982Z","shell.execute_reply.started":"2023-10-24T07:34:22.044528Z","shell.execute_reply":"2023-10-24T07:34:22.49543Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Gender', 'Diagnosis')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:22.500224Z","iopub.execute_input":"2023-10-24T07:34:22.500707Z","iopub.status.idle":"2023-10-24T07:34:22.872872Z","shell.execute_reply.started":"2023-10-24T07:34:22.500657Z","shell.execute_reply":"2023-10-24T07:34:22.870351Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Hypertension', 'Stress Levels')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:22.874567Z","iopub.execute_input":"2023-10-24T07:34:22.874962Z","iopub.status.idle":"2023-10-24T07:34:23.378285Z","shell.execute_reply.started":"2023-10-24T07:34:22.874929Z","shell.execute_reply":"2023-10-24T07:34:23.376124Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Work Type', 'Age')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:23.380159Z","iopub.execute_input":"2023-10-24T07:34:23.381177Z","iopub.status.idle":"2023-10-24T07:34:23.902492Z","shell.execute_reply.started":"2023-10-24T07:34:23.381134Z","shell.execute_reply":"2023-10-24T07:34:23.901231Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Residence Type', 'Age')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:23.906779Z","iopub.execute_input":"2023-10-24T07:34:23.907348Z","iopub.status.idle":"2023-10-24T07:34:24.437239Z","shell.execute_reply.started":"2023-10-24T07:34:23.907306Z","shell.execute_reply":"2023-10-24T07:34:24.434523Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Stroke History', 'Average Glucose Level')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:24.439002Z","iopub.execute_input":"2023-10-24T07:34:24.439961Z","iopub.status.idle":"2023-10-24T07:34:24.913305Z","shell.execute_reply.started":"2023-10-24T07:34:24.439925Z","shell.execute_reply":"2023-10-24T07:34:24.911562Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Stroke History','Body Mass Index (BMI)')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:24.915085Z","iopub.execute_input":"2023-10-24T07:34:24.91548Z","iopub.status.idle":"2023-10-24T07:34:25.396542Z","shell.execute_reply.started":"2023-10-24T07:34:24.915445Z","shell.execute_reply":"2023-10-24T07:34:25.395077Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Dietary Habits', 'Age')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:25.398263Z","iopub.execute_input":"2023-10-24T07:34:25.398675Z","iopub.status.idle":"2023-10-24T07:34:25.947411Z","shell.execute_reply.started":"2023-10-24T07:34:25.398642Z","shell.execute_reply":"2023-10-24T07:34:25.945025Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Hypertension', 'Blood Pressure Levels')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:25.949086Z","iopub.execute_input":"2023-10-24T07:34:25.949414Z","iopub.status.idle":"2023-10-24T07:34:26.401479Z","shell.execute_reply.started":"2023-10-24T07:34:25.949385Z","shell.execute_reply":"2023-10-24T07:34:26.400067Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Heart Disease', 'Cholesterol Levels')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:26.402975Z","iopub.execute_input":"2023-10-24T07:34:26.40345Z","iopub.status.idle":"2023-10-24T07:34:26.84296Z","shell.execute_reply.started":"2023-10-24T07:34:26.403419Z","shell.execute_reply":"2023-10-24T07:34:26.84017Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.nunique()","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:26.844844Z","iopub.execute_input":"2023-10-24T07:34:26.84519Z","iopub.status.idle":"2023-10-24T07:34:26.886013Z","shell.execute_reply.started":"2023-10-24T07:34:26.845163Z","shell.execute_reply":"2023-10-24T07:34:26.884778Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Residence Type', 'Body Mass Index (BMI)')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:26.88797Z","iopub.execute_input":"2023-10-24T07:34:26.888343Z","iopub.status.idle":"2023-10-24T07:34:27.357315Z","shell.execute_reply.started":"2023-10-24T07:34:26.888305Z","shell.execute_reply":"2023-10-24T07:34:27.355774Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Residence Type', 'Average Glucose Level')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:27.36024Z","iopub.execute_input":"2023-10-24T07:34:27.360566Z","iopub.status.idle":"2023-10-24T07:34:27.852238Z","shell.execute_reply.started":"2023-10-24T07:34:27.360537Z","shell.execute_reply":"2023-10-24T07:34:27.849814Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df,'Residence Type', 'Stress Levels')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:27.854451Z","iopub.execute_input":"2023-10-24T07:34:27.854881Z","iopub.status.idle":"2023-10-24T07:34:28.36898Z","shell.execute_reply.started":"2023-10-24T07:34:27.854845Z","shell.execute_reply":"2023-10-24T07:34:28.367769Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Residence Type', 'Dietary Habits')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:28.370784Z","iopub.execute_input":"2023-10-24T07:34:28.372373Z","iopub.status.idle":"2023-10-24T07:34:28.788014Z","shell.execute_reply.started":"2023-10-24T07:34:28.37233Z","shell.execute_reply":"2023-10-24T07:34:28.786805Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Residence Type', 'Smoking Status')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:28.789626Z","iopub.execute_input":"2023-10-24T07:34:28.790024Z","iopub.status.idle":"2023-10-24T07:34:29.186053Z","shell.execute_reply.started":"2023-10-24T07:34:28.78999Z","shell.execute_reply":"2023-10-24T07:34:29.184747Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Residence Type', 'Alcohol Intake')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:29.188015Z","iopub.execute_input":"2023-10-24T07:34:29.189763Z","iopub.status.idle":"2023-10-24T07:34:29.575682Z","shell.execute_reply.started":"2023-10-24T07:34:29.189694Z","shell.execute_reply":"2023-10-24T07:34:29.5747Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Residence Type', 'Physical Activity')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:29.577847Z","iopub.execute_input":"2023-10-24T07:34:29.578159Z","iopub.status.idle":"2023-10-24T07:34:29.950465Z","shell.execute_reply.started":"2023-10-24T07:34:29.578131Z","shell.execute_reply":"2023-10-24T07:34:29.949396Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Residence Type', 'Diagnosis')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:29.952004Z","iopub.execute_input":"2023-10-24T07:34:29.953132Z","iopub.status.idle":"2023-10-24T07:34:30.316032Z","shell.execute_reply.started":"2023-10-24T07:34:29.953094Z","shell.execute_reply":"2023-10-24T07:34:30.313162Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Symptoms')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:30.317521Z","iopub.execute_input":"2023-10-24T07:34:30.317892Z","iopub.status.idle":"2023-10-24T07:34:30.81704Z","shell.execute_reply.started":"2023-10-24T07:34:30.31785Z","shell.execute_reply":"2023-10-24T07:34:30.814157Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Hypertension')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:30.819026Z","iopub.execute_input":"2023-10-24T07:34:30.82029Z","iopub.status.idle":"2023-10-24T07:34:31.166248Z","shell.execute_reply.started":"2023-10-24T07:34:30.820233Z","shell.execute_reply":"2023-10-24T07:34:31.164076Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Heart Disease')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:31.167938Z","iopub.execute_input":"2023-10-24T07:34:31.169153Z","iopub.status.idle":"2023-10-24T07:34:31.529613Z","shell.execute_reply.started":"2023-10-24T07:34:31.169118Z","shell.execute_reply":"2023-10-24T07:34:31.527081Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Marital Status')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:31.531254Z","iopub.execute_input":"2023-10-24T07:34:31.531608Z","iopub.status.idle":"2023-10-24T07:34:31.911676Z","shell.execute_reply.started":"2023-10-24T07:34:31.531578Z","shell.execute_reply":"2023-10-24T07:34:31.910622Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Work Type')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:31.921694Z","iopub.execute_input":"2023-10-24T07:34:31.922434Z","iopub.status.idle":"2023-10-24T07:34:32.316829Z","shell.execute_reply.started":"2023-10-24T07:34:31.922396Z","shell.execute_reply":"2023-10-24T07:34:32.314411Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Alcohol Intake')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:32.318845Z","iopub.execute_input":"2023-10-24T07:34:32.319215Z","iopub.status.idle":"2023-10-24T07:34:32.721145Z","shell.execute_reply.started":"2023-10-24T07:34:32.319184Z","shell.execute_reply":"2023-10-24T07:34:32.718252Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Smoking Status')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:32.72275Z","iopub.execute_input":"2023-10-24T07:34:32.723365Z","iopub.status.idle":"2023-10-24T07:34:33.101677Z","shell.execute_reply.started":"2023-10-24T07:34:32.72333Z","shell.execute_reply":"2023-10-24T07:34:33.100289Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Physical Activity')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:33.103099Z","iopub.execute_input":"2023-10-24T07:34:33.103446Z","iopub.status.idle":"2023-10-24T07:34:33.481622Z","shell.execute_reply.started":"2023-10-24T07:34:33.103414Z","shell.execute_reply":"2023-10-24T07:34:33.480412Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Stroke History')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:33.483141Z","iopub.execute_input":"2023-10-24T07:34:33.483485Z","iopub.status.idle":"2023-10-24T07:34:33.839254Z","shell.execute_reply.started":"2023-10-24T07:34:33.483455Z","shell.execute_reply":"2023-10-24T07:34:33.836845Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Family History of Stroke')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:33.841077Z","iopub.execute_input":"2023-10-24T07:34:33.841527Z","iopub.status.idle":"2023-10-24T07:34:34.203343Z","shell.execute_reply.started":"2023-10-24T07:34:33.841488Z","shell.execute_reply":"2023-10-24T07:34:34.200502Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Dietary Habits')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:34.205174Z","iopub.execute_input":"2023-10-24T07:34:34.205579Z","iopub.status.idle":"2023-10-24T07:34:34.630548Z","shell.execute_reply.started":"2023-10-24T07:34:34.205544Z","shell.execute_reply":"2023-10-24T07:34:34.626791Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Stress Levels')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:34.632516Z","iopub.execute_input":"2023-10-24T07:34:34.633006Z","iopub.status.idle":"2023-10-24T07:34:35.120646Z","shell.execute_reply.started":"2023-10-24T07:34:34.632966Z","shell.execute_reply":"2023-10-24T07:34:35.118949Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Blood Pressure Levels')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:35.123374Z","iopub.execute_input":"2023-10-24T07:34:35.123747Z","iopub.status.idle":"2023-10-24T07:34:35.586955Z","shell.execute_reply.started":"2023-10-24T07:34:35.123704Z","shell.execute_reply":"2023-10-24T07:34:35.585712Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Cholesterol Levels')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:35.588325Z","iopub.execute_input":"2023-10-24T07:34:35.588833Z","iopub.status.idle":"2023-10-24T07:34:36.04837Z","shell.execute_reply.started":"2023-10-24T07:34:35.588802Z","shell.execute_reply":"2023-10-24T07:34:36.047084Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot(df, 'Diagnosis', 'Symptoms')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:36.050979Z","iopub.execute_input":"2023-10-24T07:34:36.05164Z","iopub.status.idle":"2023-10-24T07:34:36.514989Z","shell.execute_reply.started":"2023-10-24T07:34:36.051606Z","shell.execute_reply":"2023-10-24T07:34:36.513796Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Modelling","metadata":{}},{"cell_type":"code","source":"df['Blood Pressure Levels'] = df['Blood Pressure Levels'].str.split('/', expand=True).astype(float).assign(Blood_Pressure_Levels=lambda x: round(x[0] / x[1], 2))['Blood_Pressure_Levels']","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:36.516574Z","iopub.execute_input":"2023-10-24T07:34:36.516968Z","iopub.status.idle":"2023-10-24T07:34:36.576762Z","shell.execute_reply.started":"2023-10-24T07:34:36.516933Z","shell.execute_reply":"2023-10-24T07:34:36.575499Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df['HDL'] = df['Cholesterol Levels'].str.extract('(HDL: \\d+)')[0].str.replace('HDL: ', '').astype(int)\ndf['LDL'] = df['Cholesterol Levels'].str.extract('(LDL: \\d+)')[0].str.replace('LDL: ', '').astype(int)","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:36.578434Z","iopub.execute_input":"2023-10-24T07:34:36.578851Z","iopub.status.idle":"2023-10-24T07:34:36.700218Z","shell.execute_reply.started":"2023-10-24T07:34:36.578816Z","shell.execute_reply":"2023-10-24T07:34:36.698893Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def prepare_data(df):\n df = df.drop(columns=['Patient Name', 'Patient ID','Cholesterol Levels', 'Symptoms']).sample(frac=1, random_state=42)\n df['Gender'] = pd.Categorical(df['Gender'], ordered=True)\n print(df['Gender'].cat.categories)\n df['Marital Status'] = pd.Categorical(df['Marital Status'], ordered=True)\n print(df['Marital Status'].cat.categories)\n df['Work Type'] = pd.Categorical(df['Work Type'], ordered=True)\n print(df['Work Type'].cat.categories)\n df['Residence Type'] = pd.Categorical(df['Residence Type'], ordered=True)\n print(df['Residence Type'].cat.categories)\n df['Smoking Status'] = pd.Categorical(df['Smoking Status'], ordered=True)\n print(df['Smoking Status'].cat.categories)\n df['Alcohol Intake'] = pd.Categorical(df['Alcohol Intake'], ordered=True)\n print(df['Alcohol Intake'].cat.categories)\n df['Physical Activity'] = pd.Categorical(df['Physical Activity'], ordered=True)\n print(df['Physical Activity'].cat.categories)\n df['Family History of Stroke'] = pd.Categorical(df['Family History of Stroke'], ordered=True)\n print(df['Family History of Stroke'].cat.categories)\n df['Dietary Habits'] = pd.Categorical(df['Dietary Habits'], ordered=True)\n print(df['Dietary Habits'].cat.categories)\n df['Diagnosis'] = pd.Categorical(df['Diagnosis'], ordered=True)\n print(df['Diagnosis'].cat.categories)\n \n df['Gender'] = df['Gender'].cat.codes\n df['Marital Status'] = df['Marital Status'].cat.codes\n df['Work Type'] = df['Work Type'].cat.codes\n df['Residence Type'] = df['Residence Type'].cat.codes\n df['Smoking Status'] = df['Smoking Status'].cat.codes\n df['Alcohol Intake'] = df['Alcohol Intake'].cat.codes\n df['Physical Activity'] = df['Physical Activity'].cat.codes\n df['Family History of Stroke'] = df['Family History of Stroke'].cat.codes\n df['Dietary Habits'] = df['Dietary Habits'].cat.codes\n df['Diagnosis'] = df['Diagnosis'].cat.codes\n \n return df \n\ndata = prepare_data(df)","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:36.702211Z","iopub.execute_input":"2023-10-24T07:34:36.702686Z","iopub.status.idle":"2023-10-24T07:34:36.762946Z","shell.execute_reply.started":"2023-10-24T07:34:36.702643Z","shell.execute_reply":"2023-10-24T07:34:36.76165Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"data.head()","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:36.765328Z","iopub.execute_input":"2023-10-24T07:34:36.765736Z","iopub.status.idle":"2023-10-24T07:34:36.790362Z","shell.execute_reply.started":"2023-10-24T07:34:36.765688Z","shell.execute_reply":"2023-10-24T07:34:36.789025Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from sklearn.model_selection import train_test_split\nfrom sklearn.metrics import classification_report, ConfusionMatrixDisplay, RocCurveDisplay\n\nX = data.drop(columns='Diagnosis')\ny = data['Diagnosis']\n\nX_train, X_test, y_train, y_test = train_test_split(X.values, y.values, test_size=.2)\n\n\nclf = ak.StructuredDataClassifier(overwrite=True, max_trials=10)\nhistory = clf.fit(X_train, y_train, epochs=10)","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:34:36.792156Z","iopub.execute_input":"2023-10-24T07:34:36.792599Z","iopub.status.idle":"2023-10-24T07:39:40.832094Z","shell.execute_reply.started":"2023-10-24T07:34:36.792558Z","shell.execute_reply":"2023-10-24T07:39:40.830804Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"y_pred = clf.predict(X_test)\nprint(classification_report(y_test, y_pred))","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:39:40.835901Z","iopub.execute_input":"2023-10-24T07:39:40.836418Z","iopub.status.idle":"2023-10-24T07:39:44.820461Z","shell.execute_reply.started":"2023-10-24T07:39:40.83637Z","shell.execute_reply":"2023-10-24T07:39:44.819191Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"ConfusionMatrixDisplay.from_predictions(y_test, y_pred)","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:39:44.822037Z","iopub.execute_input":"2023-10-24T07:39:44.82257Z","iopub.status.idle":"2023-10-24T07:39:45.168389Z","shell.execute_reply.started":"2023-10-24T07:39:44.822533Z","shell.execute_reply":"2023-10-24T07:39:45.166837Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"RocCurveDisplay.from_predictions(y_test, y_pred)","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:39:45.170154Z","iopub.execute_input":"2023-10-24T07:39:45.170659Z","iopub.status.idle":"2023-10-24T07:39:45.501188Z","shell.execute_reply.started":"2023-10-24T07:39:45.170619Z","shell.execute_reply":"2023-10-24T07:39:45.49996Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Using Pycaret","metadata":{}},{"cell_type":"code","source":"from pycaret.classification import *\n\ns = setup(data=data, target='Diagnosis', session_id=123)","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:39:45.50297Z","iopub.execute_input":"2023-10-24T07:39:45.503442Z","iopub.status.idle":"2023-10-24T07:39:48.413544Z","shell.execute_reply.started":"2023-10-24T07:39:45.503398Z","shell.execute_reply":"2023-10-24T07:39:48.412295Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"best = compare_models()","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:39:48.415118Z","iopub.execute_input":"2023-10-24T07:39:48.41564Z","iopub.status.idle":"2023-10-24T07:41:57.111297Z","shell.execute_reply.started":"2023-10-24T07:39:48.415606Z","shell.execute_reply":"2023-10-24T07:41:57.109916Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot_model(best, plot='auc')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:41:57.113482Z","iopub.execute_input":"2023-10-24T07:41:57.113847Z","iopub.status.idle":"2023-10-24T07:41:58.278093Z","shell.execute_reply.started":"2023-10-24T07:41:57.113815Z","shell.execute_reply":"2023-10-24T07:41:58.276662Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot_model(best, plot='confusion_matrix')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:41:58.280097Z","iopub.execute_input":"2023-10-24T07:41:58.280445Z","iopub.status.idle":"2023-10-24T07:41:59.364243Z","shell.execute_reply.started":"2023-10-24T07:41:58.280415Z","shell.execute_reply":"2023-10-24T07:41:59.362997Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"plot_model(best, plot='class_report')","metadata":{"execution":{"iopub.status.busy":"2023-10-24T07:41:59.366454Z","iopub.execute_input":"2023-10-24T07:41:59.370218Z","iopub.status.idle":"2023-10-24T07:42:00.575665Z","shell.execute_reply.started":"2023-10-24T07:41:59.370152Z","shell.execute_reply":"2023-10-24T07:42:00.574146Z"},"trusted":true},"execution_count":null,"outputs":[]}]}