-
Notifications
You must be signed in to change notification settings - Fork 0
/
Data Analysis for Stroke Prediction dataset
1 lines (1 loc) · 47 KB
/
Data Analysis for Stroke Prediction dataset
1
{"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.10.14","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":9030035,"sourceType":"datasetVersion","datasetId":3895998}],"dockerImageVersionId":30761,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"<iframe src=\"https://www.kaggle.com/embed/soumyaganesh12/modelinterpretability-strokeprediction?cellIds=5&kernelSessionId=171590140\" height=\"600\" style=\"margin: 0 auto; width: 100%; max-width: 1950px;\" frameborder=\"0\" scrolling=\"auto\" title=\"ModelInterpretability_StrokePrediction\"></iframe>","metadata":{}},{"cell_type":"code","source":"import pandas as pd\nimport seaborn as sns\nimport matplotlib.pyplot as plt\nimport plotly.express as px","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:01.48675Z","iopub.execute_input":"2024-09-19T00:50:01.487201Z","iopub.status.idle":"2024-09-19T00:50:04.851407Z","shell.execute_reply.started":"2024-09-19T00:50:01.487143Z","shell.execute_reply":"2024-09-19T00:50:04.850052Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df = pd.read_csv(\"/kaggle/input/stroke-prediction/stroke_prediction_dataset.csv\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:04.854159Z","iopub.execute_input":"2024-09-19T00:50:04.854971Z","iopub.status.idle":"2024-09-19T00:50:05.01092Z","shell.execute_reply.started":"2024-09-19T00:50:04.854912Z","shell.execute_reply":"2024-09-19T00:50:05.009726Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.head()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.012338Z","iopub.execute_input":"2024-09-19T00:50:05.012803Z","iopub.status.idle":"2024-09-19T00:50:05.056966Z","shell.execute_reply.started":"2024-09-19T00:50:05.012752Z","shell.execute_reply":"2024-09-19T00:50:05.05565Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.info()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.060011Z","iopub.execute_input":"2024-09-19T00:50:05.060374Z","iopub.status.idle":"2024-09-19T00:50:05.108827Z","shell.execute_reply.started":"2024-09-19T00:50:05.060334Z","shell.execute_reply":"2024-09-19T00:50:05.107447Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.describe()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.110433Z","iopub.execute_input":"2024-09-19T00:50:05.111496Z","iopub.status.idle":"2024-09-19T00:50:05.161969Z","shell.execute_reply.started":"2024-09-19T00:50:05.111438Z","shell.execute_reply":"2024-09-19T00:50:05.160648Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Data Cleaning","metadata":{}},{"cell_type":"code","source":"df.columns","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.163486Z","iopub.execute_input":"2024-09-19T00:50:05.163952Z","iopub.status.idle":"2024-09-19T00:50:05.1724Z","shell.execute_reply.started":"2024-09-19T00:50:05.16391Z","shell.execute_reply":"2024-09-19T00:50:05.170767Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.columns = df.columns.str.replace(\" \",\"_\").str.lower().str.strip()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.174305Z","iopub.execute_input":"2024-09-19T00:50:05.175638Z","iopub.status.idle":"2024-09-19T00:50:05.189026Z","shell.execute_reply.started":"2024-09-19T00:50:05.175552Z","shell.execute_reply":"2024-09-19T00:50:05.187416Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.columns","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.190661Z","iopub.execute_input":"2024-09-19T00:50:05.192988Z","iopub.status.idle":"2024-09-19T00:50:05.210671Z","shell.execute_reply.started":"2024-09-19T00:50:05.19294Z","shell.execute_reply":"2024-09-19T00:50:05.209365Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.duplicated().sum()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.212612Z","iopub.execute_input":"2024-09-19T00:50:05.213818Z","iopub.status.idle":"2024-09-19T00:50:05.261798Z","shell.execute_reply.started":"2024-09-19T00:50:05.213756Z","shell.execute_reply":"2024-09-19T00:50:05.260473Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.isnull().sum()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.267463Z","iopub.execute_input":"2024-09-19T00:50:05.268156Z","iopub.status.idle":"2024-09-19T00:50:05.302755Z","shell.execute_reply.started":"2024-09-19T00:50:05.268111Z","shell.execute_reply":"2024-09-19T00:50:05.301417Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.isnull().mean()*100","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.304671Z","iopub.execute_input":"2024-09-19T00:50:05.305151Z","iopub.status.idle":"2024-09-19T00:50:05.34955Z","shell.execute_reply.started":"2024-09-19T00:50:05.305084Z","shell.execute_reply":"2024-09-19T00:50:05.34812Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"symptoms\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.35101Z","iopub.execute_input":"2024-09-19T00:50:05.351379Z","iopub.status.idle":"2024-09-19T00:50:05.370211Z","shell.execute_reply.started":"2024-09-19T00:50:05.35132Z","shell.execute_reply":"2024-09-19T00:50:05.368814Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"symptoms\"].mode()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.371816Z","iopub.execute_input":"2024-09-19T00:50:05.372313Z","iopub.status.idle":"2024-09-19T00:50:05.386466Z","shell.execute_reply.started":"2024-09-19T00:50:05.372256Z","shell.execute_reply":"2024-09-19T00:50:05.38529Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"symptoms\"] = df[\"symptoms\"].fillna(df[\"symptoms\"].mode()[0])","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.388082Z","iopub.execute_input":"2024-09-19T00:50:05.388764Z","iopub.status.idle":"2024-09-19T00:50:05.409033Z","shell.execute_reply.started":"2024-09-19T00:50:05.388706Z","shell.execute_reply":"2024-09-19T00:50:05.407928Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"symptoms\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.410669Z","iopub.execute_input":"2024-09-19T00:50:05.411169Z","iopub.status.idle":"2024-09-19T00:50:05.435624Z","shell.execute_reply.started":"2024-09-19T00:50:05.411113Z","shell.execute_reply":"2024-09-19T00:50:05.434191Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.isnull().sum()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.437201Z","iopub.execute_input":"2024-09-19T00:50:05.437704Z","iopub.status.idle":"2024-09-19T00:50:05.47561Z","shell.execute_reply.started":"2024-09-19T00:50:05.437647Z","shell.execute_reply":"2024-09-19T00:50:05.474415Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"gender\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.47729Z","iopub.execute_input":"2024-09-19T00:50:05.477819Z","iopub.status.idle":"2024-09-19T00:50:05.492636Z","shell.execute_reply.started":"2024-09-19T00:50:05.477773Z","shell.execute_reply":"2024-09-19T00:50:05.491361Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"marital_status\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.494306Z","iopub.execute_input":"2024-09-19T00:50:05.494775Z","iopub.status.idle":"2024-09-19T00:50:05.512758Z","shell.execute_reply.started":"2024-09-19T00:50:05.494717Z","shell.execute_reply":"2024-09-19T00:50:05.511634Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"work_type\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.514237Z","iopub.execute_input":"2024-09-19T00:50:05.514975Z","iopub.status.idle":"2024-09-19T00:50:05.529303Z","shell.execute_reply.started":"2024-09-19T00:50:05.514935Z","shell.execute_reply":"2024-09-19T00:50:05.528012Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"residence_type\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.531043Z","iopub.execute_input":"2024-09-19T00:50:05.531554Z","iopub.status.idle":"2024-09-19T00:50:05.545999Z","shell.execute_reply.started":"2024-09-19T00:50:05.531511Z","shell.execute_reply":"2024-09-19T00:50:05.544548Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"smoking_status\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.548093Z","iopub.execute_input":"2024-09-19T00:50:05.548635Z","iopub.status.idle":"2024-09-19T00:50:05.565684Z","shell.execute_reply.started":"2024-09-19T00:50:05.548561Z","shell.execute_reply":"2024-09-19T00:50:05.564254Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"alcohol_intake\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.567203Z","iopub.execute_input":"2024-09-19T00:50:05.567648Z","iopub.status.idle":"2024-09-19T00:50:05.580443Z","shell.execute_reply.started":"2024-09-19T00:50:05.567602Z","shell.execute_reply":"2024-09-19T00:50:05.579141Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"physical_activity\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.582399Z","iopub.execute_input":"2024-09-19T00:50:05.582865Z","iopub.status.idle":"2024-09-19T00:50:05.597923Z","shell.execute_reply.started":"2024-09-19T00:50:05.582812Z","shell.execute_reply":"2024-09-19T00:50:05.596542Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"family_history_of_stroke\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.59951Z","iopub.execute_input":"2024-09-19T00:50:05.599931Z","iopub.status.idle":"2024-09-19T00:50:05.6128Z","shell.execute_reply.started":"2024-09-19T00:50:05.599882Z","shell.execute_reply":"2024-09-19T00:50:05.611446Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"dietary_habits\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.614416Z","iopub.execute_input":"2024-09-19T00:50:05.614898Z","iopub.status.idle":"2024-09-19T00:50:05.629938Z","shell.execute_reply.started":"2024-09-19T00:50:05.614857Z","shell.execute_reply":"2024-09-19T00:50:05.628347Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"blood_pressure_levels\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.631513Z","iopub.execute_input":"2024-09-19T00:50:05.63227Z","iopub.status.idle":"2024-09-19T00:50:05.649218Z","shell.execute_reply.started":"2024-09-19T00:50:05.632204Z","shell.execute_reply":"2024-09-19T00:50:05.648135Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"blood_pressure_levels\"] = df[\"blood_pressure_levels\"].str.split(\"/\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.651347Z","iopub.execute_input":"2024-09-19T00:50:05.651833Z","iopub.status.idle":"2024-09-19T00:50:05.675234Z","shell.execute_reply.started":"2024-09-19T00:50:05.651779Z","shell.execute_reply":"2024-09-19T00:50:05.673992Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"blood_pressure_levels\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:05.688345Z","iopub.execute_input":"2024-09-19T00:50:05.688792Z","iopub.status.idle":"2024-09-19T00:50:09.523147Z","shell.execute_reply.started":"2024-09-19T00:50:05.68875Z","shell.execute_reply":"2024-09-19T00:50:09.522011Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"cholesterol_levels\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:09.524512Z","iopub.execute_input":"2024-09-19T00:50:09.52488Z","iopub.status.idle":"2024-09-19T00:50:09.540799Z","shell.execute_reply.started":"2024-09-19T00:50:09.524842Z","shell.execute_reply":"2024-09-19T00:50:09.53963Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"cholesterol_levels\"] =df[\"cholesterol_levels\"].str.replace(\",\",\" \").str.split()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:09.542336Z","iopub.execute_input":"2024-09-19T00:50:09.542815Z","iopub.status.idle":"2024-09-19T00:50:09.577781Z","shell.execute_reply.started":"2024-09-19T00:50:09.542764Z","shell.execute_reply":"2024-09-19T00:50:09.576674Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"cholesterol_levels\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:09.579287Z","iopub.execute_input":"2024-09-19T00:50:09.5797Z","iopub.status.idle":"2024-09-19T00:50:18.948012Z","shell.execute_reply.started":"2024-09-19T00:50:09.579656Z","shell.execute_reply":"2024-09-19T00:50:18.946652Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"symptoms\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:18.949856Z","iopub.execute_input":"2024-09-19T00:50:18.950259Z","iopub.status.idle":"2024-09-19T00:50:18.966392Z","shell.execute_reply.started":"2024-09-19T00:50:18.95022Z","shell.execute_reply":"2024-09-19T00:50:18.965129Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"diagnosis\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:18.968135Z","iopub.execute_input":"2024-09-19T00:50:18.968526Z","iopub.status.idle":"2024-09-19T00:50:18.980813Z","shell.execute_reply.started":"2024-09-19T00:50:18.968485Z","shell.execute_reply":"2024-09-19T00:50:18.979643Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.info()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:18.982396Z","iopub.execute_input":"2024-09-19T00:50:18.982825Z","iopub.status.idle":"2024-09-19T00:50:19.025752Z","shell.execute_reply.started":"2024-09-19T00:50:18.982782Z","shell.execute_reply":"2024-09-19T00:50:19.024276Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"stress_levels\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.027207Z","iopub.execute_input":"2024-09-19T00:50:19.027627Z","iopub.status.idle":"2024-09-19T00:50:19.040077Z","shell.execute_reply.started":"2024-09-19T00:50:19.027586Z","shell.execute_reply":"2024-09-19T00:50:19.038795Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"stroke_history\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.041731Z","iopub.execute_input":"2024-09-19T00:50:19.042642Z","iopub.status.idle":"2024-09-19T00:50:19.053089Z","shell.execute_reply.started":"2024-09-19T00:50:19.042561Z","shell.execute_reply":"2024-09-19T00:50:19.051837Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"body_mass_index_(bmi)\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.05475Z","iopub.execute_input":"2024-09-19T00:50:19.055204Z","iopub.status.idle":"2024-09-19T00:50:19.067678Z","shell.execute_reply.started":"2024-09-19T00:50:19.05515Z","shell.execute_reply":"2024-09-19T00:50:19.066485Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"average_glucose_level\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.069061Z","iopub.execute_input":"2024-09-19T00:50:19.069475Z","iopub.status.idle":"2024-09-19T00:50:19.084834Z","shell.execute_reply.started":"2024-09-19T00:50:19.069435Z","shell.execute_reply":"2024-09-19T00:50:19.083447Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"hypertension\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.086227Z","iopub.execute_input":"2024-09-19T00:50:19.08662Z","iopub.status.idle":"2024-09-19T00:50:19.096867Z","shell.execute_reply.started":"2024-09-19T00:50:19.086533Z","shell.execute_reply":"2024-09-19T00:50:19.095559Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"age\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.09838Z","iopub.execute_input":"2024-09-19T00:50:19.098857Z","iopub.status.idle":"2024-09-19T00:50:19.112314Z","shell.execute_reply.started":"2024-09-19T00:50:19.098798Z","shell.execute_reply":"2024-09-19T00:50:19.110839Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"age_group\"]=df[\"age\"].apply(lambda x:\"Infant\" if (x>=0)&(x<=2)\n else (\"Child\" if (x>2)&(x<=12)\n else (\"Adolescent\"if (x>12)&(x<=18) \n else (\"Young Adults\"if (x>19)&(x<=35) \n else (\"Middle Aged Adults\" if (x>35)&(x<=60) \n else \"Old Aged Adults\")))))","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.114055Z","iopub.execute_input":"2024-09-19T00:50:19.114634Z","iopub.status.idle":"2024-09-19T00:50:19.139006Z","shell.execute_reply.started":"2024-09-19T00:50:19.114547Z","shell.execute_reply":"2024-09-19T00:50:19.137673Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"age_group\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.140723Z","iopub.execute_input":"2024-09-19T00:50:19.141187Z","iopub.status.idle":"2024-09-19T00:50:19.160029Z","shell.execute_reply.started":"2024-09-19T00:50:19.141146Z","shell.execute_reply":"2024-09-19T00:50:19.15818Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"status_of_bmi\"] = df[\"body_mass_index_(bmi)\"].apply(lambda x:\"Underweight\" if x<18.5\n else (\"Normal weight\" if (x>=18.5)&(x<+24.9)\n else (\"Overweight\"if (x>=25)&(x<=29.9) \n else \"Obesity\")))","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.161697Z","iopub.execute_input":"2024-09-19T00:50:19.162139Z","iopub.status.idle":"2024-09-19T00:50:19.182966Z","shell.execute_reply.started":"2024-09-19T00:50:19.162098Z","shell.execute_reply":"2024-09-19T00:50:19.181686Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"status_of_bmi\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.184952Z","iopub.execute_input":"2024-09-19T00:50:19.185389Z","iopub.status.idle":"2024-09-19T00:50:19.200576Z","shell.execute_reply.started":"2024-09-19T00:50:19.18534Z","shell.execute_reply":"2024-09-19T00:50:19.199291Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"status_of_HDL\"] = df[\"cholesterol_levels\"].apply(lambda x:\"Low\" if int(x[1])<50 \n else (\"Acceptable\" if (int(x[1])>=50)&(int(x[1])<60)\n else \"High\"))","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.202084Z","iopub.execute_input":"2024-09-19T00:50:19.202553Z","iopub.status.idle":"2024-09-19T00:50:19.22722Z","shell.execute_reply.started":"2024-09-19T00:50:19.202495Z","shell.execute_reply":"2024-09-19T00:50:19.225198Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"status_of_HDL\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.228824Z","iopub.execute_input":"2024-09-19T00:50:19.229755Z","iopub.status.idle":"2024-09-19T00:50:19.24354Z","shell.execute_reply.started":"2024-09-19T00:50:19.229709Z","shell.execute_reply":"2024-09-19T00:50:19.242303Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"status_of_LDL\"] = df[\"cholesterol_levels\"].apply(lambda x:\"Optimal\" if int(x[3])<100 \n else (\"Near Optimal\" if (int(x[3])>=100)&(int(x[3])<130)\n else( \"Borderline High\" if (int(x[3])>=130)&(int(x[3])<160)\n else( \"High\" if (int(x[3])>=160)&(int(x[3])<190)\n else \"Very High\"))))","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.245168Z","iopub.execute_input":"2024-09-19T00:50:19.245598Z","iopub.status.idle":"2024-09-19T00:50:19.272802Z","shell.execute_reply.started":"2024-09-19T00:50:19.245527Z","shell.execute_reply":"2024-09-19T00:50:19.271524Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df[\"status_of_LDL\"].value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.274381Z","iopub.execute_input":"2024-09-19T00:50:19.274906Z","iopub.status.idle":"2024-09-19T00:50:19.29159Z","shell.execute_reply.started":"2024-09-19T00:50:19.274862Z","shell.execute_reply":"2024-09-19T00:50:19.290294Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.head()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.293281Z","iopub.execute_input":"2024-09-19T00:50:19.293862Z","iopub.status.idle":"2024-09-19T00:50:19.328221Z","shell.execute_reply.started":"2024-09-19T00:50:19.293809Z","shell.execute_reply":"2024-09-19T00:50:19.327051Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.columns","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.329532Z","iopub.execute_input":"2024-09-19T00:50:19.329999Z","iopub.status.idle":"2024-09-19T00:50:19.33805Z","shell.execute_reply.started":"2024-09-19T00:50:19.329943Z","shell.execute_reply":"2024-09-19T00:50:19.336747Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.describe()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.339428Z","iopub.execute_input":"2024-09-19T00:50:19.339809Z","iopub.status.idle":"2024-09-19T00:50:19.384427Z","shell.execute_reply.started":"2024-09-19T00:50:19.339771Z","shell.execute_reply":"2024-09-19T00:50:19.383019Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.describe(include=\"O\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:19.386533Z","iopub.execute_input":"2024-09-19T00:50:19.387057Z","iopub.status.idle":"2024-09-19T00:50:32.438905Z","shell.execute_reply.started":"2024-09-19T00:50:19.387002Z","shell.execute_reply":"2024-09-19T00:50:32.437717Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.drop(columns=[\"patient_id\",\"body_mass_index_(bmi)\",\"patient_name\",\"blood_pressure_levels\",\"cholesterol_levels\"],inplace=True)","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:32.440253Z","iopub.execute_input":"2024-09-19T00:50:32.44059Z","iopub.status.idle":"2024-09-19T00:50:32.450699Z","shell.execute_reply.started":"2024-09-19T00:50:32.440537Z","shell.execute_reply":"2024-09-19T00:50:32.449592Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.head()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:32.451939Z","iopub.execute_input":"2024-09-19T00:50:32.452543Z","iopub.status.idle":"2024-09-19T00:50:32.48343Z","shell.execute_reply.started":"2024-09-19T00:50:32.45249Z","shell.execute_reply":"2024-09-19T00:50:32.482048Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.info()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:32.485221Z","iopub.execute_input":"2024-09-19T00:50:32.485733Z","iopub.status.idle":"2024-09-19T00:50:32.525383Z","shell.execute_reply.started":"2024-09-19T00:50:32.485677Z","shell.execute_reply":"2024-09-19T00:50:32.524188Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"cat_col = df.select_dtypes(include=object).columns","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:32.526987Z","iopub.execute_input":"2024-09-19T00:50:32.527431Z","iopub.status.idle":"2024-09-19T00:50:32.543892Z","shell.execute_reply.started":"2024-09-19T00:50:32.527377Z","shell.execute_reply":"2024-09-19T00:50:32.542732Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"cat_col = [col for col in cat_col if col != \"symptoms\"]\n","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:32.545771Z","iopub.execute_input":"2024-09-19T00:50:32.546191Z","iopub.status.idle":"2024-09-19T00:50:32.552356Z","shell.execute_reply.started":"2024-09-19T00:50:32.54615Z","shell.execute_reply":"2024-09-19T00:50:32.551194Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"cat_col","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:32.553886Z","iopub.execute_input":"2024-09-19T00:50:32.554326Z","iopub.status.idle":"2024-09-19T00:50:32.564238Z","shell.execute_reply.started":"2024-09-19T00:50:32.554265Z","shell.execute_reply":"2024-09-19T00:50:32.563091Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"for col in cat_col:\n fig = px.histogram(df, x=col,color=\"diagnosis\")\n fig.show()\n","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:32.565839Z","iopub.execute_input":"2024-09-19T00:50:32.566289Z","iopub.status.idle":"2024-09-19T00:50:36.442332Z","shell.execute_reply.started":"2024-09-19T00:50:32.566238Z","shell.execute_reply":"2024-09-19T00:50:36.441142Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"for col in cat_col:\n count_series = df[col].value_counts()\n count_df = count_series.reset_index()\n count_df.columns = [col, 'count']\n \n fig = px.pie(count_df, names=col, values='count', title=f'Pie chart of {col}')\n fig.show()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:36.443741Z","iopub.execute_input":"2024-09-19T00:50:36.444124Z","iopub.status.idle":"2024-09-19T00:50:37.309011Z","shell.execute_reply.started":"2024-09-19T00:50:36.444075Z","shell.execute_reply":"2024-09-19T00:50:37.307875Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"num_col =df.select_dtypes(exclude=object).columns","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:37.31045Z","iopub.execute_input":"2024-09-19T00:50:37.310817Z","iopub.status.idle":"2024-09-19T00:50:37.317491Z","shell.execute_reply.started":"2024-09-19T00:50:37.310778Z","shell.execute_reply":"2024-09-19T00:50:37.316293Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"for col in num_col:\n fig = px.box(df, x=col,color=\"diagnosis\")\n fig.show()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:37.319049Z","iopub.execute_input":"2024-09-19T00:50:37.319446Z","iopub.status.idle":"2024-09-19T00:50:37.93416Z","shell.execute_reply.started":"2024-09-19T00:50:37.319405Z","shell.execute_reply":"2024-09-19T00:50:37.933019Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.head()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:37.935429Z","iopub.execute_input":"2024-09-19T00:50:37.935806Z","iopub.status.idle":"2024-09-19T00:50:37.967141Z","shell.execute_reply.started":"2024-09-19T00:50:37.935767Z","shell.execute_reply":"2024-09-19T00:50:37.965875Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"# Data Analysis","metadata":{}},{"cell_type":"markdown","source":"## Q1:Is it necessary because the family previously had a stroke for the person to become affected?","metadata":{}},{"cell_type":"code","source":"df[\"count\"] = 1","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:37.968551Z","iopub.execute_input":"2024-09-19T00:50:37.968944Z","iopub.status.idle":"2024-09-19T00:50:37.979103Z","shell.execute_reply.started":"2024-09-19T00:50:37.968903Z","shell.execute_reply":"2024-09-19T00:50:37.977755Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df.groupby([\"family_history_of_stroke\",\"diagnosis\"])[\"count\"].sum().reset_index()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:37.980842Z","iopub.execute_input":"2024-09-19T00:50:37.981319Z","iopub.status.idle":"2024-09-19T00:50:38.005802Z","shell.execute_reply.started":"2024-09-19T00:50:37.981265Z","shell.execute_reply":"2024-09-19T00:50:38.004502Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df.groupby([\"family_history_of_stroke\",\"diagnosis\"])[\"count\"].sum().reset_index() ,x=\"family_history_of_stroke\",y=\"count\",color=\"diagnosis\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.007252Z","iopub.execute_input":"2024-09-19T00:50:38.00772Z","iopub.status.idle":"2024-09-19T00:50:38.099658Z","shell.execute_reply.started":"2024-09-19T00:50:38.007676Z","shell.execute_reply":"2024-09-19T00:50:38.098491Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"#### There are no people infected and her family has no previous history of the disease","metadata":{}},{"cell_type":"markdown","source":"## Q2:The number of infected or uninfected people in each type of residence ?","metadata":{}},{"cell_type":"code","source":"df.groupby([\"residence_type\",\"diagnosis\"])[\"count\"].sum().reset_index()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.100941Z","iopub.execute_input":"2024-09-19T00:50:38.101295Z","iopub.status.idle":"2024-09-19T00:50:38.122076Z","shell.execute_reply.started":"2024-09-19T00:50:38.101257Z","shell.execute_reply":"2024-09-19T00:50:38.120788Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df.groupby([\"residence_type\",\"diagnosis\"])[\"count\"].sum().reset_index() ,x=\"residence_type\",y=\"count\",color=\"diagnosis\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.123734Z","iopub.execute_input":"2024-09-19T00:50:38.124162Z","iopub.status.idle":"2024-09-19T00:50:38.211438Z","shell.execute_reply.started":"2024-09-19T00:50:38.124112Z","shell.execute_reply":"2024-09-19T00:50:38.210265Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Q3:The age group most affected by the disease ?","metadata":{}},{"cell_type":"code","source":"df_diagnosis = df.groupby([\"age_group\", \"diagnosis\"])[\"count\"].sum().reset_index().sort_values(by=\"count\",ascending=False)","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.212986Z","iopub.execute_input":"2024-09-19T00:50:38.213413Z","iopub.status.idle":"2024-09-19T00:50:38.227109Z","shell.execute_reply.started":"2024-09-19T00:50:38.21337Z","shell.execute_reply":"2024-09-19T00:50:38.225514Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df_diagnosis,x=\"age_group\",y=\"count\",color=\"diagnosis\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.228923Z","iopub.execute_input":"2024-09-19T00:50:38.229433Z","iopub.status.idle":"2024-09-19T00:50:38.315298Z","shell.execute_reply.started":"2024-09-19T00:50:38.229388Z","shell.execute_reply":"2024-09-19T00:50:38.313731Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df_diagnosis_is_stroke=df_diagnosis[df_diagnosis[\"diagnosis\"]==\"Stroke\"]","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.317205Z","iopub.execute_input":"2024-09-19T00:50:38.317649Z","iopub.status.idle":"2024-09-19T00:50:38.323893Z","shell.execute_reply.started":"2024-09-19T00:50:38.317597Z","shell.execute_reply":"2024-09-19T00:50:38.322658Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df_diagnosis_is_stroke,x=\"age_group\",y=\"count\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.325476Z","iopub.execute_input":"2024-09-19T00:50:38.325954Z","iopub.status.idle":"2024-09-19T00:50:38.40375Z","shell.execute_reply.started":"2024-09-19T00:50:38.325913Z","shell.execute_reply":"2024-09-19T00:50:38.402637Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Q4: Does high blood pressure significantly affect the incidence of disease?","metadata":{}},{"cell_type":"code","source":"px.histogram(df,x=\"hypertension\",color=\"diagnosis\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.405091Z","iopub.execute_input":"2024-09-19T00:50:38.40548Z","iopub.status.idle":"2024-09-19T00:50:38.491657Z","shell.execute_reply.started":"2024-09-19T00:50:38.40544Z","shell.execute_reply":"2024-09-19T00:50:38.490539Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"### High blood pressure does not affect the disease","metadata":{}},{"cell_type":"markdown","source":"## Q5: Does heart disease significantly affect the incidence of disease?","metadata":{}},{"cell_type":"code","source":"px.histogram(df,x=\"heart_disease\",color=\"diagnosis\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.493356Z","iopub.execute_input":"2024-09-19T00:50:38.493799Z","iopub.status.idle":"2024-09-19T00:50:38.583761Z","shell.execute_reply.started":"2024-09-19T00:50:38.493747Z","shell.execute_reply":"2024-09-19T00:50:38.582621Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"### Heart disease affects the incidence of the disease, but not to a large extent","metadata":{}},{"cell_type":"markdown","source":"## Q6:The Status of BMI most affected by the disease ?","metadata":{}},{"cell_type":"code","source":"df_diagnosis = df.groupby([\"status_of_bmi\", \"diagnosis\"])[\"count\"].sum().reset_index().sort_values(by=\"count\",ascending=False)","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.585057Z","iopub.execute_input":"2024-09-19T00:50:38.585398Z","iopub.status.idle":"2024-09-19T00:50:38.599443Z","shell.execute_reply.started":"2024-09-19T00:50:38.58536Z","shell.execute_reply":"2024-09-19T00:50:38.59829Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df_diagnosis,x=\"status_of_bmi\",y=\"count\",color=\"diagnosis\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.601343Z","iopub.execute_input":"2024-09-19T00:50:38.601747Z","iopub.status.idle":"2024-09-19T00:50:38.683747Z","shell.execute_reply.started":"2024-09-19T00:50:38.601708Z","shell.execute_reply":"2024-09-19T00:50:38.682296Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df_diagnosis_is_stroke=df_diagnosis[df_diagnosis[\"diagnosis\"]==\"Stroke\"]","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.685297Z","iopub.execute_input":"2024-09-19T00:50:38.685716Z","iopub.status.idle":"2024-09-19T00:50:38.691793Z","shell.execute_reply.started":"2024-09-19T00:50:38.685675Z","shell.execute_reply":"2024-09-19T00:50:38.690548Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df_diagnosis_is_stroke,x=\"status_of_bmi\",y=\"count\",color=\"status_of_bmi\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.693416Z","iopub.execute_input":"2024-09-19T00:50:38.693876Z","iopub.status.idle":"2024-09-19T00:50:38.805672Z","shell.execute_reply.started":"2024-09-19T00:50:38.693836Z","shell.execute_reply":"2024-09-19T00:50:38.80402Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Q7:The Status of HDL most affected by the disease ?","metadata":{}},{"cell_type":"code","source":"df_diagnosis = df.groupby([\"status_of_HDL\", \"diagnosis\"])[\"count\"].sum().reset_index().sort_values(by=\"count\",ascending=False)","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.807673Z","iopub.execute_input":"2024-09-19T00:50:38.809185Z","iopub.status.idle":"2024-09-19T00:50:38.826826Z","shell.execute_reply.started":"2024-09-19T00:50:38.809121Z","shell.execute_reply":"2024-09-19T00:50:38.82499Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df_diagnosis,x=\"status_of_HDL\",y=\"count\",color=\"diagnosis\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.828329Z","iopub.execute_input":"2024-09-19T00:50:38.828876Z","iopub.status.idle":"2024-09-19T00:50:38.920995Z","shell.execute_reply.started":"2024-09-19T00:50:38.82881Z","shell.execute_reply":"2024-09-19T00:50:38.919837Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df_diagnosis_is_stroke=df_diagnosis[df_diagnosis[\"diagnosis\"]==\"Stroke\"]","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.922447Z","iopub.execute_input":"2024-09-19T00:50:38.922916Z","iopub.status.idle":"2024-09-19T00:50:38.929059Z","shell.execute_reply.started":"2024-09-19T00:50:38.922874Z","shell.execute_reply":"2024-09-19T00:50:38.927773Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df_diagnosis_is_stroke,x=\"status_of_HDL\",y=\"count\",color=\"status_of_HDL\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:38.943947Z","iopub.execute_input":"2024-09-19T00:50:38.944359Z","iopub.status.idle":"2024-09-19T00:50:39.032969Z","shell.execute_reply.started":"2024-09-19T00:50:38.944321Z","shell.execute_reply":"2024-09-19T00:50:39.031823Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Q8:The Status of LDL most affected by the disease ?","metadata":{}},{"cell_type":"code","source":"df_diagnosis = df.groupby([\"status_of_LDL\", \"diagnosis\"])[\"count\"].sum().reset_index().sort_values(by=\"count\",ascending=False)","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.034271Z","iopub.execute_input":"2024-09-19T00:50:39.034669Z","iopub.status.idle":"2024-09-19T00:50:39.047594Z","shell.execute_reply.started":"2024-09-19T00:50:39.034627Z","shell.execute_reply":"2024-09-19T00:50:39.046554Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df_diagnosis,x=\"status_of_LDL\",y=\"count\",color=\"diagnosis\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.049169Z","iopub.execute_input":"2024-09-19T00:50:39.049688Z","iopub.status.idle":"2024-09-19T00:50:39.133871Z","shell.execute_reply.started":"2024-09-19T00:50:39.04952Z","shell.execute_reply":"2024-09-19T00:50:39.132752Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df_diagnosis_is_stroke=df_diagnosis[df_diagnosis[\"diagnosis\"]==\"Stroke\"]","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.135461Z","iopub.execute_input":"2024-09-19T00:50:39.136429Z","iopub.status.idle":"2024-09-19T00:50:39.142467Z","shell.execute_reply.started":"2024-09-19T00:50:39.136374Z","shell.execute_reply":"2024-09-19T00:50:39.141193Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df_diagnosis_is_stroke,x=\"status_of_LDL\",y=\"count\",color=\"status_of_LDL\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.144049Z","iopub.execute_input":"2024-09-19T00:50:39.144474Z","iopub.status.idle":"2024-09-19T00:50:39.243704Z","shell.execute_reply.started":"2024-09-19T00:50:39.144434Z","shell.execute_reply":"2024-09-19T00:50:39.242605Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Q9: The 10 most common symptoms of the disease?","metadata":{}},{"cell_type":"code","source":"df[df[\"diagnosis\"]==\"Stroke\"][\"symptoms\"].head(10).reset_index()","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.244989Z","iopub.execute_input":"2024-09-19T00:50:39.245337Z","iopub.status.idle":"2024-09-19T00:50:39.266447Z","shell.execute_reply.started":"2024-09-19T00:50:39.245298Z","shell.execute_reply":"2024-09-19T00:50:39.265307Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Q10: Number of infected and uninfected people in all cases of drinking alcohol?","metadata":{}},{"cell_type":"code","source":"df.groupby([\"alcohol_intake\", \"diagnosis\"])[\"count\"].sum().reset_index().sort_values(by=\"count\",ascending=False)","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.268039Z","iopub.execute_input":"2024-09-19T00:50:39.26845Z","iopub.status.idle":"2024-09-19T00:50:39.288459Z","shell.execute_reply.started":"2024-09-19T00:50:39.268409Z","shell.execute_reply":"2024-09-19T00:50:39.28743Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df.groupby([\"alcohol_intake\", \"diagnosis\"])[\"count\"].sum().reset_index().sort_values(by=\"count\",ascending=False),x=\"alcohol_intake\",y=\"count\",color=\"diagnosis\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.289669Z","iopub.execute_input":"2024-09-19T00:50:39.290007Z","iopub.status.idle":"2024-09-19T00:50:39.377385Z","shell.execute_reply.started":"2024-09-19T00:50:39.289967Z","shell.execute_reply":"2024-09-19T00:50:39.376306Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Q11:Does drinking alcohol cause a stroke?","metadata":{}},{"cell_type":"code","source":"df.groupby([\"alcohol_intake\", \"diagnosis\"])[\"count\"].sum().reset_index().sort_values(by=\"count\",ascending=False)","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.37891Z","iopub.execute_input":"2024-09-19T00:50:39.379362Z","iopub.status.idle":"2024-09-19T00:50:39.400532Z","shell.execute_reply.started":"2024-09-19T00:50:39.379302Z","shell.execute_reply":"2024-09-19T00:50:39.399385Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"### Drinking alcohol has no effect on causing a stroke ","metadata":{}},{"cell_type":"markdown","source":"## Q12: Number of infected and uninfected males and females?","metadata":{}},{"cell_type":"code","source":"df.groupby([\"gender\", \"diagnosis\"])[\"count\"].sum().reset_index().sort_values(by=\"count\",ascending=False)","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.402054Z","iopub.execute_input":"2024-09-19T00:50:39.402435Z","iopub.status.idle":"2024-09-19T00:50:39.421429Z","shell.execute_reply.started":"2024-09-19T00:50:39.402394Z","shell.execute_reply":"2024-09-19T00:50:39.420385Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df.groupby([\"gender\", \"diagnosis\"])[\"count\"].sum().reset_index().sort_values(by=\"count\",ascending=False),x=\"gender\",y=\"count\",color=\"diagnosis\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.422646Z","iopub.execute_input":"2024-09-19T00:50:39.422983Z","iopub.status.idle":"2024-09-19T00:50:39.511558Z","shell.execute_reply.started":"2024-09-19T00:50:39.422946Z","shell.execute_reply":"2024-09-19T00:50:39.510332Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Q13:Marital status number Infected and non-infected?","metadata":{}},{"cell_type":"code","source":"df.groupby([\"marital_status\", \"diagnosis\"])[\"count\"].sum().reset_index().sort_values(by=\"count\",ascending=False)","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.513059Z","iopub.execute_input":"2024-09-19T00:50:39.513448Z","iopub.status.idle":"2024-09-19T00:50:39.535797Z","shell.execute_reply.started":"2024-09-19T00:50:39.513408Z","shell.execute_reply":"2024-09-19T00:50:39.534615Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df.groupby([\"marital_status\", \"diagnosis\"])[\"count\"].sum().reset_index().sort_values(by=\"count\",ascending=False),x=\"marital_status\",y=\"count\",color=\"diagnosis\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.537289Z","iopub.execute_input":"2024-09-19T00:50:39.537709Z","iopub.status.idle":"2024-09-19T00:50:39.627728Z","shell.execute_reply.started":"2024-09-19T00:50:39.537668Z","shell.execute_reply":"2024-09-19T00:50:39.626631Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"## Q14:Does Smoking cause a stroke?","metadata":{}},{"cell_type":"code","source":"df.groupby([\"smoking_status\", \"diagnosis\"])[\"count\"].sum().reset_index().sort_values(by=\"count\",ascending=False)","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.629059Z","iopub.execute_input":"2024-09-19T00:50:39.629436Z","iopub.status.idle":"2024-09-19T00:50:39.649184Z","shell.execute_reply.started":"2024-09-19T00:50:39.629397Z","shell.execute_reply":"2024-09-19T00:50:39.648075Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"### Smoking has no effect on causing a stroke ","metadata":{}},{"cell_type":"markdown","source":"## Q15:The type of work most affected by injury?","metadata":{}},{"cell_type":"code","source":"df_diagnosis = df.groupby([\"work_type\", \"diagnosis\"])[\"count\"].sum().reset_index().sort_values(by=\"count\",ascending=False)","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.650505Z","iopub.execute_input":"2024-09-19T00:50:39.650888Z","iopub.status.idle":"2024-09-19T00:50:39.662998Z","shell.execute_reply.started":"2024-09-19T00:50:39.65085Z","shell.execute_reply":"2024-09-19T00:50:39.661902Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df_diagnosis,x=\"work_type\",y=\"count\",color=\"diagnosis\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.664459Z","iopub.execute_input":"2024-09-19T00:50:39.664865Z","iopub.status.idle":"2024-09-19T00:50:39.747897Z","shell.execute_reply.started":"2024-09-19T00:50:39.664827Z","shell.execute_reply":"2024-09-19T00:50:39.74659Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"df_diagnosis_is_stroke=df_diagnosis[df_diagnosis[\"diagnosis\"]==\"Stroke\"]","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.749391Z","iopub.execute_input":"2024-09-19T00:50:39.749866Z","iopub.status.idle":"2024-09-19T00:50:39.75673Z","shell.execute_reply.started":"2024-09-19T00:50:39.749812Z","shell.execute_reply":"2024-09-19T00:50:39.75554Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"px.histogram(df_diagnosis_is_stroke,x=\"work_type\",y=\"count\",color=\"work_type\")","metadata":{"execution":{"iopub.status.busy":"2024-09-19T00:50:39.758397Z","iopub.execute_input":"2024-09-19T00:50:39.759004Z","iopub.status.idle":"2024-09-19T00:50:39.855656Z","shell.execute_reply.started":"2024-09-19T00:50:39.758903Z","shell.execute_reply":"2024-09-19T00:50:39.8545Z"},"trusted":true},"execution_count":null,"outputs":[]}]}