-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpima_diabetes.py
104 lines (73 loc) · 2.68 KB
/
pima_diabetes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# -*- coding: utf-8 -*-
"""pima diabetes.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/13TquLzM6F51ZPuXV40y3AuUKfCAkki-t
"""
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics
# Commented out IPython magic to ensure Python compatibility.
# %matplotlib inline
data=pd.read_csv('diabetes.csv')
y=data[['Outcome']]
x=data[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']]
x.shape
x.head(5)
import seaborn as sns
import matplotlib.pyplot as plt
corrmat=data.corr()
top_corr_features=corrmat.index
plt.figure(figsize=(15,15))
g=sns.heatmap(data[top_corr_features].corr(),annot = True,cmap='RdYlGn')
data.corr()
def fillskinthickness(row):
if not((row['SkinThickness'])==0):
return row['SkinThickness']
if row['BMI']>=0 or row['BMI']<=100:
if row['BMI']>=30 or row['BMI']<40:
return 35
elif row['BMI']>=20 or row['BMI']<30:
return 25
elif row['BMI']>=40 or row['BMI']<50:
return 45
elif row['BMI']>=50 or row['BMI']<60:
return 55
x['SkinThickness']=x.apply(fillskinthickness, axis=1)
x['SkinThickness']
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
x=imputer.fit_transform(x)
y = imputer.fit_transform(y)
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test=train_test_split(x, y,test_size=0.1,random_state=0)
from sklearn.preprocessing import StandardScaler
sc_x=StandardScaler()
x_train=sc_x.fit_transform(x_train)
x_test=sc_x.fit_transform(x_test)
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
principalComponent = pca.fit_transform(x_train)
principalComponent = pca.fit_transform(x_test)
from sklearn.ensemble import RandomForestClassifier
classifier=RandomForestClassifier(n_estimators=150,max_depth=100)
classifier.fit(x_train,y_train)
y_pred=classifier.predict(x_test)
from sklearn.model_selection import cross_val_score
accuracies=cross_val_score(estimator=classifier,X=x_train,y=y_train,cv=10)
y_pred = classifier.predict(x_test)
accuracies.mean()
accuracies
acc_decision_tree = round(classifier.score(x_train, y_train) * 100, 2)
acc_decision_tree
from sklearn.metrics import confusion_matrix
confusion_matrix=confusion_matrix(y_test,y_pred)
print(confusion_matrix)
from xgboost import XGBClassifier
model = XGBClassifier()
classifier.fit(x_train, y_train)
y_pred1=classifier.predict(x_test)
from sklearn.metrics import confusion_matrix
confusion_matrix=confusion_matrix(y_test,y_pred1)
print(confusion_matrix)