forked from shamiraty/REGRESSION-ANALYSIS-WITH-Y-PREDICTED
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
161 lines (107 loc) · 4.6 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import streamlit as st
import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
st.set_option('deprecation.showPyplotGlobalUse', False)
from streamlit_extras.metric_cards import style_metric_cards
#navicon and header
st.set_page_config(page_title="Dashboard", page_icon="📈", layout="wide")
st.header(" MACHINE LEARNING WORKFLOW ")
st.image("images/logo2.webp",caption="")
st.write("MULTIPLE REGRESSION WITH SSE, SE, SSR, SST, R2, ADJ[R2], RESIDUAL")
# load CSS Style
with open('styles.css')as f:
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True)
#logo
st.sidebar.image("images/logos.webp",caption="MULTI-VARIABLE REGRESSION")
st.sidebar.title("ADD NEW VALUE")
df = pd.read_excel('Book1.xlsx')
X = df[['Dependant', 'Wives']]
Y = df['Projects']
# Fit a linear regression model
model = LinearRegression()
model.fit(X, Y)
# Make predictions
predictions = model.predict(X)
# Predictions on the same data
y_pred = model.predict(X)
#Regression coefficients (Bo, B1, B2)
intercept = model.intercept_ #Bo
coefficients = model.coef_ #B1, B2
# Calculate R-squared Coefficient of determination
r2 = r2_score(Y, predictions)
# Calculate Adjusted R-squared
n = len(Y)
p = X.shape[1]
adjusted_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)
# Calculate Sum Squared Error (SSE) and SSR
sse = np.sum((Y - predictions)**2)
ssr = np.sum((y_pred - np.mean(Y)) ** 2)
#regression line
with st.expander("REGRESSION COEFFICIENT"):
col1,col2,col3=st.columns(3)
col1.metric('INTERCEPT:',value= f'{intercept:.4f}',delta="(Bo)")
col2.metric('B1 COEFFICIENT:',value= f'{coefficients[0]:.4f}',delta=" for X1 number of Dependant (B1)")
col3.metric('B2 COEFFICIENT',value= f'{coefficients[1]:.4f}',delta=" for X2 number of Wives (B2):")
style_metric_cards(background_color="#FFFFFF",border_left_color="#9900AD",border_color="#1f66bd",box_shadow="#F71938")
# Print R-squared, Adjusted R-squared, and SSE
with st.expander("MEASURE OF VARIATIONS"):
col1,col2,col3=st.columns(3)
col1.metric('R-SQUARED:',value= f'{r2:.4f}',delta="Coefficient of Determination")
col2.metric('ADJUSTED R-SQUARED:',value= f'{adjusted_r2:.4f}',delta="Adj[R2]")
col3.metric('SUM SQUARED ERROR (SSE):',value= f'{sse:.4f}',delta="Squared(Y-Y_pred)")
style_metric_cards(background_color="#FFFFFF",border_left_color="#9900AD",border_color="#1f66bd",box_shadow="#F71938")
# Print a table with predicted Y
with st.expander("PREDICTION TABLE"):
result_df = pd.DataFrame({'Name':df['Name'],'No of Dependant':df['Dependant'], 'No of Wives': df['Wives'], 'Done Projects | Actual Y': Y, 'Y_predicted': predictions})
# Add SSE and SSR to the DataFrame
result_df['SSE'] = sse
result_df['SSR'] = ssr
st.dataframe(result_df,use_container_width=True)
#download predicted csv
df_download = result_df.to_csv(index=False).encode('utf-8')
st.download_button(
label="DOWNLOAD PREDICTED DATASET",
data=df_download,
key="download_dataframe.csv",
file_name="my_dataframe.csv"
)
with st.expander("RESIDUAL & LINE OF BEST FIT"):
# Calculate residuals
residuals = Y - predictions
# Create a new DataFrame to store residuals
residuals_df = pd.DataFrame({'Actual': Y, 'Predicted': predictions, 'Residuals': residuals})
# Print the residuals DataFrame
st.dataframe(residuals_df,use_container_width=True)
col1, col2=st.columns(2)
with col1:
plt.scatter(Y, predictions)
plt.plot([min(Y), max(Y)], [min(Y), max(Y)], '--k',color='red', label='Best Fit Line') # Best fit line
plt.xlabel('Actual Y | number of Projects')
plt.ylabel('Predicted Y')
plt.grid(True)
plt.legend()
st.pyplot()
import seaborn as sns
with col2:
sns.displot(residuals,kind='kde',color='blue', fill=True, legend=True)
sns.set_style("whitegrid") # Set the style to whitegrid
st.pyplot()
# User input for X1 and X2
with st.sidebar:
with st.form("input_form",clear_on_submit=True):
x1 = st.number_input("Enter Dependant",)
x2 = st.number_input("Number of Wives",)
submit_button = st.form_submit_button(label="Predict")
if submit_button:
# Make predictions
new_data = np.array([[x1, x2]])
new_prediction = model.predict(new_data)
# Display prediction
with st.expander("OPEN"):
st.write(f"<span style='font-size: 34px;color:green;'>Predicted Output: </span> <span style='font-size: 34px;'> {new_prediction}</span>", unsafe_allow_html=True)