-
Notifications
You must be signed in to change notification settings - Fork 274
/
Hours_vs_Scores_Prediction(96% accuracy)
66 lines (42 loc) · 1.54 KB
/
Hours_vs_Scores_Prediction(96% accuracy)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#Importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#Importing the dataset
dataset= pd.read_csv("student_scores.csv")
#First 5 rows of the dataset
dataset.head()
#Information about the dataset
dataset.info()
#Visualizing the dataset
sns.scatterplot(x="Scores", y="Hours", data= dataset )
plt.show()
#Storing the independent feature and the dependent feature in X and y
X= dataset.iloc[:,0].values
y= dataset.iloc[:,1].values
#Splitting the dataset into training set and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)
#Implementing LassoLars linear model
from sklearn import linear_model
regressor = linear_model.LassoLars(alpha=0.01)
X_train= X_train.reshape(-1,1)
regressor.fit(X_train, y_train, None)
#Predicting the results
X_test= X_test.reshape(-1,1)
y_pred= regressor.predict(X_test)
#Accuracy of the model
regressor.score(X_train, y_train, sample_weight=None)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))
#Predicted score that a student may achieve after studying for 9.25 hours
regressor.predict([[9.25]])
#Visualizing the results
X_grid = np.arange(min(X_train), max(X_train), 0.1)
X_grid = X_grid.reshape((len(X_grid), 1))
plt.scatter(X_train, y_train, color = 'red')
plt.plot(X_grid, regressor.predict(X_grid), color = 'blue')
plt.title('Scores vs Hours')
plt.xlabel('Hours of Study')
plt.ylabel('Scores')
plt.show()