forked from cgpotts/cs224u
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtorch_shallow_neural_classifier.py
215 lines (165 loc) · 6.24 KB
/
torch_shallow_neural_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data
from torch_model_base import TorchModelBase
import utils
__author__ = "Christopher Potts"
__version__ = "CS224u, Stanford, Fall 2020"
class TorchShallowNeuralClassifier(TorchModelBase):
def __init__(self,
hidden_dim=50,
hidden_activation=nn.Tanh(),
**base_kwargs):
"""
A model
h = f(xW_xh + b_h)
y = softmax(hW_hy + b_y)
with a cross-entropy loss and f determined by `hidden_activation`.
Parameters
----------
hidden_dim : int
Dimensionality of the hidden layer.
hidden_activation : nn.Module
The non-activation function used by the network for the
hidden layer.
**base_kwargs
For details, see `torch_model_base.py`.
Attributes
----------
loss: nn.CrossEntropyLoss(reduction="mean")
self.params: list
Extends TorchModelBase.params with names for all of the
arguments for this class to support tuning of these values
using `sklearn.model_selection` tools.
"""
self.hidden_dim = hidden_dim
self.hidden_activation = hidden_activation
super().__init__(**base_kwargs)
self.loss = nn.CrossEntropyLoss(reduction="mean")
self.params += ['hidden_dim', 'hidden_activation']
def build_graph(self):
"""
Define the model's computation graph.
Returns
-------
nn.Module
"""
return nn.Sequential(
nn.Linear(self.input_dim, self.hidden_dim),
self.hidden_activation,
nn.Linear(self.hidden_dim, self.n_classes_))
def build_dataset(self, X, y=None):
"""
Define datasets for the model.
Parameters
----------
X : iterable of length `n_examples`
Each element must have the same length.
y: None or iterable of length `n_examples`
Attributes
----------
input_dim : int
Set based on `X.shape[1]` after `X` has been converted to
`np.array`.
Returns
-------
torch.utils.data.TensorDataset` Where `y=None`, the dataset will
yield single tensors `X`. Where `y` is specified, it will yield
`(X, y)` pairs.
"""
X = np.array(X)
self.input_dim = X.shape[1]
X = torch.FloatTensor(X)
if y is None:
dataset = torch.utils.data.TensorDataset(X)
else:
self.classes_ = sorted(set(y))
self.n_classes_ = len(self.classes_)
class2index = dict(zip(self.classes_, range(self.n_classes_)))
y = [class2index[label] for label in y]
y = torch.tensor(y)
dataset = torch.utils.data.TensorDataset(X, y)
return dataset
def score(self, X, y, device=None):
"""
Uses macro-F1 as the score function. Note: this departs from
`sklearn`, where classifiers use accuracy as their scoring
function. Using macro-F1 is more consistent with our course.
This function can be used to evaluate models, but its primary
use is in cross-validation and hyperparameter tuning.
Parameters
----------
X: np.array, shape `(n_examples, n_features)`
y: iterable, shape `len(n_examples)`
These can be the raw labels. They will converted internally
as needed. See `build_dataset`.
device: str or None
Allows the user to temporarily change the device used
during prediction. This is useful if predictions require a
lot of memory and so are better done on the CPU. After
prediction is done, the model is returned to `self.device`.
Returns
-------
float
"""
preds = self.predict(X, device=device)
return utils.safe_macro_f1(y, preds)
def predict_proba(self, X, device=None):
"""
Predicted probabilities for the examples in `X`.
Parameters
----------
X : np.array, shape `(n_examples, n_features)`
device: str or None
Allows the user to temporarily change the device used
during prediction. This is useful if predictions require a
lot of memory and so are better done on the CPU. After
prediction is done, the model is returned to `self.device`.
Returns
-------
np.array, shape `(len(X), self.n_classes_)`
Each row of this matrix will sum to 1.0.
"""
preds = self._predict(X, device=device)
probs = torch.softmax(preds, dim=1).cpu().numpy()
return probs
def predict(self, X, device=None):
"""
Predicted labels for the examples in `X`. These are converted
from the integers that PyTorch needs back to their original
values in `self.classes_`.
Parameters
----------
X : np.array, shape `(n_examples, n_features)`
device: str or None
Allows the user to temporarily change the device used
during prediction. This is useful if predictions require a
lot of memory and so are better done on the CPU. After
prediction is done, the model is returned to `self.device`.
Returns
-------
list, length len(X)
"""
probs = self.predict_proba(X, device=device)
return [self.classes_[i] for i in probs.argmax(axis=1)]
def simple_example():
"""Assess on the digits dataset."""
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
utils.fix_random_seeds()
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.33, random_state=42)
mod = TorchShallowNeuralClassifier()
print(mod)
mod.fit(X_train, y_train)
preds = mod.predict(X_test)
print("\nClassification report:")
print(classification_report(y_test, preds))
return accuracy_score(y_test, preds)
if __name__ == '__main__':
simple_example()