-
Notifications
You must be signed in to change notification settings - Fork 0
/
engine.hpp
186 lines (175 loc) · 4.8 KB
/
engine.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#ifndef ENGINE
#define ENGINE
#include <vector>
#include <iostream>
#include <functional>
#include <memory>
#include <string>
#include <cmath>
#include <algorithm>
class Tensor;
typedef std::shared_ptr<Tensor> TensorPtr;
class Tensor : public std::enable_shared_from_this<Tensor>
{
public:
double data;
std::vector<TensorPtr> prev;
std::function<void()> _backward;
double _grad;
bool visited;
Tensor(double data, std::vector<TensorPtr> _children = {})
: data(data), prev(_children), _backward([]() {}), _grad(0.0f), visited(false)
{
}
void backward()
{
_grad = 1.0;
auto sorted_tensors = build_topological_order();
for (int i = sorted_tensors.size() - 1; i >= 0; i--)
{
sorted_tensors[i]->_backward();
}
}
std::vector<TensorPtr> build_topological_order()
{
std::vector<TensorPtr> topo;
topo_sort(topo);
clear_visited(topo);
return topo;
}
void topo_sort(std::vector<TensorPtr> &order)
{
topo_sort(shared_from_this(), order);
}
void topo_sort(TensorPtr tensor, std::vector<TensorPtr> &order)
{
if (!tensor->visited)
{
tensor->visited = true;
for (auto child : tensor->prev)
{
topo_sort(child, order);
}
order.push_back(tensor);
}
}
void clear_visited(std::vector<TensorPtr> &order)
{
for (auto tensor : order)
{
tensor->visited = false;
}
}
// ReLu activation
TensorPtr relu()
{
auto out = std::make_shared<Tensor>(std::max(0.0, data), std::vector<TensorPtr>{shared_from_this()});
out->_backward = [out, self = shared_from_this()]()
{
self->_grad += (out->data > 0) * out->_grad;
};
return out;
}
// Tensor^Tensor
TensorPtr pow(TensorPtr exp)
{
auto out = std::make_shared<Tensor>(std::pow(data, exp->data), std::vector<TensorPtr>{shared_from_this()});
out->_backward = [out, exp, self = shared_from_this()]()
{
self->_grad += exp->data * std::pow(self->data, exp->data - 1) * out->_grad;
};
return out;
}
// Tensor^double
TensorPtr pow(double exp)
{
auto new_exp = std::make_shared<Tensor>(exp);
return pow(new_exp);
}
// Tensor + Tensor
friend TensorPtr operator+(TensorPtr lhs, TensorPtr rhs)
{
auto out = std::make_shared<Tensor>(lhs->data + rhs->data, std::vector<TensorPtr>{lhs, rhs});
out->_backward = [out, lhs, rhs]()
{
lhs->_grad += out->_grad;
rhs->_grad += out->_grad;
};
return out;
}
// Tensor + double
friend TensorPtr operator+(TensorPtr lhs, double rhs)
{
auto new_rhs = std::make_shared<Tensor>(rhs);
return lhs + new_rhs;
}
// doubel + Tensor
friend TensorPtr operator+(double lhs, TensorPtr rhs)
{
return rhs + lhs;
}
// Tensor * Tensor
friend TensorPtr operator*(TensorPtr lhs, TensorPtr rhs)
{
auto out = std::make_shared<Tensor>(lhs->data * rhs->data, std::vector<TensorPtr>{lhs, rhs});
out->_backward = [out, lhs, rhs]()
{
lhs->_grad += rhs->data * out->_grad;
rhs->_grad += lhs->data * out->_grad;
};
return out;
}
// Tensor * double
friend TensorPtr operator*(TensorPtr lhs, double rhs)
{
auto new_rhs = std::make_shared<Tensor>(rhs);
return lhs * new_rhs;
}
// double * Tensor
friend TensorPtr operator*(double lhs, TensorPtr rhs)
{
return rhs * lhs;
}
// -Tesor
friend TensorPtr operator-(TensorPtr rhs)
{
return rhs * std::make_shared<Tensor>(-1.0);
}
// Tensor - Tensor
friend TensorPtr operator-(TensorPtr lhs, TensorPtr rhs)
{
return lhs + (-rhs);
}
// Tensor - double
friend TensorPtr operator-(TensorPtr lhs, double rhs)
{
return lhs + (-rhs);
}
// double - Tensor
friend TensorPtr operator-(double lhs, TensorPtr rhs)
{
return lhs + (-rhs);
}
// Tensor / Tensor
friend TensorPtr operator/(TensorPtr lhs, TensorPtr rhs)
{
return lhs * rhs->pow(-1.0);
}
// double / Tensor
friend TensorPtr operator/(double lhs, TensorPtr rhs)
{
auto new_lhs = std::make_shared<Tensor>(lhs);
return new_lhs / rhs;
}
// Tensor / double
friend TensorPtr operator/(TensorPtr lhs, double rhs)
{
auto new_rhs = std::make_shared<Tensor>(rhs);
return lhs / new_rhs;
}
friend std::ostream &operator<<(std::ostream &strm, const TensorPtr &t)
{
return strm << "Tensor(data=" << t->data << ", grad=" << t->_grad << ")\n";
}
};
#endif