-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbrain.py
48 lines (39 loc) · 1.82 KB
/
brain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import tensorflow as tf
import tflearn
class Brain(object):
def __init__(self, num_actions=2, learning_rate=0.001):
self.session = tf.Session()
self.s = tf.placeholder(tf.float32, [None, 10])
net = tflearn.fully_connected(self.s, 10, activation='relu')
self.q_values = tflearn.fully_connected(net, num_actions)
network_params = tf.trainable_variables()
self.st = tf.placeholder(tf.float32, [None, 10])
target_net = tflearn.fully_connected(self.st, 10, activation='relu')
self.target_q_values = tflearn.fully_connected(target_net, num_actions)
target_network_params = tf.trainable_variables()[len(network_params):]
self.reset_target_network_params = [
target_network_params[i].assign(network_params[i])
for i in range(len(target_network_params))]
self.a = tf.placeholder("float", [None, num_actions])
self.y = tf.placeholder("float", [None])
action_q_values = tf.reduce_sum(
tf.mul(self.q_values, self.a),
reduction_indices=1)
cost = tflearn.mean_square(action_q_values, self.y)
optimizer = tf.train.RMSPropOptimizer(learning_rate)
self.grad_update = optimizer.minimize(cost, var_list=network_params)
self.session.run(tf.initialize_all_variables())
self.session.run(self.reset_target_network_params)
self.t = 0
def get_action(self, vision):
readout = self.q_values.eval(
session=self.session,
feed_dict={self.s: [vision]}
)[0]
return readout.tolist()
def get_target_action(self, vision):
readout = self.target_q_values.eval(
session=self.session,
feed_dict={self.st: [vision]}
)[0]
return readout.tolist()