-
Notifications
You must be signed in to change notification settings - Fork 0
/
MCTS.c
90 lines (64 loc) · 2.35 KB
/
MCTS.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#include "MCTS.h"
#include "Position.h"
#include "Timer.h"
#include "TreeNode.h"
#include <stdio.h>
struct Result mcts_best_move(const struct Settings* settings) {
const struct Position* position = settings->position;
if (position_filling(position)) {
struct Result result;
result.move = position_filling_move(position);
double e = position_expected(position), r = 75 + e, b = 75 - e;
sprintf(result.log, "Using filling move, expected = (%.0f, %.0f)", r, b);
result.delta_time = 0.0;
return result;
}
tree_node_precompute();
struct timeval start = timer_now();
tree_nodes_store_init();
int32_t max_iterations = settings->max_iterations;
struct TreeNode* root = tree_node_create_root(position);
int32_t iteration, max_level = 0;
for (iteration = 0; iteration < max_iterations; ++iteration) {
struct Position p = *position;
struct TreeNode* node = root;
struct TreeNode* first_move_node = NULL;
while (tree_node_fully_expanded(node)) {
node = tree_node_select(node);
if (!tree_node_fully_expanded(node)) {
break;
}
node = tree_node_select(node);
position_play(&p, node->action, node->parent->action);
if (first_move_node == NULL) {
first_move_node = node;
}
}
if (first_move_node && 2 * first_move_node->visits > max_iterations) {
break;
}
if (!tree_node_leaf(node)) {
node = tree_node_expand(node, &p);
}
int32_t level = p.turns - position->turns;
max_level = max_level > level ? max_level : level;
int32_t selected_place = node->type == PLACE ? node->action : -1;
double score = position_playout(&p, selected_place);
for (; node; node = node->parent) {
tree_node_update(node, score);
__builtin_prefetch(node->parent, 1, 1);
}
if (timer_delta_time_since(&start) >= settings->max_time) {
break;
}
}
struct Result result;
struct TreeNode *place_node = tree_node_most_visited(root),
*value_node = tree_node_most_visited(place_node);
result.move = CREATE_MOVE(value_node->action, place_node->action);
sprintf(result.log, "i=%d (%.2f, %d) -> (%.2f, %d) l=%d", iteration,
place_node->value, place_node->visits, value_node->value,
value_node->visits, max_level);
result.delta_time = timer_delta_time_since(&start);
return result;
}