-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqlearning.c
111 lines (90 loc) · 2.43 KB
/
qlearning.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#include <stdio.h>
#include <stdlib.h>
#include "qlearning.h"
struct QTable *create_qtable(int observations_space, int actions_space)
{
if ((observations_space <= 0) || (actions_space <= 0))
return NULL;
struct QTable *qtable = (struct QTable *)malloc(sizeof(struct QTable));
if (qtable == NULL)
{
return NULL;
}
qtable->data = (float *)calloc(observations_space * actions_space, sizeof(float));
if (qtable->data == NULL)
{
free(qtable);
return NULL;
}
qtable->observations_space = observations_space;
qtable->actions_space = actions_space;
return qtable;
}
void destroy_qtable(struct QTable *qtable)
{
if (qtable != NULL)
{
if (qtable->data != NULL)
{
free(qtable->data);
qtable->data = NULL;
}
free(qtable);
qtable = NULL;
}
}
void set_state_action_value(struct QTable *qtable, int state, int action, float value)
{
qtable->data[action + state * qtable->actions_space] = value;
}
float get_state_action_value(struct QTable *qtable, int state, int action)
{
if (qtable == NULL)
return -999;
return qtable->data[action + state * qtable->actions_space];
}
float get_max_state_value(struct QTable *qtable, int state)
{
if (qtable == NULL)
return -999;
float best_value = qtable->data[0 + state * qtable->actions_space];
for (int i = 1; i < qtable->actions_space; ++i)
{
float value = qtable->data[i + state * qtable->actions_space];
if (value > best_value)
{
best_value = value;
}
}
return best_value;
}
int get_best_action(struct QTable *qtable, int state)
{
if (qtable == NULL)
return -999;
float best_value = qtable->data[0 + state * qtable->actions_space];
int best_action = 0;
for (int i = 1; i < qtable->actions_space; ++i)
{
float value = qtable->data[i + state * qtable->actions_space];
if (value > best_value)
{
best_value = value;
best_action = i;
}
}
return best_action;
}
void display_qtable(struct QTable *qtable)
{
if (qtable == NULL)
return;
for (int i = 0; i < qtable->observations_space; ++i)
{
for (int j = 0; j < qtable->actions_space; ++j)
{
fprintf(stdout, "%5f ", qtable->data[j + i * qtable->actions_space]);
}
putchar('\n');
}
}