-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlearn.c
309 lines (304 loc) · 13.6 KB
/
learn.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
#include <math.h>
#include <time.h>
#include "chess.h"
#include "data.h"
#if defined(UNIX)
# include <unistd.h>
#endif
/* last modified 02/26/09 */
/*
*******************************************************************************
* *
* LearnBook() is used to update the book database when a game ends for any *
* reason. It uses the global "learn_value" variable and updates the book *
* based on the moves played and the value that was "learned". *
* *
* The global learn_value has two possible sources. If a game ends with a *
* real result (win, lose or draw) then the learrn_value will be set to a *
* number in the interval {-300, 300} depending on the result. If there is *
* no result (the operator exits the program prior to reaching a conclusion *
* (quit, end, ^C) then we will use the values from the first few searches *
* after leaving book to compute a learrn_value (see LearnValue() comments *
* later in this file). *
* *
*******************************************************************************
*/
void LearnBook() {
int nplies = 0, thisply = 0;
unsigned char buf32[4];
int i, j, cluster;
float book_learn[64], t_learn_value;
/*
************************************************************
* *
* If we have not been "out of book" for N moves, all *
* we need to do is take the search evaluation for the *
* search just completed and tuck it away in the book *
* learning array (book_learn_eval[]) for use later. *
* *
************************************************************
*/
if (!book_file)
return;
if (!learning)
return;
learning = 0;
Print(128, "Updating book database\n");
/*
************************************************************
* *
* Now we build a vector of book learning results. We *
* give every book move below the last point where there *
* were alternatives 100% of the learned score. We give *
* the book move played at that point 100% of the learned *
* score as well. Then we divide the learned score by *
* the number of alternatives, and propagate this score *
* back until there was another alternative, where we do *
* this again and again until we reach the top of the *
* book tree. *
* *
************************************************************
*/
t_learn_value = ((float) learn_value) / 100.0;
for (i = 0; i < 64; i++)
if (learn_nmoves[i] > 1)
nplies++;
nplies = Max(nplies, 1);
for (i = 0; i < 64; i++) {
if (learn_nmoves[i] > 1)
thisply++;
book_learn[i] = t_learn_value * (float) thisply / (float) nplies;
}
/*
************************************************************
* *
* Now find the appropriate cluster, find the key we were *
* passed, and update the resulting learn value. *
* *
************************************************************
*/
for (i = 0; i < 64 && learn_seekto[i]; i++) {
if (learn_seekto[i] > 0) {
fseek(book_file, learn_seekto[i], SEEK_SET);
fread(buf32, 4, 1, book_file);
cluster = BookIn32(buf32);
BookClusterIn(book_file, cluster, book_buffer);
for (j = 0; j < cluster; j++)
if (!(learn_key[i] ^ book_buffer[j].position))
break;
if (j >= cluster)
return;
if (fabs(book_buffer[j].learn) < 0.0001)
book_buffer[j].learn = book_learn[i];
else
book_buffer[j].learn = (book_buffer[j].learn + book_learn[i]) / 2.0;
fseek(book_file, learn_seekto[i] + 4, SEEK_SET);
BookClusterOut(book_file, cluster, book_buffer);
fflush(book_file);
}
}
}
/* last modified 02/26/09 */
/*
*******************************************************************************
* *
* LearnFunction() is called to compute the adjustment value added to the *
* learn counter in the opening book. It takes three pieces of information *
* into consideration to do this: the search value, the search depth that *
* produced this value, and the rating difference (Crafty-opponent) so that *
* + numbers means Crafty is expected to win, - numbers mean Crafty is ex- *
* pected to lose. *
* *
*******************************************************************************
*/
int LearnFunction(int sv, int search_depth, int rating_difference,
int trusted_value) {
static const float rating_mult_t[11] = { .00625, .0125, .025, .05, .075, .1,
0.15, 0.2, 0.25, 0.3, 0.35
};
static const float rating_mult_ut[11] = { .25, .2, .15, .1, .05, .025, .012,
.006, .003, .001
};
float multiplier;
int sd, rd;
sd = Max(Min(search_depth - 10, 19), 0);
rd = Max(Min(rating_difference / 200, 5), -5) + 5;
if (trusted_value)
multiplier = rating_mult_t[rd] * sd;
else
multiplier = rating_mult_ut[rd] * sd;
sv = Max(Min(sv, 600), -600);
return ((int) (sv * multiplier));
}
/* last modified 02/26/09 */
/*
*******************************************************************************
* *
* LearnValue() is used to monitor the scores over the first N moves out of *
* book. After these moves have been played, the evaluations are then used *
* to decide whether the last book move played was a reasonable choice or *
* not. (N is set by the #define LEARN_INTERVAL definition.) *
* *
* This procedure does not directly update the book. Rather, it sets the *
* global learn_value variable to represent the goodness or badness of the *
* position where we left the opening book. This will be used later to *
* update the book in the event the game ends without any sort of actual *
* result. In a normal situation, we will base our learning on the result *
* of the game, win-lose-draw. But it is possible that the game ends before *
* the final result is known. In this case, we will use the score from the *
* learn_value we compute here so that we learn _something_ from playing a *
* game fragment. *
* *
* There are three cases to be handled. (1) If the evaluation is bad right *
* out of book, or it drops enough to be considered a bad line, then the *
* book move will have its "learn" value reduced to discourage playing this *
* move again. (2) If the evaluation is even after N moves, then the learn *
* value will be increased, but by a relatively modest amount, so that a few *
* even results will offset one bad result. (3) If the evaluation is very *
* good after N moves, the learn value will be increased by a large amount *
* so that this move will be favored the next time the game is played. *
* *
*******************************************************************************
*/
void LearnValue(int search_value, int search_depth) {
int i;
int interval;
int best_eval = -999999, best_eval_p = 0;
int worst_eval = 999999, worst_eval_p = 0;
int best_after_worst_eval = -999999, worst_after_best_eval = 999999;
/*
************************************************************
* *
* If we have not been "out of book" for N moves, all *
* we need to do is take the search evaluation for the *
* search just completed and tuck it away in the book *
* learning array (book_learn_eval[]) for use later. *
* *
************************************************************
*/
if (!book_file)
return;
if (!learning || learn_value != 0)
return;
if (moves_out_of_book <= LEARN_INTERVAL) {
if (moves_out_of_book) {
book_learn_eval[moves_out_of_book - 1] = search_value;
book_learn_depth[moves_out_of_book - 1] = search_depth;
}
}
/*
************************************************************
* *
* Check the evaluations we've seen so far. If they are *
* within reason (+/- 1/3 of a pawn or so) we simply keep *
* playing and leave the book alone. If the eval is much *
* better or worse, we need to update the learning data. *
* *
************************************************************
*/
else if (moves_out_of_book == LEARN_INTERVAL + 1) {
if (moves_out_of_book < 1)
return;
Print(128, "LearnBook() executed\n");
interval = Min(LEARN_INTERVAL, moves_out_of_book);
if (interval < 2)
return;
for (i = 0; i < interval; i++) {
if (book_learn_eval[i] > best_eval) {
best_eval = book_learn_eval[i];
best_eval_p = i;
}
if (book_learn_eval[i] < worst_eval) {
worst_eval = book_learn_eval[i];
worst_eval_p = i;
}
}
if (best_eval_p < interval - 1) {
for (i = best_eval_p; i < interval; i++)
if (book_learn_eval[i] < worst_after_best_eval)
worst_after_best_eval = book_learn_eval[i];
} else
worst_after_best_eval = book_learn_eval[interval - 1];
if (worst_eval_p < interval - 1) {
for (i = worst_eval_p; i < interval; i++)
if (book_learn_eval[i] > best_after_worst_eval)
best_after_worst_eval = book_learn_eval[i];
} else
best_after_worst_eval = book_learn_eval[interval - 1];
#if defined(DEBUG)
Print(128, "Learning analysis ...\n");
Print(128, "worst=%d best=%d baw=%d wab=%d\n", worst_eval, best_eval,
best_after_worst_eval, worst_after_best_eval);
for (i = 0; i < interval; i++)
Print(128, "%d(%d) ", book_learn_eval[i], book_learn_depth[i]);
Print(128, "\n");
#endif
/*
************************************************************
* *
* We now have the best eval for the first N moves out *
* of book, the worst eval for the first N moves out of *
* book, and the worst eval that follows the best eval. *
* This will be used to recognize the following cases of *
* results that follow a book move: *
* *
************************************************************
*/
/*
************************************************************
* *
* (1) The best score is very good, and it doesn't drop *
* after following the game further. This case detects *
* those moves in book that are "good" and should be *
* played whenever possible, while avoiding the sound *
* gambits that leave us ahead in material for a short *
* while until the score starts to drop as the gambit *
* begins to show its effect. *
* *
************************************************************
*/
if (best_eval == best_after_worst_eval) {
learn_value = best_eval;
for (i = 0; i < interval; i++)
if (learn_value == book_learn_eval[i])
search_depth = Max(search_depth, book_learn_depth[i]);
}
/*
************************************************************
* *
* (2) The worst score is bad, and doesn't improve any *
* after the worst point, indicating that the book move *
* chosen was "bad" and should be avoided in the future. *
* *
************************************************************
*/
else if (worst_eval == worst_after_best_eval) {
learn_value = worst_eval;
for (i = 0; i < interval; i++)
if (learn_value == book_learn_eval[i])
search_depth = Max(search_depth, book_learn_depth[i]);
}
/*
************************************************************
* *
* (3) Things seem even out of book and remain that way *
* for N moves. We will just average the 10 scores and *
* use that as an approximation. *
* *
************************************************************
*/
else {
learn_value = 0;
search_depth = 0;
for (i = 0; i < interval; i++) {
learn_value += book_learn_eval[i];
search_depth += book_learn_depth[i];
}
learn_value /= interval;
search_depth /= interval;
}
learn_value =
LearnFunction(learn_value, search_depth,
crafty_rating - opponent_rating, learn_value < 0);
}
}