learn.c

#include <math.h>
#include <time.h>
#include "chess.h"
#include "data.h"
#if defined(UNIX)
#  include <unistd.h>
#endif
/* last modified 02/26/09 */
/*
 *******************************************************************************
 *                                                                             *
 *   LearnBook() is used to update the book database when a game ends for any  *
 *   reason.  It uses the global "learn_value" variable and updates the book   *
 *   based on the moves played and the value that was "learned".               *
 *                                                                             *
 *   The global learn_value has two possible sources.  If a game ends with a   *
 *   real result (win, lose or draw) then the learrn_value will be set to a    *
 *   number in the interval {-300, 300} depending on the result.  If there is  *
 *   no result (the operator exits the program prior to reaching a conclusion  *
 *   (quit, end, ^C) then we will use the values from the first few searches   *
 *   after leaving book to compute a learrn_value (see LearnValue() comments   *
 *   later in this file).                                                      *
 *                                                                             *
 *******************************************************************************
 */
void LearnBook() {
  int nplies = 0, thisply = 0;
  unsigned char buf32[4];
  int i, j, cluster;
  float book_learn[64], t_learn_value;

/*
 ************************************************************
 *                                                          *
 *   If we have not been "out of book" for N moves, all     *
 *   we need to do is take the search evaluation for the    *
 *   search just completed and tuck it away in the book     *
 *   learning array (book_learn_eval[]) for use later.      *
 *                                                          *
 ************************************************************
 */
  if (!book_file)
    return;
  if (!learning)
    return;
  learning = 0;
  Print(128, "Updating book database\n");
/*
 ************************************************************
 *                                                          *
 *   Now we build a vector of book learning results.  We    *
 *   give every book move below the last point where there  *
 *   were alternatives 100% of the learned score.  We give  *
 *   the book move played at that point 100% of the learned *
 *   score as well.  Then we divide the learned score by    *
 *   the number of alternatives, and propagate this score   *
 *   back until there was another alternative, where we do  *
 *   this again and again until we reach the top of the     *
 *   book tree.                                             *
 *                                                          *
 ************************************************************
 */
  t_learn_value = ((float) learn_value) / 100.0;
  for (i = 0; i < 64; i++)
    if (learn_nmoves[i] > 1)
      nplies++;
  nplies = Max(nplies, 1);
  for (i = 0; i < 64; i++) {
    if (learn_nmoves[i] > 1)
      thisply++;
    book_learn[i] = t_learn_value * (float) thisply / (float) nplies;
  }
/*
 ************************************************************
 *                                                          *
 *   Now find the appropriate cluster, find the key we were *
 *   passed, and update the resulting learn value.          *
 *                                                          *
 ************************************************************
 */
  for (i = 0; i < 64 && learn_seekto[i]; i++) {
    if (learn_seekto[i] > 0) {
      fseek(book_file, learn_seekto[i], SEEK_SET);
      fread(buf32, 4, 1, book_file);
      cluster = BookIn32(buf32);
      BookClusterIn(book_file, cluster, book_buffer);
      for (j = 0; j < cluster; j++)
        if (!(learn_key[i] ^ book_buffer[j].position))
          break;
      if (j >= cluster)
        return;
      if (fabs(book_buffer[j].learn) < 0.0001)
        book_buffer[j].learn = book_learn[i];
      else
        book_buffer[j].learn = (book_buffer[j].learn + book_learn[i]) / 2.0;
      fseek(book_file, learn_seekto[i] + 4, SEEK_SET);
      BookClusterOut(book_file, cluster, book_buffer);
      fflush(book_file);
    }
  }
}

/* last modified 02/26/09 */
/*
 *******************************************************************************
 *                                                                             *
 *   LearnFunction() is called to compute the adjustment value added to the    *
 *   learn counter in the opening book.  It takes three pieces of information  *
 *   into consideration to do this:  the search value, the search depth that   *
 *   produced this value, and the rating difference (Crafty-opponent) so that  *
 *   + numbers means Crafty is expected to win, - numbers mean Crafty is ex-   *
 *   pected to lose.                                                           *
 *                                                                             *
 *******************************************************************************
 */
int LearnFunction(int sv, int search_depth, int rating_difference,
    int trusted_value) {
  static const float rating_mult_t[11] = { .00625, .0125, .025, .05, .075, .1,
    0.15, 0.2, 0.25, 0.3, 0.35
  };
  static const float rating_mult_ut[11] = { .25, .2, .15, .1, .05, .025, .012,
    .006, .003, .001
  };
  float multiplier;
  int sd, rd;

  sd = Max(Min(search_depth - 10, 19), 0);
  rd = Max(Min(rating_difference / 200, 5), -5) + 5;
  if (trusted_value)
    multiplier = rating_mult_t[rd] * sd;
  else
    multiplier = rating_mult_ut[rd] * sd;
  sv = Max(Min(sv, 600), -600);
  return ((int) (sv * multiplier));
}

/* last modified 02/26/09 */
/*
 *******************************************************************************
 *                                                                             *
 *   LearnValue() is used to monitor the scores over the first N moves out of  *
 *   book.  After these moves have been played, the evaluations are then used  *
 *   to decide whether the last book move played was a reasonable choice or    *
 *   not.  (N is set by the #define LEARN_INTERVAL definition.)                *
 *                                                                             *
 *   This procedure does not directly update the book.  Rather, it sets the    *
 *   global learn_value variable to represent the goodness or badness of the   *
 *   position where we left the opening book.  This will be used later to      *
 *   update the book in the event the game ends without any sort of actual     *
 *   result.  In a normal situation, we will base our learning on the result   *
 *   of the game, win-lose-draw.  But it is possible that the game ends before *
 *   the final result is known.  In this case, we will use the score from the  *
 *   learn_value we compute here so that we learn _something_ from playing a   *
 *   game fragment.                                                            *
 *                                                                             *
 *   There are three cases to be handled.  (1) If the evaluation is bad right  *
 *   out of book, or it drops enough to be considered a bad line, then the     *
 *   book move will have its "learn" value reduced to discourage playing this  *
 *   move again.  (2) If the evaluation is even after N moves, then the learn  *
 *   value will be increased, but by a relatively modest amount, so that a few *
 *   even results will offset one bad result.  (3) If the evaluation is very   *
 *   good after N moves, the learn value will be increased by a large amount   *
 *   so that this move will be favored the next time the game is played.       *
 *                                                                             *
 *******************************************************************************
 */
void LearnValue(int search_value, int search_depth) {
  int i;
  int interval;
  int best_eval = -999999, best_eval_p = 0;
  int worst_eval = 999999, worst_eval_p = 0;
  int best_after_worst_eval = -999999, worst_after_best_eval = 999999;

/*
 ************************************************************
 *                                                          *
 *   If we have not been "out of book" for N moves, all     *
 *   we need to do is take the search evaluation for the    *
 *   search just completed and tuck it away in the book     *
 *   learning array (book_learn_eval[]) for use later.      *
 *                                                          *
 ************************************************************
 */
  if (!book_file)
    return;
  if (!learning || learn_value != 0)
    return;
  if (moves_out_of_book <= LEARN_INTERVAL) {
    if (moves_out_of_book) {
      book_learn_eval[moves_out_of_book - 1] = search_value;
      book_learn_depth[moves_out_of_book - 1] = search_depth;
    }
  }
/*
 ************************************************************
 *                                                          *
 *   Check the evaluations we've seen so far.  If they are  *
 *   within reason (+/- 1/3 of a pawn or so) we simply keep *
 *   playing and leave the book alone.  If the eval is much *
 *   better or worse, we need to update the learning data.  *
 *                                                          *
 ************************************************************
 */
  else if (moves_out_of_book == LEARN_INTERVAL + 1) {
    if (moves_out_of_book < 1)
      return;
    Print(128, "LearnBook() executed\n");
    interval = Min(LEARN_INTERVAL, moves_out_of_book);
    if (interval < 2)
      return;
    for (i = 0; i < interval; i++) {
      if (book_learn_eval[i] > best_eval) {
        best_eval = book_learn_eval[i];
        best_eval_p = i;
      }
      if (book_learn_eval[i] < worst_eval) {
        worst_eval = book_learn_eval[i];
        worst_eval_p = i;
      }
    }
    if (best_eval_p < interval - 1) {
      for (i = best_eval_p; i < interval; i++)
        if (book_learn_eval[i] < worst_after_best_eval)
          worst_after_best_eval = book_learn_eval[i];
    } else
      worst_after_best_eval = book_learn_eval[interval - 1];
    if (worst_eval_p < interval - 1) {
      for (i = worst_eval_p; i < interval; i++)
        if (book_learn_eval[i] > best_after_worst_eval)
          best_after_worst_eval = book_learn_eval[i];
    } else
      best_after_worst_eval = book_learn_eval[interval - 1];
#if defined(DEBUG)
    Print(128, "Learning analysis ...\n");
    Print(128, "worst=%d  best=%d  baw=%d  wab=%d\n", worst_eval, best_eval,
        best_after_worst_eval, worst_after_best_eval);
    for (i = 0; i < interval; i++)
      Print(128, "%d(%d) ", book_learn_eval[i], book_learn_depth[i]);
    Print(128, "\n");
#endif
/*
 ************************************************************
 *                                                          *
 *   We now have the best eval for the first N moves out    *
 *   of book, the worst eval for the first N moves out of   *
 *   book, and the worst eval that follows the best eval.   *
 *   This will be used to recognize the following cases of  *
 *   results that follow a book move:                       *
 *                                                          *
 ************************************************************
 */
/*
 ************************************************************
 *                                                          *
 *   (1) The best score is very good, and it doesn't drop   *
 *   after following the game further.  This case detects   *
 *   those moves in book that are "good" and should be      *
 *   played whenever possible, while avoiding the sound     *
 *   gambits that leave us ahead in material for a short    *
 *   while until the score starts to drop as the gambit     *
 *   begins to show its effect.                             *
 *                                                          *
 ************************************************************
 */
    if (best_eval == best_after_worst_eval) {
      learn_value = best_eval;
      for (i = 0; i < interval; i++)
        if (learn_value == book_learn_eval[i])
          search_depth = Max(search_depth, book_learn_depth[i]);
    }
/*
 ************************************************************
 *                                                          *
 *   (2) The worst score is bad, and doesn't improve any    *
 *   after the worst point, indicating that the book move   *
 *   chosen was "bad" and should be avoided in the future.  *
 *                                                          *
 ************************************************************
 */
    else if (worst_eval == worst_after_best_eval) {
      learn_value = worst_eval;
      for (i = 0; i < interval; i++)
        if (learn_value == book_learn_eval[i])
          search_depth = Max(search_depth, book_learn_depth[i]);
    }
/*
 ************************************************************
 *                                                          *
 *   (3) Things seem even out of book and remain that way   *
 *   for N moves.  We will just average the 10 scores and   *
 *   use that as an approximation.                          *
 *                                                          *
 ************************************************************
 */
    else {
      learn_value = 0;
      search_depth = 0;
      for (i = 0; i < interval; i++) {
        learn_value += book_learn_eval[i];
        search_depth += book_learn_depth[i];
      }
      learn_value /= interval;
      search_depth /= interval;
    }
    learn_value =
        LearnFunction(learn_value, search_depth,
        crafty_rating - opponent_rating, learn_value < 0);
  }
}