diff --git a/.gitignore b/.gitignore index 36299fad..c52dfd29 100644 --- a/.gitignore +++ b/.gitignore @@ -128,3 +128,9 @@ main_config.py # avoid pushing log-files generated by uci-communication CrazyAra-log.txt +score-log.txt + +# avoid pushing dataset files used for visualization +crazyara_lichess_dataset.pgn +crazyara_lichess_dataset_stats.csv + diff --git a/CrazyAra-log-bug.txt b/CrazyAra-log-bug.txt deleted file mode 100644 index 66acb680..00000000 --- a/CrazyAra-log-bug.txt +++ /dev/null @@ -1,196 +0,0 @@ -> uci -< id name CrazyAra 0.2.0 -< id author Johannes Czech, Moritz Willig, Alena Beyer et al. -< option name UCI_Variant type combo default crazyhouse var crazyhouse -< option name context type combo default cpu var cpu var gpu -< option name use_raw_network type check default false -< option name threads type spin default 16 min 1 max 4096 -< option name batch_size type spin default 8 min 1 max 4096 -< option name playouts_empty_pockets type spin default 8192 min 56 max 8192 -< option name playouts_filled_pockets type spin default 8192 min 56 max 8192 -< option name centi_cpuct type spin default 300 min 1 max 500 -< option name centi_dirichlet_epsilon type spin default 10 min 0 max 100 -< option name centi_dirichlet_alpha type spin default 20 min 0 max 100 -< option name max_search_depth type spin default 40 min 1 max 100 -< option name centi_temperature type spin default 0 min 0 max 100 -< option name centi_clip_quantil type spin default 0 min 0 max 100 -< option name virtual_loss type spin default 3 min 0 max 10 -< option name centi_q_value_weight type spin default 70 min 0 max 100 -< option name threshold_time_for_raw_net_ms type spin default 100 min 1 max 300000 -< option name move_overhead_ms type spin default 300 min 0 max 60000 -< option name moves_left type spin default 40 min 10 max 320 -< option name extend_time_on_bad_position type check default true -< option name max_move_num_to_reduce_movetime type spin default 0 min 0 max 120 -< option name check_mate_in_one type check default false -< option name enable_timeout type check default false -< option name verbose type check default false -< uciok -> setoption name batch_size value 8 -< info string Updated option batch_size to 8 -> setoption name centi_clip_quantil value 0 -< info string Updated option centi_clip_quantil to 0 -> setoption name centi_cpuct value 300 -< info string Updated option centi_cpuct to 300 -> setoption name centi_dirichlet_alpha value 20 -< info string Updated option centi_dirichlet_alpha to 20 -> setoption name centi_dirichlet_epsilon value 10 -< info string Updated option centi_dirichlet_epsilon to 10 -> setoption name centi_q_value_weight value 70 -< info string Updated option centi_q_value_weight to 70 -> setoption name centi_temperature value 0 -< info string Updated option centi_temperature to 0 -> setoption name check_mate_in_one value false -< info string Updated option check_mate_in_one to false -> setoption name context value gpu -< info string Updated option context to gpu -> setoption name enable_timeout value false -< info string Updated option enable_timeout to false -> setoption name extend_time_on_bad_position value true -< info string Updated option extend_time_on_bad_position to true -> setoption name max_move_num_to_reduce_movetime value 0 -< info string Updated option max_move_num_to_reduce_movetime to 0 -> setoption name max_search_depth value 40 -< info string Updated option max_search_depth to 40 -> setoption name move_overhead_ms value 300 -< info string Updated option move_overhead_ms to 300 -> setoption name moves_left value 40 -< info string Updated option moves_left to 40 -> setoption name playouts_empty_pockets value 8192 -< info string Updated option playouts_empty_pockets to 8192 -> setoption name playouts_filled_pockets value 8192 -< info string Updated option playouts_filled_pockets to 8192 -> setoption name threads value 16 -< info string Updated option threads to 16 -> setoption name threshold_time_for_raw_net_ms value 100 -< info string Updated option threshold_time_for_raw_net_ms to 100 -> setoption name use_raw_network value false -< info string Updated option use_raw_network to false -> setoption name verbose value true -< info string Updated option verbose to true -> setoption name virtual_loss value 3 -< info string Updated option virtual_loss to 3 -> isready -< readyok -> setoption name UCI_Variant value crazyhouse -< info string Updated option UCI_Variant to crazyhouse -> ucinewgame -> position startpos -> isready -< readyok -> go wtime 300000 btime 300000 movestogo 40 -< info string Time for this move is 7200ms -< bestmove e2e4 -> position startpos moves e2e4 e7e5 -> isready -< readyok -> go wtime 292680 btime 288064 movestogo 39 -< info string Time for this move is 7204ms -< bestmove g1f3 -> position startpos moves e2e4 e7e5 g1f3 b8c6 -> isready -< readyok -> go wtime 285363 btime 286437 movestogo 38 -< info string Time for this move is 7209ms -< bestmove f1c4 -> position startpos moves e2e4 e7e5 g1f3 b8c6 f1c4 f8c5 -> isready -< readyok -> go wtime 278047 btime 262495 movestogo 37 -< info string Time for this move is 7214ms -< bestmove e1g1 -> position startpos moves e2e4 e7e5 g1f3 b8c6 f1c4 f8c5 e1g1 g8f6 -> isready -< readyok -> go wtime 270797 btime 245488 movestogo 36 -< info string Time for this move is 7222ms -< bestmove b1c3 -> position startpos moves e2e4 e7e5 g1f3 b8c6 f1c4 f8c5 e1g1 g8f6 b1c3 e8g8 -> isready -< readyok -> go wtime 263465 btime 230545 movestogo 35 -< info string Time for this move is 7227ms -< bestmove d2d3 -> position startpos moves e2e4 e7e5 g1f3 b8c6 f1c4 f8c5 e1g1 g8f6 b1c3 e8g8 d2d3 d7d6 -> isready -< readyok -> go wtime 256190 btime 228919 movestogo 34 -< info string Time for this move is 7235ms -< bestmove c1g5 -> position startpos moves e2e4 e7e5 g1f3 b8c6 f1c4 f8c5 e1g1 g8f6 b1c3 e8g8 d2d3 d7d6 c1g5 h7h6 -> isready -< readyok -> go wtime 248913 btime 191137 movestogo 33 -< info string Time for this move is 7242ms -< bestmove g5f6 -> position startpos moves e2e4 e7e5 g1f3 b8c6 f1c4 f8c5 e1g1 g8f6 b1c3 e8g8 d2d3 d7d6 c1g5 h7h6 g5f6 d8f6 -> isready -< readyok -> go wtime 241565 btime 189984 movestogo 32 -< info string Time for this move is 7248ms -< bestmove c3d5 -> position startpos moves e2e4 e7e5 g1f3 b8c6 f1c4 f8c5 e1g1 g8f6 b1c3 e8g8 d2d3 d7d6 c1g5 h7h6 g5f6 d8f6 c3d5 f6d8 -> isready -< readyok -> go wtime 234288 btime 183547 movestogo 31 -< info string Time for this move is 7257ms -< bestmove c2c3 -> position startpos moves e2e4 e7e5 g1f3 b8c6 f1c4 f8c5 e1g1 g8f6 b1c3 e8g8 d2d3 d7d6 c1g5 h7h6 g5f6 d8f6 c3d5 f6d8 c2c3 c8g4 -> isready -< readyok -> go wtime 226947 btime 150603 movestogo 30 -< info string Time for this move is 7264ms -< bestmove h2h3 -> position startpos moves e2e4 e7e5 g1f3 b8c6 f1c4 f8c5 e1g1 g8f6 b1c3 e8g8 d2d3 d7d6 c1g5 h7h6 g5f6 d8f6 c3d5 f6d8 c2c3 c8g4 h2h3 g4e6 -> isready -< readyok -> go wtime 219609 btime 143635 movestogo 29 -< info string Time for this move is 7272ms -< bestmove b2b4 -> position startpos moves e2e4 e7e5 g1f3 b8c6 f1c4 f8c5 e1g1 g8f6 b1c3 e8g8 d2d3 d7d6 c1g5 h7h6 g5f6 d8f6 c3d5 f6d8 c2c3 c8g4 h2h3 g4e6 b2b4 e6d5 -> isready -< readyok -> go wtime 212223 btime 142247 movestogo 28 -< info string Time for this move is 7279ms -< bestmove c4d5 -> position startpos moves e2e4 e7e5 g1f3 b8c6 f1c4 f8c5 e1g1 g8f6 b1c3 e8g8 d2d3 d7d6 c1g5 h7h6 g5f6 d8f6 c3d5 f6d8 c2c3 c8g4 h2h3 g4e6 b2b4 e6d5 c4d5 c5f2 -> isready -< readyok -> go wtime 204863 btime 138200 movestogo 27 -< info string Time for this move is 7287ms -< bestmove f1f2 -> position startpos moves e2e4 e7e5 g1f3 b8c6 f1c4 f8c5 e1g1 g8f6 b1c3 e8g8 d2d3 d7d6 c1g5 h7h6 g5f6 d8f6 c3d5 f6d8 c2c3 c8g4 h2h3 g4e6 b2b4 e6d5 c4d5 c5f2 f1f2 P@e3 -> isready -< readyok -> go wtime 197514 btime 130432 movestogo 26 -< info string Time for this move is 7296ms -< Traceback (most recent call last): - File "./crazyara.py", line 439, in main - perform_action(cmd_list) - File "./crazyara.py", line 246, in perform_action - value, selected_move, confidence, _ = mcts_agent.perform_action(gamestate) - File "/media/queensgambit/Volume/Deep_Learning/projects/CrazyAra/DeepCrazyhouse/src/domain/agent/player/MCTSAgent.py", line 461, in perform_action - value, selected_move, confidence, selected_child_idx = super().perform_action(state) - File "/media/queensgambit/Volume/Deep_Learning/projects/CrazyAra/DeepCrazyhouse/src/domain/agent/player/_Agent.py", line 32, in perform_action - value, legal_moves, self.p_vec_small = self.evaluate_board_state(state) - File "/media/queensgambit/Volume/Deep_Learning/projects/CrazyAra/DeepCrazyhouse/src/domain/agent/player/MCTSAgent.py", line 256, in evaluate_board_state - p_vec_small, p_vec_small.shape, state_in)) -Exception: Legal move list [Move.from_uci('d5f7'), Move.from_uci('d5e6'), Move.from_uci('d5c6'), Move.from_uci('d5c4'), Move.from_uci('d5b3'), Move.from_uci('f3g5'), Move.from_uci('f3e5'), Move.from_uci('f3h4'), Move.from_uci('f3d4'), Move.from_uci('f3h2'), Move.from_uci('f3d2'), Move.from_uci('f3e1'), Move.from_uci('f2e2'), Move.from_uci('f2d2'), Move.from_uci('f2c2'), Move.from_uci('f2b2'), Move.from_uci('f2f1'), Move.from_uci('g1h2'), Move.from_uci('g1h1'), Move.from_uci('g1f1'), Move.from_uci('d1a4'), Move.from_uci('d1b3'), Move.from_uci('d1e2'), Move.from_uci('d1d2'), Move.from_uci('d1c2'), Move.from_uci('d1f1'), Move.from_uci('d1e1'), Move.from_uci('d1c1'), Move.from_uci('d1b1'), Move.from_uci('a1c1'), Move.from_uci('a1b1'), Move.from_uci('b4b5'), Move.from_uci('h3h4'), Move.from_uci('d3d4'), Move.from_uci('c3c4'), Move.from_uci('g2g3'), Move.from_uci('a2a3'), Move.from_uci('g2g4'), Move.from_uci('a2a4'), Move.from_uci('N@b1'), Move.from_uci('B@b1'), Move.from_uci('N@c1'), Move.from_uci('B@c1'), Move.from_uci('N@e1'), Move.from_uci('B@e1'), Move.from_uci('N@f1'), Move.from_uci('B@f1'), Move.from_uci('N@h1'), Move.from_uci('B@h1'), Move.from_uci('N@b2'), Move.from_uci('B@b2'), Move.from_uci('N@c2'), Move.from_uci('B@c2'), Move.from_uci('N@d2'), Move.from_uci('B@d2'), Move.from_uci('N@e2'), Move.from_uci('B@e2'), Move.from_uci('N@h2'), Move.from_uci('B@h2'), Move.from_uci('N@a3'), Move.from_uci('B@a3'), Move.from_uci('N@b3'), Move.from_uci('B@b3'), Move.from_uci('N@g3'), Move.from_uci('B@g3'), Move.from_uci('N@a4'), Move.from_uci('B@a4'), Move.from_uci('N@c4'), Move.from_uci('B@c4'), Move.from_uci('N@d4'), Move.from_uci('B@d4'), Move.from_uci('N@f4'), Move.from_uci('B@f4'), Move.from_uci('N@g4'), Move.from_uci('B@g4'), Move.from_uci('N@h4'), Move.from_uci('B@h4'), Move.from_uci('N@a5'), Move.from_uci('B@a5'), Move.from_uci('N@b5'), Move.from_uci('B@b5'), Move.from_uci('N@c5'), Move.from_uci('B@c5'), Move.from_uci('N@f5'), Move.from_uci('B@f5'), Move.from_uci('N@g5'), Move.from_uci('B@g5'), Move.from_uci('N@h5'), Move.from_uci('B@h5'), Move.from_uci('N@a6'), Move.from_uci('B@a6'), Move.from_uci('N@b6'), Move.from_uci('B@b6'), Move.from_uci('N@e6'), Move.from_uci('B@e6'), Move.from_uci('N@f6'), Move.from_uci('B@f6'), Move.from_uci('N@g6'), Move.from_uci('B@g6'), Move.from_uci('N@d7'), Move.from_uci('B@d7'), Move.from_uci('N@e7'), Move.from_uci('B@e7'), Move.from_uci('N@h7'), Move.from_uci('B@h7'), Move.from_uci('N@b8'), Move.from_uci('B@b8'), Move.from_uci('N@c8'), Move.from_uci('B@c8'), Move.from_uci('N@e8'), Move.from_uci('B@e8'), Move.from_uci('N@h8'), Move.from_uci('B@h8')] with length 113 is uncompatible to policy vector [0.01185716 0.00592858 0.00790477 0.00691667 0.00691667 0.00592858 - 0.00592858 0.00592858 0.00592858 0.00592858 0.00592858 0.00592858 - 0.01284525 0.00988096 0.01383335 0.01185716 0.01284525 0.00790477 - 0.00790477 0.00691667 0.00988096 0.11762996 0.01580954 0.00988096 - 0.00988096 0.01086906 0.00790477 0.00691667 0.00691667 0.00691667 - 0.00691667 0.00691667 0.00691667 0.00592858 0.00790477 0.00691667 - 0.00691667 0.00691667 0.00592858 0.00592858 0.00691667 0.00889287 - 0.00691667 0.01185716 0.00790477 0.00691667 0.00790477 0.00691667 - 0.00691667 0.00691667 0.00691667 0.00691667 0.00691667 0.00988096 - 0.00691667 0.00691667 0.00691667 0.00889287 0.00592858 0.00691667 - 0.00691667 0.01086906 0.00790477 0.01086906 0.00691667 0.00691667 - 0.00790477 0.00988096 0.00592858 0.00592858 0.00592858 0.00592858 - 0.00988096 0.00790477 0.00691667 0.01482145 0.00691667 0.00691667 - 0.00592858 0.00691667 0.00691667 0.00691667 0.01086906 0.00988096 - 0.00790477 0.00691667 0.00889287 0.01086906 0.00691667 0.00691667 - 0.00691667 0.00790477 0.00790477 0.01185716 0.00592858 0.00691667 - 0.01086906 0.01482145 0.00691667 0.00691667 0.00691667 0.00691667 - 0.00691667 0.00790477 0.00691667 0.00790477 0.00691667 0.00691667 - 0.00691667 0.00790477 0.00691667 0.00691667] with shape (112,) for board state r2q1rk1/ppp2pp1/2np3p/3Bp3/1P2P3/2PPpN1P/P4RP1/R2Q2K1[BBNbn] w - - 1 15 - diff --git a/DeepCrazyhouse/src/preprocessing/PGN2PlanesConverter.py b/DeepCrazyhouse/src/preprocessing/PGN2PlanesConverter.py index c579a6ef..0d726d96 100644 --- a/DeepCrazyhouse/src/preprocessing/PGN2PlanesConverter.py +++ b/DeepCrazyhouse/src/preprocessing/PGN2PlanesConverter.py @@ -261,6 +261,37 @@ def _filter_pgn_thread(self, queue, pgn): queue.put(batch_black_won) queue.put(batch_draw) + def filter_all_pgns(self): + """ + Filters out all games based on the given conditions in the constructor and returns all games in + :return: lst_all_pgn_sel: List of selected games in String-IO format + lst_nb_games_sel: Number of selected games for each pgn file + lst_batch_white_won: Number of white wins in each pgn file + lst_black_won: Number of black wins in each pgn file + lst_draw_won: Number of draws in each pgn file + """ + + total_games_exported = 0 + + lst_all_pgn_sel = [] + lst_nb_games_sel = [] + lst_batch_white_won = [] + lst_batch_black_won = [] + lst_batch_draw = [] + + pgns = os.listdir(self._import_dir) + for pgn_name in pgns: + self._pgn_name = pgn_name + all_pgn_sel, nb_games_sel, batch_white_won, batch_black_won, batch_draw = self.filter_pgn() + lst_all_pgn_sel.append(all_pgn_sel) + lst_nb_games_sel.append(nb_games_sel) + lst_batch_white_won.append(batch_white_won) + lst_batch_black_won.append(batch_black_won) + lst_batch_draw.append(batch_draw) + + return lst_all_pgn_sel, lst_nb_games_sel, lst_batch_white_won, lst_batch_black_won, lst_batch_draw + + def convert_all_pgns_to_planes(self): """ Master function which calls convert_pgn_to_planes() for all available pgns in the import directory diff --git a/DeepCrazyhouse/src/preprocessing/analyze_train_data.ipynb b/DeepCrazyhouse/src/preprocessing/analyze_train_data.ipynb new file mode 100644 index 00000000..76ad8e2d --- /dev/null +++ b/DeepCrazyhouse/src/preprocessing/analyze_train_data.ipynb @@ -0,0 +1,466 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CrazyAra\n", + "\n", + "## Data Analysis of the Training Data\n", + "\n", + "* file: analyze_train_data.ipynb\n", + "* brief: Filterts out the used games of lichess crazyhouse dataset and does some analysis on it.\n", + "\n", + "* author: QueensGambit\n", + "* contact: johannes.czech@stud.tu-darmstadt.de\n", + "* version: 2018-11-28 initial version\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%reload_ext autoreload" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys, os\n", + "sys.path.insert(0,'../../../')\n", + "import os\n", + "import sys\n", + "from DeepCrazyhouse.src.preprocessing.PGN2PlanesConverter import PGN2PlanesConverter\n", + "from DeepCrazyhouse.src.runtime.ColorLogger import enable_color_logging\n", + "from DeepCrazyhouse.src.preprocessing.dataset_loader import load_pgn_dataset\n", + "import logging\n", + "from io import StringIO\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import chess.pgn\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "plt.style.use('seaborn-whitegrid')\n", + "enable_color_logging()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Settings\n", + "_same as_ `convert_pgn_to_planes.ipynb`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "min_elo_both = 2000\n", + "nb_games_per_file = 1000" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s_idcs, x, yv, yp, pgn_dataset = load_pgn_dataset()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pgn_dataset.tree()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "converter = PGN2PlanesConverter(limit_nb_games_to_analyze=0, nb_games_per_file=nb_games_per_file,\n", + " max_nb_files=0, min_elo_both=min_elo_both, termination_conditions=[\"Normal\"], log_lvl=logging.DEBUG,\n", + " compression='lz4', clevel=5, dataset_type='train')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lst_all_pgn_sel, lst_nb_games_sel, lst_batch_white_won, lst_batch_black_won, lst_batch_draw = converter.filter_all_pgns()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sum(lst_nb_games_sel)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "file = open('crazyara_lichess_dataset.pgn', mode='w')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for pgn_sel in lst_all_pgn_sel:\n", + " for pgn in pgn_sel:\n", + " file.writelines(pgn.readlines())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "file.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pgn = open('crazyara_lichess_dataset.pgn')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "columns=['Event','Site','Date','Round','White','Black','Result', 'WhiteElo', 'BlackElo', 'WhiteRatingDiff', 'BlackRatingDiff', 'Termination', 'TimeControl', 'UTCDate', 'UTCTime', 'Variant']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nb_games" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(lst_all_pgn_sel[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fill the pandas dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this list contains the full data of the pandas table\n", + "data = []\n", + "\n", + "# read the first game\n", + "game = chess.pgn.read_game(pgn)\n", + "\n", + "\n", + "# read in all games one by one\n", + "for offset, headers in chess.pgn.scan_headers(pgn):\n", + "#while game is not None:\n", + " row = []\n", + " # iterate over all collumns\n", + " for colname in columns:\n", + " # fill one row of data\n", + " try:\n", + " row.append(headers[colname])\n", + " except KeyError:\n", + " # add empty value if entry is missing\n", + " row.append([])\n", + " print(headers)\n", + " # add the row to the full table content\n", + " data.append(row)\n", + " # read in the next game\n", + " #game = chess.pgn.read_game_h(pgn)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pgn.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(data, columns=columns)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export the dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv('crazyara_lichess_dataset_stats.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load the dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame.from_csv('data/crazyara_lichess_dataset_stats.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_full = pd.concat([df['White'], df['Black']])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "((df_full.value_counts()[:10] / len(df)) * 100).round(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "elo = np.concatenate((df['WhiteElo'].values, df['BlackElo'].values))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "elo.astype(np.float)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "len(elo[-5000:])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def example_plot(ax, fontsize=12):\n", + " ax.plot([1, 2])\n", + " ax.locator_params(nbins=3)\n", + " ax.set_xlabel('x-label', fontsize=fontsize)\n", + " ax.set_ylabel('y-label', fontsize=fontsize)\n", + " ax.set_title('Title', fontsize=fontsize)\n", + " \n", + "plt.close('all')\n", + "fig = plt.figure(figsize=(10*1.5,8*1.5))\n", + "\n", + "ax1 = plt.subplot(211)\n", + "ax2 = plt.subplot(425)\n", + "ax3 = plt.subplot(224)\n", + "ax4 = plt.subplot(427)\n", + "\n", + "top_x = 20\n", + "cum_perc = df_full.value_counts()[:top_x].sum() / len(df) * 100\n", + "\n", + "plt.suptitle(\"CrazyAra's Traing Data\\n569,537 Games total (%.2f\" % cum_perc + \"% \" + \"by %d players)\" % top_x, y=1.05, size=20)\n", + "\n", + "#ax = (df_full.value_counts()[:20][::-1] / len(df) * 100).plot('barh', title=\"CrazyAra's Traing Data\")\n", + "ax = (df_full.value_counts()[:top_x][::-1]).plot('barh', title=\"\\nTop %d Active Crazyhouse-Players with Matches >= 2,000 elo for both Players\\nfrom January 2016 to June 2018 (database.lichess.org/)\" % top_x, ax=ax1)\n", + "ax.set_xlabel(\"Number of Games\")\n", + "#ax.set_ylabel(\"Crazyhouse Players on lichess.org\")\n", + "\n", + "ax2.hist(elo[-5000000:])\n", + "ax2.axvline(x=elo.mean(), linewidth=2, color='lightblue')\n", + "ax2.text(elo.mean() + elo.mean()*.02,5000000 / 20, \"mean=%.2f\" % elo.mean(), fontsize=12)\n", + "ax2.set_title(\"Elo Rating\")\n", + "ax2.set_xlabel(\"Rating\")\n", + "\n", + "#example_plot(ax1)\n", + "#example_plot(ax2)\n", + "#example_plot(ax3)\n", + "\n", + "df['TimeControl'].value_counts()[:15][::-1].plot('barh', title='Time Control', ax=ax3)\n", + "ax3.set_xlabel(\"Number of Games\")\n", + "\n", + "df['Result'].value_counts()[::-1].plot('barh', ax=ax4)\n", + "ax4.set_title('Game Results')\n", + "ax4.set_xlabel(\"Number of Games\")\n", + "\n", + "plt.tight_layout()\n", + "\n", + "plt.savefig(\"plots/crazyara_training_data.png\", bbox_inches='tight')\n", + "plt.savefig(\"plots/crazyara_training_data.pdf\", bbox_inches='tight')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_full = pd.concat([df['White'], df['Black']])\n", + "ax = (df_full.value_counts()[:20][::-1]).plot('barh', title=\"CrazyAra's Traing Data\")\n", + "ax.set_xlabel(\"Number of Games\")\n", + "ax.set_ylabel(\"Crazyhouse Players on lichess.org\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.array(df['WhiteElo'].values, np.int).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.array(df['WhiteElo'].values, np.int).std()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/DeepCrazyhouse/src/preprocessing/convert_pgn_to_planes.ipynb b/DeepCrazyhouse/src/preprocessing/convert_pgn_to_planes.ipynb index 09052187..70da7c4d 100644 --- a/DeepCrazyhouse/src/preprocessing/convert_pgn_to_planes.ipynb +++ b/DeepCrazyhouse/src/preprocessing/convert_pgn_to_planes.ipynb @@ -9,7 +9,7 @@ "\n", "## Conversion of PGN files to Image-Plane Representation\n", "\n", - "* file: load_pgn_parallel.ipynb\n", + "* file: convert_pgn_to_planes.ipynb\n", "* brief: Loads in a png-file from the lichess crazyhouse dataset and converts it to plane representation. The plane representations can later be used by a convolutional neural network.\n", "\n", "* author: QueensGambit\n", diff --git a/DeepCrazyhouse/src/preprocessing/plots/crazyara_training_data.pdf b/DeepCrazyhouse/src/preprocessing/plots/crazyara_training_data.pdf new file mode 100644 index 00000000..c4fe2fcc Binary files /dev/null and b/DeepCrazyhouse/src/preprocessing/plots/crazyara_training_data.pdf differ diff --git a/DeepCrazyhouse/src/preprocessing/plots/crazyara_training_data.png b/DeepCrazyhouse/src/preprocessing/plots/crazyara_training_data.png new file mode 100644 index 00000000..b0b537df Binary files /dev/null and b/DeepCrazyhouse/src/preprocessing/plots/crazyara_training_data.png differ diff --git a/DeepCrazyhouse/src/tests/FullRoundTripTests.py b/DeepCrazyhouse/src/tests/FullRoundTripTests.py index 199e4dff..ef282560 100644 --- a/DeepCrazyhouse/src/tests/FullRoundTripTests.py +++ b/DeepCrazyhouse/src/tests/FullRoundTripTests.py @@ -7,8 +7,8 @@ Loads the plane representation for the test dataset and iterates through all board positions and moves. """ -from src.domain.util import * -from src.domain.crazyhouse.input_representation import planes_to_board +from DeepCrazyhouse.src.domain.util import * +from DeepCrazyhouse.src.domain.crazyhouse.input_representation import planes_to_board from DeepCrazyhouse.src.domain.crazyhouse.output_representation import policy_to_move import chess import chess.pgn @@ -17,8 +17,11 @@ from DeepCrazyhouse.src.preprocessing.PGN2PlanesConverter import PGN2PlanesConverter from multiprocessing import Pool from copy import deepcopy +from DeepCrazyhouse.src.preprocessing.dataset_loader import load_pgn_dataset # import the Colorer to have a nicer logging printout +from DeepCrazyhouse.src.runtime.ColorLogger import enable_color_logging +enable_color_logging() def board_single_game(params_inp): diff --git a/README.md b/README.md index 8e5a002e..c0c9de58 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,12 @@ See [LICENSE](https://github.com/QueensGambit/CrazyAra/blob/master/LICENSE) for * [Project management plattform on taiga.io](https://tree.taiga.io/project/queensgambit-deep-learning-project-crazyhouse/) +## Main libraries used in this project +* [python-chess](https://python-chess.readthedocs.io/en/latest/index.html): A pure Python chess library +* [MXNet](https://mxnet.incubator.apache.org/): A flexible and efficient library for deep learning +* [numpy](http://www.numpy.org/): The fundamental package for scientific computing with Python +* [zarr](https://zarr.readthedocs.io/en/stable/): An implementation of chunked, compressed, N-dimensional arrays + ## Links to other similar projects ### chess-alpha-zero diff --git a/etc/media/wiki/CrazyAra_v02/training/crazyara_training_data.png b/etc/media/wiki/CrazyAra_v02/training/crazyara_training_data.png new file mode 100644 index 00000000..b0b537df Binary files /dev/null and b/etc/media/wiki/CrazyAra_v02/training/crazyara_training_data.png differ