From ce12a9ceb4f0bba7ae36b193249f35648e4e6f52 Mon Sep 17 00:00:00 2001 From: Johannes Czech Date: Fri, 3 May 2024 11:48:06 +0200 Subject: [PATCH] Mixture of Experts MCTS (MoE MCTS) (#216) * - added game phase detection file - adjusted initial Dockerfile - minor changes to convert_pgn_to_planes.ipynb and pgn_to_planes_converter.py * - changed openspiel git * - changed openspiel git * fixed phase ids * added dataset creation option for specific phases * param changes in train_cnn.ipynb * - fixed plys_to_end list to only include values for moves that really have been used - added counter for games without positions for current phase * - changes to train_cnn to make it compatible - added analyse_game_phases.py to analyse game phase distribution and other information - minor changes * mcts phase integration working, some improvements missing * - added phase_to_nets map to make sure the right net is used for each phase - board->get_phase now expects the total amount of phases as an argument - phaseCountMap is now immediately filled * - added game phase vector to created datasets - added sample weighting to losses pytorch training files - load_pgn_dataset() now returns a dict - added file for easily generating normalized cutechess-cli commands * minor fixes for weighted training * - fixes and improvements to prs.py from cutechess-cli - added file to generate plots based on cutechess results * - changes for continuing training from tar file (pytorch) * - added python file for training (exported notebook) * - added python file for executing cutechess shell commands * - added the option to specify additional eval sets (unweighted) to pass through the trainer agent - you can now pass a phase to load_pgn_dataset to load a non default dataset * - minor changes * - minor changes for debugging * - bugfix in train_cnn.py for additional dataloaders * - bugfix in to correctly determine train iterations - added printing total positions in dataset when loading * - minor changese in prs.py * - minor changes for chess 960 * - reverted mode and version back to 2 and 3 * fixed bug when executing isready multiple times consecutively while setting networkLoaded back to false * alternative bugfix attempt for linux * - temporary fix for chess960 wrong training representation - adjusted cutechess run file to support 960 matches * - changes to incorporate 960 dataset analysis - new and adjusted graphs in game_phase_detector.py (should be put into a separate file) - new plots in create_cutechess_plots.py * chess960 input representation fix (c++ engine files still unadjusted and assuming a wrong input representation) * - added plot generating notebooks to git (/etc folder) - moved game phase analysis code from game_phase_detector.py to own file (analyse_game_phase_definition.py) - minor changes in train_cnn.py - adjusted .gitignore * - added support for naive movecount phases * - minor path fix in dataset_loader.py * undone temporary fix for broken chess960 input representation * - added support for phases by movecount in c++ code (currently always assumes phases by movecount) - set default value for UCI_Chess960 back to false - minor fixes * - minor plotting adjustments - added colorblind palette * - adjusted run_cutechess_experiments.py to be able to do experiments against stockfish * - added documentation * - minor assertion change in train_cnn.py * - cleaned code and removed sections that are not needed anymore * - changed underscore naming to camelCase naming in several cases * - added UCI option Game_Phase_Definition with options "lichess" and "movecount" and corresponding searchsettings enum GamePhaseDefinition * - added searchSettings to RawNetAgent to access selected gamePhaseDefinition * - aligned train_cnn.ipynb with code inside train_cnn.py * - cleaned cell outputs of main notebooks * - further notebook output cleanings * - removed files unnecessary for pull request and reverted several files back to initial state of fork * - reverted .gitignore and Dockerfile to older state * - .gitignore update to different previous state * Update crazyara.cpp Fix compile error * Update board.cpp Fix error: control reaches end of non-void function * Add GamePhase get_phase to states * Add GamePhase OpenSpielState::get_phase() * Update get_data_loader() to load dict instead --------- Co-authored-by: Felix Helfenstein --- DeepCrazyhouse/configs/main_config.py | 38 +- DeepCrazyhouse/configs/train_config.py | 9 +- DeepCrazyhouse/src/domain/util.py | 17 +- .../src/preprocessing/dataset_loader.py | 49 ++- .../src/preprocessing/game_phase_detector.py | 160 +++++++ .../src/preprocessing/pgn_converter_util.py | 65 +-- .../preprocessing/pgn_to_planes_converter.py | 86 +++- .../src/training/metrics_pytorch.py | 24 +- DeepCrazyhouse/src/training/train_cli_util.py | 6 +- DeepCrazyhouse/src/training/train_cnn.ipynb | 396 +++++++----------- .../src/training/trainer_agent_pytorch.py | 136 ++++-- engine/CMakeLists.txt | 4 +- engine/src/agents/agent.cpp | 4 +- engine/src/agents/agent.h | 2 +- engine/src/agents/config/searchsettings.cpp | 3 +- engine/src/agents/config/searchsettings.h | 7 + engine/src/agents/mctsagent.cpp | 24 +- engine/src/agents/mctsagent.h | 4 +- engine/src/agents/mctsagentbatch.cpp | 8 +- engine/src/agents/mctsagentbatch.h | 4 +- engine/src/agents/mctsagenttruesight.cpp | 6 +- engine/src/agents/mctsagenttruesight.h | 4 +- engine/src/agents/randomagent.cpp | 4 +- engine/src/agents/randomagent.h | 4 +- engine/src/agents/rawnetagent.cpp | 11 +- engine/src/agents/rawnetagent.h | 4 +- .../src/environments/chess_related/board.cpp | 143 +++++++ engine/src/environments/chess_related/board.h | 38 ++ .../environments/chess_related/boardstate.cpp | 5 + .../environments/chess_related/boardstate.h | 1 + .../chess_related/inputrepresentation.cpp | 1 + .../environments/fairy_state/fairystate.cpp | 6 + .../src/environments/fairy_state/fairystate.h | 1 + .../open_spiel/openspielstate.cpp | 6 + .../environments/open_spiel/openspielstate.h | 1 + .../stratego_related/strategostate.cpp | 6 + .../stratego_related/strategostate.h | 1 + engine/src/nn/neuralnetapi.cpp | 21 +- engine/src/nn/neuralnetapi.h | 17 + engine/src/nn/neuralnetapiuser.cpp | 44 +- engine/src/nn/neuralnetapiuser.h | 6 +- engine/src/searchthread.cpp | 29 +- engine/src/searchthread.h | 5 +- engine/src/state.h | 11 + engine/src/uci/crazyara.cpp | 55 ++- engine/src/uci/crazyara.h | 12 +- engine/src/uci/optionsuci.cpp | 1 + etc/run_cutechess_experiments.py | 71 ++++ 48 files changed, 1101 insertions(+), 459 deletions(-) create mode 100644 DeepCrazyhouse/src/preprocessing/game_phase_detector.py create mode 100644 etc/run_cutechess_experiments.py diff --git a/DeepCrazyhouse/configs/main_config.py b/DeepCrazyhouse/configs/main_config.py index f86b889b3..97e41dc6e 100644 --- a/DeepCrazyhouse/configs/main_config.py +++ b/DeepCrazyhouse/configs/main_config.py @@ -11,7 +11,12 @@ # define the default dir where the training data in plane representation is located # e.g. for supervised learning default_dir = "/data/planes/" -default_dir = "/data/RL/export/" +default_dir = "/data/kingbase2019_lite_pgn_months/" +#default_dir = "C:/workspace/Python/CrazyAra/data/kingbase2019_lite_pgn_months/" +#default_dir = "C:/workspace/Python/CrazyAra/data/chess960_pgns/" +phase = None # current phase to use, set to None to treat everything as a single phase +# type of phase definition, either "lichess" or "movecountX" with X determining the number of phases +phase_definition = "movecount3" if default_dir[-1] != "/": default_dir = default_dir + "/" @@ -30,29 +35,38 @@ # The test directory includes games from the month: 2017-05 # The mate_in_one directory includes games from the month: lichess_db_standard_rated_2015-08.pgn + "phase": phase, + "phase_definition": phase_definition, + "default_dir": default_dir, + # The pgn directories contain all files which are converted to plane representation - "pgn_train_dir": "/home/demo_user/datasets/lichess/Crazyhouse/pgn/train/", - "pgn_val_dir": "/home/demo_user/datasets/lichess/Crazyhouse/pgn/val/", - "pgn_test_dir": "/home/demo_user/datasets/lichess/Crazyhouse/pgn/test/", - "pgn_mate_in_one_dir": "/home/demo_user/datasets/lichess/Crazyhouse/pgn/mate_in_one/", + "pgn_train_dir": default_dir + "pgn/train/", + "pgn_val_dir": default_dir + "pgn/val/", + "pgn_test_dir": default_dir + "pgn/test/", + "pgn_mate_in_one_dir": default_dir + "pgn/mate_in_one/", # The plane directories contain the plane representation of the converted board state # (.zip files which have been compressed by the python zarr library) - "planes_train_dir": default_dir + "train/", - "planes_val_dir": default_dir + "val/", - "planes_test_dir": default_dir + "test/", - "planes_mate_in_one_dir": default_dir + "mate_in_one/", + + "planes_train_dir": default_dir + f"planes/{phase_definition}/phase{phase}/train/", + "planes_val_dir": default_dir + f"planes/{phase_definition}/phase{phase}/val/", + "planes_test_dir": default_dir + f"planes/{phase_definition}/phase{phase}/test/", + "planes_mate_in_one_dir": default_dir + f"planes/{phase_definition}/phase{phase}/mate_in_one/", # The rec directory contains the plane representation which are used in the training loop of the network # use the notebook create_rec_dataset to generate the .rec files: # (Unfortunately when trying to start training with the big dataset a memory overflow occurred. # therefore the old working solution was used to train the latest model by loading the dataset via batch files) # "train.idx", "val.idx", "test.idx", "mate_in_one.idx", "train.rec", "val.rec", "test.rec", "mate_in_one.rec" - "rec_dir": "/home/demo_user/datasets/lichess/Crazyhouse/rec/", + + "rec_dir": default_dir + "rec/", # The architecture dir contains the architecture definition of the network in mxnet .symbol format # These directories are used for inference - "model_architecture_dir": "/home/demo_user/models/Crazyhouse/symbol/", + #"model_architecture_dir": "/home/demo_user/models/Crazyhouse/symbol/", + "model_architecture_dir": "/DeepCrazyhouse/models/Classic/symbol/", + # the weight directory contains the of the network in mxnet .params format - "model_weights_dir": "/home/demo_user/models/Crazyhouse/params/", + #"model_weights_dir": "/home/demo_user/models/Crazyhouse/params/", + "model_weights_dir": "/DeepCrazyhouse/models/Classic/params/", # layer name of the value output layer (e.g. value_tanh0 for legacy crazyhouse networks and value_out for newer # networks) diff --git a/DeepCrazyhouse/configs/train_config.py b/DeepCrazyhouse/configs/train_config.py index 70bbc7a8d..c0ae8ec96 100644 --- a/DeepCrazyhouse/configs/train_config.py +++ b/DeepCrazyhouse/configs/train_config.py @@ -79,12 +79,8 @@ class TrainConfig: " pytorch training loop.)" k_steps_initial: int = 0 - info_symbol_file: str = "symbol_file is the neural network architecture file to continue training with (deprecated)" \ - "(e.g. 'model_init-symbol.json', model-1.19246-0.603-symbol.json')" - symbol_file: str = '' - info_params_file: str = "params_file is the neural network weight file to continue training with (deprecated)" \ - "(e.g. 'model_init-0000.params' # model-1.19246-0.603-0223.params')" - params_file: str = '' + info_tar_file: str = "tar_file is the neural network weight file to continue training with" \ + tar_file: str = '' info_optimizer_name: str = "optimizer_name is the optimizer that used in the training loop to update the weights." \ "(e.g. nag, sgd, adam, adamw)" @@ -214,4 +210,5 @@ class TrainObjects: momentum_schedule = None metrics = None variant_metrics = None + phase_weights = {0: 1., 1: 1., 2: 1.} diff --git a/DeepCrazyhouse/src/domain/util.py b/DeepCrazyhouse/src/domain/util.py index e6d41bc5d..489193ea2 100755 --- a/DeepCrazyhouse/src/domain/util.py +++ b/DeepCrazyhouse/src/domain/util.py @@ -165,7 +165,7 @@ def get_numpy_arrays(pgn_dataset): Loads the content of the dataset file into numpy arrays :param pgn_dataset: dataset file handle - :return: numpy-arrays: + :return: pgn_dataset_arrays_dict: dict of {specific dataset part: numpy-array} with the following keys starting_idx - defines the index where each game starts x - the board representation for all games y_value - the game outcome (-1,0,1) for each board position @@ -174,6 +174,7 @@ def get_numpy_arrays(pgn_dataset): This can be used to apply discounting y_best_move_q - Q-value for the position of the selected move (this information is only available for generated data during selfplay) + phase_vector - array of the game phase of each position """ # Get the data start_indices = np.array(pgn_dataset["start_indices"]) @@ -184,14 +185,22 @@ def get_numpy_arrays(pgn_dataset): except Exception: y_policy = np.array(pgn_dataset["y_policy"]) - possible_entries = ["plys_to_end", "y_best_move_q"] - entries = [None] * 2 + possible_entries = ["plys_to_end", "y_best_move_q", "phase_vector"] + entries = [None] * 3 for idx, entry in enumerate(possible_entries): try: entries[idx] = np.array(pgn_dataset[entry]) except KeyError: pass - return start_indices, x, y_value, y_policy, entries[0], entries[1] + + pgn_dataset_arrays_dict = {"start_indices": start_indices, + "x": x, + "y_value": y_value, + "y_policy": y_policy, + "plys_to_end": entries[0], + "y_best_move_q": entries[1], + "phase_vector": entries[2]} + return pgn_dataset_arrays_dict def get_x_y_and_indices(dataset): diff --git a/DeepCrazyhouse/src/preprocessing/dataset_loader.py b/DeepCrazyhouse/src/preprocessing/dataset_loader.py index 993f5db6c..3e033a343 100644 --- a/DeepCrazyhouse/src/preprocessing/dataset_loader.py +++ b/DeepCrazyhouse/src/preprocessing/dataset_loader.py @@ -19,19 +19,23 @@ def _load_dataset_file(dataset_filepath): """ Loads a single dataset file give by its path :param dataset_filepath: path where the file is located - :return:starting_idx: [int] - List of indices where ech game starts + :return: pgn_dataset_arrays_dict: dict of {specific dataset part: numpy-array} with the following keys + starting_idx: [int] - List of indices where ech game starts x: nd.array - Numpy array which contains the game positions y_value: nd.array - Numpy array which describes the winner for each board position y_policy: nd.array - Numpy array which describes the policy distribution for each board state (in case of a pgn dataset the move is one hot encoded) plys_to_end - array of how many plys to the end of the game for each position. This can be used to apply discounting + y_best_move_q - Q-value for the position of the selected move + (this information is only available for generated data during selfplay) + phase_vector - array of the game phase of each position """ return get_numpy_arrays(zarr.group(store=zarr.ZipStore(dataset_filepath, mode="r"))) def load_pgn_dataset( - dataset_type="train", part_id=0, verbose=True, normalize=False, q_value_ratio=0, + dataset_type="train", part_id=0, verbose=True, normalize=False, q_value_ratio=0, phase=None ): """ Loads one part of the pgn dataset in form of planes / multidimensional numpy array. @@ -43,8 +47,9 @@ def load_pgn_dataset( :param normalize: True if the inputs shall be normalized to 0-1 ! Note this only supported for hist-length=1 at the moment :param q_value_ratio: Ratio for mixing the value return with the corresponding q-value + :param phase: if specified use planes dataset of this phase. If None, the phase specified in main_config is used For a ratio of 0 no q-value information will be used. Value must be in [0, 1] - :return: numpy-arrays: + :return: pgn_dataset_arrays_dict: dict of {specific dataset part: numpy-array} with the following keys start_indices - defines the index where each game starts x - the board representation for all games y_value - the game outcome (-1,0,1) for each board position @@ -52,16 +57,16 @@ def load_pgn_dataset( plys_to_end - array of how many plys to the end of the game for each position. This can be used to apply discounting pgn_datasets - the dataset file handle (you can use .tree() to show the file structure) + phase_vector - array of the game phase of each position """ - if dataset_type == "train": - zarr_filepaths = glob.glob(main_config["planes_train_dir"] + "**/*.zip") - elif dataset_type == "val": - zarr_filepaths = glob.glob(main_config["planes_val_dir"] + "**/*.zip") - elif dataset_type == "test": - zarr_filepaths = glob.glob(main_config["planes_test_dir"] + "**/*.zip") - elif dataset_type == "mate_in_one": - zarr_filepaths = glob.glob(main_config["planes_mate_in_one_dir"] + "**/*.zip") + if dataset_type in ["train", "val", "test", "mate_in_one"]: + if phase is None: + zarr_filepaths = glob.glob(main_config[f"planes_{dataset_type}_dir"] + "**/*.zip") + else: + zarr_filepaths = glob.glob(main_config["default_dir"] + + f"planes/{main_config['phase_definition']}/phase{phase}/{dataset_type}/" + + "**/*.zip") else: raise Exception( 'Invalid dataset type "%s" given. It must be either "train", "val", "test" or "mate_in_one"' % dataset_type @@ -78,7 +83,15 @@ def load_pgn_dataset( logging.debug("") pgn_dataset = zarr.group(store=zarr.ZipStore(pgn_datasets[part_id], mode="r")) - start_indices, x, y_value, y_policy, plys_to_end, y_best_move_q = get_numpy_arrays(pgn_dataset) # Get the data + # Get the data + pgn_dataset_arrays_dict = get_numpy_arrays(pgn_dataset) + start_indices = pgn_dataset_arrays_dict["start_indices"] + x = pgn_dataset_arrays_dict["x"] + y_value = pgn_dataset_arrays_dict["y_value"] + y_policy = pgn_dataset_arrays_dict["y_policy"] + plys_to_end = pgn_dataset_arrays_dict["plys_to_end"] + y_best_move_q = pgn_dataset_arrays_dict["y_best_move_q"] + phase_vector = pgn_dataset_arrays_dict["phase_vector"] if verbose: logging.info("STATISTICS:") @@ -87,7 +100,7 @@ def load_pgn_dataset( print(member, list(pgn_dataset["statistics"][member])) except KeyError: logging.warning("no statistics found") - + print("total_positions", f"[{len(y_value)}]") logging.info("PARAMETERS:") try: for member in pgn_dataset["parameters"]: @@ -105,7 +118,15 @@ def load_pgn_dataset( y_policy = y_policy.astype(np.float32) # apply rescaling using a predefined scaling constant (this makes use of vectorized operations) x *= MATRIX_NORMALIZER - return start_indices, x, y_value, y_policy, plys_to_end, pgn_dataset + + pgn_dataset_arrays_dict = {"start_indices": start_indices, + "x": x, + "y_value": y_value, + "y_policy": y_policy, + "plys_to_end": plys_to_end, + "pgn_dataset": pgn_dataset, + "phase_vector": phase_vector} + return pgn_dataset_arrays_dict def load_xiangqi_dataset(dataset_type="train", part_id=0, verbose=True, normalize=False): diff --git a/DeepCrazyhouse/src/preprocessing/game_phase_detector.py b/DeepCrazyhouse/src/preprocessing/game_phase_detector.py new file mode 100644 index 000000000..c93f291cd --- /dev/null +++ b/DeepCrazyhouse/src/preprocessing/game_phase_detector.py @@ -0,0 +1,160 @@ +""" +@file: game_phase_detector.py +Created on 08.06.2023 +@project: CrazyAra +@author: HelpstoneX + +Analyses a given board state defined by a python-chess object and outputs the game phase according to a given definition +""" + + +import chess +import chess.pgn +import numpy as np +import matplotlib.pyplot as plt +import io +from DeepCrazyhouse.configs.main_config import main_config +import os +import re + + +def get_majors_and_minors_count(board): + """ + Returns the number of major and minor pieces (not including king) currently present on the board (either color) + + :param board: python-chess board object + :return: pieces_left - integer representing how many pieces are left + """ + pieces_left = bin(board.queens | board.rooks | board.knights | board.bishops).count("1") + return pieces_left + + +def is_backrank_sparse(board, max_pieces_allowed=3): + """ + Determines whether the backrank of either player is sparse + where sparseness is defined by the amount of pieces on the first (for white) or last (for black) rank + + :param board: python-chess board object + :param max_pieces_allowed: integer representing the maximum pieces (including the king) allowed on the backrank + for it to be considered sparse + :return: backrank_sparseness - boolean representing whether either backrank is currently sparse + """ + white_backrank_sparse = bin(board.occupied_co[chess.WHITE] & chess.BB_RANK_1).count("1") <= max_pieces_allowed + black_backrank_sparse = bin(board.occupied_co[chess.BLACK] & chess.BB_RANK_8).count("1") <= max_pieces_allowed + return white_backrank_sparse or black_backrank_sparse + + +def score(num_white_pieces_in_region, num_black_pieces_in_region, rank): + """ + Calculates the mixedness contribution of a particular 2x2 square/region + + :param num_white_pieces_in_region: integer representing the amount of white pieces in the current 2x2 region + :param num_black_pieces_in_region: integer representing the amount of black pieces in the current 2x2 region + :param rank: rank of the current 2x2 region + :return: mixedness_score - integer representing the mixedness score of the current 2x2 square + """ + score_map = { + (0, 0): 0, + (1, 0): 1 + (8 - rank), + (2, 0): 2 + (rank - 2) if rank > 2 else 0, + (3, 0): 3 + (rank - 1) if rank > 1 else 0, + (4, 0): 3 + (rank - 1) if rank > 1 else 0, + (0, 1): 1 + rank, + (1, 1): 5 + abs(3 - rank), + (2, 1): 4 + rank, + (3, 1): 5 + rank, + (0, 2): 2 + (6 - rank) if rank < 6 else 0, + (1, 2): 4 + (6 - rank), + (2, 2): 7, + (0, 3): 3 + (7 - rank) if rank < 7 else 0, + (1, 3): 5 + (6 - rank), + (0, 4): 3 + (7 - rank) if rank < 7 else 0 + } + return score_map.get((num_white_pieces_in_region, num_black_pieces_in_region), 0) + + +def get_mixedness(board): + """ + Calculates the mixedness of a position based on the lichess definition of mixedness, + which is roughly speaking the amount of intertwining of black and white pieces in all 2x2 squares of the board + more info: https://github.com/lichess-org/scalachess/blob/master/src/main/scala/Divider.scala + + :param board: python-chess board object + :return: mixedness_score - integer representing the current mixedness score of the position + (according to the lichess definition) + """ + mix = 0 + + for rank_idx in range(7): # use ranks 1 to 7 (indices 0 to 6) + for file_idx in range(7): # use files A to G (indices 0 to 6) + num_white_pieces_in_region = 0 + num_black_pieces_in_region = 0 + for dx in [0, 1]: + for dy in [0, 1]: + square = chess.square(file_idx+dx, rank_idx+dy) + if board.piece_at(square): + if board.piece_at(square).color == chess.WHITE: + num_white_pieces_in_region += 1 + else: + num_black_pieces_in_region += 1 + mix += score(num_white_pieces_in_region, num_black_pieces_in_region, rank_idx + 1) + + return mix + + +def get_game_phase(board, phase_definition="lichess", average_movecount_per_game=42.85): + """ + Determines the game phase based on the current board state and the given phase definition type + + :param board: python-chess board object + :param phase_definition: determines, which phase definition type to use, + either "lichess" + or "movecountX" where X describes the amount of phases + (separated by equidistant move count buckets) + :param average_movecount_per_game: specifies the average movecount per game + (used to determine phase borders when using phases by movecount) + :return: str - str representation of the phase (for lichess definition) or empty str + num_majors_and_minors - the amount of major and minor pieces left (for lichess phase EDA purposes) + backrank_sparse - whether the backrank of either player is sparse (for lichess phase EDA purposes) + mixedness_score - current mixedness score of the position (for lichess phase EDA purposes) + phase - integer from 0 to num_phases-1 representing the phase the current position belongs to + """ + + if phase_definition == "lichess": + # returns the game phase based on the lichess definition implemented in: + # https://github.com/lichess-org/scalachess/blob/master/src/main/scala/Divider.scala + + num_majors_and_minors = get_majors_and_minors_count(board) + backrank_sparse = is_backrank_sparse(board) + mixedness_score = get_mixedness(board) + + if num_majors_and_minors <= 6: + return "endgame", num_majors_and_minors, backrank_sparse, mixedness_score, 2 + elif num_majors_and_minors <= 10 or backrank_sparse or (mixedness_score > 150): + return "midgame", num_majors_and_minors, backrank_sparse, mixedness_score, 1 + else: + return "opening", num_majors_and_minors, backrank_sparse, mixedness_score, 0 + + # matches "movecount" directly followed by a number + pattern_match_result = re.match(r"\bmovecount(\d+)", phase_definition) + + if pattern_match_result: # if it is a valid match + # use number at the end of the string to determine the number of phases to be used + num_phases = int(pattern_match_result.group(1)) + phase_length = round(average_movecount_per_game/num_phases) + + # board.fullmove_number describes the move number of the next move that happens in the game, + # e.g., after 8 half moves board.fullmove_number is 5 + # so we use board.fullmove_number -1 to get the current full moves played + moves_completed = board.fullmove_number - 1 + phase = int(moves_completed/phase_length) # determine phase by rounding down to the next integer + phase = min(phase, num_phases-1) # ensure that all higher results are attributed to the last phase + return "", 0, 0, 0, phase + + else: + return "Phase definition not supported or wrongly formatted. Should be 'movecountX' or 'lichess'" + + +if __name__ == "__main__": + print(get_game_phase(chess.Board("q6k/P1P5/3p2Q1/5p1p/3N4/3b3P/5KP1/R3R3 w - - 1 36"), "movecount4")) + print("done") diff --git a/DeepCrazyhouse/src/preprocessing/pgn_converter_util.py b/DeepCrazyhouse/src/preprocessing/pgn_converter_util.py index cca426c19..e312a2d8b 100644 --- a/DeepCrazyhouse/src/preprocessing/pgn_converter_util.py +++ b/DeepCrazyhouse/src/preprocessing/pgn_converter_util.py @@ -15,6 +15,7 @@ from DeepCrazyhouse.src.domain.variants.input_representation import board_to_planes from DeepCrazyhouse.configs.main_config import main_config from DeepCrazyhouse.src.domain.variants.game_state import mirror_policy +from DeepCrazyhouse.src.preprocessing.game_phase_detector import get_game_phase NB_ITEMS_METADATA = 18 # constant which defines how many meta data items will be stored in a matrix @@ -33,6 +34,7 @@ def get_planes_from_pgn(params): y_policy: nd.array - Numpy matrix defining the policy distribution for each board state plys_to_end - array of how many plys to the end of the game for each position. This can be used to apply discounting + phase_vector - array of the game phase of each position """ (pgn, game_idx, mate_in_one) = params @@ -64,7 +66,7 @@ def get_planes_from_pgn(params): results = get_planes_from_game(game, mate_in_one) - return metadata, game_idx, results[0], results[1], results[2], results[3] + return metadata, game_idx, results[0], results[1], results[2], results[3], results[4] def get_planes_from_game(game, mate_in_one=False): @@ -84,12 +86,15 @@ def get_planes_from_game(game, mate_in_one=False): in this position plys_to_end - array of how many plys to the end of the game for each position. This can be used to apply discounting + phase_vector - array of the game phase of each position """ fen_dic = {} # A dictionary which maps the fen description to its number of occurrences x = [] y_value = [] y_policy = [] + plys_to_end = [] # save the number of plys until the end of the game for each position that was considered + phase_vector = [] # save all phases that occurred during the game board = game.board() # get the initial board state # update the y value accordingly if board.turn == chess.WHITE: @@ -108,8 +113,7 @@ def get_planes_from_game(game, mate_in_one=False): # you don't want to push the last move on the board because you had no movement policy to learn from in this case # The moves get pushed at the end of the for-loop and is only used in the next loop. # Therefore we can iterate over 'all' moves - plys = 0 - for move in all_moves: + for plys, move in enumerate(all_moves): board_occ = 0 # by default the positions hasn't occurred before fen = board.fen() # remove the halfmove counter & move counter from this fen to make repetitions possible @@ -125,39 +129,40 @@ def get_planes_from_game(game, mate_in_one=False): # check if you need to export a mate_in_one_scenario if not mate_in_one or plys == len(all_moves) - 1: - # build the last move vector by putting the most recent move on top followed by the remaining past moves - last_moves = [None] * NB_LAST_MOVES - if plys != 0: - last_moves[0:min(plys, NB_LAST_MOVES)] = all_moves[max(plys-NB_LAST_MOVES, 0):plys][::-1] - - # receive the board and the evaluation of the current position in plane representation - # We don't want to store float values because the integer datatype is cheaper, - # that's why normalize is set to false - x_cur = board_to_planes(board, board_occ, normalize=False, mode=main_config["mode"], last_moves=last_moves) - - # add the evaluation of 1 position to the list - x.append(x_cur) - y_value.append(y_init) - # add the next move defined in policy vector notation to the policy list - # the network always sees the board as if he's the white player, that's the move is mirrored fro black - y_policy.append(move_to_policy(next_move, mirror_policy=mirror_policy(board))) + + # if specified phase is not None + # check if the current game phase is the phase the dataset is created for + + curr_phase = get_game_phase(board, phase_definition=main_config["phase_definition"])[4] + + if main_config["phase"] is None or curr_phase == main_config["phase"]: + # build the last move vector by putting the most recent move on top followed by the remaining past moves + last_moves = [None] * NB_LAST_MOVES + if plys != 0: + last_moves[0:min(plys, NB_LAST_MOVES)] = all_moves[max(plys-NB_LAST_MOVES, 0):plys][::-1] + + # receive the board and the evaluation of the current position in plane representation + # We don't want to store float values because the integer datatype is cheaper, + # that's why normalize is set to false + x_cur = board_to_planes(board, board_occ, normalize=False, mode=main_config["mode"], last_moves=last_moves) + + # add the evaluation of 1 position to the list + x.append(x_cur) + y_value.append(y_init) + # add the next move defined in policy vector notation to the policy list + # the network always sees the board as if he's the white player, that's the move is mirrored fro black + y_policy.append(move_to_policy(next_move, mirror_policy=mirror_policy(board))) + plys_to_end.append(len(all_moves) - 1 - plys) + + phase_vector.append(curr_phase) y_init *= -1 # flip the y_init value after each move board.push(move) # push the next move on the board - plys += 1 - plys_to_end = np.arange(plys)[::-1] - - # check if there has been any moves + # check if there has been any moves and stack the lists if x and y_value and y_policy: x = np.stack(x, axis=0) y_value = np.stack(y_value, axis=0) y_policy = np.stack(y_policy, axis=0) - else: - print("game.headers:") - print(game.headers) - print("len(all_moves)", len(all_moves)) - print("game", game) - raise Exception("The given pgn file's mainline is empty!") - return x, y_value, y_policy, plys_to_end + return x, y_value, y_policy, plys_to_end, phase_vector diff --git a/DeepCrazyhouse/src/preprocessing/pgn_to_planes_converter.py b/DeepCrazyhouse/src/preprocessing/pgn_to_planes_converter.py index c4da8a696..4930a6815 100644 --- a/DeepCrazyhouse/src/preprocessing/pgn_to_planes_converter.py +++ b/DeepCrazyhouse/src/preprocessing/pgn_to_planes_converter.py @@ -493,12 +493,14 @@ def export_pgn_batch(self, cur_part, game_idx_start, game_idx_end, pgn_sel, nb_w logging.info("starting conversion to planes...") t_s = time() - pool = Pool() + pool = Pool(processes=None) # If processes is None then the number returned by os.cpu_count() is used x_dic = {} y_value_dic = {} y_policy_dic = {} plys_to_end_dic = {} + phase_vector_dic = {} metadata_dic = {} + num_games_without_fitting_moves = 0 if not os.path.exists(self._export_dir): os.makedirs(self._export_dir) @@ -513,12 +515,16 @@ def export_pgn_batch(self, cur_part, game_idx_start, game_idx_end, pgn_sel, nb_w zarr_file = zarr.group(store=store, overwrite=True) # the games occur in random order due to multiprocessing # in order to keep structure we store the result in a dictionary first - for metadata, game_idx, x, y_value, y_policy, plys_to_end in pool.map(get_planes_from_pgn, params_inp): - metadata_dic[game_idx] = metadata - x_dic[game_idx] = x - y_value_dic[game_idx] = y_value - y_policy_dic[game_idx] = y_policy - plys_to_end_dic[game_idx] = plys_to_end + for metadata, game_idx, x, y_value, y_policy, plys_to_end, phase_vector in pool.map(get_planes_from_pgn, params_inp): + if len(y_value) > 0: # only add games that had at least one valid move + metadata_dic[game_idx] = metadata + x_dic[game_idx] = x + y_value_dic[game_idx] = y_value + y_policy_dic[game_idx] = y_policy + plys_to_end_dic[game_idx] = plys_to_end + phase_vector_dic[game_idx] = phase_vector + else: + num_games_without_fitting_moves += 1 pool.close() pool.join() t_e = time() - t_s @@ -531,6 +537,7 @@ def export_pgn_batch(self, cur_part, game_idx_start, game_idx_end, pgn_sel, nb_w y_value = get_dic_sorted_by_key(y_value_dic) y_policy = get_dic_sorted_by_key(y_policy_dic) plys_to_end = get_dic_sorted_by_key(plys_to_end_dic) + phase_vector = get_dic_sorted_by_key(phase_vector_dic) start_indices = np.zeros(len(x)) # create a list which describes where each game starts for i, x_cur in enumerate(x[:-1]): @@ -542,10 +549,15 @@ def export_pgn_batch(self, cur_part, game_idx_start, game_idx_end, pgn_sel, nb_w y_value = np.concatenate(y_value, axis=0) y_policy = np.concatenate(y_policy, axis=0) plys_to_end = np.concatenate(plys_to_end, axis=0) + phase_vector = np.concatenate(phase_vector, axis=0) logging.debug("metadata.shape %s", metadata.shape) logging.debug("x.shape %s", x.shape) logging.debug("y_value.shape %s", y_value.shape) logging.debug("y_policy.shape %s", y_policy.shape) + logging.debug("plys_to_end.shape %s", plys_to_end.shape) + logging.debug("phase_vector.shape %s", phase_vector.shape) + logging.debug("num_games_without_fitting_moves %s", num_games_without_fitting_moves) + assert x.shape[0] == y_value.shape[0] == y_policy.shape[0] == plys_to_end.shape[0] == phase_vector.shape[0] # Save the dataset to a file logging.info("saving the dataset to a file...") # define the compressor object @@ -589,6 +601,13 @@ def export_pgn_batch(self, cur_part, game_idx_start, game_idx_end, pgn_sel, nb_w data=plys_to_end, synchronizer=zarr.ThreadSynchronizer() ) + zarr_file.create_dataset( + name="phase_vector", + shape=phase_vector.shape, + dtype=np.int16, + data=phase_vector, + synchronizer=zarr.ThreadSynchronizer() + ) zarr_file.create_dataset( name="start_indices", shape=start_indices.shape, @@ -704,7 +723,52 @@ def export_mate_in_one_scenarios(): if __name__ == "__main__": - ROOT = logging.getLogger() - ROOT.setLevel(logging.INFO) - # export_mate_in_one_scenarios() - export_pgn_to_datasetfile() + + import sys, os + + sys.path.insert(0, '../../../') + import os + import sys + #from DeepCrazyhouse.src.preprocessing.pgn_to_planes_converter import PGN2PlanesConverter + from DeepCrazyhouse.src.runtime.color_logger import enable_color_logging + + enable_color_logging() + import logging + nb_games_per_file = 1000 + # Rating cap at 90% cumulative rating for all varaints + min_elo_both = { + "Chess": 1000, + # "Crazyhouse": 2000, + # "Chess960": 1950, + # "King of the Hill": 1925, + # "Three-check": 1900, + #"Atomic": 1900, + # "Horde": 1900, + # "Racing Kings": 1900 + } # is ignored if "use_all_games" is True + use_all_games = True + + PGN2PlanesConverter(limit_nb_games_to_analyze=0, nb_games_per_file=nb_games_per_file, + max_nb_files=0, min_elo_both=min_elo_both, termination_conditions=["Normal"], + log_lvl=logging.DEBUG, + compression='lz4', clevel=5, dataset_type='train', + use_all_games=use_all_games).convert_all_pgns_to_planes() + + + PGN2PlanesConverter(limit_nb_games_to_analyze=0, nb_games_per_file=nb_games_per_file, + max_nb_files=1, min_elo_both=min_elo_both, termination_conditions=["Normal"], log_lvl=logging.DEBUG, + compression='lz4', clevel=5, dataset_type='val', use_all_games=use_all_games).convert_all_pgns_to_planes() + + PGN2PlanesConverter(limit_nb_games_to_analyze=0, nb_games_per_file=nb_games_per_file, + max_nb_files=1, min_elo_both=min_elo_both, termination_conditions=["Normal"], log_lvl=logging.DEBUG, + compression='lz4', clevel=5, dataset_type='test', use_all_games=use_all_games).convert_all_pgns_to_planes() + + #PGN2PlanesConverter(limit_nb_games_to_analyze=0, nb_games_per_file=nb_games_per_file, + # max_nb_files=1, min_elo_both=min_elo_both, termination_conditions=["Normal"], log_lvl=logging.DEBUG, + # compression='lz4', clevel=5, dataset_type='mate_in_one').convert_all_pgns_to_planes() + + + #ROOT = logging.getLogger() + #ROOT.setLevel(logging.INFO) + ## export_mate_in_one_scenarios() + #export_pgn_to_datasetfile() diff --git a/DeepCrazyhouse/src/training/metrics_pytorch.py b/DeepCrazyhouse/src/training/metrics_pytorch.py index f019949fd..e49ab3600 100644 --- a/DeepCrazyhouse/src/training/metrics_pytorch.py +++ b/DeepCrazyhouse/src/training/metrics_pytorch.py @@ -7,7 +7,7 @@ Metric definitions for Pytorch """ import torch -from DeepCrazyhouse.src.training.trainer_agent_pytorch import SoftCrossEntropyLoss +from DeepCrazyhouse.src.training.trainer_agent_pytorch import SoftCrossEntropyLoss, SampleWeightedLoss class Metric: @@ -17,7 +17,7 @@ def __init__(self): def reset(self) -> None: pass - def update(self, preds: torch.Tensor, labels: torch.Tensor) -> None: + def update(self, preds: torch.Tensor, labels: torch.Tensor, sample_weights: torch.Tensor) -> None: pass def compute(self) -> float: @@ -35,7 +35,7 @@ def reset(self) -> None: self.correct_cnt = 0 self.total_cnt = 0 - def update(self, preds: torch.Tensor, labels: torch.Tensor) -> None: + def update(self, preds: torch.Tensor, labels: torch.Tensor, sample_weights: torch.Tensor = None) -> None: if self.sparse_policy_label: self.correct_cnt += float((preds == labels.data).sum()) else: @@ -49,7 +49,7 @@ def compute(self) -> float: class MSE(Metric): def __init__(self): super().__init__() - self.loss = torch.nn.MSELoss() + self.loss = SampleWeightedLoss(torch.nn.MSELoss) self.loss_sum = 0 self.nb_batches = 0 @@ -57,8 +57,8 @@ def reset(self) -> None: self.loss_sum = 0 self.nb_batches = 0 - def update(self, preds: torch.Tensor, labels: torch.Tensor) -> None: - self.loss_sum += self.loss(preds, labels) + def update(self, preds: torch.Tensor, labels: torch.Tensor, sample_weights: torch.Tensor) -> None: + self.loss_sum += self.loss(preds, labels, sample_weights) self.nb_batches += 1 def compute(self) -> float: @@ -72,9 +72,9 @@ def __init__(self, sparse_policy_label): """ super().__init__() if sparse_policy_label: - self.loss = torch.nn.CrossEntropyLoss() + self.loss = SampleWeightedLoss(torch.nn.CrossEntropyLoss) else: - self.loss = SoftCrossEntropyLoss() + self.loss = SampleWeightedLoss(SoftCrossEntropyLoss) self.loss_sum = 0 self.nb_batches = 0 self.sparse_policy_label = sparse_policy_label @@ -83,11 +83,11 @@ def reset(self) -> None: self.loss_sum = 0 self.nb_batches = 0 - def update(self, preds: torch.Tensor, labels: torch.Tensor) -> None: + def update(self, preds: torch.Tensor, labels: torch.Tensor, sample_weights: torch.Tensor) -> None: if self.sparse_policy_label: - self.loss_sum += self.loss(preds, labels.long()) + self.loss_sum += self.loss(preds, labels.long(), sample_weights) else: - self.loss_sum += self.loss(preds, labels) + self.loss_sum += self.loss(preds, labels, sample_weights) self.nb_batches += 1 def compute(self) -> float: @@ -104,7 +104,7 @@ def reset(self) -> None: self.correct_cnt = 0 self.denominator = 0 - def update(self, preds: torch.Tensor, labels: torch.Tensor) -> None: + def update(self, preds: torch.Tensor, labels: torch.Tensor, sample_weights: torch.Tensor = None) -> None: self.correct_cnt += float((preds.sign() == labels.data.sign()).sum()) self.denominator += labels.shape[0] - (labels == 0).sum() diff --git a/DeepCrazyhouse/src/training/train_cli_util.py b/DeepCrazyhouse/src/training/train_cli_util.py index f97ada4c4..94d812ccd 100644 --- a/DeepCrazyhouse/src/training/train_cli_util.py +++ b/DeepCrazyhouse/src/training/train_cli_util.py @@ -276,10 +276,8 @@ def get_validation_data(train_config: TrainConfig): """ Returns the validation loader, x-Data and target-Policy object. """ - s_idcs_val, x_val, yv_val, yp_val, plys_to_end, pgn_datasets_val = load_pgn_dataset(dataset_type='val', part_id=0, - verbose=True, - normalize=train_config.normalize) - val_data = get_data_loader(x_val, yv_val, yp_val, plys_to_end, train_config, shuffle=False) + pgn_dataset_arrays_dict = load_pgn_dataset(dataset_type='val', part_id=0, verbose=True, normalize=train_config.normalize) + val_data = get_data_loader(pgn_dataset_arrays_dict, train_config, shuffle=False) return val_data, x_val, yp_val diff --git a/DeepCrazyhouse/src/training/train_cnn.ipynb b/DeepCrazyhouse/src/training/train_cnn.ipynb index 091872fb4..46952807b 100644 --- a/DeepCrazyhouse/src/training/train_cnn.ipynb +++ b/DeepCrazyhouse/src/training/train_cnn.ipynb @@ -14,9 +14,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", @@ -26,9 +24,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload" @@ -37,9 +33,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "from __future__ import print_function\n", @@ -156,9 +150,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "# set the context on CPU, switch to GPU if there is one available (strongly recommended for training)\n", @@ -166,16 +158,21 @@ "tc.device_id = 0\n", "\n", "# set a specific seed value for reproducibility\n", - "tc.seed = 7 # 42\n", + "tc.seed = 9 # 42\n", "\n", "tc.export_weights = True\n", "tc.log_metrics_to_tensorboard = True\n", "tc.export_grad_histograms = False\n", "\n", + "phase_weights = {0: 1.0, 1: 1.0, 2: 1.0} # specify the sample weight for each phase (will be normalized afterwards)\n", + "if \"movecount\" in main_config[\"phase_definition\"]:\n", + " assert len(phase_weights) == int(main_config[\"phase_definition\"][-1])\n", + "\n", "# directory to write and read weights, logs, onnx and other export files\n", - "tc.export_dir = \"./\"\n", + "#tc.export_dir = \"C:/workspace/Python/CrazyAra/data/train_phase2/\"\n", + "tc.export_dir = f\"/data/run_model_exports_movecount/movecount4_train_phase_0/\"\n", "\n", - "tc.div_factor = 1 # div factor is a constant which can be used to reduce the batch size and learning rate respectively\n", + "tc.div_factor = 0.5 # div factor is a constant which can be used to reduce the batch size and learning rate respectively\n", "# use a value greater 1 if you encounter memory allocation errors\n", "\n", "# batch_steps = 1000 means for example that every 1000 batches the validation set gets processed\n", @@ -185,7 +182,7 @@ "tc.k_steps_initial = 0\n", "# these are the weights to continue training with\n", "tc.symbol_file = None # 'model-0.81901-0.713-symbol.json'\n", - "tc.params_file = None #'model-0.81901-0.713-0498.params'\n", + "tc.tar_file = None # f\"/data/run_model_exports/train_phase_None_0_25_0_25_1_0/best-model/model-1.25307-0.567-0529.tar\" #'model-0.81901-0.713-0498.params' # used to continue training from model params checkpoint\n", "\n", "tc.batch_size = int(1024 / tc.div_factor) # 1024 # the batch_size needed to be reduced to 1024 in order to fit in the GPU 1080Ti\n", "#4096 was originally used in the paper -> works slower for current GPU\n", @@ -247,17 +244,17 @@ "tc.sparse_policy_label = True\n", "# define if the policy data is also defined in \"select_policy_from_plane\" representation\n", "tc.is_policy_from_plane_data = False\n", - "tc.name_initials = \"JC\"" + "tc.name_initials = \"FH\"" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ + "phase_weights_sum = sum(phase_weights.values())\n", + "to.phase_weights = {k: v/phase_weights_sum*len(phase_weights) for k, v in phase_weights.items()} # normalize so that the average weight is 1.0 (assuming each phase occurs approximately equally often)\n", "mode = main_config[\"mode\"]\n", "ctx = get_context(tc.context, tc.device_id)\n", "# concatenated at the end of the final feature representation\n", @@ -268,9 +265,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "if tc.framework == 'mxnet' or tc.framework == 'gluon':\n", @@ -289,9 +284,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "if not os.path.exists(tc.export_dir + \"logs\"):\n", @@ -310,9 +303,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "print(main_config)" @@ -321,9 +312,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "print(tc)" @@ -332,9 +321,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "print(to)" @@ -357,13 +344,19 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ - "s_idcs_val, x_val, yv_val, yp_val, plys_to_end, pgn_datasets_val = load_pgn_dataset(dataset_type='val', part_id=0,\n", - " verbose=True, normalize=tc.normalize)\n", + "pgn_dataset_arrays_dict = load_pgn_dataset(dataset_type='val', part_id=0,\n", + " verbose=True, normalize=tc.normalize)\n", + "s_idcs_val = pgn_dataset_arrays_dict[\"start_indices\"]\n", + "x_val = pgn_dataset_arrays_dict[\"x\"]\n", + "yv_val = pgn_dataset_arrays_dict[\"y_value\"]\n", + "yp_val = pgn_dataset_arrays_dict[\"y_policy\"]\n", + "plys_to_end = pgn_dataset_arrays_dict[\"plys_to_end\"]\n", + "pgn_datasets_val = pgn_dataset_arrays_dict[\"pgn_dataset\"]\n", + "phase_vector = pgn_dataset_arrays_dict[\"phase_vector\"]\n", + "\n", "if tc.discount != 1:\n", " yv_val *= tc.discount**plys_to_end\n", " \n", @@ -384,15 +377,47 @@ " else:\n", " val_iter = mx.io.NDArrayIter({'data': x_val}, {'value_label': yv_val, 'policy_label': yp_val.argmax(axis=1)}, tc.batch_size)\n", "elif tc.framework == 'gluon' or tc.framework == 'pytorch':\n", - " val_data = get_data_loader(x_val, yv_val, yp_val, plys_to_end, tc, shuffle=False)" + " val_data = get_data_loader(pgn_dataset_arrays_dict, tc, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# additional eval sets" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, + "outputs": [], + "source": [ + "# fill additional loaders that should be used for additional evaluations during training\n", + "if tc.framework == 'pytorch':\n", + " additional_data_loaders = dict()\n", + " for phase in [str(phase) for phase in to.phase_weights.keys()] + [\"None\"]:\n", + " pgn_dataset_arrays_dict = load_pgn_dataset(dataset_type='test', part_id=0,\n", + " verbose=True, normalize=tc.normalize, phase=phase)\n", + " s_idcs_val_tmp = pgn_dataset_arrays_dict[\"start_indices\"]\n", + " x_val_tmp = pgn_dataset_arrays_dict[\"x\"]\n", + " yv_val_tmp = pgn_dataset_arrays_dict[\"y_value\"]\n", + " yp_val_tmp = pgn_dataset_arrays_dict[\"y_policy\"]\n", + " plys_to_end_tmp = pgn_dataset_arrays_dict[\"plys_to_end\"]\n", + " pgn_datasets_val_tmp = pgn_dataset_arrays_dict[\"pgn_dataset\"]\n", + " phase_vector_tmp = pgn_dataset_arrays_dict[\"phase_vector\"]\n", + "\n", + " if tc.discount != 1:\n", + " yv_val_tmp *= tc.discount**plys_to_end_tmp\n", + "\n", + " data_loader = get_data_loader(x_val_tmp, yv_val_tmp, yp_val_tmp, plys_to_end_tmp, phase_vector_tmp, tc, shuffle=False)\n", + " additional_data_loaders[f\"Phase{phase}Test\"] = data_loader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "tc.nb_parts = len(glob.glob(main_config['planes_train_dir'] + '**/*'))" @@ -401,9 +426,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "nb_it_per_epoch = (len(x_val) * tc.nb_parts) // tc.batch_size # calculate how many iterations per epoch exist\n", @@ -422,9 +445,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "if \"adam\" in tc.optimizer_name:\n", @@ -448,9 +469,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "to.momentum_schedule = MomentumSchedule(to.lr_schedule, tc.min_lr, tc.max_lr, tc.min_momentum, tc.max_momentum)\n", @@ -467,9 +486,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "input_shape = x_val[0].shape\n", @@ -479,9 +496,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "try:\n", @@ -507,9 +522,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "symbol = None" @@ -518,9 +531,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "#net = AlphaZeroResnet(n_labels=2272, channels=256, channels_value_head=8, channels_policy_head=81, num_res_blocks=19, value_fc_size=256, bn_mom=0.9, act_type='relu', select_policy_from_plane=select_policy_from_plane)" @@ -529,9 +540,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "#net = alpha_zero_resnet(n_labels=2272, channels=256, channels_value_head=1, channels_policy_head=81, num_res_blocks=19, value_fc_size=256, bn_mom=0.9, act_type='relu')" @@ -540,9 +549,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "#symbol = alpha_zero_symbol(num_filter=256, channels_value_head=4, channels_policy_head=81, workspace=1024, value_fc_size=256, num_res_blocks=19, bn_mom=0.9, act_type='relu',\n", @@ -595,9 +602,7 @@ }, { "cell_type": "markdown", - "metadata": { - "scrolled": true - }, + "metadata": {}, "source": [ "symbol = rise_mobile_v3_symbol(channels=256, channels_operating_init=224, channel_expansion=32, act_type='relu',\n", " channels_value_head=8, value_fc_size=256,\n", @@ -801,9 +806,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "if tc.framework == 'gluon' and symbol is not None:\n", @@ -824,9 +827,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "if tc.framework == 'gluon':\n", @@ -838,9 +839,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "if tc.framework != 'pytorch' and symbol is not None:\n", @@ -860,9 +859,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "if tc.framework == 'mxnet':\n", @@ -911,9 +908,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "if tc.framework == 'mxnet':\n", @@ -927,8 +922,8 @@ " model.bind(for_training=True, data_shapes=[('data', (tc.batch_size, input_shape[0], input_shape[1], input_shape[2]))],\n", " label_shapes=val_iter.provide_label)\n", " model.init_params(mx.initializer.Xavier(rnd_type='uniform', factor_type='avg', magnitude=2.24))\n", - " if tc.params_file:\n", - " model.load_params(tc.export_dir + \"weights/\" + tc.params_file)\n", + " if tc.tar_file:\n", + " model.load_params(tc.export_dir + \"weights/\" + tc.tar_file)\n", "elif tc.framework == 'gluon': \n", " # Initializing the parameters\n", " for param in net.collect_params('.*gamma|.*moving_mean|.*moving_var'):\n", @@ -938,8 +933,8 @@ " for param in net.collect_params('.*weight'):\n", " net.params[param].initialize(mx.init.Xavier(rnd_type='uniform', factor_type='avg', magnitude=2.24), ctx=ctx)\n", "\n", - " if tc.params_file:\n", - " net.collect_params().load(tc.export_dir + \"weights/\" + tc.params_file, ctx)\n", + " if tc.tar_file:\n", + " net.collect_params().load(tc.export_dir + \"weights/\" + tc.tar_file, ctx)\n", " net.hybridize()\n", "elif tc.framework == 'pytorch':\n", " def init_weights(m):\n", @@ -948,6 +943,9 @@ " if isinstance(m, torch.nn.Linear):\n", " m.bias.data.fill_(0.01)\n", " #model.apply(init_weights)\n", + " if tc.tar_file:\n", + " print('load model params from file:', tc.tar_file)\n", + " load_torch_state(model, torch.optim.SGD(model.parameters(), lr=tc.max_lr), tc.tar_file, tc.device_id)\n", " if torch.cuda.is_available():\n", " model.cuda(torch.device(f\"cuda:{tc.device_id}\"))" ] @@ -962,9 +960,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "to.metrics = get_metrics(tc)" @@ -980,9 +976,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "if tc.framework == 'mxnet':\n", @@ -990,7 +984,7 @@ "elif tc.framework == 'gluon':\n", " train_agent = TrainerAgentGluon(net, val_data, tc, to, use_rtpt=True)\n", "elif tc.framework == 'pytorch':\n", - " train_agent = TrainerAgentPytorch(model, val_data, tc, to, use_rtpt=True)" + " train_agent = TrainerAgentPytorch(model, val_data, tc, to, use_rtpt=True, additional_loaders=additional_data_loaders)" ] }, { @@ -1003,9 +997,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "if tc.framework == 'mxnet':\n", @@ -1022,9 +1014,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "(k_steps_final, value_loss_final, policy_loss_final, value_acc_sign_final, val_p_acc_final), \\\n", @@ -1041,9 +1031,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "prefix = tc.export_dir + \"weights/model-%.5f-%.3f\" % (policy_loss_final, val_p_acc_final)\n", @@ -1070,9 +1058,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "print(val_metric_values_best)" @@ -1125,9 +1111,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "# delete the current net object form memory\n", @@ -1140,9 +1124,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "print('load current best model:', model_params_path)\n", @@ -1168,9 +1150,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "print('best val_loss: %.5f with v_policy_acc: %.5f at k_steps_best %d' % (val_loss_best, val_p_acc_best, k_steps_best))" @@ -1186,9 +1166,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "if tc.use_wdl and tc.use_plys_to_end:\n", @@ -1216,9 +1194,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "print(\"Saved json, weight & onnx files of the best model to %s\" % (tc.export_dir + \"best-model\"))" @@ -1234,9 +1210,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "idx = 0" @@ -1245,9 +1219,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "if mode == MODE_CHESS:\n", @@ -1266,9 +1238,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "def predict_single(net, x, select_policy_from_plane=False):\n", @@ -1314,9 +1284,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "policy_to_best_move(board, yp_val[idx])" @@ -1325,9 +1293,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "opts = 5\n", @@ -1338,9 +1304,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "plt.barh(range(opts)[::-1], probs[:opts])\n", @@ -1352,9 +1316,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "board = start_board\n", @@ -1370,9 +1332,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "pred = predict_single(net, x_scholar_atck, tc.select_policy_from_plane)\n", @@ -1387,9 +1347,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "board.push(selected_moves[0])\n", @@ -1406,30 +1364,34 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ - "s_idcs_test, x_test, yv_test, yp_test, yplys_test, pgn_datasets_test = load_pgn_dataset(dataset_type='test', part_id=0,\n", - " verbose=True, normalize=True)\n", - "test_data = get_data_loader(x_test, yv_test, yp_test, yplys_test, tc, shuffle=False)" + "pgn_dataset_arrays_dict = load_pgn_dataset(dataset_type='test', part_id=0,\n", + " verbose=True, normalize=True)\n", + "s_idcs_test = pgn_dataset_arrays_dict[\"start_indices\"]\n", + "x_test = pgn_dataset_arrays_dict[\"x\"]\n", + "yv_test = pgn_dataset_arrays_dict[\"y_value\"]\n", + "yp_test = pgn_dataset_arrays_dict[\"y_policy\"]\n", + "yplys_test = pgn_dataset_arrays_dict[\"plys_to_end\"]\n", + "pgn_datasets_test = pgn_dataset_arrays_dict[\"pgn_dataset\"]\n", + "phase_vector_test = pgn_dataset_arrays_dict[\"phase_vector\"]\n", + "\n", + "test_data = get_data_loader(x_test, yv_test, yp_test, yplys_test, phase_vector_test, tc, shuffle=False)" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "if tc.framework == 'mxnet':\n", " metrics = metrics_gluon\n", "\n", "evaluate_metrics(to.metrics, test_data, net, nb_batches=None, sparse_policy_label=tc.sparse_policy_label, ctx=ctx,\n", - " apply_select_policy_from_plane=tc.select_policy_from_plane, use_wdl=tc.use_wdl,\n", - " use_plys_to_end=tc.use_plys_to_end)" + " phase_weights=to.phase_weights, apply_select_policy_from_plane=tc.select_policy_from_plane,\n", + " use_wdl=tc.use_wdl, use_plys_to_end=tc.use_plys_to_end)" ] }, { @@ -1442,15 +1404,22 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ - "s_idcs_mate, x_mate, yv_mate, yp_mate, yplys_mate, pgn_dataset_mate = load_pgn_dataset(dataset_type='mate_in_one', part_id=0,\n", - " verbose=True, normalize=tc.normalize)\n", + "pgn_dataset_arrays_dict = load_pgn_dataset(dataset_type='mate_in_one', part_id=0,\n", + " verbose=True, normalize=tc.normalize)\n", + "\n", + "s_idcs_mate = pgn_dataset_arrays_dict[\"start_indices\"]\n", + "x_mate = pgn_dataset_arrays_dict[\"x\"]\n", + "yv_mate = pgn_dataset_arrays_dict[\"y_value\"]\n", + "yp_mate = pgn_dataset_arrays_dict[\"y_policy\"]\n", + "yplys_mate = pgn_dataset_arrays_dict[\"plys_to_end\"]\n", + "pgn_dataset_mate = pgn_dataset_arrays_dict[\"pgn_dataset\"]\n", + "phase_vector_mate = pgn_dataset_arrays_dict[\"phase_vector\"]\n", + "\n", "yplys_mate = np.ones(len(yv_mate))\n", - "mate_data = get_data_loader(x_mate, yv_mate, yp_mate, yplys_mate, tc, shuffle=False)" + "mate_data = get_data_loader(x_mate, yv_mate, yp_mate, yplys_mate, phase_vector_mate, tc, shuffle=False)" ] }, { @@ -1463,14 +1432,12 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "evaluate_metrics(to.metrics, mate_data, net, nb_batches=None, sparse_policy_label=tc.sparse_policy_label, ctx=ctx,\n", - " apply_select_policy_from_plane=tc.select_policy_from_plane, use_wdl=tc.use_wdl,\n", - " use_plys_to_end=tc.use_plys_to_end)" + " phase_weights=to.phase_weights, apply_select_policy_from_plane=tc.select_policy_from_plane,\n", + " use_wdl=tc.use_wdl, use_plys_to_end=tc.use_plys_to_end)" ] }, { @@ -1483,9 +1450,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "from IPython.core.interactiveshell import InteractiveShell\n", @@ -1502,9 +1467,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "def eval_pos(net, x_mate, yp_mate, verbose=False, select_policy_from_plane=False):\n", @@ -1557,9 +1520,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "nb_pos = len(x_mate)\n", @@ -1579,9 +1540,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "np.array(mate_mv_cnts).mean()" @@ -1590,9 +1549,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "np.array(legal_mv_cnts).mean()" @@ -1608,9 +1565,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "np.array(mate_mv_cnts).mean() / np.array(legal_mv_cnts).mean()" @@ -1626,9 +1581,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "print('mate_in_one_acc:', sum(mates_found) / nb_pos)" @@ -1637,9 +1590,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "sum(mates_5_top_found) / nb_pos" @@ -1648,9 +1599,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "pgn_dataset_mate.tree()" @@ -1659,9 +1608,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "metadata = np.array(pgn_dataset_mate['metadata'])\n", @@ -1672,9 +1619,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "site_mate = metadata[1:, 1]" @@ -1683,9 +1628,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "def clean_string(np_string):\n", @@ -1699,9 +1642,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "import chess.svg\n", @@ -1718,9 +1659,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "for i in range(17):\n", @@ -1741,9 +1680,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "mate_missed = 0\n", @@ -1763,9 +1700,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [] } @@ -1786,18 +1721,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "pycharm": { - "stem_cell": { - "cell_type": "raw", - "metadata": { - "collapsed": false - }, - "source": [] - } + "version": "3.8.16" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/DeepCrazyhouse/src/training/trainer_agent_pytorch.py b/DeepCrazyhouse/src/training/trainer_agent_pytorch.py index ccaab089a..26dff358a 100644 --- a/DeepCrazyhouse/src/training/trainer_agent_pytorch.py +++ b/DeepCrazyhouse/src/training/trainer_agent_pytorch.py @@ -44,6 +44,7 @@ def __init__( train_config: TrainConfig, train_objects: TrainObjects, use_rtpt: bool, + additional_loaders=None ): """ Class for training the neural network. @@ -52,14 +53,16 @@ def __init__( :param train_config: An instance of the TrainConfig data class. :param train_objects: Am instance pf the TrainObject data class. :param use_rtpt: If True, an RTPT object will be created and modified within this class. + :param additional_loaders: optional dictionary of {dataset_name: DataLoader} whose dataloaders will also be + used for evaluation (only used for informative purposes) """ + self.additional_loaders = additional_loaders self.tc = train_config self.to = train_objects if self.to.metrics is None: self.to.metrics = {} self._model = model self._val_loader = val_loader - self.x_train = self.yv_train = self.yp_train = None self._ctx = get_context(train_config.context, train_config.device_id) # define a summary writer that logs data and flushes to the file every 5 seconds @@ -68,12 +71,12 @@ def __init__( self.sum_writer = SummaryWriter(log_dir=self.tc.export_dir+"logs", flush_secs=5) # Define the two loss functions if train_config.sparse_policy_label: - self.policy_loss = nn.CrossEntropyLoss() + self.policy_loss = SampleWeightedLoss(nn.CrossEntropyLoss) else: - self.policy_loss = SoftCrossEntropyLoss() - self.value_loss = nn.MSELoss() - self.wdl_loss = nn.CrossEntropyLoss() - self.ply_loss = nn.MSELoss() + self.policy_loss = SampleWeightedLoss(SoftCrossEntropyLoss) + self.value_loss = SampleWeightedLoss(nn.MSELoss) + self.wdl_loss = SampleWeightedLoss(nn.CrossEntropyLoss) + self.ply_loss = SampleWeightedLoss(nn.MSELoss) # Define the optimizer self.optimizer = create_optimizer(self._model, self.tc) @@ -131,7 +134,7 @@ def train(self, cur_it=None): self.graph_exported = True if self.batch_proc_tmp >= self.tc.batch_steps or self.cur_it >= self.tc.total_it: # show metrics every thousands steps - train_metric_values, val_metric_values = self.evaluate(train_loader) + train_metric_values, val_metric_values, additional_metric_values = self.evaluate(train_loader) if self.use_rtpt: # update process title according to loss @@ -183,6 +186,8 @@ def train(self, cur_it=None): # log the metric values to tensorboard self._log_metrics(train_metric_values, global_step=self.k_steps, prefix="train_") self._log_metrics(val_metric_values, global_step=self.k_steps, prefix="val_") + for dataset_name, metric_values in additional_metric_values.items(): + self._log_metrics(metric_values, global_step=self.k_steps, prefix=f"{dataset_name}_") if self.tc.log_metrics_to_tensorboard and self.tc.export_grad_histograms: grads = [] @@ -272,13 +277,15 @@ def delete_previous_weights(self): def _get_train_loader(self, part_id): # load one chunk of the dataset from memory - _, self.x_train, self.yv_train, self.yp_train, self.plys_to_end, _ = load_pgn_dataset(dataset_type="train", - part_id=part_id, - normalize=self.tc.normalize, - verbose=False, - q_value_ratio=self.tc.q_value_ratio) - train_loader = get_data_loader(self.x_train, self.yv_train, self.yp_train, self.plys_to_end, self.tc, - shuffle=True) + + pgn_dataset_arrays_dict = load_pgn_dataset( + dataset_type="train", + part_id=part_id, + normalize=self.tc.normalize, + verbose=False, + q_value_ratio=self.tc.q_value_ratio) + + train_loader = get_data_loader(pgn_dataset_arrays_dict, self.tc, shuffle=True) return train_loader @@ -295,42 +302,68 @@ def evaluate(self, train_loader): logging.debug("Iteration %d/%d", self.cur_it, self.tc.total_it) logging.debug("lr: %.7f - momentum: %.7f", self.to.lr_schedule(self.cur_it), self.to.momentum_schedule(self.cur_it)) + print("starting train eval") train_metric_values = evaluate_metrics( self.to.metrics, train_loader, self._model, nb_batches=25, ctx=self._ctx, + phase_weights=self.to.phase_weights, sparse_policy_label=self.tc.sparse_policy_label, apply_select_policy_from_plane=self.tc.select_policy_from_plane and not self.tc.is_policy_from_plane_data, use_wdl=self.tc.use_wdl, use_plys_to_end=self.tc.use_plys_to_end, ) + + print("starting val eval") val_metric_values = evaluate_metrics( self.to.metrics, self._val_loader, self._model, nb_batches=None, ctx=self._ctx, + phase_weights=self.to.phase_weights, sparse_policy_label=self.tc.sparse_policy_label, apply_select_policy_from_plane=self.tc.select_policy_from_plane and not self.tc.is_policy_from_plane_data, use_wdl=self.tc.use_wdl, use_plys_to_end=self.tc.use_plys_to_end, ) + + # do additional evaluations based on self.additional_loaders + additional_metric_values = dict() + for dataset_name, dataloader in self.additional_loaders.items(): + print(f"starting {dataset_name} eval") + metric_values = evaluate_metrics( + self.to.metrics, + dataloader, + self._model, + nb_batches=None, + ctx=self._ctx, + phase_weights={k: 1.0 for k, v in self.to.phase_weights.items()}, # use no weighting + sparse_policy_label=self.tc.sparse_policy_label, + apply_select_policy_from_plane=self.tc.select_policy_from_plane and not self.tc.is_policy_from_plane_data, + use_wdl=self.tc.use_wdl, + use_plys_to_end=self.tc.use_plys_to_end, + ) + additional_metric_values[dataset_name] = metric_values + self._model.train() # return back to training mode - return train_metric_values, val_metric_values + return train_metric_values, val_metric_values, additional_metric_values def train_update(self, batch): self.optimizer.zero_grad() if self.tc.use_wdl and self.tc.use_plys_to_end: - data, value_label, policy_label, wdl_label, plys_label = batch + data, value_label, policy_label, wdl_label, plys_label, phase_vector = batch plys_label = plys_label.to(self._ctx) wdl_label = wdl_label.to(self._ctx).long() else: - data, value_label, policy_label = batch + data, value_label, policy_label, phase_vector = batch data = data.to(self._ctx) value_label = value_label.to(self._ctx) policy_label = policy_label.to(self._ctx) + sample_weights = torch.Tensor([self.to.phase_weights[phase.item()] for phase in phase_vector]) + sample_weights = sample_weights.to(self._ctx) if self.tc.sparse_policy_label: policy_label = policy_label.long() # update a dummy metric to see a proper progress bar @@ -340,13 +373,13 @@ def train_update(self, batch): self.old_label = value_label if self.tc.use_wdl and self.tc.use_plys_to_end: value_out, policy_out, _, wdl_out, plys_out = self._model(data) - wdl_loss = self.wdl_loss(wdl_out, wdl_label) - ply_loss = self.ply_loss(torch.flatten(plys_out), plys_label) + wdl_loss = self.wdl_loss(wdl_out, wdl_label, sample_weights) + ply_loss = self.ply_loss(torch.flatten(plys_out), plys_label, sample_weights) else: value_out, policy_out = self._model(data) # policy_out = policy_out.softmax(dim=1) - value_loss = self.value_loss(torch.flatten(value_out), value_label) - policy_loss = self.policy_loss(policy_out, policy_label) + value_loss = self.value_loss(torch.flatten(value_out), value_label, sample_weights) + policy_loss = self.policy_loss(policy_out, policy_label, sample_weights) # weight the components of the combined loss if self.tc.use_wdl and self.tc.use_wdl: combined_loss = ( @@ -437,6 +470,17 @@ def forward(self, input: Tensor, target: Tensor) -> Tensor: return torch.mean(torch.sum(-target * log_softmax(input), 1)) +class SampleWeightedLoss(nn.Module): + def __init__(self, loss_module): + super(SampleWeightedLoss, self).__init__() + self.criterion = loss_module(reduction="none") + + def forward(self, input: Tensor, target: Tensor, sample_weights: Tensor) -> Tensor: + losses = self.criterion(input, target) + weighted_losses = losses * sample_weights + return torch.mean(weighted_losses) + + def get_context(context: str, device_id: int): """ Returns the computation context as Pytorch device object. @@ -629,7 +673,7 @@ def reset_metrics(metrics): metric.reset() -def evaluate_metrics(metrics, data_iterator, model, nb_batches, ctx, sparse_policy_label=False, +def evaluate_metrics(metrics, data_iterator, model, nb_batches, ctx, phase_weights, sparse_policy_label=False, apply_select_policy_from_plane=True, use_wdl=False, use_plys_to_end=False): """ Runs inference of the network on a data_iterator object and evaluates the given metrics. @@ -642,6 +686,7 @@ def evaluate_metrics(metrics, data_iterator, model, nb_batches, ctx, sparse_poli :param nb_batches: Number of batches to evaluate (early stopping). If set to None all batches of the data_iterator will be evaluated :param ctx: Pytorch data context + :param phase_weights: dictionary with the weights of each phase as phase: weight kv pairs :param sparse_policy_label: Should be set to true if the policy uses one-hot encoded targets (e.g. supervised learning) :param apply_select_policy_from_plane: If true, given policy label is converted to policy map index @@ -650,32 +695,38 @@ def evaluate_metrics(metrics, data_iterator, model, nb_batches, ctx, sparse_poli reset_metrics(metrics) model.eval() # set model to evaluation mode with torch.no_grad(): # operations inside don't track history + print("eval iterator length:", len(data_iterator), "eval phase weights:", phase_weights) for i, batch in enumerate(data_iterator): if use_wdl and use_plys_to_end: - data, value_label, policy_label, wdl_label, plys_label = batch + data, value_label, policy_label, wdl_label, plys_label, phase_vector = batch plys_label = plys_label.to(ctx) wdl_label = wdl_label.to(ctx).long() else: - data, value_label, policy_label = batch + data, value_label, policy_label, phase_vector = batch data = data.to(ctx) value_label = value_label.to(ctx) policy_label = policy_label.to(ctx) + sample_weights = torch.Tensor([phase_weights.get(phase.item(), 1.0) for phase in phase_vector]) + sample_weights = sample_weights.to(ctx) if use_wdl and use_plys_to_end: value_out, policy_out, _, wdl_out, plys_out = model(data) - metrics["wdl_loss"].update(preds=wdl_out, labels=wdl_label) - metrics["wdl_acc"].update(preds=wdl_out.argmax(axis=1), labels=wdl_label) - metrics["plys_to_end_loss"].update(preds=torch.flatten(plys_out), labels=plys_label) + metrics["wdl_loss"].update(preds=wdl_out, labels=wdl_label, sample_weights=sample_weights) + metrics["wdl_acc"].update(preds=wdl_out.argmax(axis=1), labels=wdl_label, sample_weights=sample_weights) + metrics["plys_to_end_loss"].update(preds=torch.flatten(plys_out), labels=plys_label, + sample_weights=sample_weights) else: value_out, policy_out = model(data) # update the metrics - metrics["value_loss"].update(preds=torch.flatten(value_out), labels=value_label) + metrics["value_loss"].update(preds=torch.flatten(value_out), labels=value_label, + sample_weights=sample_weights) metrics["policy_loss"].update(preds=policy_out, #.softmax(dim=1), - labels=policy_label) - metrics["value_acc_sign"].update(preds=torch.flatten(value_out), labels=value_label) + labels=policy_label, sample_weights=sample_weights) + metrics["value_acc_sign"].update(preds=torch.flatten(value_out), labels=value_label, + sample_weights=sample_weights) metrics["policy_acc"].update(preds=policy_out.argmax(axis=1), - labels=policy_label) + labels=policy_label, sample_weights=sample_weights) # stop after evaluating x batches (only recommended to use this for the train set evaluation) if nb_batches and i+1 == nb_batches: @@ -688,30 +739,29 @@ def evaluate_metrics(metrics, data_iterator, model, nb_batches, ctx, sparse_poli return metric_values -def get_data_loader(x, y_value, y_policy, plys_to_end, tc: TrainConfig, shuffle=True): +def get_data_loader(pgn_dataset_arrays_dict: dict, tc: TrainConfig, shuffle=True): """ Returns a DataLoader object for the given numpy arrays. !Note: This function modifies the y_policy! - :param x: Input planes - :param y_value: Value target - :param y_policy: Policy target - :param plys_to_end: Plys until the game ends + :param pgn_dataset_arrays_dict: Dict object containing the numpy arrays of load_pgn_dataset :param tc: Training config object :param shuffle: Decide whether to shuffle the dataset or not :return: Returns the data loader object """ - y_policy_prep = prepare_policy(y_policy=y_policy, select_policy_from_plane=tc.select_policy_from_plane, + d = pgn_dataset_arrays_dict + y_policy_prep = prepare_policy(y_policy=d['y_policy'], select_policy_from_plane=tc.select_policy_from_plane, sparse_policy_label=tc.sparse_policy_label, is_policy_from_plane_data=tc.is_policy_from_plane_data) # update the train_data object if tc.use_wdl and tc.use_plys_to_end: - dataset = TensorDataset(torch.Tensor(x), torch.Tensor(y_value), - torch.Tensor(y_policy_prep), - torch.Tensor(value_to_wdl_label(y_value)), - torch.Tensor(prepare_plys_label(plys_to_end))) + dataset = TensorDataset(torch.Tensor(d['x']), torch.Tensor(d['y_value']), + torch.Tensor(y_policy_prep), + torch.Tensor(value_to_wdl_label(d['y_value'])), + torch.Tensor(prepare_plys_label(d['plys_to_end'])), + torch.Tensor(d['phase_vector'])) else: - dataset = TensorDataset(torch.Tensor(x), torch.Tensor(y_value), - torch.Tensor(y_policy_prep)) + dataset = TensorDataset(torch.Tensor(d['x']), torch.Tensor(d['y_value']), + torch.Tensor(y_policy_prep), torch.Tensor(d['phase_vector'])) train_loader = DataLoader(dataset, shuffle=shuffle, batch_size=tc.batch_size, num_workers=tc.cpu_count) return train_loader diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index 1762a8eb7..8ed29cb0d 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -12,8 +12,8 @@ option(BACKEND_OPENVINO "Build with OpenVino backend (CPU/GPU) support" option(BUILD_TESTS "Build and run tests" OFF) option(USE_DYNAMIC_NN_ARCH "Build with dynamic neural network architektur support" ON) # enable a single mode for different model input / outputs -option(MODE_CRAZYHOUSE "Build with crazyhouse only support" ON) -option(MODE_CHESS "Build with chess + chess960 only support" OFF) +option(MODE_CRAZYHOUSE "Build with crazyhouse only support" OFF) +option(MODE_CHESS "Build with chess + chess960 only support" ON) option(MODE_LICHESS "Build with lichess variants support" OFF) option(MODE_OPEN_SPIEL "Build with open_spiel environment support" OFF) option(MODE_BOARDGAMES "Build with Fairy-Stockfish environment support for board games" OFF) diff --git a/engine/src/agents/agent.cpp b/engine/src/agents/agent.cpp index 9f5007192..51e5c17a0 100644 --- a/engine/src/agents/agent.cpp +++ b/engine/src/agents/agent.cpp @@ -57,8 +57,8 @@ void Agent::set_must_wait(bool value) mustWait = value; } -Agent::Agent(NeuralNetAPI* net, PlaySettings* playSettings, bool verbose): - NeuralNetAPIUser(net), +Agent::Agent(vector>& nets, PlaySettings* playSettings, bool verbose): + NeuralNetAPIUser(nets), playSettings(playSettings), mustWait(true), verbose(verbose), isRunning(false) { } diff --git a/engine/src/agents/agent.h b/engine/src/agents/agent.h index eda0c0072..8b2cdea0c 100644 --- a/engine/src/agents/agent.h +++ b/engine/src/agents/agent.h @@ -72,7 +72,7 @@ class Agent : public NeuralNetAPIUser bool isRunning; public: - Agent(NeuralNetAPI* net, PlaySettings* playSettings, bool verbose); + Agent(vector>& nets, PlaySettings* playSettings, bool verbose); /** * @brief perform_action Selects an action based on the evaluation result diff --git a/engine/src/agents/config/searchsettings.cpp b/engine/src/agents/config/searchsettings.cpp index 59fb6c463..db44c5dab 100644 --- a/engine/src/agents/config/searchsettings.cpp +++ b/engine/src/agents/config/searchsettings.cpp @@ -51,7 +51,8 @@ SearchSettings::SearchSettings(): searchPlayerMode(MODE_TWO_PLAYER), virtualStyle(VIRTUAL_VISIT), virtualMixThreshold(1000), - virtualOffsetStrenght(0.001) + virtualOffsetStrenght(0.001), + gamePhaseDefinition(MOVECOUNT) { } diff --git a/engine/src/agents/config/searchsettings.h b/engine/src/agents/config/searchsettings.h index fc3c6abdf..ed7499dba 100644 --- a/engine/src/agents/config/searchsettings.h +++ b/engine/src/agents/config/searchsettings.h @@ -43,6 +43,11 @@ enum VirtualStyle { VIRTUAL_MIX }; +enum GamePhaseDefinition { + LICHESS, + MOVECOUNT +}; + struct SearchSettings { uint16_t multiPV; @@ -87,6 +92,8 @@ struct SearchSettings uint_fast32_t virtualMixThreshold; // Defines the strength of the virtual offset double virtualOffsetStrenght; + // Defines the type of game phase definition to be used + GamePhaseDefinition gamePhaseDefinition; SearchSettings(); }; diff --git a/engine/src/agents/mctsagent.cpp b/engine/src/agents/mctsagent.cpp index e72ca061a..bef2d68c4 100644 --- a/engine/src/agents/mctsagent.cpp +++ b/engine/src/agents/mctsagent.cpp @@ -34,10 +34,9 @@ #include "../node.h" #include "../util/communication.h" - -MCTSAgent::MCTSAgent(NeuralNetAPI *netSingle, vector>& netBatches, +MCTSAgent::MCTSAgent(vector>& netSingleVector, vector>>& netBatchesVector, SearchSettings* searchSettings, PlaySettings* playSettings): - Agent(netSingle, playSettings, true), + Agent(netSingleVector, playSettings, true), searchSettings(searchSettings), rootNode(nullptr), rootState(nullptr), @@ -53,7 +52,11 @@ MCTSAgent::MCTSAgent(NeuralNetAPI *netSingle, vector>& mapWithMutex.hashTable.reserve(1e6); for (auto i = 0; i < searchSettings->threads; ++i) { - searchThreads.emplace_back(new SearchThread(netBatches[i].get(), searchSettings, &mapWithMutex)); + vector> netBatchVector; // stores the ith element of all netBatches in netBatchesVector + for (auto& netBatches : netBatchesVector) { + netBatchVector.push_back(std::move(netBatches[i])); + } + searchThreads.emplace_back(new SearchThread(netBatchVector, searchSettings, &mapWithMutex)); } timeManager = make_unique(searchSettings->randomMoveFactor); generator = default_random_engine(r()); @@ -78,7 +81,7 @@ Node* MCTSAgent::get_root_node() const string MCTSAgent::get_device_name() const { - return net->get_device_name(); + return nets.front()->get_device_name(); } float MCTSAgent::get_dirichlet_noise() const @@ -166,10 +169,11 @@ shared_ptr MCTSAgent::get_root_node_from_tree(StateObj *state) void MCTSAgent::set_root_node_predictions() { - state->get_state_planes(true, inputPlanes, net->get_version()); - net->predict(inputPlanes, valueOutputs, probOutputs, auxiliaryOutputs); + state->get_state_planes(true, inputPlanes, nets.front()->get_version()); + GamePhase currentPhase = state->get_phase(numPhases, searchSettings->gamePhaseDefinition); + nets[phaseToNetsIndex.at(currentPhase)]->predict(inputPlanes, valueOutputs, probOutputs, auxiliaryOutputs); size_t tbHits = 0; - fill_nn_results(0, net->is_policy_map(), valueOutputs, probOutputs, auxiliaryOutputs, rootNode.get(), tbHits, + fill_nn_results(0, nets[phaseToNetsIndex.at(currentPhase)]->is_policy_map(), valueOutputs, probOutputs, auxiliaryOutputs, rootNode.get(), tbHits, rootState->mirror_policy(state->side_to_move()), searchSettings, rootNode->is_tablebase()); } @@ -256,12 +260,12 @@ void MCTSAgent::clear_game_history() bool MCTSAgent::is_policy_map() { - return net->is_policy_map(); + return nets.front()->is_policy_map(); } string MCTSAgent::get_name() const { - return engineName + "-" + engineVersion + "-" + net->get_model_name(); + return engineName + "-" + engineVersion + "-" + nets.front()->get_model_name(); } void MCTSAgent::update_stats() diff --git a/engine/src/agents/mctsagent.h b/engine/src/agents/mctsagent.h index e31ba41e7..3cf9ff238 100644 --- a/engine/src/agents/mctsagent.h +++ b/engine/src/agents/mctsagent.h @@ -82,8 +82,8 @@ class MCTSAgent : public Agent unique_ptr threadManager; bool reachedTablebases; public: - MCTSAgent(NeuralNetAPI* netSingle, - vector>& netBatches, + MCTSAgent(vector>& netSingleVector, + vector>>& netBatchesVector, SearchSettings* searchSettings, PlaySettings* playSettings); ~MCTSAgent(); diff --git a/engine/src/agents/mctsagentbatch.cpp b/engine/src/agents/mctsagentbatch.cpp index d75124d62..3b5cd6fe2 100644 --- a/engine/src/agents/mctsagentbatch.cpp +++ b/engine/src/agents/mctsagentbatch.cpp @@ -38,9 +38,9 @@ #include "util/gcthread.h" -MCTSAgentBatch::MCTSAgentBatch(NeuralNetAPI *netSingle, vector>& netBatches, +MCTSAgentBatch::MCTSAgentBatch(vector>& netSingleVector, vector>>& netBatchesVector, SearchSettings* searchSettings, PlaySettings* playSettings, int noa, bool sN): - MCTSAgent(netSingle, netBatches, searchSettings, playSettings) + MCTSAgent(netSingleVector, netBatchesVector, searchSettings, playSettings) { numberOfAgents = noa; splitNodes = sN; @@ -55,9 +55,9 @@ MCTSAgentBatch::~MCTSAgentBatch() string MCTSAgentBatch::get_name() const { - string ret = "MCTSBatch-" + std::to_string(numberOfAgents) + "-" + engineVersion + "-" + net->get_model_name(); + string ret = "MCTSBatch-" + std::to_string(numberOfAgents) + "-" + engineVersion + "-" + nets.front()->get_model_name(); if(splitNodes){ - ret = "MCTSBatch-Split-" + std::to_string(numberOfAgents) + "-" + engineVersion + "-" + net->get_model_name(); + ret = "MCTSBatch-Split-" + std::to_string(numberOfAgents) + "-" + engineVersion + "-" + nets.front()->get_model_name(); } return ret; } diff --git a/engine/src/agents/mctsagentbatch.h b/engine/src/agents/mctsagentbatch.h index 1d7fbea35..f0474463d 100644 --- a/engine/src/agents/mctsagentbatch.h +++ b/engine/src/agents/mctsagentbatch.h @@ -53,8 +53,8 @@ class MCTSAgentBatch : public MCTSAgent bool splitNodes; public: - MCTSAgentBatch(NeuralNetAPI* netSingle, - vector>& netBatches, + MCTSAgentBatch(vector>& netSingleVector, + vector>>& netBatchesVector, SearchSettings* searchSettings, PlaySettings* playSettings, int iterations, diff --git a/engine/src/agents/mctsagenttruesight.cpp b/engine/src/agents/mctsagenttruesight.cpp index c1d6f80e8..c2fbed398 100644 --- a/engine/src/agents/mctsagenttruesight.cpp +++ b/engine/src/agents/mctsagenttruesight.cpp @@ -37,9 +37,9 @@ #include "util/gcthread.h" -MCTSAgentTrueSight::MCTSAgentTrueSight(NeuralNetAPI *netSingle, vector>& netBatches, +MCTSAgentTrueSight::MCTSAgentTrueSight(vector>& netSingleVector, vector>>& netBatchesVector, SearchSettings* searchSettings, PlaySettings* playSettings): - MCTSAgent(netSingle, netBatches, searchSettings, playSettings) + MCTSAgent(netSingleVector, netBatchesVector, searchSettings, playSettings) { } @@ -53,7 +53,7 @@ MCTSAgentTrueSight::~MCTSAgentTrueSight() string MCTSAgentTrueSight::get_name() const { - return "MCTSTrueSight-" + engineVersion + "-" + net->get_model_name(); + return "MCTSTrueSight-" + engineVersion + "-" + nets.front()->get_model_name(); } void MCTSAgentTrueSight::evaluate_board_state() diff --git a/engine/src/agents/mctsagenttruesight.h b/engine/src/agents/mctsagenttruesight.h index 4de005306..c61775130 100644 --- a/engine/src/agents/mctsagenttruesight.h +++ b/engine/src/agents/mctsagenttruesight.h @@ -48,8 +48,8 @@ using namespace crazyara; class MCTSAgentTrueSight : public MCTSAgent { public: - MCTSAgentTrueSight(NeuralNetAPI* netSingle, - vector>& netBatches, + MCTSAgentTrueSight(vector>& netSingleVector, + vector>>& netBatchesVector, SearchSettings* searchSettings, PlaySettings* playSettings ); diff --git a/engine/src/agents/randomagent.cpp b/engine/src/agents/randomagent.cpp index 20b059368..4fe8aba2d 100644 --- a/engine/src/agents/randomagent.cpp +++ b/engine/src/agents/randomagent.cpp @@ -37,9 +37,9 @@ #include "util/gcthread.h" -MCTSAgentRandom::MCTSAgentRandom(NeuralNetAPI *netSingle, vector>& netBatches, +MCTSAgentRandom::MCTSAgentRandom(vector>& netSingleVector, vector>>& netBatchesVector, SearchSettings* searchSettings, PlaySettings* playSettings): - MCTSAgent(netSingle, netBatches, searchSettings, playSettings) + MCTSAgent(netSingleVector, netBatchesVector, searchSettings, playSettings) { } diff --git a/engine/src/agents/randomagent.h b/engine/src/agents/randomagent.h index e5c912f8d..bd6041c21 100644 --- a/engine/src/agents/randomagent.h +++ b/engine/src/agents/randomagent.h @@ -50,8 +50,8 @@ class MCTSAgentRandom : public MCTSAgent public: public: - MCTSAgentRandom(NeuralNetAPI* netSingle, - vector>& netBatches, + MCTSAgentRandom(vector>& netSingleVector, + vector>>& netBatchesVector, SearchSettings* searchSettings, PlaySettings* playSettings); ~MCTSAgentRandom(); diff --git a/engine/src/agents/rawnetagent.cpp b/engine/src/agents/rawnetagent.cpp index 23c1369db..f37ee6f9b 100644 --- a/engine/src/agents/rawnetagent.cpp +++ b/engine/src/agents/rawnetagent.cpp @@ -29,8 +29,9 @@ using blaze::HybridVector; -RawNetAgent::RawNetAgent(NeuralNetAPI* net, PlaySettings* playSettings, bool verbose): - Agent(net, playSettings, verbose) +RawNetAgent::RawNetAgent(vector>& nets, PlaySettings* playSettings, bool verbose, SearchSettings* searchSettings): + Agent(nets, playSettings, verbose), + searchSettings(searchSettings) { } @@ -53,13 +54,13 @@ void RawNetAgent::evaluate_board_state() evalInfo->pv[0] = {evalInfo->legalMoves[0]}; return; } - state->get_state_planes(true, inputPlanes, net->get_version()); - net->predict(inputPlanes, valueOutputs, probOutputs, auxiliaryOutputs); + state->get_state_planes(true, inputPlanes, nets.front()->get_version()); + nets[phaseToNetsIndex.at(state->get_phase(numPhases, searchSettings->gamePhaseDefinition))]->predict(inputPlanes, valueOutputs, probOutputs, auxiliaryOutputs); state->set_auxiliary_outputs(auxiliaryOutputs); evalInfo->policyProbSmall.resize(evalInfo->legalMoves.size()); get_probs_of_move_list(0, probOutputs, evalInfo->legalMoves, state->mirror_policy(state->side_to_move()), - !net->is_policy_map(), evalInfo->policyProbSmall, net->is_policy_map()); + !nets.front()->is_policy_map(), evalInfo->policyProbSmall, nets.front()->is_policy_map()); size_t selIdx = argmax(evalInfo->policyProbSmall); Action bestmove = evalInfo->legalMoves[selIdx]; diff --git a/engine/src/agents/rawnetagent.h b/engine/src/agents/rawnetagent.h index 2c443ddee..92aee49dc 100644 --- a/engine/src/agents/rawnetagent.h +++ b/engine/src/agents/rawnetagent.h @@ -42,7 +42,9 @@ using namespace crazyara; class RawNetAgent: public Agent { public: - RawNetAgent(NeuralNetAPI* net, PlaySettings* playSettings, bool verbose); + SearchSettings* searchSettings; + + RawNetAgent(vector>& nets, PlaySettings* playSettings, bool verbose, SearchSettings* searchSettings); RawNetAgent(const RawNetAgent&) = delete; RawNetAgent& operator=(RawNetAgent const&) = delete; diff --git a/engine/src/environments/chess_related/board.cpp b/engine/src/environments/chess_related/board.cpp index c858a3fdd..f5f71afce 100644 --- a/engine/src/environments/chess_related/board.cpp +++ b/engine/src/environments/chess_related/board.cpp @@ -442,3 +442,146 @@ bool is_capture(const Board* pos, Move move) } #endif + +unsigned int get_majors_and_minors_count(const Board& pos) +{ + return pos.count() + pos.count() + pos.count() + pos.count(); +} + +bool is_backrank_sparse(const Board& pos) +{ + Bitboard backrankPiecesWhiteBb = pos.pieces(WHITE, ALL_PIECES) & rank_bb(Rank(0)); + Bitboard backrankPiecesBlackBb = pos.pieces(BLACK, ALL_PIECES) & rank_bb(Rank(7)); + + // True if either white or black backrank is sparse (three or less pieces) + return (popcount(backrankPiecesWhiteBb) <= 3) || (popcount(backrankPiecesBlackBb) <= 3); +} + +int score_region(int numWhitePiecesInRegion, int numBlackPiecesInRegion, int rank) +{ + if (numWhitePiecesInRegion == 1 && numBlackPiecesInRegion == 0) { + return 1 + (8 - rank); + } + else if (numWhitePiecesInRegion == 2 && numBlackPiecesInRegion == 0) { + return 2 + ((rank > 2) ? (rank - 2) : 0); + } + else if (numWhitePiecesInRegion == 3 && numBlackPiecesInRegion == 0) { + return 3 + ((rank > 1) ? (rank - 1) : 0); + } + else if (numWhitePiecesInRegion == 4 && numBlackPiecesInRegion == 0) { + return 3 + ((rank > 1) ? (rank - 1) : 0); + } + else if (numWhitePiecesInRegion == 0 && numBlackPiecesInRegion == 1) { + return 1 + rank; + } + else if (numWhitePiecesInRegion == 1 && numBlackPiecesInRegion == 1) { + return 5 + abs(3 - rank); + } + else if (numWhitePiecesInRegion == 2 && numBlackPiecesInRegion == 1) { + return 4 + rank; + } + else if (numWhitePiecesInRegion == 3 && numBlackPiecesInRegion == 1) { + return 5 + rank; + } + else if (numWhitePiecesInRegion == 0 && numBlackPiecesInRegion == 2) { + return 2 + ((rank < 6) ? (6 - rank) : 0); + } + else if (numWhitePiecesInRegion == 1 && numBlackPiecesInRegion == 2) { + return 4 + (6 - rank); + } + else if (numWhitePiecesInRegion == 2 && numBlackPiecesInRegion == 2) { + return 7; + } + else if (numWhitePiecesInRegion == 0 && numBlackPiecesInRegion == 3) { + return 3 + ((rank < 7) ? (7 - rank) : 0); + } + else if (numWhitePiecesInRegion == 1 && numBlackPiecesInRegion == 3) { + return 5 + (6 - rank); + } + else if (numWhitePiecesInRegion == 0 && numBlackPiecesInRegion == 4) { + return 3 + ((rank < 7) ? (7 - rank) : 0); + } + + return 0; // for 0 white and 0 black and all other (incorrect) options with a sum that is bigger than 4 + +} + +int get_mixedness(const Board& pos) +{ + int mix = 0; + + for (int rankIdx = 0; rankIdx < 7; ++rankIdx) { // use ranks 1 to 7 (indices 0 to 6) + for (int fileIdx = 0; fileIdx < 7; ++fileIdx) { // use files A to G (indices 0 to 6) + int numWhitePiecesInRegion = 0; + int numBlackPiecesInRegion = 0; + for (int dx = 0; dx < 2; ++dx) { + for (int dy = 0; dy < 2; ++dy) { + Square currSquare = make_square(File(fileIdx + dx), Rank(rankIdx + dy)); + Piece currPiece = pos.piece_on(currSquare); + + if (currPiece != NO_PIECE) { + if (color_of(currPiece) == WHITE) + { + numWhitePiecesInRegion++; + } + else { + numBlackPiecesInRegion++; + } + } + } + } + mix += score_region(numWhitePiecesInRegion, numBlackPiecesInRegion, rankIdx + 1); + } + } + + return mix; +} + +GamePhase Board::get_phase(unsigned int numPhases, GamePhaseDefinition gamePhaseDefinition) const +{ + if (gamePhaseDefinition == LICHESS) { + + assert(numPhases == 3); // lichess definition requires three models to be loaded + + // returns the game phase based on the lichess definition implemented in: + // https://github.com/lichess-org/scalachess/blob/master/src/main/scala/Divider.scala + unsigned int numMajorsAndMinors = get_majors_and_minors_count(*this); + + if (numMajorsAndMinors <= 6) + { + return GamePhase(2); + } + else + { + bool backrankSparse = is_backrank_sparse(*this); + int mixednessScore = get_mixedness(*this); + + if (numMajorsAndMinors <= 10 || backrankSparse || mixednessScore > 150) + { + return GamePhase(1); + } + else + { + return GamePhase(0); + } + } + } + else if (gamePhaseDefinition == MOVECOUNT) { + if (numPhases == 1) { // directly return phase 0 if there is only a single network loaded + return GamePhase(0); + } + else { // use naive phases by move count + double averageMovecountPerGame = 42.85; + double phaseLength = std::round(averageMovecountPerGame / numPhases); + size_t movesCompleted = this->total_move_cout(); + double gamePhaseDouble = movesCompleted / phaseLength; + if (gamePhaseDouble > numPhases - 1) { // ensure that all higher results are attributed to the last phase + return GamePhase(numPhases - 1); + } + else { + return GamePhase(gamePhaseDouble); // truncated to Integer value + } + } + } + return GamePhase(0); +} diff --git a/engine/src/environments/chess_related/board.h b/engine/src/environments/chess_related/board.h index f8f42f011..b13c367a2 100644 --- a/engine/src/environments/chess_related/board.h +++ b/engine/src/environments/chess_related/board.h @@ -118,6 +118,15 @@ class Board : public Position */ bool draw_by_insufficient_material() const; + /** + * @brief get_phase Returns the game phase of the current board state based on the total amount of phases and the chosen GamePhaseDefinition + * Possible returned values are all integers from 0 to numPhases - 1 + * @param unsigned int numPhases + * @param GamePhaseDefinition gamePhaseDefinition + * @return Game phase as unsigned int + */ + GamePhase get_phase(unsigned int numPhases, GamePhaseDefinition gamePhaseDefinition) const; + // overloaded function which include a last move list update void do_move(Move m, StateInfo& newSt); void do_move(Move m, StateInfo& newSt, bool givesCheck); @@ -216,6 +225,35 @@ inline bool is_capture(const Board* pos, Move move); */ inline bool enhance_move_type(float increment, float thresh, const vector& legalMoves, const DynamicVector& moveType, DynamicVector& policyProbSmall); +/** + * @brief get_majors_and_minors_count Returns the amount of majors and minors currently still on the board (both sides) + * @param pos Given board position + * @return Unsigned integer representing the amount of majors and minors left + */ +unsigned int get_majors_and_minors_count(const Board& pos); + +/** + * @brief is_backrank_sparse Checks whether the backrank of either side is sparse (three or less pieces) + * @param pos Given board position + * @return True if either the white or the black backrank is sparse + */ +bool is_backrank_sparse(const Board& pos); + +/** + * @brief score_region Calculates a mixedness score for a 2x2 subregion of the board + * @param numWhitePiecesInRegion Amount of white pieces in the current region + * @param numBlackPiecesInRegion Amount of black pieces in the current region + * @param rank Rank of the current region + * @return Integer representing the mixedness of the region + */ +int score_region(int numWhitePiecesInRegion, int numBlackPiecesInRegion, int rank); + +/** + * @brief get_mixedness Returns the mixedness of the given position as defined in https://github.com/lichess-org/scalachess/blob/master/src/main/scala/Divider.scala + * @param pos Given board position + * @return Integer representing the mixedness of the given position + */ +int get_mixedness(const Board& pos); #endif // BOARD_H #endif diff --git a/engine/src/environments/chess_related/boardstate.cpp b/engine/src/environments/chess_related/boardstate.cpp index c6a5791c1..606e18c68 100644 --- a/engine/src/environments/chess_related/boardstate.cpp +++ b/engine/src/environments/chess_related/boardstate.cpp @@ -271,3 +271,8 @@ void BoardState::init(int variant, bool is960) } #endif + +GamePhase BoardState::get_phase(unsigned int numPhases, GamePhaseDefinition gamePhaseDefinition) const +{ + return board.get_phase(numPhases, gamePhaseDefinition); +} \ No newline at end of file diff --git a/engine/src/environments/chess_related/boardstate.h b/engine/src/environments/chess_related/boardstate.h index 1f5b28ff9..090eeeb77 100644 --- a/engine/src/environments/chess_related/boardstate.h +++ b/engine/src/environments/chess_related/boardstate.h @@ -417,6 +417,7 @@ class BoardState : public State void set_auxiliary_outputs(const float* auxiliaryOutputs) override; BoardState* clone() const override; void init(int variant, bool isChess960) override; + GamePhase get_phase(unsigned int numPhases, GamePhaseDefinition gamePhaseDefinition) const override; }; #endif // BOARTSTATE_H diff --git a/engine/src/environments/chess_related/inputrepresentation.cpp b/engine/src/environments/chess_related/inputrepresentation.cpp index e0d5d89da..f67a31823 100644 --- a/engine/src/environments/chess_related/inputrepresentation.cpp +++ b/engine/src/environments/chess_related/inputrepresentation.cpp @@ -532,6 +532,7 @@ inline void board_to_planes_chess_v_2_8(PlaneData& planeData, const vectorsecond, variants.find(StateConstantsFairy::available_variants()[variant])->second->startFen, isChess960, &states->back(), nullptr, false); variantNumber = variant; } + +GamePhase FairyState::get_phase(unsigned int numPhases, GamePhaseDefinition gamePhaseDefinition) const +{ + // TODO: Implement phase definition here + return GamePhase(0); +} diff --git a/engine/src/environments/fairy_state/fairystate.h b/engine/src/environments/fairy_state/fairystate.h index 0a69aac20..7c93f33e2 100644 --- a/engine/src/environments/fairy_state/fairystate.h +++ b/engine/src/environments/fairy_state/fairystate.h @@ -227,6 +227,7 @@ class FairyState : public State Tablebase::WDLScore check_for_tablebase_wdl(Tablebase::ProbeState &result) override; void set_auxiliary_outputs(const float* auxiliaryOutputs) override; void init(int variant, bool isChess960); + GamePhase get_phase(unsigned int numPhases, GamePhaseDefinition gamePhaseDefinition) const override; }; #endif // FAIRYSTATE_H diff --git a/engine/src/environments/open_spiel/openspielstate.cpp b/engine/src/environments/open_spiel/openspielstate.cpp index 8835ff74b..ba4d3e757 100644 --- a/engine/src/environments/open_spiel/openspielstate.cpp +++ b/engine/src/environments/open_spiel/openspielstate.cpp @@ -193,3 +193,9 @@ void OpenSpielState::init(int variant, bool isChess960) { check_variant(variant); spielState = spielGame->NewInitialState(); } + +GamePhase OpenSpielState::get_phase(unsigned int numPhases, GamePhaseDefinition gamePhaseDefinition) const { + // TODO: Implement game phases + return GamePhase(0); +} + diff --git a/engine/src/environments/open_spiel/openspielstate.h b/engine/src/environments/open_spiel/openspielstate.h index 3f2073a0e..f7825ab9d 100644 --- a/engine/src/environments/open_spiel/openspielstate.h +++ b/engine/src/environments/open_spiel/openspielstate.h @@ -143,6 +143,7 @@ class OpenSpielState : public State void set_auxiliary_outputs(const float* auxiliaryOutputs); OpenSpielState *clone() const; void init(int variant, bool isChess960); + GamePhase get_phase(unsigned int numPhases, GamePhaseDefinition gamePhaseDefinition) const; }; #endif // OPENSPIELSTATE_H diff --git a/engine/src/environments/stratego_related/strategostate.cpp b/engine/src/environments/stratego_related/strategostate.cpp index 314b8e72f..d1e74ac27 100755 --- a/engine/src/environments/stratego_related/strategostate.cpp +++ b/engine/src/environments/stratego_related/strategostate.cpp @@ -219,3 +219,9 @@ void StrategoState::init(int variant, bool isChess960) { spielState = spielGame->NewInitialState(); } } + +GamePhase StrategoState::get_phase(unsigned int numPhases, GamePhaseDefinition gamePhaseDefinition) const +{ + // TODO: Implement phase definition here + return GamePhase(0); +} diff --git a/engine/src/environments/stratego_related/strategostate.h b/engine/src/environments/stratego_related/strategostate.h index fe18c42f8..9737650d1 100755 --- a/engine/src/environments/stratego_related/strategostate.h +++ b/engine/src/environments/stratego_related/strategostate.h @@ -117,6 +117,7 @@ class StrategoState : public State StrategoState *clone() const; StrategoState *openBoard() const; void init(int variant, bool isChess960) override; + GamePhase get_phase(unsigned int numPhases, GamePhaseDefinition gamePhaseDefinition) const override; }; #endif // STRATEGOSTATE_H diff --git a/engine/src/nn/neuralnetapi.cpp b/engine/src/nn/neuralnetapi.cpp index 2990866f4..a7ebb64b7 100644 --- a/engine/src/nn/neuralnetapi.cpp +++ b/engine/src/nn/neuralnetapi.cpp @@ -26,7 +26,6 @@ #include "neuralnetapi.h" #include #include -#include "../stateobj.h" string get_string_ending_with(const vector& stringVector, const string& suffix) { @@ -86,7 +85,9 @@ void NeuralNetAPI::initialize_nn_design() nbNNAuxiliaryOutputs = nnDesign.auxiliaryOutputShape.flatten() / batchSize; nbPolicyValues = nnDesign.policyOutputShape.v[1]; version = read_version_from_string(modelName); + gamePhase = read_game_phase_from_string(modelDir); info_string("Input representation: ", version_to_string(version)); + info_string("Game Phase: ", std::to_string(gamePhase)); } void NeuralNetAPI::initialize() @@ -105,7 +106,8 @@ NeuralNetAPI::NeuralNetAPI(const string& ctx, int deviceID, unsigned int batchSi nbNNInputValues(0), // will be set dynamically in initialize_nn_design() nbNNAuxiliaryOutputs(0), // will be set dynamically in initialize_nn_design() nbPolicyValues(0), // will be set dynamically in initialize_nn_design() - version(make_version<0,0,0>()) + version(make_version<0,0,0>()), + gamePhase(0) { modelDir = parse_directory(modelDirectory); deviceName = ctx + string("_") + to_string(deviceID); @@ -116,6 +118,12 @@ bool NeuralNetAPI::is_policy_map() const return nnDesign.isPolicyMap; } + +GamePhase NeuralNetAPI::get_game_phase() const +{ + return gamePhase; +} + string NeuralNetAPI::get_model_name() const { return modelName; @@ -218,6 +226,15 @@ Version read_version_from_string(const string &modelFileName) return make_version<0,0,0>(); } +GamePhase read_game_phase_from_string(const string& modelDir) +{ + // use last char of modelDir and convert to int by subtracting '0' + // TODO throw errors if necessary (if last letter is not a digit) + + int gamePhase = (modelDir[modelDir.length() - 2]) - '0'; + return GamePhase(gamePhase); +} + void apply_softmax(float* input, size_t size) { size_t idx; diff --git a/engine/src/nn/neuralnetapi.h b/engine/src/nn/neuralnetapi.h index b9425cb5d..211605c06 100644 --- a/engine/src/nn/neuralnetapi.h +++ b/engine/src/nn/neuralnetapi.h @@ -38,6 +38,7 @@ #include "../util/communication.h" #include "neuralnetdesign.h" #include "version.h" +#include "../stateobj.h" // http://www.codebind.com/cpp-tutorial/cpp-program-list-files-directory-windows-linux/ namespace { @@ -101,6 +102,14 @@ string get_file_ending_with(const string& dir, const string& suffix); */ Version read_version_from_string(const string& modelFileName); +/** + * @brief read_game_phase_from_string Returns the GamePhase a given model directory belongs to based on its last character + * e.g. "/model/ClassicAra/chess/separated_learning/phase0" indicates that the model in this directory belongs to phase 0 + * @param modelDir Model directory + * @return GamePhase + */ +GamePhase read_game_phase_from_string(const string& modelDir); + template /** @@ -159,6 +168,7 @@ class NeuralNetAPI uint_fast32_t nbPolicyValues; Version version; + GamePhase gamePhase; private: /** * @brief init_nn_design Infers the input and output shapes of the loaded neural network architectures and @@ -210,6 +220,13 @@ class NeuralNetAPI */ string get_device_name() const; + + /** + * @brief get_game_phase Returns the game phase of this NeuralNetAPI + * @return GamePhase + */ + GamePhase get_game_phase() const; + /** * @brief predict Runs a prediction on the given inputPlanes and returns the policy vector in form of a NDArray and the value as a float number * @param inputPlanes Pointer to the input planes of a single board position diff --git a/engine/src/nn/neuralnetapiuser.cpp b/engine/src/nn/neuralnetapiuser.cpp index e22b0f293..b3596d844 100644 --- a/engine/src/nn/neuralnetapiuser.cpp +++ b/engine/src/nn/neuralnetapiuser.cpp @@ -31,33 +31,43 @@ #include "common.h" #endif -NeuralNetAPIUser::NeuralNetAPIUser(NeuralNetAPI *net): - net(net), +NeuralNetAPIUser::NeuralNetAPIUser(vector>& netsNew) : auxiliaryOutputs(nullptr) { + nets = std::move(netsNew); + numPhases = nets.size(); + + for (unsigned int i = 0; i < numPhases; i++) + { + GamePhase phaseOfNetI = nets[i]->get_game_phase(); + assert(phaseOfNetI < numPhases); // no net should have a phase greater or equal to the total amount of nets (assumes that only phases from 0 to numPhases -1 are possible) + assert(phaseToNetsIndex.count(phaseOfNetI) == 0); // no net should have the same phase as another net + phaseToNetsIndex[phaseOfNetI] = i; + } + // allocate memory for all predictions and results #ifdef TENSORRT #ifdef DYNAMIC_NN_ARCH - CHECK(cudaMallocHost((void**) &inputPlanes, net->get_batch_size() * net->get_nb_input_values_total() * sizeof(float))); + CHECK(cudaMallocHost((void**) &inputPlanes, nets.front()->get_batch_size() * nets.front()->get_nb_input_values_total() * sizeof(float))); #else - CHECK(cudaMallocHost((void**) &inputPlanes, net->get_batch_size() * StateConstants::NB_VALUES_TOTAL() * sizeof(float))); + CHECK(cudaMallocHost((void**) &inputPlanes, nets.front()->get_batch_size() * StateConstants::NB_VALUES_TOTAL() * sizeof(float))); #endif - CHECK(cudaMallocHost((void**) &valueOutputs, net->get_batch_size() * sizeof(float))); - CHECK(cudaMallocHost((void**) &probOutputs, net->get_batch_size() * net->get_nb_policy_values() * sizeof(float))); - if (net->has_auxiliary_outputs()) { - CHECK(cudaMallocHost((void**) &auxiliaryOutputs, net->get_batch_size() * net->get_nb_auxiliary_outputs() * sizeof(float))); + CHECK(cudaMallocHost((void**) &valueOutputs, nets.front()->get_batch_size() * sizeof(float))); + CHECK(cudaMallocHost((void**) &probOutputs, nets.front()->get_batch_size() * nets.front()->get_nb_policy_values() * sizeof(float))); + if (nets.front()->has_auxiliary_outputs()) { + CHECK(cudaMallocHost((void**) &auxiliaryOutputs, nets.front()->get_batch_size() * nets.front()->get_nb_auxiliary_outputs() * sizeof(float))); } #else - inputPlanes = new float[net->get_batch_size() * net->get_nb_input_values_total()]; - valueOutputs = new float[net->get_batch_size()]; - probOutputs = new float[net->get_batch_size() * net->get_nb_policy_values()]; + inputPlanes = new float[nets.front()->get_batch_size() * nets.front()->get_nb_input_values_total()]; + valueOutputs = new float[nets.front()->get_batch_size()]; + probOutputs = new float[nets.front()->get_batch_size() * nets.front()->get_nb_policy_values()]; #ifdef DYNAMIC_NN_ARCH - if (net->has_auxiliary_outputs()) { - auxiliaryOutputs = new float[net->get_batch_size() * net->get_nb_auxiliary_outputs()]; + if (nets.front()->has_auxiliary_outputs()) { + auxiliaryOutputs = new float[nets.front()->get_batch_size() * nets.front()->get_nb_auxiliary_outputs()]; } #else if (StateConstants::NB_AUXILIARY_OUTPUTS()) { - auxiliaryOutputs = new float[net->get_batch_size() * StateConstants::NB_AUXILIARY_OUTPUTS()]; + auxiliaryOutputs = new float[nets.front()->get_batch_size() * StateConstants::NB_AUXILIARY_OUTPUTS()]; } #endif #endif @@ -70,7 +80,7 @@ NeuralNetAPIUser::~NeuralNetAPIUser() CHECK(cudaFreeHost(valueOutputs)); CHECK(cudaFreeHost(probOutputs)); #ifdef DYNAMIC_NN_ARCH - if (net->has_auxiliary_outputs()) { + if (nets.front()->has_auxiliary_outputs()) { #else if (StateConstants::NB_AUXILIARY_OUTPUTS()) { #endif @@ -81,7 +91,7 @@ NeuralNetAPIUser::~NeuralNetAPIUser() delete [] valueOutputs; delete [] probOutputs; #ifdef DYNAMIC_NN_ARCH - if (net->has_auxiliary_outputs()) { + if (nets.front()->has_auxiliary_outputs()) { #else if (StateConstants::NB_AUXILIARY_OUTPUTS()) { #endif @@ -93,7 +103,7 @@ NeuralNetAPIUser::~NeuralNetAPIUser() void NeuralNetAPIUser::run_inference(uint_fast16_t iterations) { for (uint_fast16_t it = 0; it < iterations; ++it) { - net->predict(inputPlanes, valueOutputs, probOutputs, auxiliaryOutputs); + nets.front()->predict(inputPlanes, valueOutputs, probOutputs, auxiliaryOutputs); } } diff --git a/engine/src/nn/neuralnetapiuser.h b/engine/src/nn/neuralnetapiuser.h index d2b19f388..07916ab15 100644 --- a/engine/src/nn/neuralnetapiuser.h +++ b/engine/src/nn/neuralnetapiuser.h @@ -37,7 +37,9 @@ class NeuralNetAPIUser { protected: - NeuralNetAPI* net; + vector> nets; // vector of net objects + unsigned int numPhases; + std::map phaseToNetsIndex; // maps a GamePhase to the index of the net that should be used // inputPlanes stores the plane representation of all newly expanded nodes of a single mini-batch float* inputPlanes; @@ -48,7 +50,7 @@ class NeuralNetAPIUser float* auxiliaryOutputs; public: - NeuralNetAPIUser(NeuralNetAPI* net); + NeuralNetAPIUser(vector>& netsNew); ~NeuralNetAPIUser(); NeuralNetAPIUser(NeuralNetAPIUser&) = delete; diff --git a/engine/src/searchthread.cpp b/engine/src/searchthread.cpp index be2a86664..a58f64eae 100644 --- a/engine/src/searchthread.cpp +++ b/engine/src/searchthread.cpp @@ -33,6 +33,7 @@ #include #include #include "util/blazeutil.h" +#include size_t SearchThread::get_max_depth() const @@ -40,8 +41,8 @@ size_t SearchThread::get_max_depth() const return depthMax; } -SearchThread::SearchThread(NeuralNetAPI *netBatch, const SearchSettings* searchSettings, MapWithMutex* mapWithMutex): - NeuralNetAPIUser(netBatch), +SearchThread::SearchThread(vector>& netBatchVector, const SearchSettings* searchSettings, MapWithMutex* mapWithMutex): + NeuralNetAPIUser(netBatchVector), rootNode(nullptr), rootState(nullptr), newState(nullptr), // will be be set via setter methods newNodes(make_unique>(searchSettings->batchSize)), newNodeSideToMove(make_unique>(searchSettings->batchSize)), @@ -225,7 +226,9 @@ Node* SearchThread::get_new_child_to_evaluate(NodeDescription& description) #else // fill a new board in the input_planes vector // we shift the index by nbNNInputValues each time - newState->get_state_planes(true, inputPlanes + newNodes->size() * net->get_nb_input_values_total(), net->get_version()); + newState->get_state_planes(true, inputPlanes + newNodes->size() * nets.front()->get_nb_input_values_total(), nets.front()->get_version()); + GamePhase currPhase = newState->get_phase(numPhases, searchSettings->gamePhaseDefinition); + phaseCountMap[currPhase]++; // save a reference newly created list in the temporary list for node creation // it will later be updated with the evaluation of the NN newNodeSideToMove->add_element(newState->side_to_move()); @@ -299,7 +302,7 @@ void SearchThread::set_nn_results_to_child_nodes() { size_t batchIdx = 0; for (auto node: *newNodes) { - fill_nn_results(batchIdx, net->is_policy_map(), valueOutputs, probOutputs, auxiliaryOutputs, node, + fill_nn_results(batchIdx, nets.front()->is_policy_map(), valueOutputs, probOutputs, auxiliaryOutputs, node, tbHits, rootState->mirror_policy(newNodeSideToMove->get_element(batchIdx)), searchSettings, rootNode->is_tablebase()); ++batchIdx; @@ -381,7 +384,23 @@ void SearchThread::thread_iteration() create_mini_batch(); #ifndef SEARCH_UCT if (newNodes->size() != 0) { - net->predict(inputPlanes, valueOutputs, probOutputs, auxiliaryOutputs); + + // determine majority class in current batch + using pair_type = decltype(phaseCountMap)::value_type; + auto pr = std::max_element + ( + std::begin(phaseCountMap), std::end(phaseCountMap), + [](const pair_type& p1, const pair_type& p2) { + return p1.second < p2.second; + } + ); + + GamePhase majorityPhase = pr->first; + + phaseCountMap.clear(); + + // query the network that corresponds to the majority phase + nets[phaseToNetsIndex.at(majorityPhase)]->predict(inputPlanes, valueOutputs, probOutputs, auxiliaryOutputs); set_nn_results_to_child_nodes(); } #endif diff --git a/engine/src/searchthread.h b/engine/src/searchthread.h index 11cde23e3..99205942d 100644 --- a/engine/src/searchthread.h +++ b/engine/src/searchthread.h @@ -61,6 +61,7 @@ class SearchThread : public NeuralNetAPIUser // list of all node objects which have been selected for expansion unique_ptr> newNodes; unique_ptr> newNodeSideToMove; + std::map phaseCountMap; // saves counts of all phases in current batch unique_ptr> transpositionValues; vector newTrajectories; @@ -84,11 +85,11 @@ class SearchThread : public NeuralNetAPIUser public: /** * @brief SearchThread - * @param netBatch Network API object which provides the prediction of the neural network + * @param netBatchVector vector of Network API objects which provide the prediction of the neural network * @param searchSettings Given settings for this search run * @param MapWithMutex Handle to the hash table */ - SearchThread(NeuralNetAPI* netBatch, const SearchSettings* searchSettings, MapWithMutex* mapWithMutex); + SearchThread(vector>& netBatchVector, const SearchSettings* searchSettings, MapWithMutex* mapWithMutex); /** * @brief create_mini_batch Creates a mini-batch of new unexplored nodes. diff --git a/engine/src/state.h b/engine/src/state.h index 48091ecca..61f40fdbc 100644 --- a/engine/src/state.h +++ b/engine/src/state.h @@ -35,6 +35,7 @@ #include #include "version.h" #include "util/communication.h" +#include "agents/config/searchsettings.h" typedef uint64_t Key; #ifdef ACTION_64_BIT @@ -45,6 +46,7 @@ typedef int32_t Action; typedef uint16_t MoveIdx; typedef unsigned int uint; typedef int SideToMove; +typedef unsigned int GamePhase; #define FIRST_PLAYER_IDX 0 const int ACTION_NONE = 0; @@ -495,6 +497,15 @@ class State * @param variant Variant which the position corresponds to */ virtual void init(int variant, bool isChess960) = 0; + + /** + * @brief get_phase Returns the current game phase of the state + * @param numPhases Number of phases in total + * @param gamePhaseDefinition Game phase definition to use (e.g. MOVECOUNT, LICHESS) + * @return Game phase (uint) + */ + virtual GamePhase get_phase(unsigned int numPhases, GamePhaseDefinition gamePhaseDefinition) const = 0; + }; #endif // GAMESTATE_H diff --git a/engine/src/uci/crazyara.cpp b/engine/src/uci/crazyara.cpp index b2ea8d2c3..70fb94215 100644 --- a/engine/src/uci/crazyara.cpp +++ b/engine/src/uci/crazyara.cpp @@ -50,7 +50,6 @@ CrazyAra::CrazyAra(): rawAgent(nullptr), mctsAgent(nullptr), - netSingle(nullptr), // will be initialized in is_ready() #ifdef USE_RL netSingleContender(nullptr), mctsAgentContender(nullptr), @@ -555,6 +554,8 @@ bool CrazyAra::is_ready() { bool hasReplied = false; if (!networkLoaded) { + netSingleVector.clear(); + netBatchesVector.clear(); const size_t timeoutMS = Options["Timeout_MS"]; TimeOutReadyThread timeoutThread(timeoutMS); thread tTimeoutThread; @@ -566,13 +567,25 @@ bool CrazyAra::is_ready() #ifdef USE_RL init_rl_settings(); #endif - netSingle = create_new_net_single(string(Options["Model_Directory"])); - netSingle->validate_neural_network(); - netBatches = create_new_net_batches(string(Options["Model_Directory"])); - netBatches.front()->validate_neural_network(); - mctsAgent = create_new_mcts_agent(netSingle.get(), netBatches, &searchSettings); - rawAgent = make_unique(netSingle.get(), &playSettings, false); + + // analyse directory to get num phases + for (const auto& entry : fs::directory_iterator(string(Options["Model_Directory"]))) { + std::cout << entry.path().generic_string() << std::endl; + + unique_ptr netSingleTmp = create_new_net_single(entry.path().generic_string()); + netSingleTmp->validate_neural_network(); + vector> netBatchesTmp = create_new_net_batches(entry.path().generic_string()); + netBatchesTmp.front()->validate_neural_network(); + + netSingleVector.push_back(std::move(netSingleTmp)); + netBatchesVector.push_back(std::move(netBatchesTmp)); + } + + mctsAgent = create_new_mcts_agent(netSingleVector, netBatchesVector, &searchSettings); + //rawAgent = make_unique(netSingleVector, &playSettings, false, &searchSettings); + // TODO: rawAgent currently doesn't work (netSingleVector somehow doesn't contain any nets) StateConstants::init(mctsAgent->is_policy_map(), Options["UCI_Chess960"]); + timeoutThread.kill(); if (timeoutMS != 0) { tTimeoutThread.join(); @@ -670,32 +683,33 @@ void CrazyAra::set_uci_option(istringstream &is, StateObj& state) } } -unique_ptr CrazyAra::create_new_mcts_agent(NeuralNetAPI* netSingle, vector>& netBatches, SearchSettings* searchSettings, MCTSAgentType type) +unique_ptr CrazyAra::create_new_mcts_agent(vector>& netSingleVector, vector>>& netBatchesVector, SearchSettings* searchSettings, MCTSAgentType type) { switch (type) { case MCTSAgentType::kDefault: - return make_unique(netSingle, netBatches, searchSettings, &playSettings); + return make_unique(netSingleVector, netBatchesVector, searchSettings, &playSettings); case MCTSAgentType::kBatch1: info_string("TYP 1 -> Batch 1"); - return make_unique(netSingle, netBatches, searchSettings, &playSettings , 1, false); + return make_unique(netSingleVector, netBatchesVector, searchSettings, &playSettings , 1, false); case MCTSAgentType::kBatch3: info_string("TYP 2 -> Batch 3"); - return make_unique(netSingle, netBatches, searchSettings, &playSettings , 3, false); + return make_unique(netSingleVector, netBatchesVector, searchSettings, &playSettings , 3, false); case MCTSAgentType::kBatch5: info_string("TYP 3 -> Batch 5"); - return make_unique(netSingle, netBatches, searchSettings, &playSettings , 5, false); + return make_unique(netSingleVector, netBatchesVector, searchSettings, &playSettings , 5, false); case MCTSAgentType::kBatch3_reducedNodes: info_string("TYP 4 -> Batch 3 Split"); - return make_unique(netSingle, netBatches, searchSettings, &playSettings , 3, true); + return make_unique(netSingleVector, netBatchesVector, searchSettings, &playSettings , 3, true); case MCTSAgentType::kBatch5_reducedNodes: info_string("TYP 5 -> Batch 5 Split"); - return make_unique(netSingle, netBatches, searchSettings, &playSettings , 5, true); + return make_unique(netSingleVector, netBatchesVector, searchSettings, &playSettings , 5, true); case MCTSAgentType::kTrueSight: info_string("TYP 6 -> TrueSight"); - return make_unique(netSingle, netBatches, searchSettings, &playSettings); + return make_unique(netSingleVector, netBatchesVector, searchSettings, &playSettings); case MCTSAgentType::kRandom: info_string("TYP 7 -> Random"); - return make_unique(netSingle, netBatches, searchSettings, &playSettings); + return make_unique(netSingleVector, netBatchesVector, searchSettings, &playSettings); + default: info_string("Unknown MCTSAgentType"); return nullptr; @@ -756,6 +770,15 @@ void CrazyAra::init_search_settings() info_string_important("Unknown option", Options["Virtual_Style"], "for Virtual_Style"); } searchSettings.virtualMixThreshold = Options["Virtual_Mix_Threshold"]; + if (Options["Game_Phase_Definition"] == "lichess") { + searchSettings.gamePhaseDefinition = LICHESS; + } + else if (Options["Game_Phase_Definition"] == "movecount") { + searchSettings.gamePhaseDefinition = MOVECOUNT; + } + else { + info_string_important("Unknown option", Options["Game_Phase_Definition"], "for Game_Phase_Definition"); + } } void CrazyAra::init_play_settings() diff --git a/engine/src/uci/crazyara.h b/engine/src/uci/crazyara.h index 4247ee8cf..55f4d4ba2 100644 --- a/engine/src/uci/crazyara.h +++ b/engine/src/uci/crazyara.h @@ -41,6 +41,8 @@ #include "agents/config/playsettings.h" #include "node.h" #include "timeoutreadythread.h" +#include +namespace fs = std::filesystem; #ifdef USE_RL #include "rl/selfplay.h" #include "agents/config/rlsettings.h" @@ -80,8 +82,8 @@ class CrazyAra string(" ASCII-Art: Joan G. Stark, Chappell, Burton \n"); unique_ptr rawAgent; unique_ptr mctsAgent; - unique_ptr netSingle; - vector> netBatches; + vector> netSingleVector; + vector>> netBatchesVector; #ifdef USE_RL unique_ptr netSingleContender; unique_ptr mctsAgentContender; @@ -269,13 +271,13 @@ class CrazyAra * @brief create_new_mcts_agent Factory method to create a new MCTSAgent when loading new neural network weights * @param modelDirectory Directory where the .params and .json files are stored * @param states State-Manager, needed to keep track of 3-fold-repetition - * @param netSingle Neural net with batch-size 1. It will be loaded from file. - * @param netBatches Neural net handes with a batch-size defined by the uci options. It will be loaded from file. + * @param netSingleVector Neural networks with batch-size 1. They will be loaded from file. + * @param netBatchesVector Neural networks handes with a batch-size defined by the uci options. They will be loaded from file. * @param searchSettings Search settings object * @param type Which type of agent should be used, default is 0. * @return Pointer to the new MCTSAgent object */ - unique_ptr create_new_mcts_agent(NeuralNetAPI* netSingle, vector>& netBatches, SearchSettings* searchSettings, MCTSAgentType type = MCTSAgentType::kDefault); + unique_ptr create_new_mcts_agent(vector>& netSingleVector, vector>>& netBatchesVector, SearchSettings* searchSettings, MCTSAgentType type = MCTSAgentType::kDefault); /** * @brief create_new_net_single Factory to create and load a new model from a given directory diff --git a/engine/src/uci/optionsuci.cpp b/engine/src/uci/optionsuci.cpp index bfee22ebe..30b9e1041 100644 --- a/engine/src/uci/optionsuci.cpp +++ b/engine/src/uci/optionsuci.cpp @@ -193,6 +193,7 @@ void OptionsUCI::init(OptionsMap &o) o["Use_Raw_Network"] << Option(false); o["Virtual_Style"] << Option("virtual_mix", { "virtual_loss", "virtual_visit", "virtual_offset", "virtual_mix" }); o["Virtual_Mix_Threshold"] << Option(1000, 1, 99999999); + o["Game_Phase_Definition"] << Option("movecount", { "lichess", "movecount"}); // additional UCI-Options for RL only #ifdef USE_RL o["Centi_Node_Random_Factor"] << Option(10, 0, 100); diff --git a/etc/run_cutechess_experiments.py b/etc/run_cutechess_experiments.py new file mode 100644 index 000000000..9b3b7ec0e --- /dev/null +++ b/etc/run_cutechess_experiments.py @@ -0,0 +1,71 @@ +""" +@file: run_cutechess_experiments +Created on 03.10.2023 +@project: CrazyAra +@author: Felix + +Executes cutechess commands in the shell +""" + +import subprocess +from datetime import datetime + +device = 0 +player_a = "correct_phases" +player_b = "no_phases" + +batch_size_options = [64] +use960 = False +movetime_options = [100, 200, 400, 800, 1600] +nodes_options = [100, 200, 400, 800, 1600, 3200] +stockfish_nodes_options = [None] # [3200, 6400, 12800] + +variant = "fischerandom" if use960 else "standard" +openings_file = "960_openings.epd" if use960 else "chess.epd" +out_mode = "960_" if use960 else "" + + +def generate_and_run_command(batch_size, movetime, nodes, stockfish_nodes=None): + start_time = datetime.now() + if stockfish_nodes is None: + pgnout = f"/data/cutechess_results/{out_mode}{player_a}_vs_{player_b}_movetime{movetime}_nodes{nodes}_bs{batch_size}.pgn" + command = f"./cutechess-cli -variant {variant} -openings file={openings_file} format=epd order=random -pgnout {pgnout} -resign movecount=5 score=600 -draw movenumber=30 movecount=4 score=20 -concurrency 1 " \ + f"-engine name={out_mode}ClassicAra_{player_a} cmd=./FH_ClassicAra dir=~/CrazyAra/engine/build option.Model_Directory=/data/model/ClassicAra/chess/{player_a} proto=uci " \ + f"-engine name={out_mode}ClassicAra_{player_b} cmd=./FH_ClassicAra dir=~/CrazyAra/engine/build option.Model_Directory=/data/model/ClassicAra/chess/{player_b} proto=uci " \ + f"-each option.First_Device_ID={device} option.Batch_Size={batch_size} option.Fixed_Movetime={movetime} tc=0/6000+0.1 option.Nodes={nodes} option.Simulations={nodes * 2} option.Search_Type=mcts -games 2 -rounds 500 -repeat " + else: + pgnout = f"/data/cutechess_results/{out_mode}{player_a}_vs_{player_b}_movetime{movetime}_nodes{nodes}_bs{batch_size}_sfnodes{stockfish_nodes}.pgn" + command = f"./cutechess-cli -variant {variant} -openings file={openings_file} format=epd order=random -pgnout {pgnout} -resign movecount=5 score=600 -draw movenumber=30 movecount=4 score=20 -concurrency 1 " \ + f"-engine name={player_a} cmd=./FH_Stockfish dir=/data proto=uci nodes={stockfish_nodes}" \ + f"-engine name={out_mode}ClassicAra_{player_b} cmd=./FH_ClassicAra dir=~/CrazyAra/engine/build option.Model_Directory=/data/model/ClassicAra/chess/{player_b} proto=uci option.Batch_Size={batch_size} option.Fixed_Movetime={movetime} option.Nodes={nodes} option.Simulations={nodes * 2} option.Search_Type=mcts option.First_Device_ID={device}" \ + f"-each tc=0/6000+0.1 -games 2 -rounds 500 -repeat " + + with open(f'{pgnout[:-4]}_output.txt', 'w') as f: + print("=====================================") + new_experiment_info = f"New Experiment: BS: {batch_size}, Movetime: {movetime}, Nodes: {nodes}, Stockfish Nodes: {stockfish_nodes}\n" + f.write(new_experiment_info) + print(new_experiment_info) + f.write(f"Start Time: {start_time.strftime('%m/%d/%Y, %H:%M:%S')}\n") + print(f"Start Time: {start_time.strftime('%m/%d/%Y, %H:%M:%S')}") + f.write(f"{command}\n") + print(command) + f.flush() + result = subprocess.run(command, text=True, capture_output=False, check=True, shell=True, stdout=f, stderr=subprocess.STDOUT) + #print(result.stdout, result.stderr) + end_time = datetime.now() + f.write(f"End Time: {end_time.strftime('%m/%d/%Y, %H:%M:%S')}\n") + print(f"End Time: {end_time.strftime('%m/%d/%Y, %H:%M:%S')}") + runtime_hours = (end_time-start_time)/60/60 + print(f"Runtime in hours: {runtime_hours}") + f.write(f"Runtime in hours: {runtime_hours}\n") + + +for batch_size in batch_size_options: + for nodes in nodes_options: + movetime = 0 + for stockfish_nodes in stockfish_nodes_options: + generate_and_run_command(batch_size, movetime, nodes, stockfish_nodes=stockfish_nodes) + for movetime in movetime_options: + nodes = 0 + for stockfish_nodes in stockfish_nodes_options: + generate_and_run_command(batch_size, movetime, nodes, stockfish_nodes=stockfish_nodes)