Skip to content

Commit

Permalink
updates for version 0.3.1: Higher NPS 250->300, enabled transposition…
Browse files Browse the repository at this point in the history
… table, added time management regime by spending less time on obious moves, added opening guard moves to avoid exploration of moves < 5% for a given number of moves in the opening, added increasing cpuct value as described by recent DeepMind publicdation
  • Loading branch information
QueensGambit committed Dec 26, 2018
1 parent 8c705dc commit ddbc816
Show file tree
Hide file tree
Showing 30 changed files with 1,135 additions and 473 deletions.
Empty file modified DeepCrazyhouse/src/domain/__init__.py
100644 → 100755
Empty file.
16 changes: 15 additions & 1 deletion DeepCrazyhouse/src/domain/abstract_cls/_GameState.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(self, board):
self.board = board
self._fen_dic = {}

def apply_move(self, move: chess.Move, remember_state=False):
def apply_move(self, move: chess.Move): #, remember_state=False):
self.board.push(move)

def get_state_planes(self):
Expand Down Expand Up @@ -52,5 +52,19 @@ def get_board_fen(self):
return self.board.fen()
#return self.board.fen().rsplit(' ', 1)[0]

def get_transposition_key(self):
"""
Returns an identifier key for the current board state excluding move counters.
Calling ._transposition_key() is faster than .fen()
:return:
"""
return self.board._transposition_key()

def new_game(self):
raise NotImplementedError

def get_halfmove_counter(self):
return self.board.halfmove_clock

def get_fullmove_number(self):
return self.board.fullmove_number
Empty file modified DeepCrazyhouse/src/domain/abstract_cls/__init__.py
100644 → 100755
Empty file.
8 changes: 7 additions & 1 deletion DeepCrazyhouse/src/domain/agent/NeuralNetAPI.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging
import numpy as np
import DeepCrazyhouse.src.runtime.Colorer
import time
import json
import glob
Expand Down Expand Up @@ -79,6 +78,13 @@ def __init__(self, ctx='cpu', batch_size=1):
grad_req='null', force_rebind=True)
self.executor.copy_params_from(arg_params, aux_params)

self.executors = []
for i in range(batch_size):
executor = sym.simple_bind(ctx=self.ctx, data=(i+1, NB_CHANNELS_FULL, BOARD_HEIGHT, BOARD_WIDTH),
grad_req='null', force_rebind=True)
executor.copy_params_from(arg_params, aux_params)
self.executors.append(executor)

def get_executor(self):
"""
Returns the executor object used for inference
Expand Down
Empty file modified DeepCrazyhouse/src/domain/agent/README.md
100644 → 100755
Empty file.
Empty file modified DeepCrazyhouse/src/domain/agent/__init__.py
100644 → 100755
Empty file.
720 changes: 552 additions & 168 deletions DeepCrazyhouse/src/domain/agent/player/MCTSAgent.py
100644 → 100755

Large diffs are not rendered by default.

28 changes: 17 additions & 11 deletions DeepCrazyhouse/src/domain/agent/player/RawNetAgent.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,38 @@
from DeepCrazyhouse.src.domain.agent.NeuralNetAPI import NeuralNetAPI
from DeepCrazyhouse.src.domain.crazyhouse.output_representation import get_probs_of_move_list, value_to_centipawn
from time import time

import sys

class RawNetAgent(_Agent):

def __init__(self, net: NeuralNetAPI, temperature=0., clip_quantil=0., verbose=True):
super().__init__(temperature, clip_quantil, verbose)
def __init__(self, net: NeuralNetAPI, temperature=0., temperature_moves=4, verbose=True):
super().__init__(temperature, temperature_moves, verbose)
self._net = net

def evaluate_board_state(self, state: _GameState, verbose=True):
def evaluate_board_state(self, state: _GameState):
"""
:param state:
:return:
"""

t_start_eval = time()
pred_value, pred_policy = self._net.predict_single(state.get_state_planes())

legal_moves = list(state.get_legal_moves())

p_vec_small = get_probs_of_move_list(pred_policy, legal_moves, state.is_white_to_move())

if verbose is True:
# use the move with the highest probability as the best move for logging
instinct_move = legal_moves[p_vec_small.argmax()]
# use the move with the highest probability as the best move for logging
instinct_move = legal_moves[p_vec_small.argmax()]

# show the best calculated line
print('info score cp %d depth %d nodes %d time %d pv %s' % (
value_to_centipawn(pred_value), 1, 1, (time() - t_start_eval) * 1000, instinct_move.uci()))
# define the remaining return variables
time_e = (time() - t_start_eval)
cp = value_to_centipawn(pred_value)
depth = 1
nodes = 1
time_elapsed_s = time_e * 1000
nps = nodes/time_e
pv = instinct_move.uci()

return pred_value, legal_moves, p_vec_small
return pred_value, legal_moves, p_vec_small, cp, depth, nodes, time_elapsed_s, nps, pv
92 changes: 37 additions & 55 deletions DeepCrazyhouse/src/domain/agent/player/_Agent.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@ class _Agent:
The greedy agent always performs the first legal move with the highest move probability
"""

def __init__(self, temperature=0., clip_quantil=0., verbose=True):
def __init__(self, temperature=0, temperature_moves=4, verbose=True):
self.temperature = temperature
self.p_vec_small = None
self.clip_quantil = clip_quantil
self.temperature_current = temperature
self.temperature_moves = temperature_moves
#self.p_vec_small = None
self.verbose = verbose

def evaluate_board_state(self, state: _GameState):
Expand All @@ -29,76 +30,57 @@ def evaluate_board_state(self, state: _GameState):
def perform_action(self, state: _GameState):

# the first step is to call you policy agent to evaluate the given position
value, legal_moves, self.p_vec_small = self.evaluate_board_state(state)
value, legal_moves, p_vec_small, cp, depth, nodes, time_elapsed_s, nps, pv = self.evaluate_board_state(state)

if len(legal_moves) != len(self.p_vec_small):
raise Exception('Legal move list %s is uncompatible to policy vector %s' % (legal_moves, self.p_vec_small))
if len(legal_moves) != len(p_vec_small):
raise Exception('Legal move list %s is uncompatible to policy vector %s' % (legal_moves, p_vec_small))

if state.get_fullmove_number() <= self.temperature_moves:
self.temperature_current = self.temperature
else:
self.temperature_current = 0

if len(legal_moves) == 1:
selected_move = legal_moves[0]
confidence = 1.
idx = 0
else:
if self.temperature <= 0.01:
idx = self.p_vec_small.argmax()
if self.temperature_current <= 0.01:
idx = p_vec_small.argmax()
else:
self._apply_temperature_to_policy()
self._apply_quantil_clipping()
idx = np.random.choice(range(len(legal_moves)), p=self.p_vec_small)
p_vec_small = self._apply_temperature_to_policy(p_vec_small)
idx = np.random.choice(range(len(legal_moves)), p=p_vec_small)

selected_move = legal_moves[idx]
confidence = self.p_vec_small[idx]
confidence = p_vec_small[idx]

return value, selected_move, confidence, idx

def _apply_quantil_clipping(self):
"""
if value > 0:
# check for draw and decline if value is greater 0
state_future = deepcopy(state)
state_future.apply_move(selected_move)
if state_future.get_pythonchess_board().can_claim_threefold_repetition() is True:
p_vec_small[idx] = 0
idx = p_vec_small.argmax()
selected_move = legal_moves[idx]
confidence = p_vec_small[idx]

:param p_vec_small:
:param clip_quantil:
:return:
"""
return value, selected_move, confidence, idx, cp, depth, nodes, time_elapsed_s, nps, pv

if self.clip_quantil > 0:
# remove the lower percentage values in order to avoid strange blunders for moves with low confidence
p_vec_small_clipped = deepcopy(self.p_vec_small)

# get the sorted indices in ascending order
idx_order = np.argsort(self.p_vec_small)
# create a quantil tank which measures how much quantil power is left
quantil_tank = self.clip_quantil

# iterate over the indices (ascending) and apply the quantil clipping to it
for idx in idx_order:
if quantil_tank >= p_vec_small_clipped[idx]:
# remove the prob from the quantil tank
quantil_tank -= p_vec_small_clipped[idx]
# clip the index to 0
p_vec_small_clipped[idx] = 0
else:
# the target prob is greate than the current quantil tank
p_vec_small_clipped[idx] -= quantil_tank
# stop the for loop
break

# renormalize the policy
p_vec_small_clipped /= p_vec_small_clipped.sum()

# apply the changes
self.p_vec_small = p_vec_small_clipped

def _apply_temperature_to_policy(self):
def _apply_temperature_to_policy(self, p_vec_small):
"""
:return:
"""
# treat very small temperature value as a deterministic policy
if self.temperature <= 0.01:
p_vec_one_hot = np.zeros_like(self.p_vec_small)
p_vec_one_hot[np.argmax(self.p_vec_small)] = 1.
self.p_vec_small = p_vec_one_hot
if self.temperature_current <= 0.01:
p_vec_one_hot = np.zeros_like(p_vec_small)
p_vec_one_hot[np.argmax(p_vec_small)] = 1.
p_vec_small = p_vec_one_hot
else:
# apply exponential scaling
self.p_vec_small = np.power(self.p_vec_small, 1/self.temperature)
p_vec_small = p_vec_small ** (1/self.temperature_current)
# renormalize the values to probabilities again
self.p_vec_small /= self.p_vec_small.sum()
p_vec_small /= p_vec_small.sum()

return p_vec_small

Empty file modified DeepCrazyhouse/src/domain/agent/player/__init__.py
100644 → 100755
Empty file.
89 changes: 69 additions & 20 deletions DeepCrazyhouse/src/domain/agent/player/util/NetPredService.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,25 @@
import numpy as np
from DeepCrazyhouse.src.domain.crazyhouse.output_representation import NB_LABELS, LABELS
from time import time
import cython


class NetPredService:

def __init__(self, pipe_endings: [connection], net: NeuralNetAPI, batch_size, enable_timeout=False):
def __init__(self, pipe_endings: [connection], net: NeuralNetAPI, batch_size, batch_state_planes: np.ndarray,
batch_value_results: np.ndarray, batch_policy_results: np.ndarray):
"""
:param pipe_endings: List of pip endings which are for communicating with the thread workers.
:param net: Neural Network API object which provides the reference for the neural network.
:param batch_size: Constant batch_size used for inference.
:param enable_timeout: Decides wether to enable a timout if a batch didn't occur under 1 second.
:param batch_state_planes: Shared numpy memory in which all threads set their state plane request for the
prediction service. Each threads has it's own channel.
:param batch_value_results: Shared numpy memory in which the value results of all threads are stored.
Each threads has it's own channel.
:param batch_policy_results: Shared numpy memory in which the policy results of all threads are stored.
Each threads has it's own channel.
#:param enable_timeout: Decides wether to enable a timout if a batch didn't occur under 1 second.
"""
self.net = net
self.my_pipe_endings = pipe_endings
Expand All @@ -34,41 +42,72 @@ def __init__(self, pipe_endings: [connection], net: NeuralNetAPI, batch_size, en
self.thread_inference = Thread(target=self._provide_inference, args=(pipe_endings,), daemon=True)
self.batch_size = batch_size

self.time_start = None
self.timeout_second = 1
#self.enable_timeout = enable_timeout
self.batch_state_planes = batch_state_planes
self.batch_value_results = batch_value_results
self.batch_policy_results = batch_policy_results


#@cython.boundscheck(False)
#@cython.wraparound(False)
def _provide_inference(self, pipe_endings):

print('provide inference...')
#use_random = False
#use_random = True

#cdef double[:, :, :, ::1] batch_state_planes_view = self.batch_state_planes
#cdef double[::1] batch_value_results_view = self.batch_value_results
#cdef double[:, ::1] batch_policy_results = self.batch_policy_results

send_batches = False #True

while self.running is True:

filled_pipes = connection.wait(pipe_endings)

if filled_pipes:

if True or len(filled_pipes) >= self.batch_size:
if True or len(filled_pipes) >= self.batch_size: # 1

if send_batches is True:
planes_batch = []
pipes_pred_output = []

for pipe in filled_pipes[:self.batch_size]:
while pipe.poll():
planes_batch.append(pipe.recv())
pipes_pred_output.append(pipe)

planes_batch = []
pipes_pred_output = []
# logging.debug('planes_batch length: %d %d' % (len(planes_batch), len(filled_pipes)))
state_planes_mxnet = mx.nd.array(planes_batch, ctx=self.net.get_ctx())
else:
planes_ids = []
pipes_pred_output = []

for pipe in filled_pipes[:self.batch_size]:
while pipe.poll():
planes_batch.append(pipe.recv())
pipes_pred_output.append(pipe)
for pipe in filled_pipes[:self.batch_size]:
while pipe.poll():
planes_ids.append(pipe.recv())
pipes_pred_output.append(pipe)

#logging.debug('planes_batch length: %d %d' % (len(planes_batch), len(filled_pipes)))
planes_batch = mx.nd.array(planes_batch, ctx=self.net.get_ctx())
#logging.debug('planes_batch length: %d %d' % (len(planes_batch), len(filled_pipes)))
state_planes_mxnet = mx.nd.array(self.batch_state_planes[planes_ids], ctx=self.net.get_ctx())

#pred = self.net.get_executor().forward(is_train=False, data=planes_batch)
pred = self.net.get_net()(planes_batch)

#print(len(state_planes_mxnet))
executor = self.net.executors[len(state_planes_mxnet)-1]
pred = executor.forward(is_train=False, data=state_planes_mxnet)
#pred = self.net.get_net()(state_planes_mxnet)
#print('pred: %.3f' % (time()-t_s)*1000)
#t_s = time()

value_preds = pred[0].asnumpy()

# renormalize to [0,1]
#value_preds += 1
#value_preds /= 2

# for the policy prediction we still have to apply the softmax activation
# because it's not done by the neural net
#policy_preds = pred[1].softmax().asnumpy()
policy_preds = pred[1].softmax().asnumpy()

#if use_random is True:
Expand All @@ -77,10 +116,20 @@ def _provide_inference(self, pipe_endings):

# send the predictions back to the according workers
for i, pipe in enumerate(pipes_pred_output):
pipe.send([value_preds[i], policy_preds[i]])

# reset the timer
self.time_start = time()
if send_batches is True:
pipe.send([value_preds[i], policy_preds[i]])
else:
# get the according channel index for setting the result
channel_idx = planes_ids[i]

# set the value result
self.batch_value_results[channel_idx] = value_preds[i]
self.batch_policy_results[channel_idx] = policy_preds[i]
# give the thread the signal that the result has been set by sending back his channel_idx
pipe.send(channel_idx)

#print('send back res: %.3f' % (time()-t_s)*1000)

def start(self):
print('start inference thread...')
Expand Down
Loading

0 comments on commit ddbc816

Please sign in to comment.