forked from 4belito/Connect4
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmyplayer.py
190 lines (166 loc) · 8.36 KB
/
myplayer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import numpy as np
import random
import time
from player import Player
class MyPlayer(Player):
def __init__(self, rows, cols, connect_number, timeout_setup, timeout_move, max_invalid_moves, cylinder):
self.rows = rows
self.cols = cols
self.connect_number = connect_number
self.timeout_setup = timeout_setup
self.timeout_move = timeout_move
self.max_invalid_moves = max_invalid_moves
self.cylinder = cylinder
self.start_time = 0
def setup(self, piece_color):
self.piece_color = piece_color
def get_valid_moves(self, board):
# Return a list of column indices where a new piece can be placed
return [col for col in range(self.cols) if board[0, col] == 0]
# Method utilizied from connect4.py
def check_config(self, board, config):
b1, b2 = board.shape
c1, c2 = config.shape
for i in range(b1 - c1 + 1):
for j in range(b2 - c2 + 1):
if np.sum(board[i:i+c1, j:j+c2] * config) == self.connect_number:
board[i:i+c1, j:j+c2][config == 1] = 2
if self.cylinder:
board[:, :self.connect_number-1][board[:, -self.connect_number+1:] == 2] = 2
board = board[:, :-self.connect_number+1]
return True, board
return False, board
# Method utilizied from connect4.py
def check_if_winner(self, board):
if self.cylinder:
board = np.concatenate((board, board[:, :self.connect_number-1]), axis=1)
# Horizontal checking
config = np.ones((1, self.connect_number), dtype=int)
end, board = self.check_config(board, config)
if end:
return board
# Vertical checking
config = np.ones((self.connect_number, 1), dtype=int)
end, board = self.check_config(board, config)
if end:
return board
# Diagonal checking
config = np.eye(self.connect_number, dtype=int)
end, board = self.check_config(board, config)
if end:
return board
# Anti-diagonal checking
config = np.fliplr(np.eye(self.connect_number, dtype=int))
end, board = self.check_config(board, config)
if end:
return board
return None
def simulate_move(self, board, move, maximizingPlayer):
# print(f'Maximizing Player: {maximizingPlayer}')
# Create a copy of the board to simulate the move without altering the original
new_board = board.copy()
# Check if the move is within the column range and the column is not full
if isinstance(move, (int, np.int32, np.int64)) and 0 <= move < self.cols:
n_spots = sum(new_board[:, move] == 0) # Count empty spots in the column
if n_spots:
# Place the piece for the current player at the lowest empty spot
new_board[n_spots - 1, move] = 1 if maximizingPlayer else -1
return new_board
def heuristic_evaluation(self, board):
weights = {2: 10, 3: 30, 4: 90} # Weights for sequences of length 2, 3, and 4
blocked_sequence_penalty = -50 # Penalty for blocked sequences
score = 0
board_extended = np.concatenate((board, board[:, :self.connect_number-1]), axis=1) if self.cylinder else board
directions = [(0, 1), (1, 0), (1, 1), (1, -1), (0, -1), (-1, 0), (-1, -1), (-1, 1)] # All 8 directions
for row in range(self.rows):
for col in range(self.cols * (2 if self.cylinder else 1) - (self.connect_number - 1)):
for d_row, d_col in directions:
for length, weight in weights.items():
for player in [1, -1]: # Assuming 1 is the player's piece, -1 is the opponent's
extendable, blocked = self.count_sequence(board_extended, row, col, d_row, d_col, player, length)
if player == 1: # Adjusting score for the player
score += extendable * weight
score += blocked * blocked_sequence_penalty
else: # Adjusting score for the opponent
score -= extendable * weight
score -= blocked * blocked_sequence_penalty
return score
def count_sequence(self, board, row, col, d_row, d_col, player, length):
extendable_seq = 0
blocked_seq = 0
for l in range(length):
current_row = row + d_row * l
current_col = (col + d_col * l) % self.cols # Handle cylindrical wrap for columns
if not (0 <= current_row < self.rows) or board[current_row, current_col] != player:
return (0, 0) # If out of bounds or not matching piece, not a valid sequence
# Check for an open or blocked end after the sequence
next_row = current_row + d_row
next_col = (current_col + d_col) % self.cols
if 0 <= next_row < self.rows:
if board[next_row, next_col] == 0:
extendable_seq = 1
elif board[next_row, next_col] == -1*(player): # represents the opposite player
blocked_seq = 1
# Similarly, check the start of the sequence for an open or blocked end
prev_row = row - d_row
prev_col = (col - d_col) % self.cols
if 0 <= prev_row < self.rows:
if board[prev_row, prev_col] == 0:
extendable_seq = 1
elif board[prev_row, prev_col] == -1*(player):
blocked_seq = 1
return (extendable_seq, blocked_seq)
def minimax(self, board, depth, alpha, beta, maximizingPlayer):
# Added a timeout functionality to help with timeout errors
if time.time() - self.start_time > self.timeout_move - 0.1:
return 0, None # Return a neutral value and no move if timeout is near
# Get all valid moves for the current board state
valid_moves = self.get_valid_moves(board)
# Base case: no valid moves or maximum depth reached
if not valid_moves or depth == 0:
# Evaluate the heuristic value of the board
return self.heuristic_evaluation(board), None
if maximizingPlayer:
# Initialize the maximum evaluation score and best move
maxEval = float('-inf')
best_move = random.choice(valid_moves) # Choose a random move as the best move initially
for move in valid_moves:
# Simulate the move for the maximizing player
new_board = self.simulate_move(board, move, maximizingPlayer)
# Recursively call minimax for the minimizing player
eval, _ = self.minimax(new_board, depth - 1, alpha, beta, False)
# Update maxEval and best_move if a better evaluation is found
if eval > maxEval:
maxEval = eval
best_move = move
# Update alpha
alpha = max(alpha, eval)
# Alpha-beta pruning
if beta <= alpha:
break
return maxEval, best_move
else:
# Initialize the minimum evaluation score and best move for minimizing player
minEval = float('inf')
best_move = random.choice(valid_moves)
for move in valid_moves:
# Simulate the move for the minimizing player
new_board = self.simulate_move(board, move, maximizingPlayer)
# Recursively call minimax for the maximizing player
eval, _ = self.minimax(new_board, depth - 1, alpha, beta, True)
# Update minEval and best_move if a better evaluation is found
if eval < minEval:
minEval = eval
best_move = move
# Update beta
beta = min(beta, eval)
# Alpha-beta pruning
if beta <= alpha:
break
return minEval, best_move
def play(self, board):
self.start_time = time.time() # Record the start time
# Reduced depth for minimax to avoid timeouts. Quick fix solution.
# TODO - before competition implment more robust time method.
_, best_move = self.minimax(board, 2, float('-inf'), float('inf'), True)
return best_move