Skip to content

Commit 9dc3673

Browse files
committed
Some code documentation.
refs #1.
1 parent ec25d96 commit 9dc3673

File tree

1 file changed

+54
-17
lines changed

1 file changed

+54
-17
lines changed

src/AI/AlphaBeta.hs

+54-17
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,54 @@ type DepthIterationInput = (AlphaBetaParams, [PossibleMove], Maybe DepthIteratio
9090
type DepthIterationOutput = [(PossibleMove, Score)]
9191
type AiOutput = ([PossibleMove], Score)
9292

93+
-- | General driver / controller for Alpha-Beta prunning algorithm.
94+
-- This method is responsible in running scoreAB method on all possible moves
95+
-- and selecting the best move.
96+
--
97+
-- This is done, in general, in three stages:
98+
--
99+
-- 1. Preselect. From all possible moves, select ones that look good at a first glance.
100+
-- This logic can be used to make AI work faster, but it obviously can miss some moves
101+
-- that are not so good from a first glance, but are very good from the second glance.
102+
--
103+
-- 2. Depth-wise loop. Score all moves with specified depth. If there is still time, then
104+
-- score them again with better depth. Repeat until there is still time.
105+
-- Each iteration can be interrupted by TimeExhaused exception.
106+
-- If last iteration was not interrupted, then use results of last iteration.
107+
-- If last iteration was interrputed, then merge results of last iteration with results
108+
-- of previous one: for moves that we was not able to calculate with better depth,
109+
-- use results with previous depth.
110+
-- If timeout is not specified, then only one iteration is executed, without timeout.
111+
-- The depth to start with should not be very big, so that we should be always able to
112+
-- calculate all moves with at least start depth. Neither should it be too small,
113+
-- otherwise we would re-calculate the same for many times.
114+
--
115+
-- 3. Width-wise loop. This is performed within each depth iteration.
116+
-- Specifics of alpha-beta prunning algorithm is so that the lesser
117+
-- (alpha, beta) range is provided at start, the faster algorithm works; however,
118+
-- in case real score is outside of these bounds, it will return eiter alpha or beta
119+
-- value instead of real score value. So, we do the following:
120+
--
121+
-- * Select initial "width range", which is range of scores (alpha, beta). This range
122+
-- is selected based on evaluation of current board with zero depth, plus-minus some
123+
-- small delta.
124+
-- Run scoreAB in that range.
125+
-- * If values returned by scoreAB are within selected initial range, then everything is
126+
-- okay: we just select the best of returned values.
127+
-- * If exactly one move seems to bee "too good", i.e. corresponding result of scoreAB
128+
-- equals to alpha/beta (depending on side), then we do not bother about it's exact
129+
-- score: we should do that move anyway.
130+
-- * If there are more than one "too good" moves, then we should select the next interval
131+
-- (alpha, beta), and run the next iteration only on that moves that seem to be "too good".
132+
-- * If all moves seem to be "too bad", then we should select the previous interval of
133+
-- (alpha, beta), and run the next iteration on all moves in that interval.
134+
-- * It is possible (not very likely, but possible) that real score of some moves equals
135+
-- exactly to alpha or beta bound that we selected on some iteration. To prevent switching
136+
-- between "better" and "worther" intervals forwards and backwards indefinitely, we
137+
-- introduce a restriction: if we see that scoreAB returned the bound value, but we have
138+
-- already considered the interval on that side, then we know that the real score equals
139+
-- exactly to the bound.
140+
--
93141
runAI :: (GameRules rules, Evaluator eval) => AlphaBeta rules eval -> AICacheHandle rules eval -> Side -> Board -> Checkers AiOutput
94142
runAI ai@(AlphaBeta params rules eval) handle side board = do
95143
preOptions <- preselect
@@ -302,22 +350,6 @@ runAI ai@(AlphaBeta params rules eval) handle side board = do
302350
goodMoves = [move | (move, score) <- pairs, score == maxScore]
303351
return (goodMoves, maxScore)
304352

305-
-- | Cache of possible moves per board
306-
data MovesMemo = MovesMemo {
307-
mmFirst :: BoardMap [PossibleMove]
308-
, mmSecond :: BoardMap [PossibleMove]
309-
}
310-
311-
lookupMoves :: Side -> Board -> MovesMemo -> Maybe [PossibleMove]
312-
lookupMoves First board memo = lookupBoardMap (boardCounts board, boardKey board) (mmFirst memo)
313-
lookupMoves Second board memo = lookupBoardMap (boardCounts board, boardKey board) (mmSecond memo)
314-
315-
putMoves :: Side -> Board -> [PossibleMove] -> MovesMemo -> MovesMemo
316-
putMoves First board moves memo =
317-
memo {mmFirst = putBoardMap board moves (mmFirst memo)}
318-
putMoves Second board moves memo =
319-
memo {mmSecond = putBoardMap board moves (mmSecond memo)}
320-
321353
-- | Calculate score of the board
322354
doScore :: (GameRules rules, Evaluator eval)
323355
=> rules
@@ -341,6 +373,7 @@ doScore rules eval var params side dp board alpha beta =
341373
Just sec -> Just $ TimeSpec (fromIntegral sec) 0
342374
return $ ScoreState rules eval [loose] now timeout
343375

376+
-- | State of ScoreM monad.
344377
data ScoreState rules eval = ScoreState {
345378
ssRules :: rules
346379
, ssEvaluator :: eval
@@ -349,6 +382,7 @@ data ScoreState rules eval = ScoreState {
349382
, ssTimeout :: Maybe TimeSpec -- ^ Nothing for "no timeout"
350383
}
351384

385+
-- | Input data for scoreAB method.
352386
data ScoreInput = ScoreInput {
353387
siSide :: Side
354388
, siDepth :: DepthParams
@@ -358,6 +392,7 @@ data ScoreInput = ScoreInput {
358392
, siPrevMove :: Maybe PossibleMove
359393
}
360394

395+
-- | ScoreM monad.
361396
type ScoreM rules eval a = StateT (ScoreState rules eval) Checkers a
362397

363398
instance HasLogger (StateT (ScoreState rules eval) Checkers) where
@@ -451,6 +486,7 @@ updateDepth params nMoves dp
451486
return $ dp {dpCurrent = dpCurrent dp + 1, dpTarget = target}
452487
| otherwise = return $ dp {dpCurrent = dpCurrent dp + 1}
453488

489+
-- | Check if timeout is exhaused.
454490
isTimeExhaused :: ScoreM rules eval Bool
455491
isTimeExhaused = do
456492
check <- gets ssTimeout
@@ -461,7 +497,8 @@ isTimeExhaused = do
461497
now <- liftIO $ getTime Monotonic
462498
return $ start + delta <= now
463499

464-
-- | Calculate score for the board
500+
-- | Calculate score for the board.
501+
-- This implements the alpha-beta section algorithm itself.
465502
scoreAB :: forall rules eval. (GameRules rules, Evaluator eval)
466503
=> AICacheHandle rules eval
467504
-> AlphaBetaParams

0 commit comments

Comments
 (0)