@@ -90,6 +90,54 @@ type DepthIterationInput = (AlphaBetaParams, [PossibleMove], Maybe DepthIteratio
90
90
type DepthIterationOutput = [(PossibleMove , Score )]
91
91
type AiOutput = ([PossibleMove ], Score )
92
92
93
+ -- | General driver / controller for Alpha-Beta prunning algorithm.
94
+ -- This method is responsible in running scoreAB method on all possible moves
95
+ -- and selecting the best move.
96
+ --
97
+ -- This is done, in general, in three stages:
98
+ --
99
+ -- 1. Preselect. From all possible moves, select ones that look good at a first glance.
100
+ -- This logic can be used to make AI work faster, but it obviously can miss some moves
101
+ -- that are not so good from a first glance, but are very good from the second glance.
102
+ --
103
+ -- 2. Depth-wise loop. Score all moves with specified depth. If there is still time, then
104
+ -- score them again with better depth. Repeat until there is still time.
105
+ -- Each iteration can be interrupted by TimeExhaused exception.
106
+ -- If last iteration was not interrupted, then use results of last iteration.
107
+ -- If last iteration was interrputed, then merge results of last iteration with results
108
+ -- of previous one: for moves that we was not able to calculate with better depth,
109
+ -- use results with previous depth.
110
+ -- If timeout is not specified, then only one iteration is executed, without timeout.
111
+ -- The depth to start with should not be very big, so that we should be always able to
112
+ -- calculate all moves with at least start depth. Neither should it be too small,
113
+ -- otherwise we would re-calculate the same for many times.
114
+ --
115
+ -- 3. Width-wise loop. This is performed within each depth iteration.
116
+ -- Specifics of alpha-beta prunning algorithm is so that the lesser
117
+ -- (alpha, beta) range is provided at start, the faster algorithm works; however,
118
+ -- in case real score is outside of these bounds, it will return eiter alpha or beta
119
+ -- value instead of real score value. So, we do the following:
120
+ --
121
+ -- * Select initial "width range", which is range of scores (alpha, beta). This range
122
+ -- is selected based on evaluation of current board with zero depth, plus-minus some
123
+ -- small delta.
124
+ -- Run scoreAB in that range.
125
+ -- * If values returned by scoreAB are within selected initial range, then everything is
126
+ -- okay: we just select the best of returned values.
127
+ -- * If exactly one move seems to bee "too good", i.e. corresponding result of scoreAB
128
+ -- equals to alpha/beta (depending on side), then we do not bother about it's exact
129
+ -- score: we should do that move anyway.
130
+ -- * If there are more than one "too good" moves, then we should select the next interval
131
+ -- (alpha, beta), and run the next iteration only on that moves that seem to be "too good".
132
+ -- * If all moves seem to be "too bad", then we should select the previous interval of
133
+ -- (alpha, beta), and run the next iteration on all moves in that interval.
134
+ -- * It is possible (not very likely, but possible) that real score of some moves equals
135
+ -- exactly to alpha or beta bound that we selected on some iteration. To prevent switching
136
+ -- between "better" and "worther" intervals forwards and backwards indefinitely, we
137
+ -- introduce a restriction: if we see that scoreAB returned the bound value, but we have
138
+ -- already considered the interval on that side, then we know that the real score equals
139
+ -- exactly to the bound.
140
+ --
93
141
runAI :: (GameRules rules , Evaluator eval ) => AlphaBeta rules eval -> AICacheHandle rules eval -> Side -> Board -> Checkers AiOutput
94
142
runAI ai@ (AlphaBeta params rules eval) handle side board = do
95
143
preOptions <- preselect
@@ -302,22 +350,6 @@ runAI ai@(AlphaBeta params rules eval) handle side board = do
302
350
goodMoves = [move | (move, score) <- pairs, score == maxScore]
303
351
return (goodMoves, maxScore)
304
352
305
- -- | Cache of possible moves per board
306
- data MovesMemo = MovesMemo {
307
- mmFirst :: BoardMap [PossibleMove ]
308
- , mmSecond :: BoardMap [PossibleMove ]
309
- }
310
-
311
- lookupMoves :: Side -> Board -> MovesMemo -> Maybe [PossibleMove ]
312
- lookupMoves First board memo = lookupBoardMap (boardCounts board, boardKey board) (mmFirst memo)
313
- lookupMoves Second board memo = lookupBoardMap (boardCounts board, boardKey board) (mmSecond memo)
314
-
315
- putMoves :: Side -> Board -> [PossibleMove ] -> MovesMemo -> MovesMemo
316
- putMoves First board moves memo =
317
- memo {mmFirst = putBoardMap board moves (mmFirst memo)}
318
- putMoves Second board moves memo =
319
- memo {mmSecond = putBoardMap board moves (mmSecond memo)}
320
-
321
353
-- | Calculate score of the board
322
354
doScore :: (GameRules rules , Evaluator eval )
323
355
=> rules
@@ -341,6 +373,7 @@ doScore rules eval var params side dp board alpha beta =
341
373
Just sec -> Just $ TimeSpec (fromIntegral sec) 0
342
374
return $ ScoreState rules eval [loose] now timeout
343
375
376
+ -- | State of ScoreM monad.
344
377
data ScoreState rules eval = ScoreState {
345
378
ssRules :: rules
346
379
, ssEvaluator :: eval
@@ -349,6 +382,7 @@ data ScoreState rules eval = ScoreState {
349
382
, ssTimeout :: Maybe TimeSpec -- ^ Nothing for "no timeout"
350
383
}
351
384
385
+ -- | Input data for scoreAB method.
352
386
data ScoreInput = ScoreInput {
353
387
siSide :: Side
354
388
, siDepth :: DepthParams
@@ -358,6 +392,7 @@ data ScoreInput = ScoreInput {
358
392
, siPrevMove :: Maybe PossibleMove
359
393
}
360
394
395
+ -- | ScoreM monad.
361
396
type ScoreM rules eval a = StateT (ScoreState rules eval ) Checkers a
362
397
363
398
instance HasLogger (StateT (ScoreState rules eval ) Checkers ) where
@@ -451,6 +486,7 @@ updateDepth params nMoves dp
451
486
return $ dp {dpCurrent = dpCurrent dp + 1 , dpTarget = target}
452
487
| otherwise = return $ dp {dpCurrent = dpCurrent dp + 1 }
453
488
489
+ -- | Check if timeout is exhaused.
454
490
isTimeExhaused :: ScoreM rules eval Bool
455
491
isTimeExhaused = do
456
492
check <- gets ssTimeout
@@ -461,7 +497,8 @@ isTimeExhaused = do
461
497
now <- liftIO $ getTime Monotonic
462
498
return $ start + delta <= now
463
499
464
- -- | Calculate score for the board
500
+ -- | Calculate score for the board.
501
+ -- This implements the alpha-beta section algorithm itself.
465
502
scoreAB :: forall rules eval . (GameRules rules , Evaluator eval )
466
503
=> AICacheHandle rules eval
467
504
-> AlphaBetaParams
0 commit comments