-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgmkai.go
89 lines (78 loc) · 1.84 KB
/
gmkai.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
package gmk
import "math"
type Bot struct {
game *Board
Flag int
temper float64 //(0,1] smart, dumbass otherwise
}
func BotInit(game *Board, flag int, temper float64) Bot {
b := Bot{game, flag, temper}
return b
}
func (this *Bot) Play() {
p := this.MakeDecision()
this.game.Apply(p, this.Flag)
}
func (this *Bot) MakeDecision() int {
MaxUtil := math.Inf(-1)
var localPolicy int
for i := 0; i < this.game.NumOfMeta(); i++ {
_, m := this.game.NextMeta()
m.Flip(this.Flag)
p, u := BestLocalPolicyAndUtil(m, SELF, this.temper)
if MaxUtil < u {
MaxUtil = u
localPolicy = p
}
}
return this.game.Globalize(localPolicy)
}
/*****************MinMax******************/
func BestLocalPolicyAndUtil(glance MetaBoard, flag int, decayRate float64) (int, float64) {
maxUtil := math.Inf(-1)
var bestPolicy int
var util float64
hasEmpty, winner := glance.State()
if hasEmpty && winner == 0 {
policies := glance.EmptyPositions()
var p int
var u float64
for _, policy := range policies {
glance.Apply(policy, flag)
p, u = WorstLocalPolicyAndUtil(glance, -flag, decayRate)
glance.Cancel(policy)
}
if maxUtil < u {
bestPolicy = p
maxUtil = u
util = decayRate * u
}
} else {
util = float64(winner * 10)
}
return bestPolicy, util
}
func WorstLocalPolicyAndUtil(glance MetaBoard, flag int, decayRate float64) (int, float64) {
minUtil := math.Inf(1)
var worstPolicy int
var util float64
hasEmpty, winner := glance.State()
if hasEmpty && winner == 0 {
policies := glance.EmptyPositions()
var p int
var u float64
for _, policy := range policies {
glance.Apply(policy, flag)
p, u = BestLocalPolicyAndUtil(glance, -flag, decayRate)
glance.Cancel(policy)
}
if minUtil > u {
worstPolicy = p
minUtil = u
util = decayRate * u
}
} else {
util = float64(winner * 10)
}
return worstPolicy, util
}