-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_it self.py
146 lines (127 loc) · 4.59 KB
/
train_it self.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Tic Tac Toe
import random
import Ai
import numpy as np
def drawBoard(board):
# This function prints out the board that it was passed.
# "board" is a list of 10 strings representing the board (ignore index 0)
print(' | |')
print(' ' + board[7] + ' | ' + board[8] + ' | ' + board[9])
print(' | |')
print('-----------')
print(' | |')
print(' ' + board[4] + ' | ' + board[5] + ' | ' + board[6])
print(' | |')
print('-----------')
print(' | |')
print(' ' + board[1] + ' | ' + board[2] + ' | ' + board[3])
print(' | |')
def inputPlayerLetter():
# Lets the player type which letter they want to be.
# Returns a list with the player’s letter as the first item, and the computer's letter as the second.
letter = ''
while not (letter == 'X' or letter == 'O'):
print('Do you want to be X or O?')
letter = input().upper()
# the first element in the list is the player’s letter, the second is the computer's letter.
if letter == 'X':
return ['X', 'O']
else:
return ['O', 'X']
def whoGoesFirst():
# Randomly choose the player who goes first.
if random.randint(0, 1) == 0:
return 'computer'
else:
return 'player'
def playAgain():
# This function returns True if the player wants to play again, otherwise it returns False.
print('Do you want to play again? (yes or no)')
return input().lower().startswith('y')
def makeMove(board, letter, move):
board[move] = letter
def isWinner(bo, le):
# Given a board and a player’s letter, this function returns True if that player has won.
# We use bo instead of board and le instead of letter so we don’t have to type as much.
return ((bo[7] == le and bo[8] == le and bo[9] == le) or # across the top
(bo[4] == le and bo[5] == le and bo[6] == le) or # across the middle
(bo[1] == le and bo[2] == le and bo[3] == le) or # across the bottom
(bo[7] == le and bo[4] == le and bo[1] == le) or # down the left side
(bo[8] == le and bo[5] == le and bo[2] == le) or # down the middle
(bo[9] == le and bo[6] == le and bo[3] == le) or # down the right side
(bo[7] == le and bo[5] == le and bo[3] == le) or # diagonal
(bo[9] == le and bo[5] == le and bo[1] == le)) # diagonal
def getBoardCopy(board):
# Make a duplicate of the board list and return it the duplicate.
dupeBoard = []
for i in board:
dupeBoard.append(i)
return dupeBoard
def isSpaceFree(board, move):
# Return true if the passed move is free on the passed board.
return board[move] == ' '
def getPlayerMove(board):
# Let the player type in their move.
move = ' '
while move not in '1 2 3 4 5 6 7 8 9'.split() or not isSpaceFree(board, int(move)):
print('What is your next move? (1-9)')
move = input()
return int(move)
def isBoardFull(board):
# Return True if every space on the board has been taken. Otherwise return False.
for i in range(1, 10):
if isSpaceFree(board, i):
return False
return True
ai1 =Ai.Ai(0.1,'X','O')
ai2 =Ai.Ai(0.1,'O','X')
learningrate=1
ai1.set_learningrate(learningrate)
ai2.set_learningrate(learningrate)
iterations=0
while True:
# Reset the board
theBoard = [' '] * 10
playerLetter='X'
computerLetter='O'
turn = whoGoesFirst()
gameIsPlaying = True
while gameIsPlaying:
if turn == 'player':
move = ai2.move(theBoard)
makeMove(theBoard, playerLetter, move)
ai1.learn(theBoard)
if isWinner(theBoard, playerLetter):
ai1.reward(-99)
ai2.reward(101)
gameIsPlaying = False
else:
if isBoardFull(theBoard):
break
else:
turn = 'computer'
else:
move=ai1.move(theBoard)
makeMove(theBoard, computerLetter, move)
ai2.learn(theBoard)
if isWinner(theBoard, computerLetter):
ai1.reward(99)
ai2.reward(-101)
gameIsPlaying = False
else:
if isBoardFull(theBoard):
break
else:
turn = 'player'
if iterations>=5000000:
ai1.save()
break
if np.mod(iterations,100)==0:
print('iteration',iterations)
if np.mod(iterations, 10000) == 0:
ai1.save()
learningrate=learningrate/1.5
print('Learning rate: ',learningrate)
ai1.set_learningrate(learningrate)
ai2.set_learningrate(learningrate)
iterations+=1