-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHanoi.py
139 lines (110 loc) · 4.51 KB
/
Hanoi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import math
import numpy as np
import matplotlib.pyplot as plt
from config_manager import config_manager
class hanoi:
def __init__(self):
config = config_manager()
(self.n_pegs,
self.n_discs) = config.get_hanoi_params()
"""
Looper gjennom finner 0 g
[1 0 0]
[2 0 0]
[3 0 0]
[4 0 0]
"""
self.peg = np.zeros((self.n_discs, self.n_pegs))
for i in range(self.n_discs):
self.peg[i, 0] = i+1
self.reward = 0
self.iterator = 0
# Final state:
self.final = np.zeros((self.n_discs, self.n_pegs))
for i in range(self.n_discs):
self.final[i, self.n_pegs-1] = i+1
def step(self, action):
disc = action[0] # Extracting disc number
toPeg = action[2] # Extracting end peg
reward = self.get_reward(self.peg)
# Each step is punished by 1 (or float)
# If steps > X - game is over (300 steps)
# Give large reward when completing
# Proportional reward with disc size on last pole 4 points for disc 4, 3 for disc 3 etc..
# Perform action requested: [disc, fromPeg, toPeg]
self.remove_disc(disc) # Removes disc from original position
self.put_disc(toPeg, disc) # Puts disc on given peg (first available spot)
# self.iterator +=1
self.print_problem()
#Return available actions and reward!!
return self.peg, self.get_moves(), reward
def put_disc(self, endPeg, disc):
"""
Putting the disc in the lowest slot
"""
for d in reversed(range(self.n_discs)): # Iterates bottom up for first available slot
if self.peg[d, endPeg] == 0: # First available slot
self.peg[d, endPeg] = disc # Put disc
break #
def remove_disc(self, disc):
self.peg[self.peg == disc] = 0
def get_reward(self, state):
if not np.array_equal(state, self.final):
return -10
else:
return 0
# Check if the state is final
def is_final(self, state):
if np.array_equal(state, self.final):
print("FINISHED")
return True
else:
return False
def get_moves(self):
"""
Move:
[Disc, FromPeg, ToPeg]
#AuxPeg er den siste peggen som ikke blir brukt
[2,2,3]
"""
moves = [] # Array for storing the available moves
discsToMove = [] # Helper array for saving discs that is movable
for i in range(self.n_pegs): # Loop through all n_pegs
for j in range(self.n_discs): # Loop through all possible slots in peg
if self.peg[j, i] != 0: # Is there a disc?
discToMove = int(self.peg[j, i]) # Saving disc
discsToMove.append((discToMove, i)) # Saving the disc number and peg
break # Only take the top disc
for disc in discsToMove:
for k in range(self.n_pegs): # Loop through all other poles
move = [] # Reset buffer
if disc[1] != k: # Not start pole
# Find possible end pole
for j in range(self.n_discs):
if self.peg[j, k] != 0: # Not empty
if self.peg[j, k] > disc[0]: # The peg contains a disc that is larger
endPeg = k
move = [disc[0], disc[1], endPeg]
break
if j == self.n_discs-1: # If not found any (reached bottom of array)
endPeg = k
move = [disc[0], disc[1], endPeg]
break
if move:
moves.append(move)
return moves
def get_state(self):
return self.peg
def print_problem(self):
for d in range(self.n_discs): # Loop through all possible slots in peg
print("\r") # newline
for p in range(self.n_pegs): # Loop through all n_pegs
disc = "-" * (int(self.peg[d, p]))
print(f"{disc:>{self.n_discs}}|{disc:{self.n_discs}}", end="")
print()
def reset_problem(self):
self.peg = np.zeros((self.n_discs, self.n_pegs))
for i in range(self.n_discs):
self.peg[i, 0] = i+1
self.reward = 0
self.iterator = 0