-
Notifications
You must be signed in to change notification settings - Fork 0
/
MDP.py
170 lines (135 loc) · 6.35 KB
/
MDP.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#importing the required libraries
import random
import tkinter as tk
from tkinter import ttk, messagebox
import time
from tabulate import tabulate
#defining the MDP_Game class with the required functions
class MDP_Game:
#defining the constructor
def __init__(self, root):
self.root = root
self.current_state = 0
self.current_reward = -5
self.setup_root_window()
#defining the fucntion to setup the root/main window
def setup_root_window(self):
self.root.title("MDP Game")
style = ttk.Style()
style.theme_use('clam')
style.configure('TLabel', font=("Montserrat", 18, "bold"), background="#ffffff", foreground="black")
title_label = ttk.Label(self.root, text="MDP Game", background="white", style="TLabel")
title_label.pack(pady=20, padx=10)
style.configure('TButton', font=("Montserrat", 12, "bold"), background="#D3D3D3", foreground="black", relief="raised", padding=5, width=20, bordercolor="black")
start_button = ttk.Button(self.root, text="Start Game", command=self.start_game, style="TButton")
start_button.pack(pady=20, padx=10)
#defining the transition function
def transition_function(self,action):
if action == -1:
if(random.random() <0.8):
self.current_state -=1
else:
self.current_state +=1
elif action == 1:
if(random.random() <0.7):
self.current_state -=1
else:
self.current_state +=1
else:
print("Invalid action")
self.reward_function()
#defining the reward function
def reward_function(self):
if self.current_state == -2:
self.current_reward = 20
elif self.current_state == 2:
self.current_reward = 100
else:
self.current_reward = -5
#defining the reset function
def reset_game(self):
self.current_state = 0
self.reward_function()
self.update_display()
#defining the function to update the display window
def update_display(self):
self.state_label.config(text=f"Current State: {self.current_state}")
self.reward_label.config(text=f"Reward: {self.current_reward}")
#defining the start function and creating the game window
def start_game(self):
self.root.withdraw()
self.current_state = 0
self.reward_function()
self.game_window = tk.Toplevel(root)
self.game_window.title("MDP Game")
self.game_window.option_add("*Font", ("Montserrat", 14))
self.state_label = ttk.Label(self.game_window, text=f"Current State: {self.current_state}" )
self.state_label.pack()
self.reward_label = ttk.Label(self.game_window, text=f"Reward: {self.current_reward}")
self.reward_label.pack()
button_frame = ttk.Frame(self.game_window)
button_frame.pack(pady=20)
ttk.Button(button_frame, text="-1", command=lambda: self.take_action(-1)).pack(side=tk.LEFT, padx=10)
ttk.Button(button_frame, text="+1", command=lambda: self.take_action(+1)).pack(side=tk.RIGHT, padx=10)
ttk.Button(self.game_window, text="Reset Game", command=self.reset_game).pack(side=tk.BOTTOM, pady=10)
# Bind the close event of game window to opening the root window
self.game_window.protocol("WM_DELETE_WINDOW", self.on_game_window_close)
# Function to restore the root window when game_window is closed
def on_game_window_close(self):
self.root.deiconify()
self.game_window.destroy()
#defining the take action function
def take_action(self, action):
self.transition_function(action)
self.reward_function()
self.update_display()
if(self.current_state == -2 or self.current_state == 2):
time.sleep(0.1)
self.game_window.destroy()
messagebox.showinfo("Game Over 👾", f"Game Over, you have reached the final state!\n{'+'*39}\nYour Final State is: {self.current_state},\nYour Final Reward is: {self.current_reward}\n{'+'*39}\nThank you for playing !!!")
self.root.deiconify()
#main function to run the game
if __name__ == "__main__":
root = tk.Tk()
app = MDP_Game(root)
root.mainloop()
# End of the code for MDP
#code to print the utility matrix for T=0, T=1, T=2
def reward_calc(current_state):
if current_state == -2:
return 20
elif current_state == 2:
return 100
else:
return -5
def Bellman_MDP_utility():
states = [-2, -1, 0, 1, 2]
actions = [-1, 1]
gamma = 1
transition_probabilities = { ('S0', '1'): [('S1', 0.3), ('S-1', 0.7)], ('S0', '-1'): [('S1', 0.2), ('S-1', 0.8)],
('S1', '1'): [('S2', 0.3), ('S0', 0.7)], ('S1', '-1'): [('S2', 0.2), ('S0', 0.8)],
('S-1', '1'): [('S0', 0.3), ('S-2', 0.7)], ('S-1', '-1'): [('S0', 0.2), ('S-2', 0.8)]}
utility_matrix = {0: [0] * len(states), 1: [0] * len(states), 2: [0] * len(states)}
for t in range(1, 3):
for i, state in enumerate(states):
utility_list = []
for action in actions:
state_key = f'S{state}'
action_key = f'{action}'
utility = 0
if (state_key, action_key) in transition_probabilities:
transitions = transition_probabilities[(state_key, action_key)]
for next_state, probability in transitions:
rwrd = reward_calc(int(next_state[1:]))
utility += probability * (rwrd + gamma * utility_matrix[t-1][states.index(int(next_state[1:]))])
utility_list.append(utility)
if utility_list:
utility = max(utility_list)
else:
utility = 0
utility_matrix[t][i] = utility
# Prepare data for tabulate
headers = ["State"] + [f"Iteration {t}" for t in range(len(utility_matrix))]
table = [[state] + [utility_matrix[t][i] for t in range(len(utility_matrix))] for i, state in enumerate(states)]
print(tabulate(table, headers=headers, tablefmt="grid"))
Bellman_MDP_utility()