Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

value iteration show value #1280

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions deep_learning4e.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import numpy as np
from keras import Sequential, optimizers
from keras.layers import Embedding, SimpleRNN, Dense
from keras.preprocessing import sequence
#from keras.preprocessing import sequence
from keras.utils.data_utils import pad_sequences

from utils4e import (conv1D, gaussian_kernel, element_wise_product, vector_add, random_weights,
scalar_vector_product, map_vector, mean_squared_error_loss)
Expand Down Expand Up @@ -518,8 +519,10 @@ def keras_dataset_loader(dataset, max_length=500):
# init dataset
(X_train, y_train), (X_val, y_val) = dataset
if max_length > 0:
X_train = sequence.pad_sequences(X_train, maxlen=max_length)
X_val = sequence.pad_sequences(X_val, maxlen=max_length)
#X_train = sequence.pad_sequences(X_train, maxlen=max_length)
#X_val = sequence.pad_sequences(X_val, maxlen=max_length)
X_train = pad_sequences(X_train, maxlen=max_length)
X_val = pad_sequences(X_val, maxlen=max_length)
return (X_train[10:], y_train[10:]), (X_val, y_val), (X_train[:10], y_train[:10])


Expand Down
35 changes: 26 additions & 9 deletions gui/grid_mdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import tkinter.messagebox
from functools import partial
from tkinter import ttk
import time

sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

import matplotlib
import matplotlib.animation as animation
Expand All @@ -15,12 +18,11 @@

from mdp import *

sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

matplotlib.use('TkAgg')
style.use('ggplot')

fig = Figure(figsize=(20, 15))
fig = Figure(figsize=(20, 25))
sub = fig.add_subplot(111)
plt.rcParams['axes.grid'] = False

Expand All @@ -47,11 +49,12 @@ def extents(f):
return [f[0] - delta / 2, f[-1] + delta / 2]


def display(gridmdp, _height, _width):
def display(gridmdp, _height, _width, _a):
"""displays matrix"""

dialog = tk.Toplevel()
dialog.wm_title('Values')
#dialog.wm_title('Values')
dialog.wm_title(_a)

container = tk.Frame(dialog)
container.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
Expand Down Expand Up @@ -125,7 +128,8 @@ def initialize_dialogbox(_width, _height, gridmdp, terminals, buttons):
btn_ok = ttk.Button(container, text='Ok', command=dialog.destroy)
btn_ok.grid(row=5, column=2, sticky='nsew', pady=5, padx=5)

dialog.geometry('400x200')
#dialog.geometry('400x200')
dialog.geometry('1600x200')
dialog.mainloop()


Expand Down Expand Up @@ -393,7 +397,7 @@ def view_matrix(self):
_height = self.shared_data['height'].get()
_width = self.shared_data['width'].get()
print(build_page.gridmdp)
display(build_page.gridmdp, _height, _width)
display(build_page.gridmdp, _height, _width, "aaaaa")

def view_terminals(self):
"""prints current terminals to console"""
Expand Down Expand Up @@ -570,6 +574,9 @@ def __init__(self, parent, controller):
self.epsilon = 0.001
self.delta = 0

def print_inter_matrix(self, values, h, w):
display(values, h, w, "aaaaa")

def process_data(self, terminals, _height, _width, gridmdp):
"""preprocess variables"""

Expand Down Expand Up @@ -606,7 +613,7 @@ def create_graph(self, gridmdp, terminals, _height, _width):

self.canvas = FigureCanvasTkAgg(fig, self.frame)
self.canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True)
self.anim = animation.FuncAnimation(fig, self.animate_graph, interval=50)
self.anim = animation.FuncAnimation(fig, self.animate_graph, interval=300)
self.canvas.show()

def animate_graph(self, i):
Expand All @@ -620,8 +627,14 @@ def animate_graph(self, i):
y = np.linspace(0, len(self.gridmdp) - 1, y_interval)

sub.clear()
sub.imshow(self.grid_to_show, cmap='BrBG', aspect='auto', interpolation='none', extent=extents(x) + extents(y),
#sub.imshow(self.grid_to_show, cmap='BrBG', aspect='auto', interpolation='none', extent=extents(x) + extents(y),
# origin='lower')

for (j,i),label in np.ndenumerate(self.grid_to_show):
sub.text(i,j,label,ha='center',va='center')
sub.imshow(self.grid_to_show, aspect='auto', interpolation='none', extent=extents(x) + extents(y),
origin='lower')

fig.tight_layout()

U = self.U1.copy()
Expand All @@ -634,16 +647,20 @@ def animate_graph(self, i):
self.grid_to_show = grid_to_show = [[0.0] * max(1, self._width) for _ in range(max(1, self._height))]
for k, v in U.items():
self.grid_to_show[k[1]][k[0]] = v

#time.sleep(1)

if (self.delta < self.epsilon * (1 - self.gamma) / self.gamma) or (
self.iterations > 60) and self.terminated is False:
self.terminated = True
display(self.grid_to_show, self._height, self._width)
display(self.grid_to_show, self._height, self._width, "Final Value")

pi = best_policy(self.sequential_decision_environment,
value_iteration(self.sequential_decision_environment, .01))
display_best_policy(self.sequential_decision_environment.to_arrows(pi), self._height, self._width)



ax = fig.gca()
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
Expand Down
Loading