forked from nikhilbarhate99/PPO-PyTorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
make_gif.py
220 lines (156 loc) · 6.9 KB
/
make_gif.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import os
import glob
import time
from datetime import datetime
import torch
import numpy as np
from PIL import Image
import gym
import roboschool
from PPO import PPO
"""
One frame corresponding to each timestep is saved in a folder :
PPO_gif_images/env_name/000001.jpg
PPO_gif_images/env_name/000002.jpg
PPO_gif_images/env_name/000003.jpg
...
...
...
if this section is run multiple times or for multiple episodes for the same env_name;
then the saved images will be overwritten.
"""
############################# save images for gif ##############################
def save_gif_images(env_name, has_continuous_action_space, max_ep_len, action_std):
print("============================================================================================")
total_test_episodes = 1 # save gif for only one episode
K_epochs = 80 # update policy for K epochs
eps_clip = 0.2 # clip parameter for PPO
gamma = 0.99 # discount factor
lr_actor = 0.0003 # learning rate for actor
lr_critic = 0.001 # learning rate for critic
env = gym.make(env_name)
# state space dimension
state_dim = env.observation_space.shape[0]
# action space dimension
if has_continuous_action_space:
action_dim = env.action_space.shape[0]
else:
action_dim = env.action_space.n
# make directory for saving gif images
gif_images_dir = "PPO_gif_images" + '/'
if not os.path.exists(gif_images_dir):
os.makedirs(gif_images_dir)
# make environment directory for saving gif images
gif_images_dir = gif_images_dir + '/' + env_name + '/'
if not os.path.exists(gif_images_dir):
os.makedirs(gif_images_dir)
# make directory for gif
gif_dir = "PPO_gifs" + '/'
if not os.path.exists(gif_dir):
os.makedirs(gif_dir)
# make environment directory for gif
gif_dir = gif_dir + '/' + env_name + '/'
if not os.path.exists(gif_dir):
os.makedirs(gif_dir)
ppo_agent = PPO(state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, action_std)
# preTrained weights directory
random_seed = 0 #### set this to load a particular checkpoint trained on random seed
run_num_pretrained = 0 #### set this to load a particular checkpoint num
directory = "PPO_preTrained" + '/' + env_name + '/'
checkpoint_path = directory + "PPO_{}_{}_{}.pth".format(env_name, random_seed, run_num_pretrained)
print("loading network from : " + checkpoint_path)
ppo_agent.load(checkpoint_path)
print("--------------------------------------------------------------------------------------------")
test_running_reward = 0
for ep in range(1, total_test_episodes+1):
ep_reward = 0
state = env.reset()
for t in range(1, max_ep_len+1):
action = ppo_agent.select_action(state)
state, reward, done, _ = env.step(action)
ep_reward += reward
img = env.render(mode = 'rgb_array')
img = Image.fromarray(img)
img.save(gif_images_dir + '/' + str(t).zfill(6) + '.jpg')
if done:
break
# clear buffer
ppo_agent.buffer.clear()
test_running_reward += ep_reward
print('Episode: {} \t\t Reward: {}'.format(ep, round(ep_reward, 2)))
ep_reward = 0
env.close()
print("============================================================================================")
print("total number of frames / timesteps / images saved : ", t)
avg_test_reward = test_running_reward / total_test_episodes
avg_test_reward = round(avg_test_reward, 2)
print("average test reward : " + str(avg_test_reward))
print("============================================================================================")
######################## generate gif from saved images ########################
def save_gif(env_name):
print("============================================================================================")
gif_num = 0 #### change this to prevent overwriting gifs in same env_name folder
# adjust following parameters to get desired duration, size (bytes) and smoothness of gif
total_timesteps = 300
step = 10
frame_duration = 150
# input images
gif_images_dir = "PPO_gif_images/" + env_name + '/*.jpg'
# ouput gif path
gif_dir = "PPO_gifs"
if not os.path.exists(gif_dir):
os.makedirs(gif_dir)
gif_dir = gif_dir + '/' + env_name
if not os.path.exists(gif_dir):
os.makedirs(gif_dir)
gif_path = gif_dir + '/PPO_' + env_name + '_gif_' + str(gif_num) + '.gif'
img_paths = sorted(glob.glob(gif_images_dir))
img_paths = img_paths[:total_timesteps]
img_paths = img_paths[::step]
print("total frames in gif : ", len(img_paths))
print("total duration of gif : " + str(round(len(img_paths) * frame_duration / 1000, 2)) + " seconds")
# save gif
img, *imgs = [Image.open(f) for f in img_paths]
img.save(fp=gif_path, format='GIF', append_images=imgs, save_all=True, optimize=True, duration=frame_duration, loop=0)
print("saved gif at : ", gif_path)
print("============================================================================================")
############################# check gif byte size ##############################
def list_gif_size(env_name):
print("============================================================================================")
gif_dir = "PPO_gifs/" + env_name + '/*.gif'
gif_paths = sorted(glob.glob(gif_dir))
for gif_path in gif_paths:
file_size = os.path.getsize(gif_path)
print(gif_path + '\t\t' + str(round(file_size / (1024 * 1024), 2)) + " MB")
print("============================================================================================")
if __name__ == '__main__':
# env_name = "CartPole-v1"
# has_continuous_action_space = False
# max_ep_len = 400
# action_std = None
# env_name = "LunarLander-v2"
# has_continuous_action_space = False
# max_ep_len = 500
# action_std = None
# env_name = "BipedalWalker-v2"
# has_continuous_action_space = True
# max_ep_len = 1500 # max timesteps in one episode
# action_std = 0.1 # set same std for action distribution which was used while saving
# env_name = "RoboschoolWalker2d-v1"
# has_continuous_action_space = True
# max_ep_len = 1000 # max timesteps in one episode
# action_std = 0.1 # set same std for action distribution which was used while saving
env_name = "RoboschoolHalfCheetah-v1"
has_continuous_action_space = True
max_ep_len = 1000 # max timesteps in one episode
action_std = 0.1 # set same std for action distribution which was used while saving
# env_name = "RoboschoolHopper-v1"
# has_continuous_action_space = True
# max_ep_len = 1000 # max timesteps in one episode
# action_std = 0.1 # set same std for action distribution which was used while saving
# save .jpg images in PPO_gif_images folder
save_gif_images(env_name, has_continuous_action_space, max_ep_len, action_std)
# save .gif in PPO_gifs folder using .jpg images
save_gif(env_name)
# list byte size (in MB) of gifs in one "PPO_gif/env_name/" folder
list_gif_size(env_name)