forked from Sentdex/SC2RL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sc2env.py
89 lines (79 loc) · 2.62 KB
/
sc2env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import gym
from gym import spaces
import numpy as np
import subprocess
import pickle
import time
import os
class Sc2Env(gym.Env):
"""Custom Environment that follows gym interface"""
def __init__(self):
super(Sc2Env, self).__init__()
# Define action and observation space
# They must be gym.spaces objects
# Example when using discrete actions:
self.action_space = spaces.Discrete(6)
self.observation_space = spaces.Box(low=0, high=255,
shape=(224, 224, 3), dtype=np.uint8)
def step(self, action):
wait_for_action = True
# waits for action.
while wait_for_action:
#print("waiting for action")
try:
with open('state_rwd_action.pkl', 'rb') as f:
state_rwd_action = pickle.load(f)
if state_rwd_action['action'] is not None:
#print("No action yet")
wait_for_action = True
else:
#print("Needs action")
wait_for_action = False
state_rwd_action['action'] = action
with open('state_rwd_action.pkl', 'wb') as f:
# now we've added the action.
pickle.dump(state_rwd_action, f)
except Exception as e:
#print(str(e))
pass
# waits for the new state to return (map and reward) (no new action yet. )
wait_for_state = True
while wait_for_state:
try:
if os.path.getsize('state_rwd_action.pkl') > 0:
with open('state_rwd_action.pkl', 'rb') as f:
state_rwd_action = pickle.load(f)
if state_rwd_action['action'] is None:
#print("No state yet")
wait_for_state = True
else:
#print("Got state state")
state = state_rwd_action['state']
reward = state_rwd_action['reward']
done = state_rwd_action['done']
wait_for_state = False
except Exception as e:
wait_for_state = True
map = np.zeros((224, 224, 3), dtype=np.uint8)
observation = map
# if still failing, input an ACTION, 3 (scout)
data = {"state": map, "reward": 0, "action": 3, "done": False} # empty action waiting for the next one!
with open('state_rwd_action.pkl', 'wb') as f:
pickle.dump(data, f)
state = map
reward = 0
done = False
action = 3
info ={}
observation = state
return observation, reward, done, info
def reset(self):
print("RESETTING ENVIRONMENT!!!!!!!!!!!!!")
map = np.zeros((224, 224, 3), dtype=np.uint8)
observation = map
data = {"state": map, "reward": 0, "action": None, "done": False} # empty action waiting for the next one!
with open('state_rwd_action.pkl', 'wb') as f:
pickle.dump(data, f)
# run incredibot-sct.py non-blocking:
subprocess.Popen(['python3', 'incredibot-sct.py'])
return observation # reward, done, info can't be included