From 21048060edf730e2ee3dfe63ecf29f644cfbe526 Mon Sep 17 00:00:00 2001
From: Wil Koch <wfkoch@gmail.com>
Date: Sun, 14 Jun 2020 19:02:22 -0400
Subject: [PATCH] Add PPO evaluation

Closes: #75
---
 examples/gymfc_nf/policies/__init__.py        |   3 +-
 examples/gymfc_nf/policies/baselinespolicy.py |  14 ++
 examples/gymfc_nf/utils/monitor.py            |  63 +++++++++
 examples/ppo_baselines_evaluate.py            | 120 ++++++++++++++++++
 ...{ppo_example.py => ppo_baselines_train.py} |  11 +-
 5 files changed, 205 insertions(+), 6 deletions(-)
 create mode 100644 examples/gymfc_nf/policies/baselinespolicy.py
 create mode 100644 examples/gymfc_nf/utils/monitor.py
 create mode 100644 examples/ppo_baselines_evaluate.py
 rename examples/{ppo_example.py => ppo_baselines_train.py} (91%)

diff --git a/examples/gymfc_nf/policies/__init__.py b/examples/gymfc_nf/policies/__init__.py
index 38a545a..342ff2a 100644
--- a/examples/gymfc_nf/policies/__init__.py
+++ b/examples/gymfc_nf/policies/__init__.py
@@ -1,3 +1,4 @@
 
 from gymfc_nf.policies.pidpolicy import PidPolicy
-__all__ = ['PidPolicy']
+from gymfc_nf.policies.baselinespolicy import PpoBaselinesPolicy
+__all__ = ['PidPolicy', 'PpoBaselinesPolicy']
diff --git a/examples/gymfc_nf/policies/baselinespolicy.py b/examples/gymfc_nf/policies/baselinespolicy.py
new file mode 100644
index 0000000..3084f80
--- /dev/null
+++ b/examples/gymfc_nf/policies/baselinespolicy.py
@@ -0,0 +1,14 @@
+import numpy as np
+import tensorflow as tf
+from .policy import Policy
+class PpoBaselinesPolicy(Policy):
+    def __init__(self, sess):
+        graph = tf.get_default_graph()
+        self.x = graph.get_tensor_by_name('pi/ob:0') 
+        self.y = graph.get_tensor_by_name('pi/pol/final/BiasAdd:0')
+        self.sess = sess
+
+    def action(self, state, sim_time=0, desired=np.zeros(3), actual=np.zeros(3) ):
+
+        y_out = self.sess.run(self.y, feed_dict={self.x:[state] })
+        return y_out[0]
diff --git a/examples/gymfc_nf/utils/monitor.py b/examples/gymfc_nf/utils/monitor.py
new file mode 100644
index 0000000..ae22089
--- /dev/null
+++ b/examples/gymfc_nf/utils/monitor.py
@@ -0,0 +1,63 @@
+import tensorflow as tf
+import os.path
+import time
+
+
+class CheckpointMonitor:
+    """Helper class to monitor the Tensorflow checkpoints and call a callback
+    when a new checkpoint has been created."""
+
+    def __init__(self, checkpoint_dir, callback):
+        """
+        Args:
+                checkpoint_dir: Directory to monitor where new checkpoint
+                directories will be created
+                callback: A callback for when a new checkpoint is created.
+        """
+        self.checkpoint_dir = checkpoint_dir
+        self.callback = callback
+        # Track which checkpoints have already been called. 
+        self.processed = []
+
+        self.watching = True
+
+    def _check_new_checkpoint(self):
+        """Update the queue with newly found checkpoints.
+
+        When a checkpoint directory is created a 'checkpoint' file is created
+        containing a list of all the checkpoints. We can monitor this file to
+        determine when new checkpoints have been created.
+        """
+        # TODO (wfk) check if there is a way to get a callback when a file has
+        # changed. 
+
+        ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir)
+        for path in ckpt.all_model_checkpoint_paths:
+            checkpoint_filename = os.path.split(path)[-1]
+            if tf.train.checkpoint_exists(path):
+                # Make sure there is a checkpoint meta file before allowing it
+                # to be processed
+                meta_file = path + ".meta"
+                if os.path.isfile(meta_file):
+                    if (checkpoint_filename not in self.processed):
+                        self.callback(checkpoint_filename)
+                        self.processed.append(checkpoint_filename)
+                else:
+                    print ("Meta file {} doesn't exist.".format(meta_file))
+
+    def start(self):
+
+        # Sit and wait until the checkpoint directory is created, otherwise we
+        # can't monitor it. If it never gets created this could be an indicator
+        # something is wrong with the trainer.
+        c=0
+        while not os.path.isdir(self.checkpoint_dir):
+            print("[WARN {}] Directory {} doesn't exist yet, waiting until "
+                  "created...".format(c, self.checkpoint_dir))
+            time.sleep(30)
+            c+=1
+
+        while self.watching:
+            self._check_new_checkpoint()
+            time.sleep(10)
+
diff --git a/examples/ppo_baselines_evaluate.py b/examples/ppo_baselines_evaluate.py
new file mode 100644
index 0000000..5238afe
--- /dev/null
+++ b/examples/ppo_baselines_evaluate.py
@@ -0,0 +1,120 @@
+import argparse
+from pathlib import Path
+import os.path
+import numpy as np
+import tensorflow as tf
+import gym
+from gymfc_nf.envs import *
+from gymfc_nf.utils.monitor import CheckpointMonitor
+from gymfc_nf.policies import PpoBaselinesPolicy
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("Evaluate OpenAI Baseline PPO1 checkpoints.")
+    parser.add_argument('ckpt_dir', help="Directory where checkpoints are saved. ")
+    parser.add_argument('--twin', default="./gymfc_nf/twins/nf1/model.sdf",
+                        help="File path of the aircraft digitial twin/model SDF.")
+    parser.add_argument('--eval-dir', 
+                        help="Directory where evaluation logs are saved.")
+    parser.add_argument('--gym-id', default="gymfc_nf-step-v1")
+    parser.add_argument('--num-trials', type=int, default=1)
+    # Provide a seed so the same setpoint will be created. Useful for debugging
+    parser.add_argument('--seed', help='RNG seed', type=int, default=-1)
+
+    args = parser.parse_args()
+
+    seed = np.random.randint(0, 1e6) if args.seed < 0 else args.seed
+    gym_id = args.gym_id
+    ckpt_dir = args.ckpt_dir
+    model_dir = Path(ckpt_dir).parent 
+    eval_dir = args.eval_dir if args.eval_dir else os.path.join(model_dir,
+                                                                "evaluations")
+    num_trials = args.num_trials
+    print ("Saving evaluations to {}".format(eval_dir))
+
+    env = gym.make(gym_id)
+    env.seed(seed)
+    env.set_aircraft_model(args.twin)
+
+    log_header = ""
+    def make_header(ob_size):
+        """Make the log header.
+
+        This needs to be done dynamically because the observations which are 
+        used as input to the NN may differ.
+        """
+        entries = []
+        entries.append("t")
+        for i in range(ob_size):
+            entries.append("ob{}".format(i))
+        for i in range(4):
+            entries.append("ac{}".format(i))
+        for i in range(4):
+            entries.append("y{}".format(i))
+        entries.append("p") # roll rate
+        entries.append("q") # pitch rate
+        entries.append("r") # yaw rate
+        entries.append("p-sp") # roll rate setpoint
+        entries.append("q-sp") # pitch rate setpoint
+        entries.append("r-sp") # yaw rate setpoint
+        for i in range(4):
+            entries.append("w{}".format(i)) # ESC rpms
+        entries.append("reward")
+
+        log_header = ",".join(entries)
+
+    def callback(checkpoint):
+        print ("Callback ", checkpoint)
+
+        ckpt_eval_dir = os.path.join(eval_dir, checkpoint)
+        Path(ckpt_eval_dir).mkdir(parents=True, exist_ok=True)
+
+        # TODO (wfk) I'm pretty sure this just takes the last checkpoint 
+        # written defined by 'model_checkpoint_path' in the checkpoint file
+        # should look at how to specify the exact one. 
+        checkpoint = tf.train.get_checkpoint_state(ckpt_dir) 
+        input_checkpoint = checkpoint.model_checkpoint_path
+        print ("Using checkpoint=", input_checkpoint)
+        with tf.Session() as sess:
+            saver = tf.train.import_meta_graph(input_checkpoint + '.meta',
+                                               clear_devices=True)
+            saver.restore(sess, input_checkpoint)
+            pi = PpoBaselinesPolicy(sess)
+
+
+            for i in range(num_trials):
+
+                pi.reset()
+                ob = env.reset()
+                if len(log_header) == 0:
+                    make_header(len(ob))
+
+                log_file = os.path.join(ckpt_eval_dir, "trial-{}.csv".format(i))
+
+                sim_time = 0
+                actual = np.zeros(3)
+
+                logs = []
+                while True:
+                    ac = pi.action(ob, env.sim_time, env.angular_rate_sp,
+                                   env.imu_angular_velocity_rpy)
+                    ob, reward, done,  _ = env.step(ac)
+
+                    log = ([env.sim_time] +
+                            ob.tolist() + # The observations are the NN input
+                            ac.tolist() + # The actions are the NN output
+                            env.y.tolist() + # Y is the output sent to the ESC
+
+                            env.imu_angular_velocity_rpy.tolist() + # Angular velocites
+                            env.angular_rate_sp.tolist() + #
+                            env.esc_motor_angular_velocity.tolist() +
+                            [reward])# The reward that would have been given for the action, can be helpful for debugging
+
+                    logs.append(log)
+
+                    if done:
+                        break
+                np.savetxt(log_file, logs, delimiter=",", header=log_header)
+
+    monitor = CheckpointMonitor(args.ckpt_dir, callback)
+    monitor.start()
diff --git a/examples/ppo_example.py b/examples/ppo_baselines_train.py
similarity index 91%
rename from examples/ppo_example.py
rename to examples/ppo_baselines_train.py
index 056b246..2723d46 100644
--- a/examples/ppo_example.py
+++ b/examples/ppo_baselines_train.py
@@ -27,7 +27,7 @@ def get_commit_hash():
 
 def get_training_name():
     now = datetime.datetime.now()
-    timestamp = now.strftime('%y%m%d-%H%M%S')
+    timestamp = now.strftime('%Y%m%d-%H%M%S')
     return "baselines_" + get_commit_hash() + "_" + timestamp
 
 
@@ -80,7 +80,8 @@ def policy_fn(name, ob_space, ac_space):
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser("Synthesize a neuro-flight controller.")
-    parser.add_argument('--checkpoint_dir', default="../../checkpoints")
+    parser.add_argument('--model_dir', default="../../models", 
+                        help="Directory where models are saved to.")
     parser.add_argument('--twin', default="./gymfc_nf/twins/nf1/model.sdf",
                         help="File path of the aircraft digitial twin/model SDF.")
     parser.add_argument('--gym-id', default="gymfc_nf-step-v1")
@@ -88,11 +89,11 @@ def policy_fn(name, ob_space, ac_space):
     parser.add_argument('--ckpt-freq', type=int, default=100e3)
     args = parser.parse_args()
 
-    training_dir = os.path.join(args.checkpoint_dir, get_training_name())
-    print ("Storing results to ", training_dir)
+    training_dir = os.path.join(args.model_dir, get_training_name())
 
     seed = np.random.randint(0, 1e6) 
-    ckpt_dir = os.path.join(training_dir, "checkpoints")
+    ckpt_dir = os.path.abspath(os.path.join(training_dir, "checkpoints"))
+    print ("Saving checkpoints to {}.".format(ckpt_dir))
     render = False
     # How many timesteps until a checkpoint is saved
     ckpt_freq = args.ckpt_freq