Update aggregate statistics unit to cycle (#4)

gsavarela · Feb 27, 2020 · 0f0be2e · 0f0be2e
1 parent b5753f5
commit 0f0be2e
Show file tree

Hide file tree

Showing 5 changed files with 84 additions and 171 deletions.
diff --git a/analysis/evaluation.py b/analysis/evaluation.py
@@ -7,7 +7,6 @@
 import os
 import json
 from glob import glob
-import pdb
 
 # third-party libs
 import matplotlib.pyplot as plt
@@ -27,20 +26,21 @@
         file_paths = sorted(glob(f"{path}*.[0-9].eval.info.json"))
 
         returns = []
-        for file_path in file_paths:
+        or file_path in file_paths:
             with open(file_path, 'r') as f:
                 db = json.load(f)
-                returns.append(db['returns'])
-                t = len(db['per_step_returns'][0])
+                ret = np.array(db['rewards']).sum(axis=1)
+                returns.append(ret)
+                t = len(db['rewards'][0])
 
-        returns = np.array(returns)
+        # convert rewards to returns
+        returns = np.concatenate(returns)
         y = np.mean(returns, axis=0)
-        err = np.std(returns, axis=0) 
+        err = np.std(returns, axis=0)
         y_error = [err, err]
         num_iterations = len(y)
-
         x = [int(t / 90) * i for i in range(num_iterations)]
-        
+
 
         # Must savefig.
         label = f'{config_dir[:2]}x{config_dir[2:]}'

diff --git a/analysis/ks_2samp.py b/analysis/ks_2samp.py
diff --git a/analysis/leaderboard.py b/analysis/leaderboard.py
@@ -130,9 +130,9 @@ def plots():
                 with open(path, 'r') as f:
                     stats = json.load(f)
 
-                returns = stats['per_step_returns']
-                ni = len(stats['per_step_returns'])
-                total = len(stats['per_step_returns'][0])
+                returns = stats['rewards']
+                ni = len(stats['rewards'])
+                total = len(stats['rewards'][0])
                 nc = int(total / CYCLE)
 
 

diff --git a/analysis/rewards.py b/analysis/rewards.py
@@ -25,11 +25,6 @@ def get_arguments():
         """
     )
 
-    # TODO: validate against existing networks
-    parser.add_argument('period', type=str, nargs='?',
-                        choices=('cycles', 'episodes'),
-                        default='episodes', help='Cycle or episode')
-
     parser.add_argument('--type', '-t', dest='db_type', nargs='?',
                         choices=('evaluate', 'train'),
                         default='evaluate',
@@ -50,7 +45,6 @@ def str2bool(v):
 
 if __name__ == '__main__':
     parser = get_arguments()
-    period = parser.period
 
     db_type = parser.db_type
     if db_type == 'evaluate':
@@ -76,62 +70,42 @@ def str2bool(v):
             with open(path, 'r') as f:
                 data = json.load(f)
 
-            num_iter = len(data['per_step_returns'])
-            num_steps = len(data['per_step_returns'][0])
-
-            if period == 'cycles':
-                p = cycle_time
-            elif period == 'episodes':
-                p = num_steps
-            else:
-                raise ValueError(f'period<{period}> not recognized!')
+            num_iter = len(data['rewards'])
+            num_steps = len(data['rewards'][0])
 
             if nf == 0:
-                N = int((num_steps * num_iter) / p)
+                N = int((num_steps * num_iter) / cycle_time)
                 rets = np.zeros((N,), dtype=float)
                 vels = np.zeros((N,),  dtype=float)
                 vehs = np.zeros((N,),  dtype=float)
                 acts = np.zeros((N, num_dbs),  dtype=int)
 
             for i in range(num_iter):
 
-                if period == 'episodes':
-                    _vels = data["velocities"][i]
-                    _rets = data["mean_returns"][i]
-                    _vehs = data["vehicles"][i]
-
-                    vels[i] = (nf * vels[i] + _vels) / (nf + 1)
-                    rets[i] = (nf * rets[i] + _rets) / (nf + 1)
-                    vehs[i] = (nf * vehs[i] + _vehs) / (nf + 1)
-                else:
-
-                    _rets = data['per_step_returns'][i]
-                    _vels = data['per_step_velocities'][i]
-                    _vehs = data['per_step_vehs'][i]
-                    _acts = np.array(data["rl_actions"][i])
-                    for t in range(0, num_steps, cycle_time):
-
-                        cycle = int(i * (num_steps / cycle_time) + t / cycle_time)
-                        ind = slice(t, t + cycle_time)
-                        vels[cycle] = \
-                            (nf * vels[cycle] + np.mean(_vels[ind])) / (nf + 1)
-                        rets[cycle] = \
-                            (nf * rets[cycle] + np.sum(_rets[ind])) / (nf + 1)
-                        vehs[cycle] = \
-                            (nf * vehs[cycle] + np.mean(_vehs[ind])) / (nf + 1)
-                        # 0x02 per_step_actions
-                        if len(_acts) == num_steps:
-                            acts[cycle, nf] = \
-                                round(np.array(_acts[ind]).mean())
-                        else:
-                            # 0x03 actions per decision
-                            acts[cycle, nf] = _acts[cycle]
+                _rets = data['rewards'][i]
+                _vels = data['velocities'][i]
+                _vehs = data['per_step_vehs'][i]
+                _acts = np.array(data["rl_actions"][i])
+                for t in range(0, num_steps, cycle_time):
+
+                    cycle = int(i * (num_steps / cycle_time) + t / cycle_time)
+                    ind = slice(t, t + cycle_time)
+                    vels[cycle] = \
+                        (nf * vels[cycle] + np.mean(_vels[ind])) / (nf + 1)
+                    rets[cycle] = \
+                        (nf * rets[cycle] + np.sum(_rets[ind])) / (nf + 1)
+                    vehs[cycle] = \
+                        (nf * vehs[cycle] + np.mean(_vehs[ind])) / (nf + 1)
+                    # 0x02 per_step_actions
+                    if len(_acts) == num_steps:
+                        acts[cycle, nf] = \
+                            round(np.array(_acts[ind]).mean())
+                    else:
+                        # 0x03 actions per decision
+                        acts[cycle, nf] = _acts[cycle]
 
         _, ax1 = plt.subplots()
-        if period == 'cycles':
-            ax1.set_xlabel(f'Cycles ({cycle_time} sec)')
-        else:
-            ax1.set_xlabel(f'Episodes ({num_steps} sec)')
+        ax1.set_xlabel(f'Cycles ({cycle_time} sec)')
 
         color = 'tab:blue'
         ax1.set_ylabel('Avg. speed', color=color)
@@ -151,28 +125,21 @@ def str2bool(v):
 
         color = 'tab:cyan'
         _, ax1 = plt.subplots()
-        if period == 'cycles':
-            ax1.set_xlabel(f'Cycles ({cycle_time} sec)')
-        else:
-            ax1.set_xlabel(f'Episodes ({num_steps} sec)')
+        ax1.set_xlabel(f'Cycles ({cycle_time} sec)')
 
         ax1.set_ylabel('Avg. Reward per Cycle', color=color)
         ax2.tick_params(axis='y', labelcolor=color)
         ax1.plot(rets, color=color)
         plt.title(f'{phase_split}:avg. cycle return\n({db_type}, n={num_dbs})')
-        plt.savefig(f'{files_dir}{phase_split}_{db_type}_rewards_{period}.png')
+        plt.savefig(f'{files_dir}{phase_split}_{db_type}_rewards.png')
         plt.show()
 
         # optimal action
         # TODO: allow for customize action
         optact = 0.0
         _, ax1 = plt.subplots()
         color = 'tab:orange'
-        if period == 'cycles':
-            ax1.set_xlabel(f'Cycles ({cycle_time} sec)')
-        else:
-            ax1.set_xlabel(f'Episodes ({num_steps} sec)')
-
+        ax1.set_xlabel(f'Cycles ({cycle_time} sec)')
         ax1.set_ylabel('ratio optimal action')
 
         cumacts = np.cumsum(acts == optact, axis=0)

diff --git a/ilurl/core/experiment.py b/ilurl/core/experiment.py
@@ -90,6 +90,7 @@ def __init__(self, env, dir_path=EMISSION_PATH, train=True, policies=None):
         self.train = train
         self.dir_path = dir_path
         self.policies = policies
+        self.cycle = getattr(env, 'cycle_time', None)
 
         logging.info(" Starting experiment {} at {}".format(
             env.network.name, str(datetime.datetime.utcnow())))
@@ -150,50 +151,46 @@ def run(
 
             def rl_actions(*_):
                 return None
-        #  duration flags where in the current phase
-        #  the syncronous agent is.
-        is_synch = hasattr(self.env, "duration")
 
-        rets = []
-        mean_rets = []
-        ret_lists = []
         vels = []
         vehs = []
-        mean_vels = []
-        vels_lists = []
-        mean_vehs = []
-        veh_lists = []
-        std_vels = []
-        outflows = []
         observation_spaces = []
-        actions_lists = []
+        actions = []
+        rewards = []
+
         for i in range(num_runs):
-            vel = np.zeros(num_steps)
-            veh = np.zeros(num_steps)
             logging.info("Iter #" + str(i))
-            ret = 0
-            ret_list = []
             actions_list = []
+
             vel_list = []
             veh_list = []
+            rew_list = []
+            act_list = []
+            obs_list = []
             state = self.env.reset()
 
+            veh_i = []
+            vel_i = []
             for j in tqdm(range(num_steps)):
                 state, reward, done, _ = self.env.step(rl_actions(state))
-                speeds = self.env.k.vehicle.get_speed(
-                    self.env.k.vehicle.get_ids()
+                veh_i.append(len(self.env.k.vehicle.get_ids()))
+                vel_i.append(
+                    np.nanmean(self.env.k.vehicle.get_speed(
+                        self.env.k.vehicle.get_ids()
+                        )
+                    )
                 )
-                vel[j] = round(np.nanmean(speeds), 2)
-                veh[j] = len(speeds)
 
-                ret += reward if not(np.isnan(reward)) else 0
-                ret_list.append(round(reward, 2))
+                if self.cycle is not None:
+                    if self.env.duration == 0.0:
+                        obs_list.append(
+                            list(self.env.get_observation_space()))
+                        act_list.append(
+                            getattr(self.env, 'rl_action', None))
+                        rew_list.append(reward)
 
-                if is_synch and self.env.duration == 0.0 and j > 0:
-                    observation_space = list(self.env.get_observation_space())
-                    observation_spaces.append(observation_space)
-                    if hasattr(self.env, 'rl_action'):
-                        actions_list.append(list(self.env.rl_action))
+                        veh_list.append(sum(veh_i) / self.cycle)
+                        vel_list.append(sum(vel_i) / self.cycle)
                 if done:
                     break
 
@@ -212,38 +209,31 @@ def rl_actions(*_):
                         if i < len(self.policies):
                             self.env.Q = self.policies[i]
 
-            ret = round(ret, 2)
-            rets.append(ret)
-            vels.append(vel.tolist())
-            vehs.append(veh.tolist())
-
-            mean_rets.append(round(np.nanmean(ret_list), 2))
-            ret_lists.append(ret_list)
-            actions_lists.append(actions_list)
-
-            veh_list.append(vehs)
-            vel_list.append(vels)
-            mean_vels.append(round(np.nanmean(vel), 2))
-            mean_vehs.append(np.mean(veh))
-            outflows.append(self.env.k.vehicle.get_outflow_rate(int(500)))
-            std_vels.append(round(np.nanstd(vel), 2))
+
+            vels.append(vel_list)
+            vehs.append(veh_list)
+            observation_spaces.append(obs_list)
+            actions.append(act_list)
+            rewards.append(rew_list)
+
             print(f"""
-                    Round {i}\treturn: {sum(ret_list):0.2f}\tavg speed:{mean_vels[-1]}
+                    Round {i}\treturn: {sum(rew_list):0.2f}\tavg speed:{np.mean(vel_list)}
                   """)
 
+            if show_plot:
+                _rets = [np.sum(rew_list) for rew_list in rewards]
+                _vels = [np.nanmean(vel_list) for vel_list in vels]
+                self.ax1.plot(_rets, 'c-')
+                self.ax2.plot(_vels, 'b-')
+                plt.draw()
+                plt.pause(0.01)
+
         info_dict["id"] = self.env.network.name
-        info_dict["returns"] = rets
-        info_dict["velocities"] = mean_vels
-        info_dict["mean_returns"] = mean_rets
-        info_dict["per_step_returns"] = ret_lists
-        info_dict["outflows"] = round(np.mean(outflows).astype(float), 2)
-        info_dict["mean_outflows"] = round(np.mean(outflows).astype(float), 2)
-
-        info_dict["per_step_velocities"] = vels
-        info_dict["per_step_vehs"] = vehs
+        info_dict["rewards"] = per_cycle_rewards
+        info_dict["velocities"] = vels
+        info_dict["vehicles"] = vehs
         info_dict["observation_spaces"] = observation_spaces
-        info_dict["rl_actions"] = actions_lists
-        info_dict["vehicles"] = mean_vehs
+        info_dict["rl_actions"] = actions
 
         print("Average, std return: {}, {}".format(np.nanmean(rets),
                                                    np.nanstd(rets)))