diff --git a/analysis/evaluation.py b/analysis/evaluation.py index 643ba71..6ea93c4 100644 --- a/analysis/evaluation.py +++ b/analysis/evaluation.py @@ -7,7 +7,6 @@ import os import json from glob import glob -import pdb # third-party libs import matplotlib.pyplot as plt @@ -27,20 +26,21 @@ file_paths = sorted(glob(f"{path}*.[0-9].eval.info.json")) returns = [] - for file_path in file_paths: + or file_path in file_paths: with open(file_path, 'r') as f: db = json.load(f) - returns.append(db['returns']) - t = len(db['per_step_returns'][0]) + ret = np.array(db['rewards']).sum(axis=1) + returns.append(ret) + t = len(db['rewards'][0]) - returns = np.array(returns) + # convert rewards to returns + returns = np.concatenate(returns) y = np.mean(returns, axis=0) - err = np.std(returns, axis=0) + err = np.std(returns, axis=0) y_error = [err, err] num_iterations = len(y) - x = [int(t / 90) * i for i in range(num_iterations)] - + # Must savefig. label = f'{config_dir[:2]}x{config_dir[2:]}' diff --git a/analysis/ks_2samp.py b/analysis/ks_2samp.py deleted file mode 100644 index ff283f8..0000000 --- a/analysis/ks_2samp.py +++ /dev/null @@ -1,44 +0,0 @@ -"""Implements the Komogorov Smirnov - - Are the two return samples comming from the same distribution? - -""" -__author__ = 'Guilherme Varela' -__date__ = '2020-01-12' -import json -import os - -import numpy as np -from scipy.stats import ks_2samp - -EMISSION_PATH = f"{os.environ['ILURL_HOME']}/data/emissions/" -# baseline_path = f"{EMISSION_PATH}intersection_20200112-1758511578851931.3494809-info.json" - -baseline_path = f"{EMISSION_PATH}intersection_20200117-2058001579294680.783177.info.json" - -# This is the true test -# test_path = f"{EMISSION_PATH}intersection_20200112-1817011578853021.163328-info.json" -# This is another baseline test -test_path = f"{EMISSION_PATH}intersection_20200117-2109501579295390.89992.info.json" -with open(baseline_path, 'r') as f: - baseline = json.load(f) - -with open(test_path, 'r') as f: - agent = json.load(f) - -baseline_returns = np.array(baseline['returns']) -baseline_speeds = np.array(baseline['velocities']) -baseline_per_returns = np.array([ret - for returns in baseline['per_step_returns'] - for ret in returns]) - -agent_returns = np.array(agent['returns']) -agent_speeds = np.array(agent['velocities']) -agent_per_returns = np.array([ret - for returns in agent['per_step_returns'] - for ret in returns]) - -print(ks_2samp(baseline_returns, agent_returns)) -print(ks_2samp(baseline_speeds, agent_speeds)) -print(ks_2samp(baseline_per_returns, agent_per_returns)) - diff --git a/analysis/leaderboard.py b/analysis/leaderboard.py index 52da7b8..23ac927 100644 --- a/analysis/leaderboard.py +++ b/analysis/leaderboard.py @@ -130,9 +130,9 @@ def plots(): with open(path, 'r') as f: stats = json.load(f) - returns = stats['per_step_returns'] - ni = len(stats['per_step_returns']) - total = len(stats['per_step_returns'][0]) + returns = stats['rewards'] + ni = len(stats['rewards']) + total = len(stats['rewards'][0]) nc = int(total / CYCLE) diff --git a/analysis/rewards.py b/analysis/rewards.py index cbecc09..c55428b 100644 --- a/analysis/rewards.py +++ b/analysis/rewards.py @@ -25,11 +25,6 @@ def get_arguments(): """ ) - # TODO: validate against existing networks - parser.add_argument('period', type=str, nargs='?', - choices=('cycles', 'episodes'), - default='episodes', help='Cycle or episode') - parser.add_argument('--type', '-t', dest='db_type', nargs='?', choices=('evaluate', 'train'), default='evaluate', @@ -50,7 +45,6 @@ def str2bool(v): if __name__ == '__main__': parser = get_arguments() - period = parser.period db_type = parser.db_type if db_type == 'evaluate': @@ -76,18 +70,11 @@ def str2bool(v): with open(path, 'r') as f: data = json.load(f) - num_iter = len(data['per_step_returns']) - num_steps = len(data['per_step_returns'][0]) - - if period == 'cycles': - p = cycle_time - elif period == 'episodes': - p = num_steps - else: - raise ValueError(f'period<{period}> not recognized!') + num_iter = len(data['rewards']) + num_steps = len(data['rewards'][0]) if nf == 0: - N = int((num_steps * num_iter) / p) + N = int((num_steps * num_iter) / cycle_time) rets = np.zeros((N,), dtype=float) vels = np.zeros((N,), dtype=float) vehs = np.zeros((N,), dtype=float) @@ -95,43 +82,30 @@ def str2bool(v): for i in range(num_iter): - if period == 'episodes': - _vels = data["velocities"][i] - _rets = data["mean_returns"][i] - _vehs = data["vehicles"][i] - - vels[i] = (nf * vels[i] + _vels) / (nf + 1) - rets[i] = (nf * rets[i] + _rets) / (nf + 1) - vehs[i] = (nf * vehs[i] + _vehs) / (nf + 1) - else: - - _rets = data['per_step_returns'][i] - _vels = data['per_step_velocities'][i] - _vehs = data['per_step_vehs'][i] - _acts = np.array(data["rl_actions"][i]) - for t in range(0, num_steps, cycle_time): - - cycle = int(i * (num_steps / cycle_time) + t / cycle_time) - ind = slice(t, t + cycle_time) - vels[cycle] = \ - (nf * vels[cycle] + np.mean(_vels[ind])) / (nf + 1) - rets[cycle] = \ - (nf * rets[cycle] + np.sum(_rets[ind])) / (nf + 1) - vehs[cycle] = \ - (nf * vehs[cycle] + np.mean(_vehs[ind])) / (nf + 1) - # 0x02 per_step_actions - if len(_acts) == num_steps: - acts[cycle, nf] = \ - round(np.array(_acts[ind]).mean()) - else: - # 0x03 actions per decision - acts[cycle, nf] = _acts[cycle] + _rets = data['rewards'][i] + _vels = data['velocities'][i] + _vehs = data['per_step_vehs'][i] + _acts = np.array(data["rl_actions"][i]) + for t in range(0, num_steps, cycle_time): + + cycle = int(i * (num_steps / cycle_time) + t / cycle_time) + ind = slice(t, t + cycle_time) + vels[cycle] = \ + (nf * vels[cycle] + np.mean(_vels[ind])) / (nf + 1) + rets[cycle] = \ + (nf * rets[cycle] + np.sum(_rets[ind])) / (nf + 1) + vehs[cycle] = \ + (nf * vehs[cycle] + np.mean(_vehs[ind])) / (nf + 1) + # 0x02 per_step_actions + if len(_acts) == num_steps: + acts[cycle, nf] = \ + round(np.array(_acts[ind]).mean()) + else: + # 0x03 actions per decision + acts[cycle, nf] = _acts[cycle] _, ax1 = plt.subplots() - if period == 'cycles': - ax1.set_xlabel(f'Cycles ({cycle_time} sec)') - else: - ax1.set_xlabel(f'Episodes ({num_steps} sec)') + ax1.set_xlabel(f'Cycles ({cycle_time} sec)') color = 'tab:blue' ax1.set_ylabel('Avg. speed', color=color) @@ -151,16 +125,13 @@ def str2bool(v): color = 'tab:cyan' _, ax1 = plt.subplots() - if period == 'cycles': - ax1.set_xlabel(f'Cycles ({cycle_time} sec)') - else: - ax1.set_xlabel(f'Episodes ({num_steps} sec)') + ax1.set_xlabel(f'Cycles ({cycle_time} sec)') ax1.set_ylabel('Avg. Reward per Cycle', color=color) ax2.tick_params(axis='y', labelcolor=color) ax1.plot(rets, color=color) plt.title(f'{phase_split}:avg. cycle return\n({db_type}, n={num_dbs})') - plt.savefig(f'{files_dir}{phase_split}_{db_type}_rewards_{period}.png') + plt.savefig(f'{files_dir}{phase_split}_{db_type}_rewards.png') plt.show() # optimal action @@ -168,11 +139,7 @@ def str2bool(v): optact = 0.0 _, ax1 = plt.subplots() color = 'tab:orange' - if period == 'cycles': - ax1.set_xlabel(f'Cycles ({cycle_time} sec)') - else: - ax1.set_xlabel(f'Episodes ({num_steps} sec)') - + ax1.set_xlabel(f'Cycles ({cycle_time} sec)') ax1.set_ylabel('ratio optimal action') cumacts = np.cumsum(acts == optact, axis=0) diff --git a/ilurl/core/experiment.py b/ilurl/core/experiment.py index d69f5dc..75dfe4e 100755 --- a/ilurl/core/experiment.py +++ b/ilurl/core/experiment.py @@ -90,6 +90,7 @@ def __init__(self, env, dir_path=EMISSION_PATH, train=True, policies=None): self.train = train self.dir_path = dir_path self.policies = policies + self.cycle = getattr(env, 'cycle_time', None) logging.info(" Starting experiment {} at {}".format( env.network.name, str(datetime.datetime.utcnow()))) @@ -150,50 +151,46 @@ def run( def rl_actions(*_): return None - # duration flags where in the current phase - # the syncronous agent is. - is_synch = hasattr(self.env, "duration") - rets = [] - mean_rets = [] - ret_lists = [] vels = [] vehs = [] - mean_vels = [] - vels_lists = [] - mean_vehs = [] - veh_lists = [] - std_vels = [] - outflows = [] observation_spaces = [] - actions_lists = [] + actions = [] + rewards = [] + for i in range(num_runs): - vel = np.zeros(num_steps) - veh = np.zeros(num_steps) logging.info("Iter #" + str(i)) - ret = 0 - ret_list = [] actions_list = [] + vel_list = [] veh_list = [] + rew_list = [] + act_list = [] + obs_list = [] state = self.env.reset() + veh_i = [] + vel_i = [] for j in tqdm(range(num_steps)): state, reward, done, _ = self.env.step(rl_actions(state)) - speeds = self.env.k.vehicle.get_speed( - self.env.k.vehicle.get_ids() + veh_i.append(len(self.env.k.vehicle.get_ids())) + vel_i.append( + np.nanmean(self.env.k.vehicle.get_speed( + self.env.k.vehicle.get_ids() + ) + ) ) - vel[j] = round(np.nanmean(speeds), 2) - veh[j] = len(speeds) - ret += reward if not(np.isnan(reward)) else 0 - ret_list.append(round(reward, 2)) + if self.cycle is not None: + if self.env.duration == 0.0: + obs_list.append( + list(self.env.get_observation_space())) + act_list.append( + getattr(self.env, 'rl_action', None)) + rew_list.append(reward) - if is_synch and self.env.duration == 0.0 and j > 0: - observation_space = list(self.env.get_observation_space()) - observation_spaces.append(observation_space) - if hasattr(self.env, 'rl_action'): - actions_list.append(list(self.env.rl_action)) + veh_list.append(sum(veh_i) / self.cycle) + vel_list.append(sum(vel_i) / self.cycle) if done: break @@ -212,38 +209,31 @@ def rl_actions(*_): if i < len(self.policies): self.env.Q = self.policies[i] - ret = round(ret, 2) - rets.append(ret) - vels.append(vel.tolist()) - vehs.append(veh.tolist()) - - mean_rets.append(round(np.nanmean(ret_list), 2)) - ret_lists.append(ret_list) - actions_lists.append(actions_list) - - veh_list.append(vehs) - vel_list.append(vels) - mean_vels.append(round(np.nanmean(vel), 2)) - mean_vehs.append(np.mean(veh)) - outflows.append(self.env.k.vehicle.get_outflow_rate(int(500))) - std_vels.append(round(np.nanstd(vel), 2)) + + vels.append(vel_list) + vehs.append(veh_list) + observation_spaces.append(obs_list) + actions.append(act_list) + rewards.append(rew_list) + print(f""" - Round {i}\treturn: {sum(ret_list):0.2f}\tavg speed:{mean_vels[-1]} + Round {i}\treturn: {sum(rew_list):0.2f}\tavg speed:{np.mean(vel_list)} """) + if show_plot: + _rets = [np.sum(rew_list) for rew_list in rewards] + _vels = [np.nanmean(vel_list) for vel_list in vels] + self.ax1.plot(_rets, 'c-') + self.ax2.plot(_vels, 'b-') + plt.draw() + plt.pause(0.01) + info_dict["id"] = self.env.network.name - info_dict["returns"] = rets - info_dict["velocities"] = mean_vels - info_dict["mean_returns"] = mean_rets - info_dict["per_step_returns"] = ret_lists - info_dict["outflows"] = round(np.mean(outflows).astype(float), 2) - info_dict["mean_outflows"] = round(np.mean(outflows).astype(float), 2) - - info_dict["per_step_velocities"] = vels - info_dict["per_step_vehs"] = vehs + info_dict["rewards"] = per_cycle_rewards + info_dict["velocities"] = vels + info_dict["vehicles"] = vehs info_dict["observation_spaces"] = observation_spaces - info_dict["rl_actions"] = actions_lists - info_dict["vehicles"] = mean_vehs + info_dict["rl_actions"] = actions print("Average, std return: {}, {}".format(np.nanmean(rets), np.nanstd(rets)))