Skip to content

Commit

Permalink
Update aggregate statistics unit to cycle (#4)
Browse files Browse the repository at this point in the history
  • Loading branch information
gsavarela committed Feb 27, 2020
1 parent b5753f5 commit 0f0be2e
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 171 deletions.
16 changes: 8 additions & 8 deletions analysis/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import os
import json
from glob import glob
import pdb

# third-party libs
import matplotlib.pyplot as plt
Expand All @@ -27,20 +26,21 @@
file_paths = sorted(glob(f"{path}*.[0-9].eval.info.json"))

returns = []
for file_path in file_paths:
or file_path in file_paths:
with open(file_path, 'r') as f:
db = json.load(f)
returns.append(db['returns'])
t = len(db['per_step_returns'][0])
ret = np.array(db['rewards']).sum(axis=1)
returns.append(ret)
t = len(db['rewards'][0])

returns = np.array(returns)
# convert rewards to returns
returns = np.concatenate(returns)
y = np.mean(returns, axis=0)
err = np.std(returns, axis=0)
err = np.std(returns, axis=0)
y_error = [err, err]
num_iterations = len(y)

x = [int(t / 90) * i for i in range(num_iterations)]


# Must savefig.
label = f'{config_dir[:2]}x{config_dir[2:]}'
Expand Down
44 changes: 0 additions & 44 deletions analysis/ks_2samp.py

This file was deleted.

6 changes: 3 additions & 3 deletions analysis/leaderboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ def plots():
with open(path, 'r') as f:
stats = json.load(f)

returns = stats['per_step_returns']
ni = len(stats['per_step_returns'])
total = len(stats['per_step_returns'][0])
returns = stats['rewards']
ni = len(stats['rewards'])
total = len(stats['rewards'][0])
nc = int(total / CYCLE)


Expand Down
89 changes: 28 additions & 61 deletions analysis/rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,6 @@ def get_arguments():
"""
)

# TODO: validate against existing networks
parser.add_argument('period', type=str, nargs='?',
choices=('cycles', 'episodes'),
default='episodes', help='Cycle or episode')

parser.add_argument('--type', '-t', dest='db_type', nargs='?',
choices=('evaluate', 'train'),
default='evaluate',
Expand All @@ -50,7 +45,6 @@ def str2bool(v):

if __name__ == '__main__':
parser = get_arguments()
period = parser.period

db_type = parser.db_type
if db_type == 'evaluate':
Expand All @@ -76,62 +70,42 @@ def str2bool(v):
with open(path, 'r') as f:
data = json.load(f)

num_iter = len(data['per_step_returns'])
num_steps = len(data['per_step_returns'][0])

if period == 'cycles':
p = cycle_time
elif period == 'episodes':
p = num_steps
else:
raise ValueError(f'period<{period}> not recognized!')
num_iter = len(data['rewards'])
num_steps = len(data['rewards'][0])

if nf == 0:
N = int((num_steps * num_iter) / p)
N = int((num_steps * num_iter) / cycle_time)
rets = np.zeros((N,), dtype=float)
vels = np.zeros((N,), dtype=float)
vehs = np.zeros((N,), dtype=float)
acts = np.zeros((N, num_dbs), dtype=int)

for i in range(num_iter):

if period == 'episodes':
_vels = data["velocities"][i]
_rets = data["mean_returns"][i]
_vehs = data["vehicles"][i]

vels[i] = (nf * vels[i] + _vels) / (nf + 1)
rets[i] = (nf * rets[i] + _rets) / (nf + 1)
vehs[i] = (nf * vehs[i] + _vehs) / (nf + 1)
else:

_rets = data['per_step_returns'][i]
_vels = data['per_step_velocities'][i]
_vehs = data['per_step_vehs'][i]
_acts = np.array(data["rl_actions"][i])
for t in range(0, num_steps, cycle_time):

cycle = int(i * (num_steps / cycle_time) + t / cycle_time)
ind = slice(t, t + cycle_time)
vels[cycle] = \
(nf * vels[cycle] + np.mean(_vels[ind])) / (nf + 1)
rets[cycle] = \
(nf * rets[cycle] + np.sum(_rets[ind])) / (nf + 1)
vehs[cycle] = \
(nf * vehs[cycle] + np.mean(_vehs[ind])) / (nf + 1)
# 0x02 per_step_actions
if len(_acts) == num_steps:
acts[cycle, nf] = \
round(np.array(_acts[ind]).mean())
else:
# 0x03 actions per decision
acts[cycle, nf] = _acts[cycle]
_rets = data['rewards'][i]
_vels = data['velocities'][i]
_vehs = data['per_step_vehs'][i]
_acts = np.array(data["rl_actions"][i])
for t in range(0, num_steps, cycle_time):

cycle = int(i * (num_steps / cycle_time) + t / cycle_time)
ind = slice(t, t + cycle_time)
vels[cycle] = \
(nf * vels[cycle] + np.mean(_vels[ind])) / (nf + 1)
rets[cycle] = \
(nf * rets[cycle] + np.sum(_rets[ind])) / (nf + 1)
vehs[cycle] = \
(nf * vehs[cycle] + np.mean(_vehs[ind])) / (nf + 1)
# 0x02 per_step_actions
if len(_acts) == num_steps:
acts[cycle, nf] = \
round(np.array(_acts[ind]).mean())
else:
# 0x03 actions per decision
acts[cycle, nf] = _acts[cycle]

_, ax1 = plt.subplots()
if period == 'cycles':
ax1.set_xlabel(f'Cycles ({cycle_time} sec)')
else:
ax1.set_xlabel(f'Episodes ({num_steps} sec)')
ax1.set_xlabel(f'Cycles ({cycle_time} sec)')

color = 'tab:blue'
ax1.set_ylabel('Avg. speed', color=color)
Expand All @@ -151,28 +125,21 @@ def str2bool(v):

color = 'tab:cyan'
_, ax1 = plt.subplots()
if period == 'cycles':
ax1.set_xlabel(f'Cycles ({cycle_time} sec)')
else:
ax1.set_xlabel(f'Episodes ({num_steps} sec)')
ax1.set_xlabel(f'Cycles ({cycle_time} sec)')

ax1.set_ylabel('Avg. Reward per Cycle', color=color)
ax2.tick_params(axis='y', labelcolor=color)
ax1.plot(rets, color=color)
plt.title(f'{phase_split}:avg. cycle return\n({db_type}, n={num_dbs})')
plt.savefig(f'{files_dir}{phase_split}_{db_type}_rewards_{period}.png')
plt.savefig(f'{files_dir}{phase_split}_{db_type}_rewards.png')
plt.show()

# optimal action
# TODO: allow for customize action
optact = 0.0
_, ax1 = plt.subplots()
color = 'tab:orange'
if period == 'cycles':
ax1.set_xlabel(f'Cycles ({cycle_time} sec)')
else:
ax1.set_xlabel(f'Episodes ({num_steps} sec)')

ax1.set_xlabel(f'Cycles ({cycle_time} sec)')
ax1.set_ylabel('ratio optimal action')

cumacts = np.cumsum(acts == optact, axis=0)
Expand Down
100 changes: 45 additions & 55 deletions ilurl/core/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def __init__(self, env, dir_path=EMISSION_PATH, train=True, policies=None):
self.train = train
self.dir_path = dir_path
self.policies = policies
self.cycle = getattr(env, 'cycle_time', None)

logging.info(" Starting experiment {} at {}".format(
env.network.name, str(datetime.datetime.utcnow())))
Expand Down Expand Up @@ -150,50 +151,46 @@ def run(

def rl_actions(*_):
return None
# duration flags where in the current phase
# the syncronous agent is.
is_synch = hasattr(self.env, "duration")

rets = []
mean_rets = []
ret_lists = []
vels = []
vehs = []
mean_vels = []
vels_lists = []
mean_vehs = []
veh_lists = []
std_vels = []
outflows = []
observation_spaces = []
actions_lists = []
actions = []
rewards = []

for i in range(num_runs):
vel = np.zeros(num_steps)
veh = np.zeros(num_steps)
logging.info("Iter #" + str(i))
ret = 0
ret_list = []
actions_list = []

vel_list = []
veh_list = []
rew_list = []
act_list = []
obs_list = []
state = self.env.reset()

veh_i = []
vel_i = []
for j in tqdm(range(num_steps)):
state, reward, done, _ = self.env.step(rl_actions(state))
speeds = self.env.k.vehicle.get_speed(
self.env.k.vehicle.get_ids()
veh_i.append(len(self.env.k.vehicle.get_ids()))
vel_i.append(
np.nanmean(self.env.k.vehicle.get_speed(
self.env.k.vehicle.get_ids()
)
)
)
vel[j] = round(np.nanmean(speeds), 2)
veh[j] = len(speeds)

ret += reward if not(np.isnan(reward)) else 0
ret_list.append(round(reward, 2))
if self.cycle is not None:
if self.env.duration == 0.0:
obs_list.append(
list(self.env.get_observation_space()))
act_list.append(
getattr(self.env, 'rl_action', None))
rew_list.append(reward)

if is_synch and self.env.duration == 0.0 and j > 0:
observation_space = list(self.env.get_observation_space())
observation_spaces.append(observation_space)
if hasattr(self.env, 'rl_action'):
actions_list.append(list(self.env.rl_action))
veh_list.append(sum(veh_i) / self.cycle)
vel_list.append(sum(vel_i) / self.cycle)
if done:
break

Expand All @@ -212,38 +209,31 @@ def rl_actions(*_):
if i < len(self.policies):
self.env.Q = self.policies[i]

ret = round(ret, 2)
rets.append(ret)
vels.append(vel.tolist())
vehs.append(veh.tolist())

mean_rets.append(round(np.nanmean(ret_list), 2))
ret_lists.append(ret_list)
actions_lists.append(actions_list)

veh_list.append(vehs)
vel_list.append(vels)
mean_vels.append(round(np.nanmean(vel), 2))
mean_vehs.append(np.mean(veh))
outflows.append(self.env.k.vehicle.get_outflow_rate(int(500)))
std_vels.append(round(np.nanstd(vel), 2))

vels.append(vel_list)
vehs.append(veh_list)
observation_spaces.append(obs_list)
actions.append(act_list)
rewards.append(rew_list)

print(f"""
Round {i}\treturn: {sum(ret_list):0.2f}\tavg speed:{mean_vels[-1]}
Round {i}\treturn: {sum(rew_list):0.2f}\tavg speed:{np.mean(vel_list)}
""")

if show_plot:
_rets = [np.sum(rew_list) for rew_list in rewards]
_vels = [np.nanmean(vel_list) for vel_list in vels]
self.ax1.plot(_rets, 'c-')
self.ax2.plot(_vels, 'b-')
plt.draw()
plt.pause(0.01)

info_dict["id"] = self.env.network.name
info_dict["returns"] = rets
info_dict["velocities"] = mean_vels
info_dict["mean_returns"] = mean_rets
info_dict["per_step_returns"] = ret_lists
info_dict["outflows"] = round(np.mean(outflows).astype(float), 2)
info_dict["mean_outflows"] = round(np.mean(outflows).astype(float), 2)

info_dict["per_step_velocities"] = vels
info_dict["per_step_vehs"] = vehs
info_dict["rewards"] = per_cycle_rewards
info_dict["velocities"] = vels
info_dict["vehicles"] = vehs
info_dict["observation_spaces"] = observation_spaces
info_dict["rl_actions"] = actions_lists
info_dict["vehicles"] = mean_vehs
info_dict["rl_actions"] = actions

print("Average, std return: {}, {}".format(np.nanmean(rets),
np.nanstd(rets)))
Expand Down

0 comments on commit 0f0be2e

Please sign in to comment.