-
Notifications
You must be signed in to change notification settings - Fork 83
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update to GPflow v2 and include safety extensions (#45)
* No special subsequent calls to scipy optimiser * Added safety specific code in safe_pilco folder * Added master_alg, experiments and post_process for more systematic evaluation (still under development) * 4 first experiments run successflly * Linear Cars done too * Syncing utils * Cleaned main repo * Re-enabled rendering in examples, deleted a few unecessary things * Running safe_cars when run direclty, deleted bash script * Updated requirements/minor fixes * Updated mgpr to 2.0 * Updated smgpr, test_sparse_predictions passes * Test cascade passes, updated pilco, rewards and controllers * Fixed test_rewards * Fixed controllers and test_controllers * Fixed rbf controller bug. Mountain car and inverted pendulum run successfully with occasional numerical errors * Got rid of the autoflow wrapper functions which are not useful anymore * Updated inverted pendulum and pendulum swingup examples. Fixed noise variance for RBF controllers. Modified mgpr priors * Updated swimmer and double pendulum. Fix in rbf controllers. * Minor fixes, ready to test all plain pilco envs * Small updates to improve numerical stability and identify operations that could cause falures * Updated safe pilco - linear cars seem to work fine now * Cleaned up unecessary comments and logging * Updated requirements * Adding matplotlib dependecy apparently required by Tensorflow. see travis output on this branch before this commit. * Remove comments * Various fixes * Change coverage settings * Attempt to cleaup swimmer * Remove safe pilco include * Remove self.t from LinearController * Move examples, utils to examples folder. * Remove utils import * Updated imports in safe pilco examples. renamed safe-pilco-extension to safe_pilco_extension * read.me update for the new version * Update READMEs * Update README Co-authored-by: kyr-pol <[email protected]>
- Loading branch information
Showing
29 changed files
with
994 additions
and
501 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import numpy as np | ||
from gym import spaces | ||
from gym.core import Env | ||
|
||
class LinearCars(Env): | ||
def __init__(self): | ||
self.action_space = spaces.Box(low=-0.4, high=0.4, shape=(1,)) | ||
self.observation_space = spaces.Box(low=-100, high=100, shape=(4,)) | ||
self.M = 1 # car mass [kg] | ||
self.b = 0.001 # friction coef [N/m/s] | ||
self.Dt = 0.50 # timestep [s] | ||
|
||
self.A = np.array([[0, self.Dt, 0, 0], | ||
[0, -self.b*self.Dt/self.M, 0, 0], | ||
[0, 0, 0, self.Dt], | ||
[0, 0, 0, 0]]) | ||
|
||
self.B = np.array([0,self.Dt/self.M, 0, 0]).reshape((4,1)) | ||
|
||
self.initial_state = np.array([-6.0, 1.0, -5.0, 1.0]).reshape((4,1)) | ||
|
||
def step(self, action): | ||
self.state += self.A @ self.state + self.B * action | ||
#0.1 * np.random.normal(scale=[[1e-3], [1e-3], [1e-3], [0.001]], size=(4,1)) | ||
|
||
if self.state[0] < 0: | ||
reward = -1 | ||
else: | ||
reward = 1 | ||
return np.reshape(self.state[:], (4,)), reward, False, None | ||
|
||
def reset(self): | ||
self.state = self.initial_state + 0.03 * np.random.normal(size=(4,1)) | ||
return np.reshape(self.state[:], (4,)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import numpy as np | ||
import gym | ||
from pilco.models import PILCO | ||
from pilco.controllers import RbfController, LinearController | ||
from pilco.rewards import ExponentialReward | ||
import tensorflow as tf | ||
np.random.seed(0) | ||
from utils import policy, rollout, Normalised_Env | ||
|
||
|
||
SUBS = 5 | ||
T = 25 | ||
env = gym.make('MountainCarContinuous-v0') | ||
# Initial random rollouts to generate a dataset | ||
X1,Y1, _, _ = rollout(env=env, pilco=None, random=True, timesteps=T, SUBS=SUBS, render=True) | ||
for i in range(1,5): | ||
X1_, Y1_,_,_ = rollout(env=env, pilco=None, random=True, timesteps=T, SUBS=SUBS, render=True) | ||
X1 = np.vstack((X1, X1_)) | ||
Y1 = np.vstack((Y1, Y1_)) | ||
env.close() | ||
|
||
env = Normalised_Env('MountainCarContinuous-v0', np.mean(X1[:,:2],0), np.std(X1[:,:2], 0)) | ||
X = np.zeros(X1.shape) | ||
X[:, :2] = np.divide(X1[:, :2] - np.mean(X1[:,:2],0), np.std(X1[:,:2], 0)) | ||
X[:, 2] = X1[:,-1] # control inputs are not normalised | ||
Y = np.divide(Y1 , np.std(X1[:,:2], 0)) | ||
|
||
state_dim = Y.shape[1] | ||
control_dim = X.shape[1] - state_dim | ||
m_init = np.transpose(X[0,:-1,None]) | ||
S_init = 0.5 * np.eye(state_dim) | ||
controller = RbfController(state_dim=state_dim, control_dim=control_dim, num_basis_functions=25) | ||
|
||
R = ExponentialReward(state_dim=state_dim, | ||
t=np.divide([0.5,0.0] - env.m, env.std), | ||
W=np.diag([0.5,0.1]) | ||
) | ||
pilco = PILCO((X, Y), controller=controller, horizon=T, reward=R, m_init=m_init, S_init=S_init) | ||
|
||
best_r = 0 | ||
all_Rs = np.zeros((X.shape[0], 1)) | ||
for i in range(len(all_Rs)): | ||
all_Rs[i,0] = R.compute_reward(X[i,None,:-1], 0.001 * np.eye(state_dim))[0] | ||
|
||
ep_rewards = np.zeros((len(X)//T,1)) | ||
|
||
for i in range(len(ep_rewards)): | ||
ep_rewards[i] = sum(all_Rs[i * T: i*T + T]) | ||
|
||
for model in pilco.mgpr.models: | ||
model.likelihood.variance.assign(0.05) | ||
set_trainable(model.likelihood.variance, False) | ||
|
||
r_new = np.zeros((T, 1)) | ||
for rollouts in range(5): | ||
pilco.optimize_models() | ||
pilco.optimize_policy(maxiter=100, restarts=3) | ||
import pdb; pdb.set_trace() | ||
X_new, Y_new,_,_ = rollout(env=env, pilco=pilco, timesteps=T, SUBS=SUBS, render=True) | ||
|
||
for i in range(len(X_new)): | ||
r_new[:, 0] = R.compute_reward(X_new[i,None,:-1], 0.001 * np.eye(state_dim))[0] | ||
total_r = sum(r_new) | ||
_, _, r = pilco.predict(m_init, S_init, T) | ||
|
||
print("Total ", total_r, " Predicted: ", r) | ||
X = np.vstack((X, X_new)); Y = np.vstack((Y, Y_new)); | ||
all_Rs = np.vstack((all_Rs, r_new)); ep_rewards = np.vstack((ep_rewards, np.reshape(total_r,(1,1)))) | ||
pilco.mgpr.set_data((X, Y)) |
Oops, something went wrong.