From 6d074d99c4395b5a9d04817343d8eb7f7a7695e9 Mon Sep 17 00:00:00 2001
From: ikkamens <ikkamens@amazon.com>
Date: Sun, 14 Feb 2021 13:53:07 +0100
Subject: [PATCH] Speed up the policy optimization and reward computation by
 using @tf.function

---
 pilco/models/pilco.py | 1 +
 pilco/rewards.py      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/pilco/models/pilco.py b/pilco/models/pilco.py
index b09e7cb..c9f66e9 100644
--- a/pilco/models/pilco.py
+++ b/pilco/models/pilco.py
@@ -115,6 +115,7 @@ def optimize_policy(self, maxiter=50, restarts=1):
     def compute_action(self, x_m):
         return self.controller.compute_action(x_m, tf.zeros([self.state_dim, self.state_dim], float_type))[0]
 
+    @tf.function
     def predict(self, m_x, s_x, n):
         loop_vars = [
             tf.constant(0, tf.int32),
diff --git a/pilco/rewards.py b/pilco/rewards.py
index aeb1085..c2f68e7 100644
--- a/pilco/rewards.py
+++ b/pilco/rewards.py
@@ -16,6 +16,7 @@ def __init__(self, state_dim, W=None, t=None):
         else:
             self.t = Parameter(np.zeros((1, state_dim)), trainable=False)
 
+    @tf.function
     def compute_reward(self, m, s):
         '''
         Reward function, calculating mean and variance of rewards, given