dev

ivukotic · Mar 26, 2020 · 9c60a98 · 9c60a98
1 parent 88dc0dc
commit 9c60a98
Show file tree

Hide file tree

Showing 12 changed files with 84 additions and 18 deletions.
diff --git a/gym_cache/__init__.py b/gym_cache/__init__.py
@@ -3,13 +3,13 @@
 
 logger = logging.getLogger(__name__)
 
-# discrete action 20TB cache
+# discrete action cache
 register(
     id='Cache-v0',
     entry_point='gym_cache.envs:CacheEnv',
     kwargs={
         'InputData': 'data/ANALY_MWT2_UCORE_processed',
-        'CacheSize': 20 * 1024 * 1024 * 1024
+        'CacheSize': 100 * 1024 * 1024 * 1024
     },
     # reward_threshold=1.0,
     max_episode_steps=20000000,

diff --git a/gym_cache/envs/cache_env.py b/gym_cache/envs/cache_env.py
@@ -16,9 +16,12 @@
 class CacheEnv(gym.Env):
 
     metadata = {'render.modes': ['human']}
-    actions_num = 1  # estimated probability that a file is in cache.
+    actions_num = 1  # best guess if the file is in the cache/should be kept in cache
 
     def __init__(self, InputData, CacheSize):
+
+        self.name = '100TB_LRU'
+
         self.accesses_filename = InputData + '.h5'
 
         self.load_access_data()
@@ -49,8 +52,7 @@ def __init__(self, InputData, CacheSize):
             high=np.array([maxes[0], maxes[1], maxes[2], maxes[3], maxes[4], maxes[5], maxes[6], 100]),
             dtype=np.int32
         )
-        print('environment loaded!')
-        print('cache size [kB]:', self.cache_size)
+        print('environment loaded!  cache size [kB]:', self.cache_size)
 
     def load_access_data(self):
         # last variable is the fileID.
@@ -61,8 +63,7 @@ def load_access_data(self):
 
     def save_monitoring_data(self):
         mdata = pd.DataFrame(self.monitoring, columns=['kB', 'cache size', 'cache hit', 'reward'])
-        # print(mdata)
-        mdata.to_hdf('monitoring.h5', key='monitoring', mode='w', complevel=1)
+        mdata.to_hdf('results/' + self.name + '.h5', key=self.name, mode='w', complevel=1)
 
     def seed(self, seed=None):
         self.np_random, seed = seeding.np_random(seed)

diff --git a/results/100TB_LRU.h5 b/results/100TB_LRU.h5
diff --git a/results/20TB_LRU.h5 b/results/20TB_LRU.h5
diff --git a/results/InfiniteCache_LRU.h5 b/results/InfiniteCache_LRU.h5
diff --git a/plots.py → results/plots.py b/plots.py → results/plots.py
@@ -2,12 +2,16 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 # import numpy as np
+TB = 1024 * 1024 * 1024
 
-fn = 'monitoring.h5'
 df = None
-with pd.HDFStore(fn) as hdf:
-    print("keys in file:", fn, ':', hdf.keys())
-    df = hdf.select('monitoring')
+# name = 'InfiniteCache_LRU'
+# name = '20TB_LRU'
+name = '100TB_LRU'
+
+with pd.HDFStore(name + '.h5') as hdf:
+    print("keys in file:", name, ':', hdf.keys())
+    df = hdf.select(name)
     print("data loaded:", df.shape[0])
 
 print(df)
@@ -19,18 +23,22 @@
 df['data delivered'] = df['kB'].cumsum()
 del df['tmp']
 df['CHR data'] = df['ch_data'] / df['data delivered']
+df["cache size"] = df["cache size"] / TB
 print(df)
 f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, gridspec_kw={'hspace': 0.15})
 
-# f.suptitle('Cache hit rates')
+ax22 = ax2.twinx()
+f.suptitle(name)
 ax1.plot(df["CHR files"])
 ax1.plot(df["CHR data"])
 ax2.plot(df["reward"].cumsum())
-# ax2.plot(df["inst chr files"])
-# ax2.plot(df["inst chr data"])
+# ax22.plot(df["cache size"])
+# ax22.set_ylabel('cache fill [TB]', color='b')
+ax22.plot(df["reward"].rolling(5000).mean())
+ax22.set_ylabel('rolling reward', color='b')
 ax1.legend()
 ax2.legend()
 ax1.grid(True)
 ax2.grid(True)
 # plt.tight_layout()
-plt.savefig("CHR_env_.png")
+plt.savefig('plots/' + name + '.png')
diff --git a/results/plots/100TB_LRU.png b/results/plots/100TB_LRU.png
diff --git a/results/plots/20TB_LRU.png b/results/plots/20TB_LRU.png
diff --git a/results/plots/InfiniteCache_LRU.png b/results/plots/InfiniteCache_LRU.png
diff --git a/results/plots/combinations/combination.png b/results/plots/combinations/combination.png
diff --git a/results/plots_combination.py b/results/plots_combination.py
@@ -0,0 +1,50 @@
+
+import matplotlib.pyplot as plt
+import pandas as pd
+# import numpy as np
+TB = 1024 * 1024 * 1024
+df = None
+names = ['20TB_LRU', '100TB_LRU', 'InfiniteCache_LRU']
+
+
+f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, gridspec_kw={'hspace': 0.15})
+f.suptitle(' '.join(names))
+
+for name in names:
+
+    with pd.HDFStore(name + '.h5') as hdf:
+        print("keys:", hdf.keys())
+        df = hdf.select(name)
+        print("data loaded:", df.shape[0])
+
+    print(df)
+    df['ch_files'] = df['cache hit'].cumsum()
+    df['CHR files'] = df['ch_files'] / df.index
+
+    df['tmp'] = df['cache hit'] * df['kB']
+    df['ch_data'] = df['tmp'].cumsum()
+    df['data delivered'] = df['kB'].cumsum()
+    del df['tmp']
+    df['CHR data'] = df['ch_data'] / df['data delivered']
+    df["cache size"] = df["cache size"] / TB
+    print(df)
+
+    # ax22 = ax2.twinx()
+    ax1.plot(df["CHR files"], label=name)
+    ax2.plot(df["CHR data"], label=name)
+
+    # ax2.plot(df["reward"].cumsum())
+    # ax22.plot(df["cache size"])
+    # ax22.set_ylabel('cache fill [TB]', color='b')
+    # ax22.plot(df["reward"].rolling(500).sum())
+    # ax22.set_ylabel('rolling reward', color='b')
+
+ax1.legend()
+ax2.legend()
+ax1.set_ylabel('cache hit rate [files]')
+ax2.set_ylabel('cache hit rate [data]')
+ax2.set_xlabel('files accessed')
+ax1.grid(True)
+ax2.grid(True)
+# plt.tight_layout()
+plt.savefig('plots/combinations/combination.png')
diff --git a/unit.py b/unit.py
@@ -1,17 +1,24 @@
+# this actor does always the same action or a random one.
+
 import gym
+
 env = gym.make('gym_cache:Cache-v0')
 env.reset()
+
 total_reward = 0
 for i in range(1000000):
     if not i % 1000:
         print(i, 'total reward', total_reward)
         # env.render()
-    act = env.action_space.sample()
+
+    # --- random prediction
+    # act = env.action_space.sample()
+    # --- always predict cache miss
     act = 0
-    # print('action:', act)
+
     acc, rew, done, smt = env.step(act)
     # print('access:', acc, 'rew:', rew)
     total_reward += rew
 
 env.close()
-print('total_reward:', total_reward)
+print('Finished. Total reward:', total_reward)