naming. now regular env is 10TB.

ivukotic · Aug 6, 2020 · d0a7ec5 · d0a7ec5
1 parent 631ad3c
commit d0a7ec5
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 10 deletions.
diff --git a/TODO.md b/TODO.md
@@ -1,5 +1 @@
 
-add actor cleaning step: 
-* ask actor for decission to remove or not. List files in order of LRU. Signal to not learn is given in comment dict.
-
-
diff --git a/gym_cache/__init__.py b/gym_cache/__init__.py
@@ -9,7 +9,7 @@
     entry_point='gym_cache.envs:CacheEnv',
     kwargs={
         'InputData': 'data/MWT2_processed',
-        'CacheSize': 100 * 1024 * 1024 * 1024
+        'CacheSize': 10 * 1024 * 1024 * 1024
     },
     # reward_threshold=1.0,
     max_episode_steps=20000000,

diff --git a/gym_cache/envs/cache_env.py b/gym_cache/envs/cache_env.py
@@ -12,6 +12,8 @@
 import logging
 logger = logging.getLogger(__name__)
 
+TB = 1024 * 1024 * 1024  # filesize is in kb so here TB is this.
+
 
 class CacheEnv(gym.Env):
 
@@ -20,7 +22,7 @@ class CacheEnv(gym.Env):
 
     def __init__(self, InputData, CacheSize):
 
-        self.name = '100TB'
+        self.name = '{}TB'.format(CacheSize/TB)
         self.actor_name = 'default'
 
         self.accesses_filename = InputData + '.pa'

diff --git a/gym_cache/envs/cache_env_full.py → gym_cache/envs/cache_env_actor_cleanup.py b/gym_cache/envs/cache_env_full.py → gym_cache/envs/cache_env_actor_cleanup.py
@@ -8,6 +8,8 @@
 import logging
 logger = logging.getLogger(__name__)
 
+# this one should not be used. It has cache cleaning based on new queries to actor.
+
 
 class CacheEnv(gym.Env):
 
@@ -51,18 +53,20 @@ def __init__(self, InputData, CacheSize):
         self.observation_space = spaces.Box(
             # first 6 are tokens, 7th is filesize, 8th is how full is cache at the moment
             low=np.array([0, 0, 0, 0, 0, 0, 0, 0]),
-            high=np.array([maxes[0], maxes[1], maxes[2], maxes[3], maxes[4], maxes[5], maxes[6], 100]),
+            high=np.array([maxes[0], maxes[1], maxes[2], maxes[3],
+                           maxes[4], maxes[5], maxes[6], 100]),
             dtype=np.int32
         )
         print('environment loaded!  cache size [kB]:', self.cache_size)
 
     def load_access_data(self):
         # last variable is the fileID.
-        self.accesses= pd.read_parquet(self.accesses_filename)
+        self.accesses = pd.read_parquet(self.accesses_filename)
         print("accesses loaded:", self.accesses.shape[0])
 
     def save_monitoring_data(self):
-        mdata = pd.DataFrame(self.monitoring, columns=['kB', 'cache size', 'cache hit', 'reward'])
+        mdata = pd.DataFrame(self.monitoring, columns=[
+                             'kB', 'cache size', 'cache hit', 'reward'])
         mdata.to_parquet('results/' + self.name + '.pa', engine='pyarrow')
 
     def seed(self, seed=None):
@@ -134,7 +138,8 @@ def step(self, action):
 
         self.cache_content[fID] = (self.files_processed, fs)
 
-        self.monitoring.append([fs, self.cache_kbytes, self.found_in_cache, int(reward)])
+        self.monitoring.append(
+            [fs, self.cache_kbytes, self.found_in_cache, int(reward)])
 
         self.files_processed += 1
         self.data_processed += fs
Original file line number	Diff line number	Diff line change
		@@ -1,5 +1 @@

		add actor cleaning step:
		* ask actor for decission to remove or not. List files in order of LRU. Signal to not learn is given in comment dict.