cache cleanup from memory. registration. docker

ivukotic · Aug 4, 2020 · 8644bfb · 8644bfb
1 parent 05806f5
commit 8644bfb
Show file tree

Hide file tree

Showing 11 changed files with 70 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -2,16 +2,16 @@
 OpenAI based Gym environments for training RL caching agent
 
 install it:
-
 <code>
-python -m pip install --user -e .
-</code>
+   pip install gym-cache
+</code> 
 
 import it like this:
 
 <code>
 import gym
-gym.make('gym_cache:Cache-v0')
+
+gym.make('gym-cache:Cache-v0')
 </code>
 
 
@@ -20,7 +20,7 @@ observation space has following variables:
 * file size \[kB\]
 * how full is the cache at that moment
 
-There are two discrete action environments (*Cache-v0* and *Cache-large-v0*) and one discrete action environment (*Cache-continuous-v0*).
+There are two discrete action environments (*Cache-v0* and *Cache-large-v0*) and one continuous action environment (*Cache-continuous-v0*).
 
 
 ## Data extractions and preprocessing
@@ -39,11 +39,10 @@ It is a parque file (.pa) with one dataframe:
 ## Rewards
 * always negative and correspond to cost to get the file - if it was cached it will be smaller
 * files are cached irrespectively from what action actor performed for the file
-* cleanup
-   * discrete environment - removes ones with 
+* cleanup: environment memorizes actions. on cleanup it first deletes files judged not to be needed again (action 0 in discrete environments or smaller values in continues environment). If multiple files have the same action value, LRU one is removed first.
 
 
-# Technical implementation in XCache server
+## Possible technical implementation in XCache server
 * There are additional containers in the pod. 
     * environment container
        * recieves gstream pfc, and disk info
@@ -52,4 +51,23 @@ It is a parque file (.pa) with one dataframe:
        * triggers cleanup at lower HWM then xcache itself. Loops through memorized paths and removes ones least probably needed. 
     * redis db - used by environment container to store actor responses
     * actor container 
-
+
+
+## Miscalenious
+
+To change environments:
+* clone github repository
+* make changes
+* install locally:    
+   <code>  python -m pip install --user -e .  </code>
+   or
+   <code>
+   python setup.py bdist_wheel
+   python -m pip install dist\gym_cache-1.0.0-py3-none-any.whl
+   </code> 
+* to upload to pypi repository
+   <code>
+   # create %USER%\.pypirc file first. 
+   python setup.py bdist_wheel
+   python -m twine upload dist\*
+   </code> 
diff --git a/TODO.md b/TODO.md
@@ -1,3 +1,6 @@
+
 add actor cleaning step: 
 * ask actor for decission to remove or not. List files in order of LRU. Signal to not learn is given in comment dict.
 
+register so it is pip installable.
+
diff --git a/gym_cache/__init__.py → gym-cache/__init__.py b/gym_cache/__init__.py → gym-cache/__init__.py
@@ -6,7 +6,7 @@
 # discrete action cache
 register(
     id='Cache-v0',
-    entry_point='gym_cache.envs:CacheEnv',
+    entry_point='gym-cache.envs:CacheEnv',
     kwargs={
         'InputData': 'data/MWT2_processed',
         'CacheSize': 100 * 1024 * 1024 * 1024
@@ -18,7 +18,7 @@
 
 register(
     id='Cache-large-v0',
-    entry_point='gym_cache.envs:CacheEnv',
+    entry_point='gym-cache.envs:CacheEnv',
     kwargs={
         'InputData': 'data/MWT2_processed',
         'CacheSize': 100 * 1024 * 1024 * 1024
@@ -31,7 +31,7 @@
 # continuous action cache
 register(
     id='Cache-continuous-v0',
-    entry_point='gym_cache.envs:CacheContinousEnv',
+    entry_point='gym-cache.envs:CacheContinousEnv',
     kwargs={
         'InputData': 'data/MWT2_processed',
         'CacheSize': 100 * 1024 * 1024 * 1024

diff --git a/gym-cache/envs/__init__.py b/gym-cache/envs/__init__.py
@@ -0,0 +1,2 @@
+from gym-cache.envs.cache_env import CacheEnv
+from gym-cache.envs.cache_continous import CacheContinousEnv
diff --git a/gym_cache/envs/cache_continous.py → gym-cache/envs/cache_continous.py b/gym_cache/envs/cache_continous.py → gym-cache/envs/cache_continous.py
diff --git a/gym_cache/envs/cache_env.py → gym-cache/envs/cache_env.py b/gym_cache/envs/cache_env.py → gym-cache/envs/cache_env.py
@@ -20,9 +20,8 @@ class CacheEnv(gym.Env):
 
     def __init__(self, InputData, CacheSize):
 
-        self.name = '100TB_LRU'
-        # self.name = '100TB_DDQN'
-        # self.name = 'InfiniteCache_DDQN'
+        self.name = '100TB'
+        self.actor_name = 'default'
 
         self.accesses_filename = InputData + '.pa'
 
@@ -61,6 +60,9 @@ def __init__(self, InputData, CacheSize):
         )
         print('environment loaded!  cache size [kB]:', self.cache_size)
 
+    def set_actor_name(self, actor):
+        self.actor_name = actor
+
     def load_access_data(self):
         # last variable is the fileID.
         self.accesses = pd.read_parquet(self.accesses_filename)
@@ -71,7 +73,8 @@ def load_access_data(self):
     def save_monitoring_data(self):
         mdata = pd.DataFrame(self.monitoring, columns=[
                              'kB', 'cache size', 'cache hit', 'reward'])
-        mdata.to_parquet('results/' + self.name + '.pa', engine='pyarrow')
+        mdata.to_parquet('results/' + self.name + '_' +
+                         self.actor_name + '.pa', engine='pyarrow')
 
     def seed(self, seed=None):
         self.np_random, seed = seeding.np_random(seed)

diff --git a/gym_cache/envs/cache_env_full.py → gym-cache/envs/cache_env_full.py b/gym_cache/envs/cache_env_full.py → gym-cache/envs/cache_env_full.py
diff --git a/gym_cache/envs/__init__.py b/gym_cache/envs/__init__.py
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+setuptools 
+wheel
+tqdm
+twine
+pandas
+gym
diff --git a/setup.py b/setup.py
@@ -1,11 +1,27 @@
-from setuptools import setup
+import setuptools
 
-setup(
-    name='gym_cache',
-    version='0.0.1',
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+setuptools.setup(
+    name='gym-cache',
+    version='1.0.0',
     install_requires=[
         'gym>=0.2.3',
         'pandas>=0.24.2',
         'scikit-learn>=0.22.1'
-    ]
+    ],
+    scripts=['unit.py'],
+    author="Ilija Vukotic",
+    author_email="[email protected]",
+    description="gym environment simulating file cache.",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/ivukotic/gym-cache",
+    packages=setuptools.find_packages(),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
 )
diff --git a/unit.py b/unit.py
@@ -2,7 +2,7 @@
 
 import gym
 
-env = gym.make('gym_cache:Cache-v0')
+env = gym.make('gym-cache:Cache-v0')
 env.reset()
 
 total_reward = 0
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from gym-cache.envs.cache_env import CacheEnv
		from gym-cache.envs.cache_continous import CacheContinousEnv