From e9af6d7641d83a50e144802b66fa59fb5f50f957 Mon Sep 17 00:00:00 2001
From: Ilia Moiseev <villeman.5@yandex.ru>
Date: Thu, 29 Sep 2022 22:36:46 +0300
Subject: [PATCH 01/12] Fix indentation size

---
 cascade/docs/source/cascade.data.rst   | 26 +++++++++++++-------------
 cascade/docs/source/cascade.meta.rst   | 14 +++++++-------
 cascade/docs/source/cascade.models.rst | 16 ++++++++--------
 cascade/docs/source/cascade.utils.rst  | 26 +++++++++++++-------------
 4 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/cascade/docs/source/cascade.data.rst b/cascade/docs/source/cascade.data.rst
index afcd9a89..85dd5491 100644
--- a/cascade/docs/source/cascade.data.rst
+++ b/cascade/docs/source/cascade.data.rst
@@ -1,7 +1,7 @@
 cascade.data
 ============ 
 .. autoclass:: cascade.data.ApplyModifier
-   :members:
+    :members:
 
 |
 
@@ -10,7 +10,7 @@ cascade.data
 | 
 
 .. autoclass:: cascade.data.BruteforceCacher
-   :members:
+    :members:
 
 |
 
@@ -19,7 +19,7 @@ cascade.data
 | 
 
 .. autoclass:: cascade.data.Concatenator
-   :members:
+    :members:
 
 |
 
@@ -28,7 +28,7 @@ cascade.data
 | 
 
 .. autoclass:: cascade.data.CyclicSampler
-   :members:
+    :members:
 
 |
 
@@ -37,7 +37,7 @@ cascade.data
 | 
 
 .. autoclass:: cascade.data.Dataset
-   :members:
+    :members:
 
 |
 
@@ -46,7 +46,7 @@ cascade.data
 |
 
 .. autoclass:: cascade.data.Iterator
-   :members:
+    :members:
 
 |
 
@@ -55,7 +55,7 @@ cascade.data
 |
 
 .. autoclass:: cascade.data.Wrapper
-   :members:
+    :members:
 
 |
 
@@ -65,7 +65,7 @@ cascade.data
 
 
 .. autoclass:: cascade.data.Modifier
-   :members:
+    :members:
 
 |
 
@@ -74,7 +74,7 @@ cascade.data
 | 
 
 .. autoclass:: cascade.data.Sampler
-   :members:
+    :members:
 
 |
 
@@ -83,7 +83,7 @@ cascade.data
 | 
 
 .. autoclass:: cascade.data.FolderDataset
-   :members:
+    :members:
 
 |
 
@@ -92,7 +92,7 @@ cascade.data
 |
 
 .. autoclass:: cascade.data.Pickler
-   :members:
+    :members:
 
 |
 
@@ -101,7 +101,7 @@ cascade.data
 |
 
 .. autoclass:: cascade.data.RandomSampler
-   :members:
+    :members:
 
 |
 
@@ -110,7 +110,7 @@ cascade.data
 | 
 
 .. autoclass:: cascade.data.SequentialCacher
-   :members:
+    :members:
 
 |
 
diff --git a/cascade/docs/source/cascade.meta.rst b/cascade/docs/source/cascade.meta.rst
index c12a0144..016685dd 100644
--- a/cascade/docs/source/cascade.meta.rst
+++ b/cascade/docs/source/cascade.meta.rst
@@ -1,7 +1,7 @@
 cascade.meta
 ============ 
 .. autoclass:: cascade.meta.HistoryViewer
-   :members:
+    :members:
 
 |
 
@@ -10,7 +10,7 @@ cascade.meta
 |
 
 .. autoclass:: cascade.meta.MetaValidator
-   :members:
+    :members:
 
 |
 
@@ -20,7 +20,7 @@ cascade.meta
 
 
 .. autoclass:: cascade.meta.MetaViewer
-   :members:
+    :members:
 
 |
 
@@ -29,7 +29,7 @@ cascade.meta
 |
 
 .. autoclass:: cascade.meta.MetricViewer
-   :members:
+    :members:
 
 |
 
@@ -38,7 +38,7 @@ cascade.meta
 |
 
 .. autoclass:: cascade.meta.Validator
-   :members:
+    :members:
 
 |
 
@@ -47,7 +47,7 @@ cascade.meta
 |
 
 .. autoclass:: cascade.meta.AggregateValidator
-   :members:
+    :members:
 
 |
 
@@ -56,7 +56,7 @@ cascade.meta
 |
 
 .. autoclass:: cascade.meta.PredicateValidator
-   :members:
+    :members:
 
 |
 
diff --git a/cascade/docs/source/cascade.models.rst b/cascade/docs/source/cascade.models.rst
index 7a9263d3..a4129fdb 100644
--- a/cascade/docs/source/cascade.models.rst
+++ b/cascade/docs/source/cascade.models.rst
@@ -2,7 +2,7 @@ cascade.models
 ============== 
 
 .. autoclass:: cascade.models.Model
-   :members:
+    :members:
 
 |
 
@@ -11,7 +11,7 @@ cascade.models
 |
 
 .. autoclass:: cascade.models.ModelModifier
-   :members:
+    :members:
 
 |
 
@@ -20,7 +20,7 @@ cascade.models
 |
 
 .. autoclass:: cascade.models.BasicModel
-   :members:
+    :members:
 
 |
 
@@ -29,7 +29,7 @@ cascade.models
 |
 
 .. autoclass:: cascade.models.BasicModelModifier
-   :members:
+    :members:
 
 |
 
@@ -38,7 +38,7 @@ cascade.models
 |
 
 .. autoclass:: cascade.models.ModelRepo
-   :members:
+    :members:
 
 |
 
@@ -47,7 +47,7 @@ cascade.models
 |
 
 .. autoclass:: cascade.models.ModelLine
-   :members:
+    :members:
 
 |
 
@@ -56,7 +56,7 @@ cascade.models
 |
 
 .. autoclass:: cascade.models.Trainer
-   :members:
+    :members:
 
 |
 
@@ -65,7 +65,7 @@ cascade.models
 |
 
 .. autoclass:: cascade.models.BasicTrainer
-   :members:
+    :members:
 
 |
 
diff --git a/cascade/docs/source/cascade.utils.rst b/cascade/docs/source/cascade.utils.rst
index d79eca13..c2e0e586 100644
--- a/cascade/docs/source/cascade.utils.rst
+++ b/cascade/docs/source/cascade.utils.rst
@@ -2,7 +2,7 @@ cascade.utils
 ============== 
 
 .. autoclass:: cascade.utils.ConstantBaseline
-   :members:
+    :members:
 
 |
 
@@ -11,7 +11,7 @@ cascade.utils
 |
 
 .. autoclass:: cascade.utils.FolderImageDataset
-   :members:
+    :members:
 
 |
 
@@ -20,7 +20,7 @@ cascade.utils
 |
 
 .. autoclass:: cascade.utils.NumpyWrapper
-   :members:
+    :members:
 
 |
 
@@ -29,7 +29,7 @@ cascade.utils
 |
 
 .. autoclass:: cascade.utils.OverSampler
-   :members:
+    :members:
 
 |
 
@@ -38,7 +38,7 @@ cascade.utils
 |
 
 .. autoclass:: cascade.utils.PaSchemaValidator
-   :members:
+    :members:
 
 |
 
@@ -47,7 +47,7 @@ cascade.utils
 |
 
 .. autoclass:: cascade.utils.SkModel
-   :members:
+    :members:
 
 |
 
@@ -56,7 +56,7 @@ cascade.utils
 |
 
 .. autoclass:: cascade.utils.TableDataset
-   :members:
+    :members:
 
 |
 
@@ -65,7 +65,7 @@ cascade.utils
 |
 
 .. autoclass:: cascade.utils.TableFilter
-   :members:
+    :members:
 
 |
 
@@ -74,7 +74,7 @@ cascade.utils
 |
 
 .. autoclass:: cascade.utils.CSVDataset
-   :members:
+    :members:
 
 |
 
@@ -84,7 +84,7 @@ cascade.utils
 
 
 .. autoclass:: cascade.utils.PartedTableLoader
-   :members:
+    :members:
 
 |
 
@@ -93,7 +93,7 @@ cascade.utils
 |
 
 .. autoclass:: cascade.utils.TableIterator
-   :members:
+    :members:
 
 |
 
@@ -102,7 +102,7 @@ cascade.utils
 |
 
 .. autoclass:: cascade.utils.LargeCSVDataset
-   :members:
+    :members:
 
 |
 
@@ -111,7 +111,7 @@ cascade.utils
 |
 
 .. autoclass:: cascade.utils.NullValidator
-   :members:
+    :members:
 
 |
 

From 384e30eb71d1c4d1145d71d3933308c790e7f7f3 Mon Sep 17 00:00:00 2001
From: Ilia Moiseev <villeman.5@yandex.ru>
Date: Thu, 29 Sep 2022 22:37:02 +0300
Subject: [PATCH 02/12] Fix typo

---
 cascade/data/bruteforce_cacher.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cascade/data/bruteforce_cacher.py b/cascade/data/bruteforce_cacher.py
index 47c6c260..02b448ae 100644
--- a/cascade/data/bruteforce_cacher.py
+++ b/cascade/data/bruteforce_cacher.py
@@ -25,7 +25,7 @@ class BruteforceCacher(Modifier):
 
     See also
     --------
-    Cascade.data.SequentialCacher
+    cascade.data.SequentialCacher
     """
     def __init__(self, dataset: Dataset, *args, **kwargs) -> None:
         super().__init__(dataset, *args, **kwargs)

From 0b8b60e553e4b0757df4de6c48aa662dd384ce31 Mon Sep 17 00:00:00 2001
From: Ilia Moiseev <villeman.5@yandex.ru>
Date: Thu, 29 Sep 2022 23:21:32 +0300
Subject: [PATCH 03/12] Write and refine base documentation

---
 cascade/base/meta_handler.py | 112 ++++++++++++++++++++---------------
 cascade/base/traceable.py    |  26 ++++++--
 2 files changed, 85 insertions(+), 53 deletions(-)

diff --git a/cascade/base/meta_handler.py b/cascade/base/meta_handler.py
index 886da83b..28139baa 100644
--- a/cascade/base/meta_handler.py
+++ b/cascade/base/meta_handler.py
@@ -16,9 +16,8 @@
 
 import os
 import json
-from typing import Union
 import datetime
-from typing import List, Dict
+from typing import Union, List, Dict
 from json import JSONEncoder
 
 import yaml
@@ -61,15 +60,15 @@ def default(self, obj):
 
         return super(CustomEncoder, self).default(obj)
 
-    def obj_to_dict(self, obj):
+    def obj_to_dict(self, obj) -> Dict:
         return json.loads(self.encode(obj))
 
 
 class BaseHandler:
-    def read(self, path) -> List[Dict]:
+    def read(self, path: str) -> Union[Dict, List[Dict]]:
         raise NotImplementedError()
 
-    def write(self, path, obj, overwrite=True) -> None:
+    def write(self, path: str, obj, overwrite=True) -> None:
         raise NotImplementedError()
 
     def _raise_io_error(self, path, exc):
@@ -80,24 +79,7 @@ def _raise_io_error(self, path, exc):
 
 
 class JSONHandler(BaseHandler):
-    """
-    Handles the logic of dumping and loading json files
-    """
-    def read(self, path) -> Union[Dict, List[Dict]]:
-        """
-        Reads json from path
-
-        Parameters
-        ----------
-        path:
-            Path to the file. If no extension provided,
-            then .json will be added
-
-        Raises
-        ------
-        IOError
-            when decoding errors occur
-        """
+    def read(self, path: str) -> Union[Dict, List[Dict]]:
         _, ext = os.path.splitext(path)
         if ext == '':
             path += '.json'
@@ -111,32 +93,16 @@ def read(self, path) -> Union[Dict, List[Dict]]:
                 self._raise_io_error(path, e)
             return meta
 
-    def write(self, name, obj: List[Dict], overwrite=True) -> None:
-        """
-        Writes json to path using custom encoder
-        """
-        if not overwrite and os.path.exists(name):
+    def write(self, path:str, obj: List[Dict], overwrite=True) -> None:
+        if not overwrite and os.path.exists(path):
             return
 
-        with open(name, 'w') as f:
+        with open(path, 'w') as f:
             json.dump(obj, f, cls=CustomEncoder, indent=4)
 
 
 class YAMLHandler(BaseHandler):
-    def read(self, path) -> Union[Dict, List[Dict]]:
-        """
-        Reads yaml from path
-
-        Parameters
-        ----------
-        path:
-            Path to the file. If no extension provided, then .yml will be added
-
-        Raises
-        ------
-        IOError
-            when decoding errors occur
-        """
+    def read(self, path: str) -> Union[Dict, List[Dict]]:
         _, ext = os.path.splitext(path)
         if ext == '':
             path += '.yml'
@@ -148,7 +114,7 @@ def read(self, path) -> Union[Dict, List[Dict]]:
                 self._raise_io_error(path, e)
             return meta
 
-    def write(self, path, obj, overwrite=True) -> None:
+    def write(self, path: str, obj, overwrite=True) -> None:
         if not overwrite and os.path.exists(path):
             return
 
@@ -158,14 +124,14 @@ def write(self, path, obj, overwrite=True) -> None:
 
 
 class TextHandler(BaseHandler):
-    def read(self, path) -> Dict:
+    def read(self, path: str) -> Dict:
         """
         Reads text file from path and returns dict
         in the form {path: 'text from file'}
 
         Parameters
         ----------
-        path:
+        path: str
             Path to the file
         """
 
@@ -179,11 +145,61 @@ def write(self, path, obj, overwrite=True) -> None:
 
 
 class MetaHandler:
-    def read(self, path) -> List[Dict]:
+    """
+    Encapsulates the logic of reading and writing metadata to disk.
+
+    Supported read-write formats are `json` and `yml`. Other formats
+    are supported as read-only. For example one can read meta from txt or md file.
+
+    Examples
+    --------
+    >>> from cascade.base import MetaHandler
+    >>> mh = MetaHandler()
+    >>> mh.write('meta.json', {'hello': 'world'})
+    >>> obj = mh.read('meta.json')
+    >>> mh.write('meta.yml', {'hello': 'world'})
+    >>> obj = mh.read('meta.yml')
+    """
+    def read(self, path: str) -> Union[Dict, List[Dict]]:
+        """
+        Reads object from path.
+
+        Parameters
+        ----------
+            path: str
+                Path to the object.
+
+        Returns
+        -------
+            obj: Union[Dict, List[Dict]]
+
+        Raises
+        ------
+        IOError
+            when decoding errors occur
+        """
         handler = self._get_handler(path)
         return handler.read(path)
 
-    def write(self, path, obj, overwrite=True) -> None:
+    def write(self, path: str, obj, overwrite:bool = True) -> None:
+        """
+        Writes object to path.
+
+        Parameters
+        ----------
+            path: str
+                Path where to write object with name and extension
+            obj
+                An object to be serialized and saved
+            overwrite: bool, optional
+                Whether to overwrite the file if it already exists. If False
+                and file already exists will silently return without saving.
+
+        Raises
+        ------
+        IOError
+            when encoding errors occur
+        """
         handler = self._get_handler(path)
         return handler.write(path, obj, overwrite=overwrite)
 
diff --git a/cascade/base/traceable.py b/cascade/base/traceable.py
index 1de4d4a3..15ea19b9 100644
--- a/cascade/base/traceable.py
+++ b/cascade/base/traceable.py
@@ -3,7 +3,23 @@
 
 
 class Traceable:
-    def __init__(self, *args, meta_prefix=None, **kwargs) -> None:
+    """
+    Base class for everything that has metadata in cascade.
+    Handles the logic of getting and updating internal meta prefix.
+    """
+    def __init__(self, *args, meta_prefix:Union[Dict, str] = None, **kwargs) -> None:
+        """
+        Parameters
+        ----------
+        meta_prefix: Union[Dict, str], optional
+            The dictionary that is used to update object's meta in `get_meta` call.
+            Due to the call of update can overwrite default values.
+            If str - prefix assumed to be path and loaded using MetaHandler.
+        
+        See also
+        --------
+        cascade.base.MetaHandler
+        """
         if meta_prefix is None:
             meta_prefix = {}
         elif isinstance(meta_prefix, str):
@@ -22,8 +38,8 @@ def get_meta(self) -> List[Dict]:
         meta: List[Dict]
             A list where last element is this object's metadata.
             Meta can be anything that is worth to document about
-            the object and its properties. This is done in form
-            of list to enable cascade-like calls in Modifiers and Samplers.
+            the object and its properties.
+            Meta is list to allow the formation of pipelines.
         """
         meta = {
             'name': repr(self)
@@ -36,8 +52,8 @@ def get_meta(self) -> List[Dict]:
 
     def update_meta(self, obj: Union[Dict, str]) -> None:
         """
-        Updates _meta_prefix, which is then updates
-        dataset's meta when get_meta() is called
+        Updates `_meta_prefix`, which then updates
+        dataset's meta when `get_meta()` is called
         """
         if isinstance(obj, str):
             obj = self._read_meta_from_file(obj)

From 6456a8a127b457db449106082075865a520c06b7 Mon Sep 17 00:00:00 2001
From: Ilia Moiseev <villeman.5@yandex.ru>
Date: Thu, 29 Sep 2022 23:46:32 +0300
Subject: [PATCH 04/12] Failed test of Concatenator's meta

---
 cascade/tests/test_concatenator.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cascade/tests/test_concatenator.py b/cascade/tests/test_concatenator.py
index 3359d9a8..dc4f56ef 100644
--- a/cascade/tests/test_concatenator.py
+++ b/cascade/tests/test_concatenator.py
@@ -31,6 +31,7 @@ def test_meta():
 
     c = Concatenator([n1, n2], meta_prefix={'num': 1})
     assert c.get_meta()[0]['num'] == 1
+    assert len(c.get_meta()[0]['data']) == 2
 
 
 @pytest.mark.parametrize(

From a20ba8205c43988ef9261d867a6eb67ae19f23fd Mon Sep 17 00:00:00 2001
From: Ilia Moiseev <villeman.5@yandex.ru>
Date: Thu, 29 Sep 2022 23:49:15 +0300
Subject: [PATCH 05/12] Return to the data of Concatenator as list

---
 cascade/data/concatenator.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cascade/data/concatenator.py b/cascade/data/concatenator.py
index 61154651..e7c286c4 100644
--- a/cascade/data/concatenator.py
+++ b/cascade/data/concatenator.py
@@ -67,7 +67,5 @@ def get_meta(self) -> List[Dict]:
         Concatenator calls `get_meta()` of all its datasets
         """
         meta = super().get_meta()
-        meta[0]['data'] = {}
-        for ds in self._datasets:
-            meta[0]['data'][repr(ds)] = ds.get_meta()
+        meta[0]['data'] = [ds.get_meta() for ds in self._datasets]
         return meta

From fc88f06d1a56e9eac244d50e80eae6bd23f81f61 Mon Sep 17 00:00:00 2001
From: Ilia Moiseev <villeman.5@yandex.ru>
Date: Fri, 30 Sep 2022 23:35:54 +0300
Subject: [PATCH 06/12] Write and refine data documentation

---
 cascade/data/apply_modifier.py    | 16 ++++++++++---
 cascade/data/bruteforce_cacher.py | 31 ++++++++++++++++++++++--
 cascade/data/concatenator.py      | 12 ++++++++--
 cascade/data/cyclic_sampler.py    | 11 ++-------
 cascade/data/dataset.py           | 39 +++++++++++++++++++++----------
 cascade/data/folder_dataset.py    | 12 +++++++---
 cascade/data/pickler.py           | 12 +++++-----
 cascade/data/random_sampler.py    |  6 ++---
 cascade/data/range_sampler.py     | 19 ++++++++++++++-
 cascade/data/sequential_cacher.py | 12 ++++++----
 cascade/data/utils.py             | 12 +++++-----
 11 files changed, 131 insertions(+), 51 deletions(-)

diff --git a/cascade/data/apply_modifier.py b/cascade/data/apply_modifier.py
index a464d381..97636927 100644
--- a/cascade/data/apply_modifier.py
+++ b/cascade/data/apply_modifier.py
@@ -20,17 +20,27 @@
 
 class ApplyModifier(Modifier):
     """
-    Modifier that maps a function to previous dataset's elements in a lazy way.
+    Modifier that maps a function to given dataset's items in a lazy way.
     """
     def __init__(self, dataset: Dataset, func: Callable, *args, **kwargs) -> None:
         """
         Parameters
         ----------
         dataset: Dataset
-            a dataset to modify
+            A dataset to modify
         func: Callable
-            a function to be applied to every item of a dataset -
+            A function to be applied to every item of a dataset -
             each `__getitem__` would call `func` on an item obtained from a previous dataset
+
+        Examples
+        --------
+        >>> from cascade import data as cdd
+        >>> ds = cdd.Wrapper([0, 1, 2, 3, 4])
+        >>> ds = cdd.ApplyModifier(ds, lambda x: x ** 2)
+
+        Now function will only be applied when items are retrieved
+
+        >>> assert [item for item in ds] == [0, 1, 4, 9, 16]
         """
         super().__init__(dataset, *args, **kwargs)
         self._func = func
diff --git a/cascade/data/bruteforce_cacher.py b/cascade/data/bruteforce_cacher.py
index 02b448ae..b07422d2 100644
--- a/cascade/data/bruteforce_cacher.py
+++ b/cascade/data/bruteforce_cacher.py
@@ -20,14 +20,41 @@
 
 class BruteforceCacher(Modifier):
     """
-    Unusual modifier which loads everything in memory in initialization phase
-    and then returns values from cache
+    Identity modifier that calls all previous pipeline in __init__ loading everything
+    in memory. This is useful in combination with `Pickler` when pipeline
+    has heavy operations upstream. You can load everything and pickle it to turn off
+    heavy part of the pipeline.
+
+    Examples
+    --------
+    >>> from cascade import data as cdd
+    >>> ds = cdd.Wrapper([0 for _ in range(1000000)])
+    >>> ds = cdd.ApplyModifier(ds, lambda x: x + 1)
+    >>> ds = cdd.ApplyModifier(ds, lambda x: x + 1)
+    >>> ds = cdd.ApplyModifier(ds, lambda x: x + 1)
+
+    Cache heavy upstream part once
+
+    >>> ds = cdd.BruteforceCacher(ds)
+
+    Then pickle it
+
+    >>> ds = cdd.Pickler('ds', ds)
+
+    Unpickle and use further
+
+    >>> ds = cdd.Pickler('ds')
+    >>> ds = cdd.RandomSampler(ds, 1000)
 
     See also
     --------
     cascade.data.SequentialCacher
+    cascade.data.Pickler
     """
     def __init__(self, dataset: Dataset, *args, **kwargs) -> None:
+        """
+        Loads every item in dataset in internal list.
+        """
         super().__init__(dataset, *args, **kwargs)
         # forcibly calling all previous datasets in the init
         if hasattr(self._dataset, '__len__') and hasattr(self._dataset, '__getitem__'):
diff --git a/cascade/data/concatenator.py b/cascade/data/concatenator.py
index 61154651..9c4c420e 100644
--- a/cascade/data/concatenator.py
+++ b/cascade/data/concatenator.py
@@ -23,6 +23,14 @@
 class Concatenator(Dataset):
     """
     Unifies several Datasets under one, calling them sequentially in the provided order.
+
+    Examples
+    --------
+    >>> from cascade.data import Wrapper, Concatenator
+    >>> ds_1 = Wrapper([0, 1, 2])
+    >>> ds_2 = Wrapper([2, 1, 0])
+    >>> ds = Concatenator((ds_1, ds_2))
+    >>> assert [item for item in ds] == [0, 1, 2, 2, 1, 0]
     """
     def __init__(self, datasets: Iterable[Dataset], *args, **kwargs) -> None:
         """
@@ -30,8 +38,8 @@ def __init__(self, datasets: Iterable[Dataset], *args, **kwargs) -> None:
 
         Parameters
         ----------
-        datasets: Iterable[Dataset]
-            a list or tuple of datasets to concatenate
+        datasets: Union[Iterable[Dataset], Mapping[Dataset]]
+            A list or tuple of datasets to concatenate
         """
         self._datasets = datasets
         lengths = [len(ds) for ds in self._datasets]
diff --git a/cascade/data/cyclic_sampler.py b/cascade/data/cyclic_sampler.py
index ed3eeddc..9b4023ed 100644
--- a/cascade/data/cyclic_sampler.py
+++ b/cascade/data/cyclic_sampler.py
@@ -25,15 +25,8 @@ class CyclicSampler(Sampler):
     -------
     >>> from cascade.data import CyclicSampler, Wrapper
     >>> ds = Wrapper([1,2,3])
-    >>> ds = CyclicSampler(ds, 5)
-    >>> for item in ds:
-    ...     print(item)
-    ...
-    1
-    2
-    3
-    1
-    2
+    >>> ds = CyclicSampler(ds, 7)
+    >>> assert [item for item in ds] == [1, 2, 3, 1, 2, 3, 1]
     """
     def __getitem__(self, index) -> T:
         internal_index = index % len(self._dataset)
diff --git a/cascade/data/dataset.py b/cascade/data/dataset.py
index 4223d5c9..37c4e3ba 100644
--- a/cascade/data/dataset.py
+++ b/cascade/data/dataset.py
@@ -11,7 +11,7 @@
 limitations under the License.
 """
 
-from typing import Dict, Generic, Iterable, List, TypeVar
+from typing import Dict, Generic, Iterable, List, Mapping, TypeVar
 from ..base import Traceable
 
 T = TypeVar('T')
@@ -35,7 +35,7 @@ def __getitem__(self, index) -> T:
         """
         Abstract method - should be defined in every successor
         """
-        raise NotImplementedError
+        raise NotImplementedError()
 
     def get_meta(self) -> List[Dict]:
         """
@@ -50,22 +50,25 @@ def get_meta(self) -> List[Dict]:
         meta[0]['type'] = 'dataset'
         return meta
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """
         Returns
         -------
-        string representation of a Dataset. This repr used as a name for get_meta() method
-        by default gives the name of class from basic repr
+        repr: str 
+            Representation of a Dataset. This repr used as a name for get_meta() method
+            by default gives the name of class from basic repr
 
         See also
         --------
         cascade.data.Dataset.get_meta()
         """
-        rp = super().__repr__()
-        return rp[1:].split()[0]
+        return super().__repr__().split()[0]
 
 
 class Iterator(Dataset):
+    """
+    Wraps Dataset around any Iterable. Does not have map-like interface.
+    """
     def __init__(self, data: Iterable, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._data = data
@@ -87,7 +90,7 @@ class Wrapper(Dataset):
     """
     Wraps Dataset around any list-like object.
     """
-    def __init__(self, obj, *args, **kwargs) -> None:
+    def __init__(self, obj: Mapping, *args, **kwargs) -> None:
         self._data = obj
         super().__init__(*args, **kwargs)
 
@@ -120,7 +123,7 @@ def __init__(self, dataset: Dataset, *args, **kwargs) -> None:
         Parameters
         ----------
         dataset: Dataset
-            a dataset to modify
+            A dataset to modify
         """
         self._dataset = dataset
         super().__init__(*args, **kwargs)
@@ -128,7 +131,7 @@ def __init__(self, dataset: Dataset, *args, **kwargs) -> None:
     def __getitem__(self, index) -> T:
         return self._dataset[index]
 
-    def __iter__(self):
+    def __iter__(self) -> T:
         for i in range(len(self)):
             yield self.__getitem__(i)
 
@@ -150,14 +153,26 @@ def get_meta(self) -> List[Dict]:
 class Sampler(Modifier):
     """
     Defines certain sampling over a Dataset. Its distinctive feature is that it changes the number of
-    items in dataset. It can constitute a batch sampler or random sampler or sample in cycling manner.
+    items in dataset. It can be used to build a batch sampler, random sampler, etc.
 
     See also
     --------
     cascade.data.CyclicSampler
+    cascade.data.RandomSampler
+    cascade.data.RangeSampler
     """
     def __init__(self, dataset: Dataset, num_samples: int, *args, **kwargs) -> None:
-        assert num_samples > 0
+        """
+        Constructs a Sampler.
+
+        Parameters
+        ----------
+            dataset: Dataset
+                A dataset to sample from
+            num_samples: int
+                The number of samples
+        """
+        assert num_samples > 0, 'The number of samples should be positive'
         super().__init__(dataset, *args, **kwargs)
         self._num_samples = num_samples
 
diff --git a/cascade/data/folder_dataset.py b/cascade/data/folder_dataset.py
index e9fb13b2..92b3174f 100644
--- a/cascade/data/folder_dataset.py
+++ b/cascade/data/folder_dataset.py
@@ -8,13 +8,19 @@
 class FolderDataset(Dataset):
     """
     Basic "folder of files" dataset. Accepts root folder in which considers all files.
-    Is abstract - getitem is not defined, since it is specific for each file type
+    Is abstract - getitem is not defined, since it is specific for each file type.
 
     See also
     --------
     cascade.utils.FolderImageDataset
     """
-    def __init__(self, root, *args, **kwargs) -> None:
+    def __init__(self, root: str, *args, **kwargs) -> None:
+        """
+        Parameters
+        ----------
+        root: str
+            A path to the folder of files 
+        """
         super().__init__(*args, **kwargs)
         self._root = os.path.abspath(root)
         if not os.path.exists(self._root):
@@ -39,5 +45,5 @@ def get_meta(self) -> List[Dict]:
                 meta[0]['md5sums'].append(md5(f.read()).hexdigest())
         return meta
 
-    def __len__(self):
+    def __len__(self) -> int:
         return len(self._names)
diff --git a/cascade/data/pickler.py b/cascade/data/pickler.py
index 76ec32ec..8ad30731 100644
--- a/cascade/data/pickler.py
+++ b/cascade/data/pickler.py
@@ -16,14 +16,14 @@
 
 import os
 import pickle
-from . import Modifier
+from . import Dataset, Modifier
 
 
 class Pickler(Modifier):
     """
-    Pickles an input dataset or unpickles one
+    Pickles input dataset or unpickles one
     """
-    def __init__(self, path, dataset=None, *args, **kwargs) -> None:
+    def __init__(self, path: str, dataset: Dataset = None, *args, **kwargs) -> None:
         """
         Loads pickled dataset or dumps one depending on parameters passed:
 
@@ -32,10 +32,10 @@ def __init__(self, path, dataset=None, *args, **kwargs) -> None:
 
         Parameters
         ----------
-        path:
-            path to the pickled dataset
+        path: str
+            Path to the pickled dataset
         dataset: Dataset, optional
-            a dataset to be pickled
+            A dataset to be pickled
 
         Raises
         ------
diff --git a/cascade/data/random_sampler.py b/cascade/data/random_sampler.py
index f7b2ba4d..ed20eb1d 100644
--- a/cascade/data/random_sampler.py
+++ b/cascade/data/random_sampler.py
@@ -15,7 +15,7 @@
 """
 
 from numpy.random import random_integers, shuffle
-from . import Dataset, Sampler
+from . import Dataset, Sampler, T
 
 
 class RandomSampler(Sampler):
@@ -23,7 +23,7 @@ class RandomSampler(Sampler):
     Shuffles dataset. Can randomly sample from dataset
     if num_samples is not None and less than length of dataset.
     """
-    def __init__(self, dataset: Dataset, num_samples=None, **kwargs) -> None:
+    def __init__(self, dataset: Dataset, num_samples: int = None, **kwargs) -> None:
         """
         Parameters
         ----------
@@ -44,5 +44,5 @@ def __init__(self, dataset: Dataset, num_samples=None, **kwargs) -> None:
             self._indices = random_integers(0, len(dataset) - 1, num_samples)
         super().__init__(dataset, num_samples, **kwargs)
 
-    def __getitem__(self, index):
+    def __getitem__(self, index) -> T:
         return super().__getitem__(self._indices[index])
diff --git a/cascade/data/range_sampler.py b/cascade/data/range_sampler.py
index af4b72f8..349bd97b 100644
--- a/cascade/data/range_sampler.py
+++ b/cascade/data/range_sampler.py
@@ -42,7 +42,24 @@ class RangeSampler(Sampler):
     2
     3
     """
-    def __init__(self, dataset: Dataset, start=None, stop=None, step=1, *args, **kwargs) -> None:
+    def __init__(self,
+        dataset: Dataset,
+        start:int = None,
+        stop:int = None,
+        step:int = 1,
+        *args, **kwargs) -> None:
+        """
+        Parameters
+        ----------
+            dataset: Dataset
+                A dataset to sampler from
+            start: int
+                Start index in range - included
+            stop: int
+                Stop index in range - excluded
+            step: int, optional
+                Step of range
+        """
         if start is not None and stop is None:
             stop = start
             start = 0
diff --git a/cascade/data/sequential_cacher.py b/cascade/data/sequential_cacher.py
index abdd6369..292f0d71 100644
--- a/cascade/data/sequential_cacher.py
+++ b/cascade/data/sequential_cacher.py
@@ -29,14 +29,18 @@ class SequentialCacher(Modifier):
     --------
     BruteforceCacher
     """
-    def __init__(self, dataset: Dataset, batch_size=2, *args, **kwargs) -> None:
+    def __init__(
+        self,
+        dataset: Dataset,
+        batch_size: int = 2,
+        *args, **kwargs) -> None:
         """
         Parameters
         ----------
         dataset: Dataset
-            dataset to cache sequentially
-        batch_size: int, default: 2
-            a number of items to load and keep in each moment
+            Dataset to cache sequentially
+        batch_size: int, optional
+            A number of items to load and keep in each moment
         """
         # TODO: make something to release this assert
         assert hasattr(dataset, '__len__'), 'Dataset should have __len__'
diff --git a/cascade/data/utils.py b/cascade/data/utils.py
index c8c62b0d..6c877b56 100644
--- a/cascade/data/utils.py
+++ b/cascade/data/utils.py
@@ -22,24 +22,24 @@ def split(ds: Dataset, frac=0.5, num=None) -> Tuple[Dataset]:
 
     >>> ds1, ds2 = cdd.split(ds)
     >>> print([item for item in ds1])
-    ... [0, 1]
+    [0, 1]
 
     >>> print([item for item in ds2])
-    ... [2, 3, 4]
+    [2, 3, 4]
 
     >>> ds1, ds2 = cdd.split(ds, 0.6)
 
     >>> print([item for item in ds1])
-    ... [0, 1, 2]
+    [0, 1, 2]
     >>> print([item for item in ds2])
-    ... [3, 4]
+    [3, 4]
 
     >>> ds1, ds2 = cdd.split(ds, num=4)
 
     >>> print([item for item in ds1])
-    ... [0, 1, 2, 3]
+    [0, 1, 2, 3]
     >>> print([item for item in ds2])
-    ... [4]
+    [4]
     '''
     if num is None:
         num = floor(len(ds) * frac)

From af0532507d58c9b31cb05b20fe182e563f221eac Mon Sep 17 00:00:00 2001
From: Ilia Moiseev <villeman.5@yandex.ru>
Date: Thu, 6 Oct 2022 00:23:46 +0300
Subject: [PATCH 07/12] Write and fix meta documentation

---
 cascade/meta/history_viewer.py | 24 ++++++++++++++++++------
 cascade/meta/meta_validator.py | 16 +++++++++-------
 cascade/meta/meta_viewer.py    | 17 +++++++++--------
 cascade/meta/metric_viewer.py  | 25 +++++++++++++++++++------
 cascade/meta/validator.py      | 11 +++++++----
 5 files changed, 62 insertions(+), 31 deletions(-)

diff --git a/cascade/meta/history_viewer.py b/cascade/meta/history_viewer.py
index 42a4551a..2003726b 100644
--- a/cascade/meta/history_viewer.py
+++ b/cascade/meta/history_viewer.py
@@ -27,15 +27,20 @@
 
 from . import MetaViewer
 from .. import __version__
+from ..data import Dataset
 
 
 class HistoryViewer:
     """
     The tool which allows user to visualize training history of model versions.
-    Uses plotly to show how metrics of models changed in time and how
-    models with different hyperparameters depend on each other
+    Uses shows how metrics of models changed over time and how
+    models with different hyperparameters depend on each other.
     """
-    def __init__(self, repo, last_lines=None, last_models=None) -> None:
+    def __init__(
+        self,
+        repo,
+        last_lines: int = None,
+        last_models: int = None) -> None:
         """
         Parameters
         ----------
@@ -112,7 +117,7 @@ def _specific_argmin(arr, self_index) -> int:
                 arg_min = i
         return arg_min
 
-    def plot(self, metric: str, show=False) -> plotly.graph_objects.Figure:
+    def plot(self, metric: str, show: bool = False) -> plotly.graph_objects.Figure:
         """
         Plots training history of model versions using plotly.
 
@@ -120,7 +125,7 @@ def plot(self, metric: str, show=False) -> plotly.graph_objects.Figure:
         ----------
         metric: str
             Metric should be present in meta of at least one model in repo
-        show: bool
+        show: bool, optional
             Whether to return and show or just return figure
         """
 
@@ -199,7 +204,14 @@ def plot(self, metric: str, show=False) -> plotly.graph_objects.Figure:
 
         return fig
 
-    def serve(self, metric, **kwargs):
+    def serve(self, metric: str, **kwargs):
+        """
+        Run dash-based server with HistoryViewer, updating plots in real-time.
+        
+        Note
+        ----
+        This feature needs `dash` to be installed.
+        """
         # Conditional import
         try:
             import dash
diff --git a/cascade/meta/meta_validator.py b/cascade/meta/meta_validator.py
index aa4b94fa..25a1da2b 100644
--- a/cascade/meta/meta_validator.py
+++ b/cascade/meta/meta_validator.py
@@ -45,10 +45,6 @@ class MetaValidator(Validator):
 
     If the structure of pipeline is different it saves new meta file.
 
-    Raises
-    ------
-    cascade.meta.DataValidationException
-
     See also
     --------
     cascade.data.Modifier
@@ -58,10 +54,16 @@ def __init__(self, dataset: Dataset, root=None, meta_fmt='.json') -> None:
         Parameters
         ----------
         dataset: Dataset
-            dataset to validate
-        root: str
-            path to the folder in which to store meta
+            Dataset to validate
+        root: str, optional
+            Path to the folder in which to store meta
             default is './.cascade'
+        meta_fmt: str, optional
+            Format of metadata files
+
+        Raises
+        ------
+        cascade.meta.DataValidationException
         """
         super().__init__(dataset, lambda x: True)
         self._mh = MetaHandler()
diff --git a/cascade/meta/meta_viewer.py b/cascade/meta/meta_viewer.py
index b8cb31a1..8afe4d05 100644
--- a/cascade/meta/meta_viewer.py
+++ b/cascade/meta/meta_viewer.py
@@ -21,22 +21,20 @@
 
 class MetaViewer:
     """
-    The class to read and write meta data.
+    The class to view all metadata in folders and subfolders.
     """
-    def __init__(self, root, filt=None) -> None:
+    def __init__(self, root: str, filt: Dict=None) -> None:
         """
         Parameters
         ----------
-        root:
+        root: str
             path to the folder containing metadata files
-            to dump and load metadata files MetaHandler is used
         filt Dict, optional:
-            dictionary that specifies which values should be present in meta
+            dictionary that specifies which values that should be present in meta
             for example to find all models use `filt={'type': 'model'}`
 
         See also
         --------
-        cascade.meta.ModelRepo
         cascade.meta.MetaHandler
         """
         if not os.path.exists(root):
@@ -55,12 +53,12 @@ def __init__(self, root, filt=None) -> None:
         if filt is not None:
             self.names = list(filter(self._filter, self.names))
 
-    def __getitem__(self, index) -> List[Dict]:
+    def __getitem__(self, index: int) -> List[Dict]:
         """
         Returns
         -------
         meta: List[Dict]
-            object containing meta
+            Meta object
         """
         return self.read(self.names[index])
 
@@ -92,4 +90,7 @@ def _filter(self, name):
 
     @staticmethod
     def obj_to_dict(obj):
+        """
+        Serializes the object using extended JSONEncoder
+        """
         return JSONEncoder().obj_to_dict(obj)
diff --git a/cascade/meta/metric_viewer.py b/cascade/meta/metric_viewer.py
index d3db721e..3c58a2ae 100644
--- a/cascade/meta/metric_viewer.py
+++ b/cascade/meta/metric_viewer.py
@@ -15,6 +15,7 @@
 """
 
 import os
+from typing import List
 import warnings
 import pendulum
 from flatten_json import flatten
@@ -28,8 +29,9 @@
 class MetricViewer:
     """
     Interface for viewing metrics in model meta files
-    uses ModelRepo to extract metrics of all models if any
-    constructs a `pd.DataFrame` of metrics internally, which is showed in `__repr__`
+    uses ModelRepo to extract metrics of all models if any.
+    As metrics it uses data from `metrics` field in models'
+    meta and as parameters it uses `params` field.
     """
     def __init__(self, repo) -> None:
         """
@@ -42,7 +44,10 @@ def __init__(self, repo) -> None:
         self._metrics = []
         self.reload_table()
 
-    def reload_table(self):
+    def reload_table(self) -> None:
+        """
+        Updates internal state
+        """
         self._metrics = []
         for line in self._repo:
             viewer_root = line.root
@@ -91,7 +96,10 @@ def reload_table(self):
     def __repr__(self) -> str:
         return repr(self.table)
 
-    def plot_table(self, show=False):
+    def plot_table(self, show: bool = False):
+        """
+        Uses plotly to graphically show table with metrics and parameters.
+        """
         data = pd.DataFrame(map(flatten, self.table.to_dict('records')))
         fig = go.Figure(data=[
             go.Table(
@@ -107,13 +115,18 @@ def plot_table(self, show=False):
             fig.show()
         return fig
 
-    def serve(self, page_size=50, include=None, exclude=None, **kwargs) -> None:
+    def serve(
+        self,
+        page_size: int = 50,
+        include: List[str] = None,
+        exclude: List[str] = None,
+        **kwargs) -> None:
         """
         Runs dash-based server with interactive table of metrics and parameters.
 
         Parameters
         ----------
-        page_size:
+        page_size: int, optional
             Size of the table in rows on one page
         include: List[str], optional:
             List of parameters or metrics to be added. Only them will be present along with some default.
diff --git a/cascade/meta/validator.py b/cascade/meta/validator.py
index 25425827..a3ccda24 100644
--- a/cascade/meta/validator.py
+++ b/cascade/meta/validator.py
@@ -18,7 +18,9 @@
 
 
 class DataValidationException(Exception):
-    pass
+    """
+    Raised when data validation fails
+    """
 
 
 class Validator(Modifier):
@@ -37,7 +39,8 @@ def __init__(self, dataset: Dataset,
 
 class AggregateValidator(Validator):
     """
-    This validator accepts an aggregate function that accepts a `Dataset` and return `True` of `False`
+    This validator accepts an aggregate function
+    that accepts a `Dataset` and returns `True` or `False`
 
     Example
     -------
@@ -61,8 +64,8 @@ def __init__(self, dataset: Dataset, func: Callable[[Dataset], bool], **kwargs)
 
 class PredicateValidator(Validator):
     """
-    This validator accepts function that is applied to each item in dataset and return `True` or `False`
-    Calls all previous lazy datasets in __init__
+    This validator accepts function that is applied to each item in a dataset
+    and returns `True` or `False`. Calls `__getitem__`s of all previous datasets in `__init__`.
 
     Example
     -------

From 6d4a9f44506126305849c967efd277cc54cefd8f Mon Sep 17 00:00:00 2001
From: Ilia Moiseev <villeman.5@yandex.ru>
Date: Thu, 6 Oct 2022 00:24:06 +0300
Subject: [PATCH 08/12] Order records in data docs

---
 cascade/docs/source/cascade.data.rst | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/cascade/docs/source/cascade.data.rst b/cascade/docs/source/cascade.data.rst
index 85dd5491..a14c8284 100644
--- a/cascade/docs/source/cascade.data.rst
+++ b/cascade/docs/source/cascade.data.rst
@@ -109,8 +109,7 @@ cascade.data
 
 | 
 
-.. autoclass:: cascade.data.SequentialCacher
-    :members:
+.. autoclass:: cascade.data.RangeSampler
 
 |
 
@@ -118,8 +117,8 @@ cascade.data
 
 | 
 
-
-.. autoclass:: cascade.data.RangeSampler
+.. autoclass:: cascade.data.SequentialCacher
+    :members:
 
 |
 

From 68dd95a2c0c6ef5c998a52af01c0ca84d262ce1c Mon Sep 17 00:00:00 2001
From: Ilia Moiseev <villeman.5@yandex.ru>
Date: Thu, 6 Oct 2022 00:52:24 +0300
Subject: [PATCH 09/12] Write and fix models documentation

---
 cascade/docs/source/cascade.models.rst | 17 +++----
 cascade/models/basic_model.py          | 21 +++++++--
 cascade/models/model.py                | 12 ++++-
 cascade/models/model_line.py           | 24 +++++-----
 cascade/models/model_repo.py           | 64 ++++++++++++++------------
 cascade/models/trainer.py              | 20 ++++----
 6 files changed, 93 insertions(+), 65 deletions(-)

diff --git a/cascade/docs/source/cascade.models.rst b/cascade/docs/source/cascade.models.rst
index a4129fdb..324208b0 100644
--- a/cascade/docs/source/cascade.models.rst
+++ b/cascade/docs/source/cascade.models.rst
@@ -1,7 +1,7 @@
 cascade.models
 ============== 
 
-.. autoclass:: cascade.models.Model
+.. autoclass:: cascade.models.BasicModel
     :members:
 
 |
@@ -10,7 +10,7 @@ cascade.models
 
 |
 
-.. autoclass:: cascade.models.ModelModifier
+.. autoclass:: cascade.models.BasicModelModifier
     :members:
 
 |
@@ -19,7 +19,7 @@ cascade.models
 
 |
 
-.. autoclass:: cascade.models.BasicModel
+.. autoclass:: cascade.models.ModelLine
     :members:
 
 |
@@ -28,7 +28,7 @@ cascade.models
 
 |
 
-.. autoclass:: cascade.models.BasicModelModifier
+.. autoclass:: cascade.models.ModelRepo
     :members:
 
 |
@@ -37,7 +37,7 @@ cascade.models
 
 |
 
-.. autoclass:: cascade.models.ModelRepo
+.. autoclass:: cascade.models.Model
     :members:
 
 |
@@ -46,7 +46,7 @@ cascade.models
 
 |
 
-.. autoclass:: cascade.models.ModelLine
+.. autoclass:: cascade.models.ModelModifier
     :members:
 
 |
@@ -55,7 +55,7 @@ cascade.models
 
 |
 
-.. autoclass:: cascade.models.Trainer
+.. autoclass:: cascade.models.BasicTrainer
     :members:
 
 |
@@ -64,7 +64,8 @@ cascade.models
 
 |
 
-.. autoclass:: cascade.models.BasicTrainer
+
+.. autoclass:: cascade.models.Trainer
     :members:
 
 |
diff --git a/cascade/models/basic_model.py b/cascade/models/basic_model.py
index 7455581f..64a9c7ad 100644
--- a/cascade/models/basic_model.py
+++ b/cascade/models/basic_model.py
@@ -15,7 +15,7 @@
 """
 
 
-from typing import Dict, Callable, AnyStr
+from typing import Dict, Callable
 from .model import Model, ModelModifier
 
 
@@ -39,12 +39,24 @@ def save(self, filepath) -> None:
     def predict(self, x, *args, **kwargs):
         raise NotImplementedError()
 
-    def evaluate(self, x, y, metrics_dict: Dict[AnyStr, Callable], *args, **kwargs) -> None:
+    def evaluate(self, x, y, metrics_dict: Dict[str, Callable], *args, **kwargs) -> None:
         """
-        Receives x and y batches. Passes x to the model's predict method along with any args or kwargs needed.
+        Receives x and y validation sequences. Passes x to the model's predict
+        method along with any args or kwargs needed.
         Then updates self.metrics with what functions in `metrics_dict` return.
         `metrics_dict` should contain names of the metrics and the functions with the interface:
-        f(true, predicted) -> metric_value
+        f(true, predicted) -> metric_value, where metric_value is not always scalar, can be
+        array or dict. For example confusion matrix.
+
+        Parameters
+        ----------
+            x:
+                Input of the model.
+            y:
+                Desired output to compare with the values predicted.
+            metrics_dict: Dict[str, Callable]
+                Dictionary with functions that given ground-truth and
+                predicted values return metrics.
         """
         preds = self.predict(x, *args, **kwargs)
         self.metrics.update({key: metrics_dict[key](y, preds) for key in metrics_dict})
@@ -57,4 +69,3 @@ class BasicModelModifier(ModelModifier, BasicModel):
     """
     Interface to unify BasicModel and ModelModifier.
     """
-    pass
diff --git a/cascade/models/model.py b/cascade/models/model.py
index ce311d32..8880f9e2 100644
--- a/cascade/models/model.py
+++ b/cascade/models/model.py
@@ -57,7 +57,7 @@ def predict(self, *args, **kwargs):
 
     def evaluate(self, *args, **kwargs) -> None:
         """
-        Evaluates model against any metrics. Should not return any values, just populating self.metrics dict.
+        Evaluates model against any metrics. Should not return any value, just populate self.metrics dict.
         """
         raise NotImplementedError()
 
@@ -104,7 +104,17 @@ def get_meta(self) -> List[Dict]:
 
 
 class ModelModifier(Model):
+    """
+    Analog of dataset's Modifier. Can be used to chain
+    two models in one.
+    """
     def __init__(self, model: Model, **kwargs):
+        """
+        Parameters
+        ----------
+        model: Model
+            A model to modify.
+        """
         self._model = model
         super().__init__(**kwargs)
 
diff --git a/cascade/models/model_line.py b/cascade/models/model_line.py
index 29c6d3f6..033c3f3e 100644
--- a/cascade/models/model_line.py
+++ b/cascade/models/model_line.py
@@ -31,19 +31,19 @@ class ModelLine(Traceable):
     A line of models is typically a models with the same hyperparameters and architecture,
     but different epochs or using different data.
     """
-    def __init__(self, folder, model_cls=Model, meta_fmt='.json', **kwargs) -> None:
+    def __init__(self, folder: str, model_cls=Model, meta_fmt='.json', **kwargs) -> None:
         """
         All models in line should be instances of the same class.
 
         Parameters
         ----------
-        folder:
-            Path to a folder where ModelLine will be created or already was created
-            if folder does not exist, creates it
-        model_cls:
-            A class of models in repo. ModelLine uses this class to reconstruct a model
-        meta_fmt:
-            Format in which to store meta data. '.json', '.yml' are supported. .json is default.
+        folder: str
+            Path to a folder where ModelLine will be created or already was created.
+            If folder does not exist, creates it
+        model_cls: type, optional
+            A class of models in line. ModelLine uses this class to reconstruct a model
+        meta_fmt: str, optional
+            Format in which to store meta data.
         See also
         --------
         cascade.models.ModelRepo
@@ -96,20 +96,20 @@ def __len__(self) -> int:
 
     def save(self, model: Model, only_meta=False) -> None:
         """
-        Saves a model and its metadata to a line folder.
+        Saves a model and its metadata to a line's folder.
         Model is automatically assigned a number and a model is saved
         using Model's method `save` in its own folder.
         Folder's name is assigned using f'{idx:0>5d}'. For example: 00001 or 00042.
         The name passed to model's save is just "model" without extension.
         It is Model's responsibility to correctly  assign extension and save its own state.
 
-        Additionally, saves ModelLine's meta to the Line's root
+        Additionally, saves ModelLine's meta to the Line's root.
 
         Parameters
         ----------
-        model: cascade.models.Model
+        model: Model
             Model to be saved
-        only_meta: bool
+        only_meta: bool, optional
             Flag, that indicates whether to save model's binaries. If True saves only metadata.
         """
         idx = len(self.model_names)
diff --git a/cascade/models/model_repo.py b/cascade/models/model_repo.py
index 6e53023a..a997ab37 100644
--- a/cascade/models/model_repo.py
+++ b/cascade/models/model_repo.py
@@ -51,40 +51,41 @@ class ModelRepo(Repo):
     An interface to manage experiments with several lines of models.
     When created, initializes an empty folder constituting a repository of model lines.
 
-    Stores meta-data in file meta.json in the root folder. With every run if the repo was already
+    Stores its meta-data in its root folder. With every run if the repo was already
     created earlier, updates its meta and logs changes in human-readable format in file history.log
 
     Example
     -------
     >>> from cascade.models import ModelRepo
+    >>> from cascade.utils import ConstantBaseline
     >>> repo = ModelRepo('repo', _meta_prefix={'description': 'This is a repo with one VGG16 line for the example.'})
-    >>> vgg16_line = repo.add_line('vgg16', VGG16Model)
-    >>> vgg16 = VGG16Model()
-    >>> vgg16.fit()
-    >>> vgg16_line.save(vgg16)
+    >>> line = repo.add_line('model', ConstantBaseline)
+    >>> model = ConstantBaseline(1)
+    >>> model.fit()
+    >>> line.save(model)
 
 
     >>> from cascade.models import ModelRepo
-    >>> repo = ModelRepo('repo', lines=[dict(name='vgg16', model_cls=VGGModel)])
-    >>> vgg16 = VGG16Model()
-    >>> vgg16.fit()
-    >>> repo['vgg16'].save(vgg16)
+    >>> from cascade.utils import ConstantBaseline
+    >>> repo = ModelRepo('repo', lines=[dict(name='constant', model_cls=ConstantBaseline)])
+    >>> model = ConstantBaseline()
+    >>> model.fit()
+    >>> repo['constant'].save(model)
     """
-    def __init__(self, folder, lines=None, overwrite=False, meta_fmt='.json', **kwargs):
+    def __init__(self, folder, lines:List[Dict] = None,
+                 overwrite:bool = False, meta_fmt:str = '.json', **kwargs):
         """
         Parameters
         ----------
         folder:
-            Path to a folder where ModelRepo needs to be created or already was created
-            if folder does not exist, creates it
-        lines: List[Dict]
-            A list with parameters of model lines to add at creation or to initialize (alias for `add_model`)
-        overwrite: bool
-            if True will remove folder that is passed in first argument and start a new repo
-            in that place
-        meta_fmt: str
-            extension of repo's metadata files and that will be assigned to the lines by default
-            `.json` and `.yml` are supported
+            Path to a folder where ModelRepo needs to be created or already was created.
+            If folder does not exist, creates it automatically.
+        lines: List[Dict], optional
+            A list with parameters of model lines to add at creation or to initialize (alias for `add_model`).
+        overwrite: bool, optional
+            If True will remove folder that is passed in first argument and start a new repo in that place.
+        meta_fmt: str, optional
+            Extension of repo's metadata files and that will be assigned to the lines by default.
         See also
         --------
         cascade.models.ModelLine
@@ -119,18 +120,17 @@ def _load_lines(self):
 
     def add_line(self, name, *args, meta_fmt=None, **kwargs):
         """
-        Adds new line to repo if it doesn't exist and returns it
-        If line exists, defines it in repo
+        Adds new line to repo if it doesn't exist and returns it.
+        If line exists, defines it in repo with parameters provided.
 
         Supports all the parameters of ModelLine using args and kwargs.
 
         Parameters:
-            name: str
-                Name of the line. It is used to name a folder of line.
-                Repo prepends it with `self._root` before creating.
-            meta_fmt: str
-                Format of meta files. Supported values are the same as for repo.
-                If omitted, inherits format from repo.
+        name: str
+            Name of the line. It is used to name a folder of line.
+            Repo prepends it with `self._root` before creating.
+        meta_fmt: str
+            Format of meta files. If omitted, inherits format from repo.
         See also
         --------
             cascade.models.ModelLine
@@ -214,6 +214,9 @@ def get_meta(self) -> List[Dict]:
         return meta
 
     def reload(self) -> None:
+        """
+        Updates internal state.
+        """
         self._load_lines()
         self._update_meta()
 
@@ -228,6 +231,9 @@ def __add__(self, repo):
         return ModelRepoConcatenator([self, repo])
 
     def get_line_names(self) -> List[str]:
+        """
+        Returns list of line names.
+        """
         # TODO: write test covering this
         return list(self.lines.keys())
 
@@ -236,7 +242,7 @@ class ModelRepoConcatenator(Repo):
     """
     The class to concatenate different Repos.
     For the ease of use please, don't use it directly.
-    Just do repo = repo_1 + repo_2 to unify repos.
+    Just do `repo = repo_1 + repo_2` to unify two or more repos.
     """
     def __init__(self, repos: Iterable[Repo], *args, **kwargs) -> None:
         super().__init__(*args, **kwargs)
diff --git a/cascade/models/trainer.py b/cascade/models/trainer.py
index f2f754e1..c24ebe37 100644
--- a/cascade/models/trainer.py
+++ b/cascade/models/trainer.py
@@ -19,8 +19,8 @@ def __init__(self, repo: Union[ModelRepo, str], *args, **kwargs) -> None:
         """
         Parameters
         ----------
-            repo: Union[ModelRepo, str]
-                Either repo or path to it
+        repo: Union[ModelRepo, str]
+            Either repo or path to it
         """
         if isinstance(repo, str):
             self._repo = ModelRepo(repo)
@@ -56,10 +56,10 @@ def train(self,
               train_data: Iterable,
               test_data: Iterable,
               *args,
-              train_kwargs=None,
-              test_kwargs=None,
-              epochs=1,
-              start_from=None,
+              train_kwargs: Dict = None,
+              test_kwargs: Dict = None,
+              epochs: int = 1,
+              start_from: str = None,
               **kwargs) -> None:
         """
         Trains, evaluates and saves given model. If specified, loads model from checkpoint.
@@ -71,13 +71,13 @@ def train(self,
                 train data to be passed to model's fit()
             test_data: Iterable
                 test data to be passed to model's evaluate()
-            train_kwargs:
+            train_kwargs: Dict, optional
                 arguments for fit()
-            test_kwargs:
+            test_kwargs: Dict, optional
                 arguments for evaluate() - the most common is the dict of metrics
-            epochs:
+            epochs: int, optional
                 how many times to repeat training on data
-            start_from: str
+            start_from: str, optional
                 name of line from which to start, start from the latest model in line
         """
 

From 61e6904a025ce836f770d0dc32117f033b7d5849 Mon Sep 17 00:00:00 2001
From: Ilia Moiseev <villeman.5@yandex.ru>
Date: Thu, 6 Oct 2022 11:47:25 +0300
Subject: [PATCH 10/12] Write and fix utils documentation

---
 cascade/utils/baselines.py                   |  7 ++-
 cascade/utils/numpy_wrapper.py               |  7 +--
 cascade/utils/oversampler.py                 | 10 ++--
 cascade/utils/pa_schema_validator.py         |  4 ++
 cascade/utils/sk_model.py                    |  6 +--
 cascade/utils/table_dataset.py               | 48 +++++++++++---------
 cascade/utils/text_classification_dataset.py |  9 +++-
 cascade/utils/time_series_dataset.py         | 38 +++++++++++++---
 cascade/utils/torch_model.py                 | 31 +++++++++++--
 cascade/utils/undersampler.py                |  8 ++--
 10 files changed, 118 insertions(+), 50 deletions(-)

diff --git a/cascade/utils/baselines.py b/cascade/utils/baselines.py
index ccbd9262..36339b17 100644
--- a/cascade/utils/baselines.py
+++ b/cascade/utils/baselines.py
@@ -31,7 +31,12 @@ def __init__(self, constant=None, **kwargs) -> None:
     def fit(self, x, y, *args, **kwargs) -> None:
         pass
 
-    def predict(self, x, *args, **kwargs):
+    def predict(self, x, *args, **kwargs) -> np.ndarray:
+        """
+        Returns the array of the same shape as input full of
+        given constant.
+        """
+        # TODO: make more universal when work with input shape
         return np.full_like(x, self._constant)
 
     def save(self, path) -> None:
diff --git a/cascade/utils/numpy_wrapper.py b/cascade/utils/numpy_wrapper.py
index f25be255..5004a111 100644
--- a/cascade/utils/numpy_wrapper.py
+++ b/cascade/utils/numpy_wrapper.py
@@ -14,18 +14,19 @@
 limitations under the License.
 """
 
+from typing import Dict, List
 import numpy as np
 from ..data import Wrapper
 
 
 class NumpyWrapper(Wrapper):
     """
-    A wrapper around .npy files. Loads file on `__init__`.
+    A wrapper around .npy files. Loads file in `__init__`.
     """
-    def __init__(self, path, *args, **kwargs):
+    def __init__(self, path: str, *args, **kwargs) -> None:
         self._path = path
         super().__init__(np.load(path), *args, **kwargs)
 
-    def get_meta(self):
+    def get_meta(self) -> List[Dict]:
         meta = super().get_meta()
         meta[0]['root'] = self._path
diff --git a/cascade/utils/oversampler.py b/cascade/utils/oversampler.py
index 5001bfbf..528f2182 100644
--- a/cascade/utils/oversampler.py
+++ b/cascade/utils/oversampler.py
@@ -14,14 +14,14 @@
 limitations under the License.
 """
 
-from ..data import Sampler
+from ..data import T, Dataset, Sampler
 import numpy as np
 from tqdm import trange
 
 
 class OverSampler(Sampler):
     """
-    Accepts datasets which return tuples of objects and labels.
+    Accepts datasets which return tuples of objects and labels in the respected order.
     Isn't lazy - runs through all the items ones to determine key order.
     Doesn't store values afterwards.
 
@@ -29,7 +29,7 @@ class OverSampler(Sampler):
     of times needed to make equal distribution.
     Works for any number of classes.
     """
-    def __init__(self, dataset, *args, **kwargs):
+    def __init__(self, dataset: Dataset, *args, **kwargs) -> None:
         labels = [int(dataset[i][1]) for i in trange(len(dataset))]
         ulabels = np.unique(labels)
         label_nums, _ = np.histogram(labels, bins=len(ulabels))
@@ -47,12 +47,12 @@ def __init__(self, dataset, *args, **kwargs):
 
         super().__init__(dataset, num_samples=ln, *args, **kwargs)
 
-    def __getitem__(self, index):
+    def __getitem__(self, index: int) -> T:
         if index < len(self._dataset):
             return self._dataset[index]
         else:
             idx = self._add_indices[index - len(self._dataset)]
             return self._dataset[idx]
 
-    def __len__(self):
+    def __len__(self) -> int:
         return len(self._dataset) + len(self._add_indices)
diff --git a/cascade/utils/pa_schema_validator.py b/cascade/utils/pa_schema_validator.py
index 04172b2c..e29081d9 100644
--- a/cascade/utils/pa_schema_validator.py
+++ b/cascade/utils/pa_schema_validator.py
@@ -19,6 +19,10 @@ def __init__(self, dataset, schema, *args, **kwargs) -> None:
             Schema of the table in the format that is acceptable by pandera
             or path to the YAML file with schema.
             For more details on schemas see pandera's documentation.
+        
+        Raises
+        ------
+        DataValidationException
         """
         super().__init__(dataset, *args, func=lambda x: self._validate(x, schema), **kwargs)
 
diff --git a/cascade/utils/sk_model.py b/cascade/utils/sk_model.py
index cb94f3c8..4d4d3a2f 100644
--- a/cascade/utils/sk_model.py
+++ b/cascade/utils/sk_model.py
@@ -38,7 +38,7 @@ def __init__(self, name=None, blocks=None, **kwargs) -> None:
         ----------
         name: str, optional
             Name of the model
-        blocks: list
+        blocks: list, optional
             List of sklearn transformers to make a pipeline from
         """
         if name is not None:
@@ -86,7 +86,7 @@ def predict_proba(self, x, *args, **kwargs):
     #              hash from meta: {meta["md5sum"]}\n \
     #              hash from .pkl: {file_hash}')
 
-    def load(self, path) -> None:
+    def load(self, path: str) -> None:
         """
         Loads the model from path provided. If no extension, .pkl is added.
         """
@@ -102,7 +102,7 @@ def load(self, path) -> None:
         with open(path, 'rb') as f:
             self._pipeline = pickle.load(f)
 
-    def save(self, path) -> None:
+    def save(self, path: str) -> None:
         """
         Saves model to the path provided.
         If no extension, then .pkl is added.
diff --git a/cascade/utils/table_dataset.py b/cascade/utils/table_dataset.py
index ad5b05ed..fa5706bc 100644
--- a/cascade/utils/table_dataset.py
+++ b/cascade/utils/table_dataset.py
@@ -14,7 +14,7 @@
 limitations under the License.
 """
 
-from typing import List, Dict
+from typing import List, Dict, Iterable
 import pandas as pd
 from dask import dataframe as dd
 
@@ -24,13 +24,14 @@
 
 class TableDataset(Dataset):
     """
-    Wrapper for `pd.DataFrame`s
+    Wrapper for `pd.DataFrame`s which allows to manage metadata and perform
+    validation.
     """
     def __init__(self, *args, t=None, **kwargs):
         """
         Parameters
         ----------
-        t:
+        t: optional
             pd.DataFrame or TableDataset to be set as table
         """
         super().__init__(*args, **kwargs)
@@ -45,7 +46,7 @@ def __init__(self, *args, t=None, **kwargs):
 
     def __getitem__(self, index):
         """
-        Returns row from table by index
+        Returns a row from table by index
         """
         return self._table.iloc[index]
 
@@ -54,7 +55,7 @@ def __repr__(self):
 
     def __len__(self):
         """
-        Return len of the table
+        Returns length of the table
         """
         return len(self._table)
 
@@ -70,7 +71,8 @@ def get_meta(self) -> List[Dict]:
 
     def to_csv(self, path, **kwargs):
         """
-        Saves the table to .csv
+        Saves the table to .csv file. Any kwargs are sent to
+        `pd.DataFrame.to_csv`.
         """
         self._table.to_csv(path, **kwargs)
 
@@ -79,14 +81,15 @@ class TableFilter(TableDataset, Modifier):
     """
     Filter for table values
     """
-    def __init__(self, dataset, mask, *args, **kwargs):
+    def __init__(self, dataset: TableDataset,
+                 mask: Iterable[bool], *args, **kwargs):
         """
         Parameters
         ----------
         dataset: TableDataset
-            Dataset to be filtered
+            Dataset to be filtered.
         mask: Iterable[bool]
-            Binary mask to select values from table
+            Binary mask to select values from table.
         """
         super().__init__(dataset, t=dataset._table, *args, **kwargs)
         init_len = len(dataset)
@@ -101,7 +104,7 @@ class CSVDataset(TableDataset):
     """
     def __init__(self, csv_file_path, *args, **kwargs):
         """
-        Passes all args and kwargs to the read_csv
+        Passes all args and kwargs to `pd.read_csv`
 
         Parameters
         ----------
@@ -115,7 +118,11 @@ def __init__(self, csv_file_path, *args, **kwargs):
 class PartedTableLoader(Dataset):
     """
     Works like CSVDataset, but uses dask to load tables
-    and returns partitions on __getitem__
+    and returns partitions on `__getitem__`.
+
+    See also
+    --------
+    cascade.utils.CSVDataset
     """
     def __init__(self, csv_file_path, *args, **kwargs):
         super().__init__(**kwargs)
@@ -123,13 +130,13 @@ def __init__(self, csv_file_path, *args, **kwargs):
 
     def __getitem__(self, index):
         """
-        Returns partition under the index
+        Returns partition under the index.
         """
         return self._table.get_partition(index).compute()
 
     def __len__(self):
         """
-        The number of partitions
+        Returns the number of partitions.
         """
         return self._table.npartitions
 
@@ -138,14 +145,13 @@ class TableIterator(Iterator):
     """
     Iterates over the table from path by the chunks.
     """
-    def __init__(self, csv_file_path, *args, chunk_size=1000, **kwargs):
+    def __init__(self, csv_file_path: str, *args, chunk_size:int = 1000, **kwargs):
         """
         Parameters
         ----------
-        csv_file_path:
-            path to the .csv file
-
-        chunk_size: int
+        csv_file_path: str
+            Path to the .csv file
+        chunk_size: int, optional
             number of rows to return in one __next__
         """
         self.chunk_size = chunk_size
@@ -178,13 +184,13 @@ def __len__(self):
 
 class NullValidator(TableDataset, AggregateValidator):
     """
-    Checks there are no null values in the table.
+    Checks that there are no null values in the table.
     """
     def __init__(self, dataset: TableDataset, *args, **kwargs) -> None:
-        super().__init__(dataset, self.check_nulls,
+        super().__init__(dataset, self._check_nulls,
                          *args, t=dataset._table, **kwargs)
 
-    def check_nulls(self, x):
+    def _check_nulls(self, x):
         mask = x._table.isnull().values
         if ~(mask.any()):
             return True
diff --git a/cascade/utils/text_classification_dataset.py b/cascade/utils/text_classification_dataset.py
index 41021b87..11c5fa61 100644
--- a/cascade/utils/text_classification_dataset.py
+++ b/cascade/utils/text_classification_dataset.py
@@ -26,13 +26,15 @@ class TextClassificationDataset(Dataset):
     Dataset to simplify loading of data for text classification.
     Texts of different classes should be placed in different folders.
     """
-    def __init__(self, path, encoding='utf-8', *args, **kwargs):
+    def __init__(self, path: str, encoding: str = 'utf-8', *args, **kwargs):
         """
         Parameters
         ----------
-        path:
+        path: str
             Path to the folder with folders of text files.
             In each folder should be only one class of texts.
+        encoding: str, optional
+            Encoding that is used to open files.
         """
         super().__init__(*args, *kwargs)
         self._encoding = encoding
@@ -57,6 +59,9 @@ def __getitem__(self, index):
         return text, label
 
     def __len__(self):
+        """
+        Total number of files.
+        """
         return len(self._paths)
 
     def get_meta(self) -> List[Dict]:
diff --git a/cascade/utils/time_series_dataset.py b/cascade/utils/time_series_dataset.py
index 89c78851..171e429e 100644
--- a/cascade/utils/time_series_dataset.py
+++ b/cascade/utils/time_series_dataset.py
@@ -14,7 +14,7 @@
 limitations under the License.
 """
 
-from typing import Iterable
+from typing import Iterable, Literal
 
 import pendulum
 from datetime import datetime
@@ -35,9 +35,9 @@ def __init__(self, *args, time=None, data=None, **kwargs):
         """
         Parameters
         ----------
-        time: Iterable[datetime]
+        time: Iterable[datetime], optional
             The time dimension. Should be represented subclasses of datetime
-        data: Iterable
+        data: Iterable, optional
             The data dimension. Should be 1D array or list.
         """
         if time is not None and data is not None:
@@ -72,6 +72,8 @@ def __init__(self, *args, time=None, data=None, **kwargs):
 
     def to_numpy(self):
         """
+        Returns only data without time in numpy array format.
+
         Returns
         -------
         data: np.ndarray
@@ -92,8 +94,8 @@ def get_data(self):
         """
         Returns
         -------
-        data: tuple(time, data)
-            (time as it is and data as np.array)
+        data: tuple
+            Time and data as np.array
         """
         return self._time, self.to_numpy()
 
@@ -153,8 +155,23 @@ def __len__(self):
 
 
 class Average(TimeSeriesDataset, Modifier):
+    """
+    Averages values over some time step.
+    """
     def __init__(self, dataset: TimeSeriesDataset,
-                 unit='years', amount=1, *args, **kwargs):
+                 unit: str = 'years',
+                 amount=1, *args, **kwargs):
+        """
+        Parameters
+        ----------
+        dataset: TimeSeriesDataset,
+            A dataset to average
+        unit: str, optional
+            Time unit over which to average - years, month, etc.
+        amount:
+            The amount of units over which to average. For example for six month periods use
+            `unit='months'` and `amount=6`.
+        """
         time, data = dataset.get_data()
         reg_time = [d for d in pendulum
                     .period(time[0], time[-1])
@@ -180,6 +197,9 @@ def _avg(arr, arr_dates, dates):
 
 
 class Interpolate(TimeSeriesDataset, Modifier):
+    """
+    The wrapper around pd.Series.interpolate.
+    """
     def __init__(self, dataset, method='linear',
                  limit_direction='both', **kwargs):
         t = dataset.to_pandas()
@@ -190,6 +210,12 @@ def __init__(self, dataset, method='linear',
 
 
 class Align(TimeSeriesDataset, Modifier):
+    """
+    Given dataset and some time scale selects
+    data from dataset using time scale. Works
+    only if dataset has data in given points
+    in time.
+    """
     def __init__(self, dataset, time, *args, **kwargs):
         super().__init__(dataset, time=time,
                          data=dataset[time], *args, **kwargs)
diff --git a/cascade/utils/torch_model.py b/cascade/utils/torch_model.py
index 2ddcca43..bc5142fc 100644
--- a/cascade/utils/torch_model.py
+++ b/cascade/utils/torch_model.py
@@ -14,28 +14,49 @@
 limitations under the License.
 """
 
+from typing import Dict, List
 import torch
-from typing import ClassVar
 from ..models import Model
 
 
 class TorchModel(Model):
-    def __init__(self, model_class: ClassVar, *args, **kwargs) -> None:
+    """
+    The wrapper around `nn.Module`s.
+    """
+    def __init__(self, model_class: type, *args, **kwargs) -> None:
+        """
+        Parameters
+        ----------
+        model_class: type
+            The class created when new nn.Module was defined. Will be used
+            to construct model. If any arguments needed, please pass them
+            into `args` and `kwargs`.
+        """
         self._model = model_class(*args, **kwargs)
         super().__init__(*args, **kwargs)
 
     def predict(self, *args, **kwargs):
+        """
+        Calls internal module with whatever arguments.
+        """
         return self._model(*args, **kwargs)
 
-    def save(self, path, *args, **kwargs) -> None:
+    def save(self, path: str, *args, **kwargs) -> None:
+        """
+        Saves the model using `torch.save`.
+        """
         with open(path, 'wb') as f:
+            # TODO: pass args and kwargs
             torch.save(self._model, f)
 
-    def load(self, path, *args, **kwargs) -> None:
+    def load(self, path: str, *args, **kwargs) -> None:
+        """
+        Loads the model using `torch.load`.
+        """
         with open(path, 'rb') as f:
             self._model = torch.load(f)
 
-    def get_meta(self):
+    def get_meta(self) -> List[Dict]:
         meta = super().get_meta()
         meta[0]['module'] = repr(self._model)
         return meta
diff --git a/cascade/utils/undersampler.py b/cascade/utils/undersampler.py
index 7092ea32..b61e0551 100644
--- a/cascade/utils/undersampler.py
+++ b/cascade/utils/undersampler.py
@@ -14,7 +14,7 @@
 limitations under the License.
 """
 
-from ..data import Sampler
+from ..data import T, Dataset, Sampler
 from numpy import unique, min, histogram
 from tqdm import trange
 
@@ -23,13 +23,13 @@ class UnderSampler(Sampler):
     """
     Accepts datasets which return tuples of objects and labels.
     Isn't lazy - runs through all the items ones to determine key order.
-    Doesn't store values afterwards.
+    Doesn't store values in memory afterwards.
 
     To undersample it removes items of majority class for the amount
     of times needed to make equal distribution.
     Works for any number of classes.
     """
-    def __init__(self, dataset):
+    def __init__(self, dataset: Dataset) -> None:
         labels = [int(dataset[i][1]) for i in trange(len(dataset))]
         ulabels = unique(labels)
         label_nums, _ = histogram(labels, bins=len(ulabels))
@@ -46,7 +46,7 @@ def __init__(self, dataset):
         print(f'Original length was {len(dataset)} and new is {ln}')
         super().__init__(dataset, ln)
 
-    def __getitem__(self, index):
+    def __getitem__(self, index: int) -> T:
         idx = self._rem_indices[index]
         return self._dataset[idx]
 

From fbc793b988b6935e5e9ec078e41f8125429c58c7 Mon Sep 17 00:00:00 2001
From: Ilia Moiseev <villeman.5@yandex.ru>
Date: Thu, 6 Oct 2022 12:05:39 +0300
Subject: [PATCH 11/12] Update versions of requirements, make them more
 compatible

---
 requirements.txt | 6 +++---
 setup.py         | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 2bafcdbf..83178784 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
 tqdm>=4.64.1
-numpy>=1.23.3
-pandas>=1.4.2
+numpy>=1.18.5
+pandas>=1.4.0
 deepdiff>=5.8.0
 pendulum>=2.1.2
 plotly>=5.7.0
 flatten_json>=0.1.13
-pyyaml>=6.0
+pyyaml>=5.4.1
diff --git a/setup.py b/setup.py
index 73ef0890..10b5d010 100644
--- a/setup.py
+++ b/setup.py
@@ -27,12 +27,12 @@
     python_requires=">=3.8",
     install_requires=[
         'tqdm>=4.64.1',
-        'numpy>=1.23.3',
-        'pandas>=1.4.2',
+        'numpy>=1.18.5',
+        'pandas>=1.4.0',
         'deepdiff>=5.8.0',
         'pendulum>=2.1.2',
         'plotly>=5.7.0',
         'flatten_json>=0.1.13',
-        'pyyaml>=6.0'
+        'pyyaml>=5.4.1'
     ]
 )

From 187fd03e16fa2f793928d9ecd93e1b22c5dfdb50 Mon Sep 17 00:00:00 2001
From: Ilia Moiseev <villeman.5@yandex.ru>
Date: Thu, 6 Oct 2022 12:28:26 +0300
Subject: [PATCH 12/12] Bump version

---
 cascade/__init__.py | 2 +-
 setup.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cascade/__init__.py b/cascade/__init__.py
index a1bcd6ab..3c3e69c1 100644
--- a/cascade/__init__.py
+++ b/cascade/__init__.py
@@ -15,7 +15,7 @@
 """
 
 
-__version__ = '0.7.2'
+__version__ = '0.7.3'
 __author__ = 'Ilia Moiseev'
 __author_email__ = 'ilia.moiseev.5@yandex.ru'
 
diff --git a/setup.py b/setup.py
index 10b5d010..ddd971fe 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 
 setuptools.setup(
     name="cascade-ml",
-    version='0.7.2',
+    version='0.7.3',
     author='Ilia Moiseev',
     author_email='ilia.moiseev.5@yandex.ru',
     license='Apache License 2.0',