Improve documentation

lverweijen · Jan 13, 2025 · 37baf23 · 37baf23
1 parent ba4a6f9
commit 37baf23
Show file tree

Hide file tree

Showing 8 changed files with 143 additions and 44 deletions.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -19,12 +19,20 @@
     'sphinx.ext.githubpages',
     'sphinx.ext.viewcode',
     'myst_parser',
+    'sphinxcontrib.mermaid',
 ]
 
 templates_path = ['_templates']
 exclude_patterns = []
 
-
+# Make sure __init__ is always documented.
+autodoc_default_options = {
+    # 'members': True,
+    'member-order': 'bysource',
+    'special-members': '__init__',
+    # 'undoc-members': True,
+    # 'exclude-members': '__weakref__'
+}
 
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

diff --git a/docs/source/hierarchies.md b/docs/source/hierarchies.md
@@ -0,0 +1,84 @@
+# Hierarchies #
+
+For explanatory variables, it is recommended to supply a hierarchy.
+There are 3 kinds of hierarchy supported by PiArgus.
+
+## FlatHierarchy ##
+
+This is the default if no hierarchy is supplied.
+All labels are of the same level with a single total.
+
+```python
+import piargus as pa
+
+datacol = ["A", "B", "C", "B", "A"]
+hierarchy = pa.FlatHierarchy(total_code="Total")
+```
+
+```{mermaid}
+graph LR;
+Total --> A;
+Total --> B;
+Total --> C;
+```
+
+## LevelHierarchy ##
+
+A level hierarchy is useful when the hierarchy is encoded within the code itself.
+
+```python
+import piargus as pa
+
+datacol = ["11123", "11234", "23456"]
+hierarchy = pa.LevelHierarchy(levels=[2, 3], total_code="Total")
+```
+
+```{mermaid}
+graph LR;
+Total --> 11;
+Total --> 23;
+11 --> 123;
+11 --> 234;
+23 --> 456;
+```
+
+## TreeHierarchy ##
+
+For complex hierarchies, a TreeHierarchy can be used.
+These are typically stored in a hrc-file.
+
+```python
+import piargus as pa
+
+datacol = ["PV20", "PV21", "PV22"]
+hierarchy = pa.TreeHierarchy.from_hrc("provinces.hrc", total_code="NL01")
+```
+
+```{mermaid}
+graph LR;
+NL01 --> LD01;
+NL01 --> LD02;
+LD01 --> PV20;
+LD01 --> PV21;
+LD02 --> PV22;
+```
+
+The file provinces.hrc may look like this:
+```hrc
+LD01
+@PV20
+@PV21
+LD02
+@PV22
+```
+
+It can also be created programmatically:
+```python
+import piargus as pa
+
+hierarchy = pa.TreeHierarchy(total_code="NL01")
+hierarchy.create_node(["NL01", "LD01", "PV20"])
+hierarchy.create_node(["NL01", "LD01", "PV21"])
+hierarchy.create_node(["NL01", "LD02", "PV22"])
+hierarchy.to_hrc('provinces.hrc')
+```
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -16,8 +16,7 @@ It's also recommended to read the `TauArgus manual <https://research.cbs.nl/casc
    :maxdepth: 2
    :caption: Contents:
 
-   installation
-   tutorial
+   userguide
    api
    changes
 

diff --git a/docs/source/userguide.rst b/docs/source/userguide.rst
@@ -0,0 +1,10 @@
+User guide
+----------
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  installation
+  tutorial
+  hierarchies
diff --git a/src/piargus/inputspec/microdata.py b/src/piargus/inputspec/microdata.py
@@ -30,10 +30,11 @@ def __init__(
         :param weight: Column that contains the sampling weight of this record.
         :param request: Column that indicates if a respondent asks for protection.
         :param request_values: Parameters that indicate if request is asked.
-        Two different request values can be specified for two different levels in the request_rule.
+            Two different request values can be specified for two different levels in the request_rule.
         :param holding: Column containing the group identifier.
         :param args: See InputData.
         :param kwargs: See InputData.
+
         See the Tau-Argus documentation for more details on these parameters.
         """
         super().__init__(dataset, **kwargs)

diff --git a/src/piargus/inputspec/tabledata.py b/src/piargus/inputspec/tabledata.py
@@ -58,14 +58,14 @@ def __init__(
         :param total_codes: Codes within explanatory that are used for the totals.
         :param frequency: Column containing number of contributors to this cell.
         :param top_contributors: The columns containing top contributions for dominance rule.
-        The columns should be in the same order as they appear in the dataset.
-        The first of the these columns should describe the highest contribution,
-        the second column the second-highest contribution.
+            The columns should be in the same order as they appear in the dataset.
+            The first of the these columns should describe the highest contribution,
+            the second column the second-highest contribution.
         :param lower_protection_level: Column that denotes the level below which values are unsafe.
         :param upper_protection_level: Column that denotes the level above which values are unsafe.
         :param status_indicator: Column indicating the status of cells.
         :param status_markers: The meaning of each status.
-        Should be dictionary mapping "SAFE", "UNSAFE" and "STATUS" to a code indicating status.
+            Should be dictionary mapping "SAFE", "UNSAFE" and "STATUS" to a code indicating status.
         :param kwargs: See InputData
         """
 

diff --git a/src/piargus/job.py b/src/piargus/job.py
@@ -39,15 +39,14 @@ def __init__(
         the existing file is used. If modifications are made to the metadata, then the user
         should call metadata.to_rda() first.
 
-        :param input_data: The source from which to generate tables. Needs to be either
-        MicroData or TableData.
+        :param input_data: The source from which to generate tables.
+            Needs to be either MicroData or TableData.
         :param tables: The tables to be generated. Can be omitted if input_data is TableData.
         :param metadata: The metadata of input_data. If omitted, it will be derived from input_data.
         :param linked_suppress_method: Method to use for linked suppression.
-        Options are:
-        - `GHMITER` ("GH"): Hypercube
-        - `MODULAR` ("MOD"): Modular
-        Warning: The Tau-Argus manual doesn't document this. Therefore, usage is not recommended.
+            Options are:
+                * `GHMITER` ("GH"): Hypercube
+                * `MODULAR` ("MOD"): Modular
         :param linked_suppress_method_args: Parameters to pass to suppress_method.
         :param directory: Where to write tau-argus files.
         :param name: Name from which to derive the name of some temporary files.

diff --git a/src/piargus/outputspec/table.py b/src/piargus/outputspec/table.py
@@ -31,42 +31,40 @@ def __init__(
         """
         Create a new Table
 
-        Parameters:
         :param explanatory: List of background variables that explain the response.
-        Will be set as a Dataframe-index.
+            Will be set as a Dataframe-index.
         :param response: The column that needs to be explained.
         :param shadow: The column that is used for the safety rules. Default: response.
         :param cost: The column that contains the cost of suppressing a cell.
-        Set to 1 to minimise the number of cells suppressed (although this might suppress totals).
-        Default: response.
-        :param labda: If set to a value > 0, a box-cox transformation is applied on the cost
-        variable.
-        If set to 0, a log transformation is applied on the cost.
-        Default: 1.
+            Set to 1 to minimise the number of cells suppressed (although this might suppress totals).
+            Default: response.
+        :param labda: If set to a value > 0, a box-cox transformation is applied on the cost variable.
+            If set to 0, a log transformation is applied on the cost.
+            Default: 1.
         :param safety_rule: A set of safety rules on individual level.
-        Can be supplied as:
-        - str where parts are separated by |
-        - A sequence of parts
-        - A dict with keys {"individual": x "holding": y} with separate rules on individual and
-        holding level .
-        Each part can be:
-        - "P(p, n=1)": p% rule
-        - "NK(n, k)": (n, k)-dominance rule
-        - "ZERO(safety_range)": Zero rule
-        - "FREQ(minfreq, safety_range)": Frequency rule
-        - "REQ(percentage_1, percentage_2, safety_margin)": Request rule
-        See the Tau-Argus manual for details on those rules.
-        :param apriori: Apriori file to change parameters
+            Can be supplied as:
+                * str where parts are separated by |
+                * A sequence of parts
+                * A dict with keys {"individual": x "holding": y} with separate rules on individual and
+                holding level.
+            Each part can be:
+                * "P(p, n=1)": p% rule
+                * "NK(n, k)": (n, k)-dominance rule
+                * "ZERO(safety_range)": Zero rule
+                * "FREQ(minfreq, safety_range)": Frequency rule
+                * "REQ(percentage_1, percentage_2, safety_margin)": Request rule
+            See the Tau-Argus manual for details on those rules.
+        :param apriori: Apriori file to change parameters.
         :param suppress_method: Method to use for secondary suppression.
-        Options are:
-        - `GHMITER` ("GH"): Hypercube
-        - `MODULAR` ("MOD"): Modular
-        - `OPTIMAL` ("OPT"): Optimal [default]
-        - `NETWORK` ("NET"): Network
-        - `ROUNDING` ("RND"): Controlled rounding
-        - `TABULAR_ADJUSTMENT` ("CTA"): Controlled Tabular Adjustment
-        - None: No secondary suppression is applied
-        See the Tau-Argus manual for details on those rules.
+            Options are:
+                * `GHMITER` ("GH"): Hypercube
+                * `MODULAR` ("MOD"): Modular
+                * `OPTIMAL` ("OPT"): Optimal [default]
+                * `NETWORK` ("NET"): Network
+                * `ROUNDING` ("RND"): Controlled rounding
+                * `TABULAR_ADJUSTMENT` ("CTA"): Controlled Tabular Adjustment
+                * None: No secondary suppression is applied
+            See the Tau-Argus manual for details on those rules.
         :param suppress_method_args: Parameters to pass to suppress_method.
         """