docs: correct docstring style checks

cytomining · Apr 3, 2024 · 059267f · 059267f
1 parent eb8a886
commit 059267f
Show file tree

Hide file tree

Showing 22 changed files with 76 additions and 48 deletions.
diff --git a/pycytominer/cyto_utils/DeepProfiler_processing.py b/pycytominer/cyto_utils/DeepProfiler_processing.py
@@ -211,7 +211,7 @@ def setup_aggregate(self):
 
     def aggregate_deep(self):
         """
-        Main function of this class. Aggregates the profiles into a pandas dataframe.
+        Aggregate the DeepProfiler profiles into a pandas dataframe.
 
         For each key in file_aggregate, the profiles are loaded, concatenated and then aggregated.
         If files are missing, we throw a warning but continue the code.
@@ -303,7 +303,9 @@ def aggregate_deep(self):
 
 
 class SingleCellDeepProfiler:
-    """This class holds functions needed to analyze single cells from the DeepProfiler (DP) run. Only pycytominer.normalization() is implemented.
+    """Class that holds functions needed to analyze single cells from the DeepProfiler (DP) run.
+
+    Only pycytominer.normalization() is implemented.
 
     Attributes
     ----------
@@ -352,9 +354,9 @@ def __init__(
     def get_single_cells(
         self, output=False, location_x_col_index=0, location_y_col_index=1
     ):
-        """
-        Sets up the single_cells attribute or output as a variable. This is a helper function to normalize_deep_single_cells().
-        single_cells is a pandas dataframe in the format expected by pycytominer.normalize().
+        """Set up a single_cells dataframe in the format expected by pycytominer.normalize().
+
+        Helper function to normalize_deep_single_cells() that sets up the single_cells attribute or outputs it as a dataframe.
 
         Arguments
         -----------
@@ -409,7 +411,7 @@ def normalize_deep_single_cells(
         spherize_epsilon=1e-6,
     ):
         """
-        Normalizes all cells into a pandas dataframe.
+        Normalize all cells into a pandas dataframe.
 
         For each file in the DP project features folder, the features from each cell are loaded.
         These features are put into a profiles dataframe for use in pycytominer.normalize.

diff --git a/pycytominer/cyto_utils/__init__.py b/pycytominer/cyto_utils/__init__.py
@@ -1,3 +1,5 @@
+"""A variety of utility functions for working with cytominer data."""
+
 from .output import output
 from .util import (
     check_compartments,

diff --git a/pycytominer/cyto_utils/annotate_custom.py b/pycytominer/cyto_utils/annotate_custom.py
@@ -1,3 +1,5 @@
+"""Functions to annotate data frames with custom options according to CMAP specifications."""
+
 import numpy as np
 
 

diff --git a/pycytominer/cyto_utils/cell_locations.py b/pycytominer/cyto_utils/cell_locations.py
@@ -12,8 +12,7 @@
 
 
 class CellLocation:
-    """This class holds all the functions augment a metadata file with X,Y
-    locations of cells in each image.
+    """Class holding all the functions augment a metadata file with X,Y locations of cells in each image.
 
     In the metadata file, which is either a CSV or a Parquet file,
     - Each row is single multi-channel image
@@ -376,6 +375,7 @@ def _load_single_cell(self):
 
     def add_cell_location(self):
         """Add the X,Y locations of all cells to the metadata file in the corresponding row, packed into a single column.
+
         Optionally, save the augmented metadata file as a Parquet file.
 
         Returns

diff --git a/pycytominer/cyto_utils/cell_locations_cmd.py b/pycytominer/cyto_utils/cell_locations_cmd.py
@@ -1,3 +1,5 @@
+"""CLI for cell location calculations."""
+
 from pycytominer.cyto_utils.cell_locations import CellLocation
 import fire
 

diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py
@@ -1,3 +1,5 @@
+"""Module containing the SingleCells class, which is used to interact with single cell morphological profiles."""
+
 from typing import Dict, Union, Optional
 
 import numpy as np
@@ -25,8 +27,7 @@
 
 
 class SingleCells:
-    """This is a class to interact with single cell morphological profiles. Interaction
-    includes aggregation, normalization, and output.
+    """Class to interact with single cell morphological profiles including aggregation, normalization, and output.
 
     Attributes
     ----------
@@ -115,7 +116,7 @@ def __init__(
         object_feature="Metadata_ObjectNumber",
         default_datatype_float=np.float64,
     ):
-        """Constructor method."""
+        """Construct a SingleCells object."""
         # Check compartments specified
         check_compartments(compartments)
 
@@ -179,7 +180,7 @@ def __init__(
             self.load_image(image_table_name=self.image_table_name)
 
     def _check_subsampling(self):
-        """Internal method checking if subsampling options were specified correctly.
+        """Check if subsampling options were specified correctly.
 
         Returns
         -------
@@ -192,7 +193,7 @@ def _check_subsampling(self):
         ), "Do not set both subsample_frac and subsample_n"
 
     def set_output_file(self, output_file):
-        """Setting operation to conveniently rename output file.
+        """Set or modify output file.
 
         Parameters
         ----------
@@ -207,7 +208,7 @@ def set_output_file(self, output_file):
         self.output_file = output_file
 
     def set_subsample_frac(self, subsample_frac):
-        """Setting operation to conveniently update the subsample fraction.
+        """Set or update the subsample fraction.
 
         Parameters
         ----------
@@ -223,7 +224,7 @@ def set_subsample_frac(self, subsample_frac):
         self._check_subsampling()
 
     def set_subsample_n(self, subsample_n):
-        """Setting operation to conveniently update the subsample n.
+        """Set or update the subsample n.
 
         Parameters
         ----------
@@ -242,7 +243,7 @@ def set_subsample_n(self, subsample_n):
         self._check_subsampling()
 
     def set_subsample_random_state(self, random_state):
-        """Setting operation to conveniently update the subsample random state.
+        """Set or update the subsample random state.
 
         Parameters
         ----------
@@ -435,7 +436,7 @@ def split_column_categories(self, col_names):
         return meta_cols, feat_cols
 
     def load_compartment(self, compartment):
-        """Creates the compartment dataframe.
+        """Create the compartment dataframe.
 
         Note: makes use of default_datatype_float attribute
         for setting a default floating point datatype.
@@ -590,8 +591,7 @@ def _compartment_df_generator(
         compartment,
         n_aggregation_memory_strata=1,
     ):
-        """A generator function that returns chunks of the entire compartment
-        table from disk.
+        """Yield chunks of the entire compartment table from disk.
 
         We want to return dataframes with all compartment entries within unique
         combinations of self.merge_cols when aggregated by self.strata
@@ -881,9 +881,7 @@ def aggregate_profiles(
 
 
 def _sqlite_strata_conditions(df, dtypes, n=1):
-    """Given a dataframe where columns are merge_cols and rows are unique
-    value combinations that appear as aggregation strata, return a list
-    of strings which constitute valid SQLite conditional statements.
+    """Construct a list of strings which constitute valid SQLite conditional statements.
 
     Parameters
     ----------

diff --git a/pycytominer/cyto_utils/collate.py b/pycytominer/cyto_utils/collate.py
@@ -1,3 +1,5 @@
+"""Module that provides functions for collating CellProfiler-created CSVs into a single SQLite file."""
+
 import os
 import pathlib
 import subprocess

diff --git a/pycytominer/cyto_utils/collate_cmd.py b/pycytominer/cyto_utils/collate_cmd.py
@@ -1,3 +1,5 @@
+"""Command line interface for collate function in pycytominer.cyto_utils.collate."""
+
 import argparse
 from pycytominer.cyto_utils.collate import collate
 

diff --git a/pycytominer/cyto_utils/features.py b/pycytominer/cyto_utils/features.py
@@ -179,7 +179,7 @@ def drop_outlier_features(
 
 
 def convert_compartment_format_to_list(compartments):
-    """Converts compartment to a list.
+    """Convert cell painting compartments to a list.
 
     Parameters
     ----------

diff --git a/pycytominer/cyto_utils/load.py b/pycytominer/cyto_utils/load.py
@@ -1,3 +1,5 @@
+"""Module for loading data from various file formats."""
+
 import csv
 import gzip
 import pathlib
@@ -7,7 +9,7 @@
 
 
 def is_path_a_parquet_file(file: Union[str, pathlib.PurePath]) -> bool:
-    """Checks if the provided file path is a parquet file.
+    """Check if the provided file path is a parquet file.
 
     Identify parquet files by inspecting the file extensions.
     If the file does not end with `parquet`, this will return False, else True.

diff --git a/pycytominer/cyto_utils/modz.py b/pycytominer/cyto_utils/modz.py
@@ -1,3 +1,5 @@
+"""Module for performing a modified z score transformation."""
+
 import numpy as np
 from pycytominer.cyto_utils.util import (
     get_pairwise_correlation,

diff --git a/pycytominer/cyto_utils/single_cell_ingest_utils.py b/pycytominer/cyto_utils/single_cell_ingest_utils.py
@@ -1,3 +1,5 @@
+"""Utility functions for single cell ingest."""
+
 from collections import Counter
 from pycytominer.cyto_utils import get_default_compartments
 
@@ -75,8 +77,7 @@ def assert_linking_cols_complete(linking_cols="default", compartments="default")
 
 
 def provide_linking_cols_feature_name_update(linking_cols="default"):
-    """Output a dictionary to use to update pandas dataframe column names. The linking
-    cols must be Metadata.
+    """Output a dictionary to use to update pandas dataframe column names from linking cols in the Metadata.
 
     Parameters
     ----------

diff --git a/pycytominer/cyto_utils/util.py b/pycytominer/cyto_utils/util.py
@@ -14,7 +14,7 @@
 
 
 def get_default_compartments():
-    """Returns default compartments.
+    """Return default compartments.
 
     Returns
     -------
@@ -26,7 +26,7 @@ def get_default_compartments():
 
 
 def check_compartments(compartments):
-    """Checks if the input compartments are noncanonical compartments.
+    """Check if the input compartments are noncanonical compartments.
 
     Parameters
     ----------
@@ -56,13 +56,13 @@ def check_compartments(compartments):
 
 
 def load_known_metadata_dictionary(metadata_file=default_metadata_file):
-    """From a tab separated text file (two columns: ["compartment", "feature"]), load
-    previously known metadata columns per compartment.
+    """Load previously known metadata columns per compartment from metadata text file.
 
     Parameters
     ----------
     metadata_file : str, optional
-        File location of the metadata text file. Uses a default dictionary if you do not specify.
+        File location of the metadata text file which should be a tab-separated file with two columns: ["compartment", "feature"].
+        If not provided, the default metadata file will be used.
 
     Returns
     -------

diff --git a/pycytominer/cyto_utils/write_gct.py b/pycytominer/cyto_utils/write_gct.py
@@ -1,4 +1,5 @@
-"""
+"""Module to write a gct file from a pandas DataFrame.
+
 Transform profiles into a gct (Gene Cluster Text) file
 A gct is a tab deliminted text file that traditionally stores gene expression data
 File Format Description: https://clue.io/connectopedia/gct_format.

diff --git a/pycytominer/feature_select.py b/pycytominer/feature_select.py
@@ -35,7 +35,7 @@ def feature_select(
     noise_removal_perturb_groups=None,
     noise_removal_stdev_cutoff=None,
 ):
-    """Performs feature selection based on the given operation.
+    """Perform feature selection based on the given operation.
 
     Parameters
     ----------

diff --git a/pycytominer/operations/__init__.py b/pycytominer/operations/__init__.py
@@ -1,3 +1,5 @@
+"""Module containing statistical operations for data processing."""
+
 from .correlation_threshold import correlation_threshold
 from .get_na_columns import get_na_columns
 from .noise_removal import noise_removal

diff --git a/pycytominer/operations/correlation_threshold.py b/pycytominer/operations/correlation_threshold.py
@@ -1,5 +1,6 @@
-"""
-Returns list of features such that no two features have a correlation greater than a
+"""Module for correlation threshold operation.
+
+The correlation threshold operation list of features such that no two features have a correlation greater than a
 specified threshold.
 """
 
@@ -79,8 +80,9 @@ def correlation_threshold(
 
 
 def determine_high_cor_pair(correlation_row, sorted_correlation_pairs):
-    """Select highest correlated variable given a correlation row with columns:
-    ["pair_a", "pair_b", "correlation"]. For use in a pandas.apply().
+    """Select highest correlated variable given a correlation row.
+
+    From a row with columns: ["pair_a", "pair_b", "correlation"]. For use in a pandas.apply().
 
     Parameters
     ----------

diff --git a/pycytominer/operations/get_na_columns.py b/pycytominer/operations/get_na_columns.py
@@ -1,4 +1,5 @@
-"""
+"""Function to get columns with NA values above a certain threshold.
+
 Remove variables with specified threshold of NA values
 Note: This was called `drop_na_columns` in cytominer for R.
 """

diff --git a/pycytominer/operations/noise_removal.py b/pycytominer/operations/noise_removal.py
@@ -10,7 +10,7 @@ def noise_removal(
     samples="all",
     noise_removal_stdev_cutoff=0.8,
 ):
-    """
+    """Remove features with excessive standard deviation within the same perturbation group.
 
     Parameters
     ----------

diff --git a/pycytominer/operations/transform.py b/pycytominer/operations/transform.py
@@ -13,9 +13,9 @@
 
 
 class Spherize(BaseEstimator, TransformerMixin):
-    """Class to apply a sphering transform (aka whitening) data in the base sklearn
-    transform API. Note, this implementation is modified/inspired from the following
-    sources:
+    """Class to apply a sphering transform (aka whitening) data in the base sklearn transform API.
+
+    This implementation is modified/inspired from the following sources:
     1) A custom function written by Juan C. Caicedo
     2) A custom ZCA function at https://github.com/mwv/zca
     3) Notes from Niranj Chandrasekaran (https://github.com/cytomining/pycytominer/issues/90)
@@ -33,7 +33,8 @@ class Spherize(BaseEstimator, TransformerMixin):
     """
 
     def __init__(self, epsilon=1e-6, center=True, method="ZCA", return_numpy=False):
-        """
+        """Construct a Spherize object.
+
         Parameters
         ----------
         epsilon : float, default 1e-6

diff --git a/pycytominer/operations/variance_threshold.py b/pycytominer/operations/variance_threshold.py
@@ -1,5 +1,5 @@
-"""
-Remove variables with near-zero variance.
+"""Remove variables with near-zero variance.
+
 Modified from caret::nearZeroVar().
 """
 
@@ -79,7 +79,6 @@ def variance_threshold(
 
 def calculate_frequency(feature_column, freq_cut):
     """Calculate frequency of second most common to most common feature.
-    Used in pandas.apply().
 
     Parameters
     ----------

diff --git a/pyproject.toml b/pyproject.toml
@@ -118,6 +118,9 @@ target-version = "py38"
 line-length = 88
 fix = true
 extend-include = ["*.ipynb"]
+exclude = [
+    "walkthroughs/nbconverted/*",
+]
 
 [tool.ruff.lint]
 select = [
@@ -170,8 +173,12 @@ convention = "numpy"
 [tool.ruff.lint.per-file-ignores]
 # Ignore `E402` and `F401` (unusued imports) in all `__init__.py` files
 "__init__.py" = ["E402", "F401"]
-# Ignore assert statements in tests
-"tests/*" = ["S101"]
+"tests/*" = [
+    # Allow assert statements in tests
+    "S101",
+    # Disable docstring checks in tests
+    "D",
+]
 "pycytominer/cyto_utils/*" = [
     # I (isort) is ignored due to circular dependencies in the cyto_utils module
     "I",
Original file line number	Diff line number	Diff line change
		@@ -1,3 +1,5 @@
		"""Functions to annotate data frames with custom options according to CMAP specifications."""

		import numpy as np


Expand Down