From fa3b9ee7340aa43bd7964595b2c3ee6dd74eba5f Mon Sep 17 00:00:00 2001 From: Gleb Levitski <36483986+glevv@users.noreply.github.com> Date: Sun, 24 Nov 2024 14:59:24 +0200 Subject: [PATCH 1/2] updated readme and changed location --- README.md | 2 + src/obscure_stats/kurtosis/__init__.py | 4 -- src/obscure_stats/kurtosis/kurtosis.py | 72 -------------------------- src/obscure_stats/skewness/__init__.py | 4 ++ src/obscure_stats/skewness/skewness.py | 72 ++++++++++++++++++++++++++ tests/test_dispersion.py | 4 +- tests/test_kurtosis.py | 20 +------ tests/test_skewness.py | 21 +++++++- tests/test_variation.py | 4 +- 9 files changed, 103 insertions(+), 100 deletions(-) diff --git a/README.md b/README.md index e9c6fbb..92f253f 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ * Dispersion Ratio; * Fisher's Index of Dispersion; * Gini Mean Difference; + * Grenander's Mode; * Linear Coefficient of Variation; * Inter-expectile Range; * Morisita Index of Dispersion; @@ -58,6 +59,7 @@ * Moors Kurtosis; * Moors Octile Kurtosis; * Reza-Ma Kurtosis; + * Schmid-Trede measure of Peakedness; * Staudte Kurtosis. - Collection of measures of association - `obscure_stats/association`: * Blomqvist's Beta; diff --git a/src/obscure_stats/kurtosis/__init__.py b/src/obscure_stats/kurtosis/__init__.py index 73c823e..78d293f 100644 --- a/src/obscure_stats/kurtosis/__init__.py +++ b/src/obscure_stats/kurtosis/__init__.py @@ -4,11 +4,9 @@ crow_siddiqui_kurt, hogg_kurt, l_kurt, - left_quantile_weight, moors_kurt, moors_octile_kurt, reza_ma_kurt, - right_quantile_weight, schmid_trede_peakedness, staudte_kurt, ) @@ -17,11 +15,9 @@ "crow_siddiqui_kurt", "hogg_kurt", "l_kurt", - "left_quantile_weight", "moors_kurt", "moors_octile_kurt", "reza_ma_kurt", - "right_quantile_weight", "schmid_trede_peakedness", "staudte_kurt", ] diff --git a/src/obscure_stats/kurtosis/kurtosis.py b/src/obscure_stats/kurtosis/kurtosis.py index 55d9b44..e1b00e7 100644 --- a/src/obscure_stats/kurtosis/kurtosis.py +++ b/src/obscure_stats/kurtosis/kurtosis.py @@ -232,75 +232,3 @@ def schmid_trede_peakedness(x: np.ndarray) -> float: """ p125, p25, p75, p875 = np.nanquantile(x, [0.125, 0.25, 0.75, 0.875]) return (p875 - p125) / (p75 - p25) - - -def left_quantile_weight(x: np.ndarray, q: float = 0.25) -> float: - """Calculate left quantile weight (LQW). - - It is based on inter-percentile ranges (uncentered, unscaled) of the - left tail of the distribution. - - Parameters - ---------- - x : array_like - Input array. - q : float - Quantile to use for the anchor. - - Returns - ------- - lqw : float - The value of left quantile weight. - - References - ---------- - Brys, G.; Hubert, M.; Struyf, A. (2006). - Robust measures of tail weight. - Computational Statistics and Data Analysis 50(3), 733-759. - """ - min_q, max_q = 0.0, 0.5 - if q <= min_q or q >= max_q: - msg = "Parameter q should be in range (0, 0.5)." - raise ValueError(msg) - lower_quantile, q025, upper_quantile = np.nanquantile( - x, [q * 0.5, 0.25, (1 - q) * 0.5] - ) - return -(upper_quantile + lower_quantile - 2 * q025) / ( - upper_quantile - lower_quantile - ) - - -def right_quantile_weight(x: np.ndarray, q: float = 0.75) -> float: - """Calculate right quantile weight (RQW). - - It is based on inter-percentile ranges (uncentered, unscaled) of the - right tail of the distribution. - - Parameters - ---------- - x : array_like - Input array. - q : float - Quantile to use for the anchor. - - Returns - ------- - rqw : float - The value of right quantile weight. - - References - ---------- - Brys, G.; Hubert, M.; Struyf, A. (2006). - Robust measures of tail weight. - Computational Statistics and Data Analysis 50(3), 733-759. - """ - min_q, max_q = 0.5, 1.0 - if q <= min_q or q >= max_q: - msg = "Parameter q should be in range (0.5, 1.0)." - raise ValueError(msg) - lower_quantile, q075, upper_quantile = np.nanquantile( - x, [1 - q * 0.5, 0.75, (1 + q) * 0.5] - ) - return (lower_quantile + upper_quantile - 2 * q075) / ( - lower_quantile - upper_quantile - ) diff --git a/src/obscure_stats/skewness/__init__.py b/src/obscure_stats/skewness/__init__.py index 5bc7b4b..64fd7e4 100644 --- a/src/obscure_stats/skewness/__init__.py +++ b/src/obscure_stats/skewness/__init__.py @@ -10,9 +10,11 @@ hossain_adnan_skew, kelly_skew, l_skew, + left_quantile_weight, medeen_skew, pearson_median_skew, pearson_mode_skew, + right_quantile_weight, wauc_skew_gamma, ) @@ -26,8 +28,10 @@ "hossain_adnan_skew", "kelly_skew", "l_skew", + "left_quantile_weight", "medeen_skew", "pearson_median_skew", "pearson_mode_skew", + "right_quantile_weight", "wauc_skew_gamma", ] diff --git a/src/obscure_stats/skewness/skewness.py b/src/obscure_stats/skewness/skewness.py index 2098544..8244fb5 100644 --- a/src/obscure_stats/skewness/skewness.py +++ b/src/obscure_stats/skewness/skewness.py @@ -400,3 +400,75 @@ def cumulative_skew(x: np.ndarray) -> float: d = q - p w = (2 * r - n) * 3 / n return np.sum(d * w) / np.sum(d) + + +def left_quantile_weight(x: np.ndarray, q: float = 0.25) -> float: + """Calculate left quantile weight (LQW). + + It is based on inter-percentile ranges (uncentered, unscaled) of the + left tail of the distribution. + + Parameters + ---------- + x : array_like + Input array. + q : float + Quantile to use for the anchor. + + Returns + ------- + lqw : float + The value of left quantile weight. + + References + ---------- + Brys, G.; Hubert, M.; Struyf, A. (2006). + Robust measures of tail weight. + Computational Statistics and Data Analysis 50(3), 733-759. + """ + min_q, max_q = 0.0, 0.5 + if q <= min_q or q >= max_q: + msg = "Parameter q should be in range (0, 0.5)." + raise ValueError(msg) + lower_quantile, q025, upper_quantile = np.nanquantile( + x, [q * 0.5, 0.25, (1 - q) * 0.5] + ) + return -(upper_quantile + lower_quantile - 2 * q025) / ( + upper_quantile - lower_quantile + ) + + +def right_quantile_weight(x: np.ndarray, q: float = 0.75) -> float: + """Calculate right quantile weight (RQW). + + It is based on inter-percentile ranges (uncentered, unscaled) of the + right tail of the distribution. + + Parameters + ---------- + x : array_like + Input array. + q : float + Quantile to use for the anchor. + + Returns + ------- + rqw : float + The value of right quantile weight. + + References + ---------- + Brys, G.; Hubert, M.; Struyf, A. (2006). + Robust measures of tail weight. + Computational Statistics and Data Analysis 50(3), 733-759. + """ + min_q, max_q = 0.5, 1.0 + if q <= min_q or q >= max_q: + msg = "Parameter q should be in range (0.5, 1.0)." + raise ValueError(msg) + lower_quantile, q075, upper_quantile = np.nanquantile( + x, [1 - q * 0.5, 0.75, (1 + q) * 0.5] + ) + return (lower_quantile + upper_quantile - 2 * q075) / ( + lower_quantile - upper_quantile + ) diff --git a/tests/test_dispersion.py b/tests/test_dispersion.py index ebce60d..57c623c 100644 --- a/tests/test_dispersion.py +++ b/tests/test_dispersion.py @@ -78,8 +78,8 @@ def test_mock_aggregation_functions( def test_dispersion_sensibility(func: typing.Callable, seed: int) -> None: """Testing for result correctness.""" rng = np.random.default_rng(seed) - low_disp = np.round(rng.exponential(scale=1, size=99) + 1, 2) - high_disp = np.round(rng.exponential(scale=10, size=99) + 1, 2) + low_disp = np.round(rng.exponential(scale=1, size=100) + 1, 2) + high_disp = np.round(rng.exponential(scale=10, size=100) + 1, 2) low_disp_res = func(low_disp) high_disp_res = func(high_disp) if low_disp_res > high_disp_res: diff --git a/tests/test_kurtosis.py b/tests/test_kurtosis.py index 331209d..a2a2276 100644 --- a/tests/test_kurtosis.py +++ b/tests/test_kurtosis.py @@ -13,11 +13,9 @@ crow_siddiqui_kurt, hogg_kurt, l_kurt, - left_quantile_weight, moors_kurt, moors_octile_kurt, reza_ma_kurt, - right_quantile_weight, schmid_trede_peakedness, staudte_kurt, ) @@ -26,11 +24,9 @@ crow_siddiqui_kurt, hogg_kurt, l_kurt, - left_quantile_weight, moors_kurt, moors_octile_kurt, reza_ma_kurt, - right_quantile_weight, schmid_trede_peakedness, staudte_kurt, ] @@ -53,14 +49,10 @@ def test_mock_aggregation_functions( def test_kurt_sensibility(func: typing.Callable, seed: int) -> None: """Testing for result correctness.""" rng = np.random.default_rng(seed) - platy = rng.uniform(size=99) - lepto = rng.laplace(size=99) + platy = rng.uniform(size=100) + lepto = rng.laplace(size=100) platy_res = func(platy) lepto_res = func(lepto) - if func.__name__ == "right_quantile_weight": - # ugly but more harmonized this way - platy_res = -platy_res - lepto_res = -lepto_res if platy_res > lepto_res: msg = ( f"Kurtosis in the first case should be lower, got {platy_res} > {lepto_res}" @@ -76,14 +68,6 @@ def test_statistic_with_nans(func: typing.Callable, x_array_nan: np.ndarray) -> raise ValueError(msg) -@pytest.mark.parametrize("func", [right_quantile_weight, left_quantile_weight]) -@pytest.mark.parametrize("q", [0.0, 1.0]) -def test_q_in_qw(x_array_float: np.ndarray, func: typing.Callable, q: float) -> None: - """Simple tets case for correctnes of q.""" - with pytest.raises(ValueError, match="Parameter q should be in range"): - func(x_array_float, q=q) - - @given( arrays( dtype=np.float64, diff --git a/tests/test_skewness.py b/tests/test_skewness.py index de64a13..d5b3bae 100644 --- a/tests/test_skewness.py +++ b/tests/test_skewness.py @@ -19,9 +19,11 @@ hossain_adnan_skew, kelly_skew, l_skew, + left_quantile_weight, medeen_skew, pearson_median_skew, pearson_mode_skew, + right_quantile_weight, wauc_skew_gamma, ) @@ -35,9 +37,11 @@ hossain_adnan_skew, kelly_skew, l_skew, + left_quantile_weight, medeen_skew, pearson_median_skew, pearson_mode_skew, + right_quantile_weight, wauc_skew_gamma, ] @@ -59,10 +63,15 @@ def test_mock_aggregation_functions( def test_skew_sensibility(func: typing.Callable, seed: int) -> None: """Testing for result correctness.""" rng = np.random.default_rng(seed) - no_skew = np.round(rng.normal(size=99), 2) - left_skew = np.round(rng.exponential(size=99) + 1, 2) + # round for the mode estimators to work properly + no_skew = np.round(rng.uniform(size=100), 2) + left_skew = np.round(rng.exponential(size=100) + 1, 2) no_skew_res = func(no_skew) left_skew_res = func(left_skew) + if func.__name__ == "right_quantile_weight": + # ugly but more harmonized this way + no_skew_res = -no_skew_res + left_skew_res = -left_skew_res if no_skew_res > left_skew_res: msg = ( f"Skewness in the first case should be lower, " @@ -88,6 +97,14 @@ def test_statistic_with_nans(func: typing.Callable, x_array_nan: np.ndarray) -> raise ValueError(msg) +@pytest.mark.parametrize("func", [right_quantile_weight, left_quantile_weight]) +@pytest.mark.parametrize("q", [0.0, 1.0]) +def test_q_in_qw(x_array_float: np.ndarray, func: typing.Callable, q: float) -> None: + """Simple tets case for correctnes of q.""" + with pytest.raises(ValueError, match="Parameter q should be in range"): + func(x_array_float, q=q) + + @given( arrays( dtype=np.float64, diff --git a/tests/test_variation.py b/tests/test_variation.py index b6eacbb..2668d9e 100644 --- a/tests/test_variation.py +++ b/tests/test_variation.py @@ -49,8 +49,8 @@ def test_mock_variation_functions( def test_var_sensibility_higher_better(func: typing.Callable, seed: int) -> None: """Testing for result correctness.""" rng = np.random.default_rng(seed) - low_var = rng.choice(["a", "b", "c", "d"], p=[0.25, 0.25, 0.25, 0.25], size=99) - high_var = rng.choice(["a", "b", "c", "d"], p=[0.75, 0.15, 0.05, 0.05], size=99) + low_var = rng.choice(["a", "b", "c", "d"], p=[0.25, 0.25, 0.25, 0.25], size=100) + high_var = rng.choice(["a", "b", "c", "d"], p=[0.75, 0.15, 0.05, 0.05], size=100) low_var_res = func(low_var) high_var_res = func(high_var) if low_var_res < high_var_res: From 30c58b8b9086a7173726630ea5b3b40295300237 Mon Sep 17 00:00:00 2001 From: Gleb Levitski <36483986+glevv@users.noreply.github.com> Date: Sun, 24 Nov 2024 15:06:57 +0200 Subject: [PATCH 2/2] more readme fixes --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 92f253f..0f6c548 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ - Collection of measures of central tendency - `obscure_stats/central_tendency`: * Contraharmonic Mean; + * Grenander's Mode; * Half-Sample Mode; * Hodges-Lehmann-Sen Location; * Midhinge; @@ -30,7 +31,6 @@ * Dispersion Ratio; * Fisher's Index of Dispersion; * Gini Mean Difference; - * Grenander's Mode; * Linear Coefficient of Variation; * Inter-expectile Range; * Morisita Index of Dispersion; @@ -49,9 +49,11 @@ * Hossain-Adnan Skewness Coefficient; * Kelly Skewness Coefficient; * L-Skewness Coefficient; + * Left Quantile Weight; * Medeen Skewness Coefficient; * Pearson Median Skewness Coefficient; - * Pearson Mode Skewness Coefficient. + * Pearson Mode Skewness Coefficient; + * Right Quantile Weight; - Collection of measures of kurtosis - `obscure_stats/kurtosis`: * Crow-Siddiqui Kurtosis; * L-Kurtosis;