From fe661eb8e2e9172bac6859b600795504d5c29885 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Mon, 22 May 2023 09:27:48 +0200 Subject: [PATCH] Fix an issue when concatenating only pd.DataFrame objects (#742) --- RELEASE_NOTES.md | 3 ++- pyam/core.py | 16 +++++++++------- tests/test_feature_append_concat.py | 29 +++++++++++++++++++++++++++-- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 75f077e62..577f46618 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,6 +1,7 @@ # Next Release -- [#738](https://github.com/IAMconsortium/pyam/pull/738) Ensure compatibility with **pandas v2.0** +- [#742](https://github.com/IAMconsortium/pyam/pull/742) Fix an issue when concatenating only pd.DataFrame objects +- [#739](https://github.com/IAMconsortium/pyam/pull/739) Ensure compatibility with **pandas v2.0** # Release v1.8.0 diff --git a/pyam/core.py b/pyam/core.py index 96b72e02a..90158f06b 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -2652,7 +2652,7 @@ def _check_rows(rows, check, in_range=True, return_test="any"): lo_op = rows.values.__ge__ if in_range else rows.values.__lt__ check_idx = [] - for (bd, op) in [("up", up_op), ("lo", lo_op)]: + for bd, op in [("up", up_op), ("lo", lo_op)]: if bd in check: check_idx.append(set(rows.index[op(check[bd])])) @@ -2917,7 +2917,7 @@ def as_iamdataframe(df): # cast first item to IamDataFrame (if necessary) df, _merge_meta = as_iamdataframe(objs[0]) - extra_cols, time_col = df.extra_cols, df.time_col + index_names, extra_cols, time_col = df.index.names, df.extra_cols, df.time_col consistent_time_domain = True iam_dfs = [(df, _merge_meta)] @@ -2925,8 +2925,10 @@ def as_iamdataframe(df): # cast all items to IamDataFrame (if necessary) and check consistency of items for df in objs[1:]: df, _merge_meta = as_iamdataframe(df) + if df.index.names != index_names: + raise ValueError("Items have incompatible index dimensions.") if df.extra_cols != extra_cols: - raise ValueError("Items have incompatible timeseries data dimensions") + raise ValueError("Items have incompatible timeseries data dimensions.") if df.time_col != time_col: consistent_time_domain = False iam_dfs.append((df, _merge_meta)) @@ -2934,7 +2936,7 @@ def as_iamdataframe(df): # cast all instances to "time" if not consistent_time_domain: _iam_dfs = [] - for (df, _merge_meta) in iam_dfs: + for df, _merge_meta in iam_dfs: if df.time_col == "year": df = df.swap_year_for_time() _iam_dfs.append((df, _merge_meta)) @@ -2942,7 +2944,7 @@ def as_iamdataframe(df): # extract timeseries data and meta attributes ret_data, ret_meta = [], None - for (df, _merge_meta) in iam_dfs: + for df, _merge_meta in iam_dfs: ret_data.append(df._data) if _merge_meta: ret_meta = ( @@ -2951,11 +2953,11 @@ def as_iamdataframe(df): else merge_meta(ret_meta, df.meta, ignore_meta_conflict) ) - # return as new IamDataFrame, this will verify integrity as part of `__init__()` + # return as new IamDataFrame, integrity of `data` is verified at initialization return IamDataFrame( pd.concat(ret_data, verify_integrity=False), meta=ret_meta, - index=ret_meta.index.names, + index=index_names, ) diff --git a/tests/test_feature_append_concat.py b/tests/test_feature_append_concat.py index 0ea0f90cb..7dc6aad02 100644 --- a/tests/test_feature_append_concat.py +++ b/tests/test_feature_append_concat.py @@ -136,6 +136,20 @@ def test_concat_non_default_index(): assert_iamframe_equal(exp, concat([df1, df2])) +def test_concat_inconsistent_index_raises(test_df): + # Test that merging two IamDataFrames with inconsistent index raises + + df_version = IamDataFrame( + pd.DataFrame( + [["model_a", "scenario_a", "region_a", "variable_a", "unit", 1, 1, 2]], + columns=IAMC_IDX + ["version", 2005, 2010], + ), + index=META_IDX + ["version"], + ) + with pytest.raises(ValueError, match="Items have incompatible index dimensions"): + concat([test_df, df_version]) + + @pytest.mark.parametrize("reverse", (False, True)) def test_concat_with_pd_dataframe(test_df, reverse): other = test_df.filter(scenario="scen_b").rename({"scenario": {"scen_b": "scen_c"}}) @@ -160,6 +174,19 @@ def test_concat_with_pd_dataframe(test_df, reverse): npt.assert_array_equal(ts.iloc[2].values, ts.iloc[3].values) +def test_concat_all_pd_dataframe(test_df): + # Try concatenating only pd.DataFrame objects and casting to an IamDataFrame + + other = test_df.filter(scenario="scen_b").rename({"scenario": {"scen_b": "scen_c"}}) + + # merge only the timeseries `data` DataFrame of both items + result = concat([test_df.data, other.data]) + + # assert that appending data works as expected + ts = result.timeseries() + npt.assert_array_equal(ts.iloc[2].values, ts.iloc[3].values) + + def test_append(test_df): other = test_df.filter(scenario="scen_b").rename({"scenario": {"scen_b": "scen_c"}}) @@ -185,7 +212,6 @@ def test_append(test_df): @pytest.mark.parametrize("time", (datetime(2010, 7, 21), "2010-07-21 00:00:00")) @pytest.mark.parametrize("reverse", (False, True)) def test_concat_time_domain(test_pd_df, test_df_mixed, time, reverse): - df_year = IamDataFrame(test_pd_df[IAMC_IDX + [2005]], meta=test_df_mixed.meta) df_time = IamDataFrame( test_pd_df[IAMC_IDX + [2010]].rename({2010: time}, axis="columns") @@ -208,7 +234,6 @@ def test_concat_time_domain(test_pd_df, test_df_mixed, time, reverse): @pytest.mark.parametrize("time", (datetime(2010, 7, 21), "2010-07-21 00:00:00")) @pytest.mark.parametrize("inplace", (True, False)) def test_append_time_domain(test_pd_df, test_df_mixed, other, time, inplace): - df_year = IamDataFrame(test_pd_df[IAMC_IDX + [2005]], meta=test_df_mixed.meta) df_time = IamDataFrame( test_pd_df[IAMC_IDX + [2010]].rename({2010: time}, axis="columns")