Fix an issue when concatenating only pd.DataFrame objects (#742)

IAMconsortium · May 22, 2023 · fe661eb · fe661eb
1 parent d6d06db
commit fe661eb
Show file tree

Hide file tree

Showing 3 changed files with 38 additions and 10 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,6 +1,7 @@
 # Next Release
 
-- [#738](https://github.com/IAMconsortium/pyam/pull/738) Ensure compatibility with **pandas v2.0**
+- [#742](https://github.com/IAMconsortium/pyam/pull/742) Fix an issue when concatenating only pd.DataFrame objects 
+- [#739](https://github.com/IAMconsortium/pyam/pull/739) Ensure compatibility with **pandas v2.0**
 
 # Release v1.8.0
 

diff --git a/pyam/core.py b/pyam/core.py
@@ -2652,7 +2652,7 @@ def _check_rows(rows, check, in_range=True, return_test="any"):
     lo_op = rows.values.__ge__ if in_range else rows.values.__lt__
 
     check_idx = []
-    for (bd, op) in [("up", up_op), ("lo", lo_op)]:
+    for bd, op in [("up", up_op), ("lo", lo_op)]:
         if bd in check:
             check_idx.append(set(rows.index[op(check[bd])]))
 
@@ -2917,32 +2917,34 @@ def as_iamdataframe(df):
 
     # cast first item to IamDataFrame (if necessary)
     df, _merge_meta = as_iamdataframe(objs[0])
-    extra_cols, time_col = df.extra_cols, df.time_col
+    index_names, extra_cols, time_col = df.index.names, df.extra_cols, df.time_col
 
     consistent_time_domain = True
     iam_dfs = [(df, _merge_meta)]
 
     # cast all items to IamDataFrame (if necessary) and check consistency of items
     for df in objs[1:]:
         df, _merge_meta = as_iamdataframe(df)
+        if df.index.names != index_names:
+            raise ValueError("Items have incompatible index dimensions.")
         if df.extra_cols != extra_cols:
-            raise ValueError("Items have incompatible timeseries data dimensions")
+            raise ValueError("Items have incompatible timeseries data dimensions.")
         if df.time_col != time_col:
             consistent_time_domain = False
         iam_dfs.append((df, _merge_meta))
 
     # cast all instances to "time"
     if not consistent_time_domain:
         _iam_dfs = []
-        for (df, _merge_meta) in iam_dfs:
+        for df, _merge_meta in iam_dfs:
             if df.time_col == "year":
                 df = df.swap_year_for_time()
             _iam_dfs.append((df, _merge_meta))
         iam_dfs = _iam_dfs  # replace list of IamDataFrames with consistent list
 
     # extract timeseries data and meta attributes
     ret_data, ret_meta = [], None
-    for (df, _merge_meta) in iam_dfs:
+    for df, _merge_meta in iam_dfs:
         ret_data.append(df._data)
         if _merge_meta:
             ret_meta = (
@@ -2951,11 +2953,11 @@ def as_iamdataframe(df):
                 else merge_meta(ret_meta, df.meta, ignore_meta_conflict)
             )
 
-    # return as new IamDataFrame, this will verify integrity as part of `__init__()`
+    # return as new IamDataFrame, integrity of `data` is verified at initialization
     return IamDataFrame(
         pd.concat(ret_data, verify_integrity=False),
         meta=ret_meta,
-        index=ret_meta.index.names,
+        index=index_names,
     )
 
 

diff --git a/tests/test_feature_append_concat.py b/tests/test_feature_append_concat.py
@@ -136,6 +136,20 @@ def test_concat_non_default_index():
     assert_iamframe_equal(exp, concat([df1, df2]))
 
 
+def test_concat_inconsistent_index_raises(test_df):
+    # Test that merging two IamDataFrames with inconsistent index raises
+
+    df_version = IamDataFrame(
+        pd.DataFrame(
+            [["model_a", "scenario_a", "region_a", "variable_a", "unit", 1, 1, 2]],
+            columns=IAMC_IDX + ["version", 2005, 2010],
+        ),
+        index=META_IDX + ["version"],
+    )
+    with pytest.raises(ValueError, match="Items have incompatible index dimensions"):
+        concat([test_df, df_version])
+
+
 @pytest.mark.parametrize("reverse", (False, True))
 def test_concat_with_pd_dataframe(test_df, reverse):
     other = test_df.filter(scenario="scen_b").rename({"scenario": {"scen_b": "scen_c"}})
@@ -160,6 +174,19 @@ def test_concat_with_pd_dataframe(test_df, reverse):
     npt.assert_array_equal(ts.iloc[2].values, ts.iloc[3].values)
 
 
+def test_concat_all_pd_dataframe(test_df):
+    # Try concatenating only pd.DataFrame objects and casting to an IamDataFrame
+
+    other = test_df.filter(scenario="scen_b").rename({"scenario": {"scen_b": "scen_c"}})
+
+    # merge only the timeseries `data` DataFrame of both items
+    result = concat([test_df.data, other.data])
+
+    # assert that appending data works as expected
+    ts = result.timeseries()
+    npt.assert_array_equal(ts.iloc[2].values, ts.iloc[3].values)
+
+
 def test_append(test_df):
     other = test_df.filter(scenario="scen_b").rename({"scenario": {"scen_b": "scen_c"}})
 
@@ -185,7 +212,6 @@ def test_append(test_df):
 @pytest.mark.parametrize("time", (datetime(2010, 7, 21), "2010-07-21 00:00:00"))
 @pytest.mark.parametrize("reverse", (False, True))
 def test_concat_time_domain(test_pd_df, test_df_mixed, time, reverse):
-
     df_year = IamDataFrame(test_pd_df[IAMC_IDX + [2005]], meta=test_df_mixed.meta)
     df_time = IamDataFrame(
         test_pd_df[IAMC_IDX + [2010]].rename({2010: time}, axis="columns")
@@ -208,7 +234,6 @@ def test_concat_time_domain(test_pd_df, test_df_mixed, time, reverse):
 @pytest.mark.parametrize("time", (datetime(2010, 7, 21), "2010-07-21 00:00:00"))
 @pytest.mark.parametrize("inplace", (True, False))
 def test_append_time_domain(test_pd_df, test_df_mixed, other, time, inplace):
-
     df_year = IamDataFrame(test_pd_df[IAMC_IDX + [2005]], meta=test_df_mixed.meta)
     df_time = IamDataFrame(
         test_pd_df[IAMC_IDX + [2010]].rename({2010: time}, axis="columns")