Skip to content

Commit

Permalink
Fix an issue when concatenating only pd.DataFrame objects (#742)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann authored May 22, 2023
1 parent d6d06db commit fe661eb
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 10 deletions.
3 changes: 2 additions & 1 deletion RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Next Release

- [#738](https://github.com/IAMconsortium/pyam/pull/738) Ensure compatibility with **pandas v2.0**
- [#742](https://github.com/IAMconsortium/pyam/pull/742) Fix an issue when concatenating only pd.DataFrame objects
- [#739](https://github.com/IAMconsortium/pyam/pull/739) Ensure compatibility with **pandas v2.0**

# Release v1.8.0

Expand Down
16 changes: 9 additions & 7 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2652,7 +2652,7 @@ def _check_rows(rows, check, in_range=True, return_test="any"):
lo_op = rows.values.__ge__ if in_range else rows.values.__lt__

check_idx = []
for (bd, op) in [("up", up_op), ("lo", lo_op)]:
for bd, op in [("up", up_op), ("lo", lo_op)]:
if bd in check:
check_idx.append(set(rows.index[op(check[bd])]))

Expand Down Expand Up @@ -2917,32 +2917,34 @@ def as_iamdataframe(df):

# cast first item to IamDataFrame (if necessary)
df, _merge_meta = as_iamdataframe(objs[0])
extra_cols, time_col = df.extra_cols, df.time_col
index_names, extra_cols, time_col = df.index.names, df.extra_cols, df.time_col

consistent_time_domain = True
iam_dfs = [(df, _merge_meta)]

# cast all items to IamDataFrame (if necessary) and check consistency of items
for df in objs[1:]:
df, _merge_meta = as_iamdataframe(df)
if df.index.names != index_names:
raise ValueError("Items have incompatible index dimensions.")
if df.extra_cols != extra_cols:
raise ValueError("Items have incompatible timeseries data dimensions")
raise ValueError("Items have incompatible timeseries data dimensions.")
if df.time_col != time_col:
consistent_time_domain = False
iam_dfs.append((df, _merge_meta))

# cast all instances to "time"
if not consistent_time_domain:
_iam_dfs = []
for (df, _merge_meta) in iam_dfs:
for df, _merge_meta in iam_dfs:
if df.time_col == "year":
df = df.swap_year_for_time()
_iam_dfs.append((df, _merge_meta))
iam_dfs = _iam_dfs # replace list of IamDataFrames with consistent list

# extract timeseries data and meta attributes
ret_data, ret_meta = [], None
for (df, _merge_meta) in iam_dfs:
for df, _merge_meta in iam_dfs:
ret_data.append(df._data)
if _merge_meta:
ret_meta = (
Expand All @@ -2951,11 +2953,11 @@ def as_iamdataframe(df):
else merge_meta(ret_meta, df.meta, ignore_meta_conflict)
)

# return as new IamDataFrame, this will verify integrity as part of `__init__()`
# return as new IamDataFrame, integrity of `data` is verified at initialization
return IamDataFrame(
pd.concat(ret_data, verify_integrity=False),
meta=ret_meta,
index=ret_meta.index.names,
index=index_names,
)


Expand Down
29 changes: 27 additions & 2 deletions tests/test_feature_append_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,20 @@ def test_concat_non_default_index():
assert_iamframe_equal(exp, concat([df1, df2]))


def test_concat_inconsistent_index_raises(test_df):
# Test that merging two IamDataFrames with inconsistent index raises

df_version = IamDataFrame(
pd.DataFrame(
[["model_a", "scenario_a", "region_a", "variable_a", "unit", 1, 1, 2]],
columns=IAMC_IDX + ["version", 2005, 2010],
),
index=META_IDX + ["version"],
)
with pytest.raises(ValueError, match="Items have incompatible index dimensions"):
concat([test_df, df_version])


@pytest.mark.parametrize("reverse", (False, True))
def test_concat_with_pd_dataframe(test_df, reverse):
other = test_df.filter(scenario="scen_b").rename({"scenario": {"scen_b": "scen_c"}})
Expand All @@ -160,6 +174,19 @@ def test_concat_with_pd_dataframe(test_df, reverse):
npt.assert_array_equal(ts.iloc[2].values, ts.iloc[3].values)


def test_concat_all_pd_dataframe(test_df):
# Try concatenating only pd.DataFrame objects and casting to an IamDataFrame

other = test_df.filter(scenario="scen_b").rename({"scenario": {"scen_b": "scen_c"}})

# merge only the timeseries `data` DataFrame of both items
result = concat([test_df.data, other.data])

# assert that appending data works as expected
ts = result.timeseries()
npt.assert_array_equal(ts.iloc[2].values, ts.iloc[3].values)


def test_append(test_df):
other = test_df.filter(scenario="scen_b").rename({"scenario": {"scen_b": "scen_c"}})

Expand All @@ -185,7 +212,6 @@ def test_append(test_df):
@pytest.mark.parametrize("time", (datetime(2010, 7, 21), "2010-07-21 00:00:00"))
@pytest.mark.parametrize("reverse", (False, True))
def test_concat_time_domain(test_pd_df, test_df_mixed, time, reverse):

df_year = IamDataFrame(test_pd_df[IAMC_IDX + [2005]], meta=test_df_mixed.meta)
df_time = IamDataFrame(
test_pd_df[IAMC_IDX + [2010]].rename({2010: time}, axis="columns")
Expand All @@ -208,7 +234,6 @@ def test_concat_time_domain(test_pd_df, test_df_mixed, time, reverse):
@pytest.mark.parametrize("time", (datetime(2010, 7, 21), "2010-07-21 00:00:00"))
@pytest.mark.parametrize("inplace", (True, False))
def test_append_time_domain(test_pd_df, test_df_mixed, other, time, inplace):

df_year = IamDataFrame(test_pd_df[IAMC_IDX + [2005]], meta=test_df_mixed.meta)
df_time = IamDataFrame(
test_pd_df[IAMC_IDX + [2010]].rename({2010: time}, axis="columns")
Expand Down

0 comments on commit fe661eb

Please sign in to comment.