diff --git a/bioframe/extras.py b/bioframe/extras.py index c33705a8..9a48763b 100644 --- a/bioframe/extras.py +++ b/bioframe/extras.py @@ -28,15 +28,15 @@ def make_chromarms( Parameters ---------- - chromsizes : pandas.Dataframe or pandas.Series - If pandas.Series, a map from chromosomes to lengths in bp. + chromsizes : pandas.Dataframe or dict-like + If dict or pandas.Series, a map from chromosomes to lengths in bp. If pandas.Dataframe, a dataframe with columns defined by cols_chroms. If cols_chroms is a triplet (e.g. 'chrom','start','end'), then values in chromsizes[cols_chroms[1]].values must all be zero. midpoints : pandas.Dataframe or dict-like Mapping of chromosomes to midpoint (aka centromere) locations. - If pandas.Series, a map from chromosomes to midpoints in bp. + If dict or pandas.Series, a map from chromosomes to midpoints in bp. If pandas.Dataframe, a dataframe with columns defined by cols_mids. cols_chroms : (str, str) or (str, str, str) @@ -59,9 +59,13 @@ def make_chromarms( elif len(cols_chroms) == 3: ck1, sk1, ek1 = cols_chroms - if isinstance(chromsizes, pd.Series): + if isinstance(chromsizes, (pd.Series, dict)): + chromsizes = dict(chromsizes) df_chroms = ( - pd.DataFrame(chromsizes).reset_index().rename(columns={"index": ck1}) + pd.DataFrame({ + ck1: list(chromsizes.keys()), + "length": list(chromsizes.values()), + }) ) elif isinstance(chromsizes, pd.DataFrame): df_chroms = chromsizes.copy() @@ -83,7 +87,8 @@ def make_chromarms( raise ValueError("invalid number of cols_chroms") ck2, sk2 = cols_mids - if isinstance(midpoints, dict): + if isinstance(midpoints, (pd.Series, dict)): + midpoints = dict(midpoints) df_mids = pd.DataFrame.from_dict(midpoints, orient="index", columns=[sk2]) df_mids.reset_index(inplace=True) df_mids.rename(columns={"index": ck2}, inplace=True) diff --git a/tests/test_extras.py b/tests/test_extras.py index c7e47a19..98fd69a1 100644 --- a/tests/test_extras.py +++ b/tests/test_extras.py @@ -12,46 +12,59 @@ def test_make_chromarms(): ### test the case where columns have different names - df1 = pd.DataFrame( + df = pd.DataFrame( [["chrX", 0, 8]], columns=["chromosome", "lo", "hi"], ) - - df2 = pd.DataFrame([["chrX", 4]], columns=["chromosome", "loc"]) - - df_result = pd.DataFrame( + mids = pd.DataFrame([["chrX", 4]], columns=["chromosome", "loc"]) + arms = pd.DataFrame( [ ["chrX", 0, 4, "chrX_p"], ["chrX", 4, 8, "chrX_q"], ], - columns=["chromosome", "lo", "hi", "name"], + columns=["chrom", "start", "end", "name"], ) + arms = arms.astype({"start": pd.Int64Dtype(), "end": pd.Int64Dtype()}) # test passing 3 columns + result = bioframe.make_chromarms( + df, + mids, + cols_chroms=["chromosome", "lo", "hi"], + cols_mids=["chromosome", "loc"], + ) pd.testing.assert_frame_equal( - df_result.astype({"lo": pd.Int64Dtype(), "hi": pd.Int64Dtype()}), - bioframe.make_chromarms( - df1, - df2, - cols_chroms=["chromosome", "lo", "hi"], - cols_mids=["chromosome", "loc"], - ), + result, + arms.rename(columns={"chrom": "chromosome", "start": "lo", "end": "hi"}) ) # test passing 2 columns + result = bioframe.make_chromarms( + df, + mids, + cols_chroms=["chromosome", "hi"], + cols_mids=["chromosome", "loc"], + ) pd.testing.assert_frame_equal( - df_result.astype({"lo": pd.Int64Dtype(), "hi": pd.Int64Dtype()}).rename( - columns={"lo": "start", "hi": "end"} - ), - bioframe.make_chromarms( - df1, - df2, - cols_chroms=["chromosome", "hi"], - cols_mids=["chromosome", "loc"], - ), + result + arms.rename(columns={"chrom": "chromosome"}), ) - # todo: test for passing pd.series ! + # test for passing Series or dict + result = bioframe.make_chromarms(pd.Series({"chrX": 8}), mids, cols_mids=["chromosome", "loc"]) + pd.testing.assert_frame_equal(arms, result) + + result = bioframe.make_chromarms(pd.Series({"chrX": 8}), pd.Series({"chrX": 4})) + pd.testing.assert_frame_equal(arms, result) + + bioframe.make_chromarms({"chrX": 8}, mids, cols_mids=["chromosome", "loc"]) + pd.testing.assert_frame_equal(arms, result) + + bioframe.make_chromarms({"chrX": 8}, pd.Series({"chrX": 4})) + pd.testing.assert_frame_equal(arms, result) + + bioframe.make_chromarms({"chrX": 8}, {"chrX": 4}) + pd.testing.assert_frame_equal(arms, result) def test_binnify():