Skip to content

Commit

Permalink
ENH: corrwith excludes object data by default, address GH pandas-dev#144
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Sep 22, 2011
1 parent dca3c5c commit a4d1ea3
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 5 deletions.
22 changes: 17 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2214,20 +2214,23 @@ def corrwith(self, other, axis=0, drop=False):
-------
correls : Series
"""
com_index = self._intersect_index(other)
com_cols = self._intersect_columns(other)
this = self._get_numeric_data()
other = other._get_numeric_data()

com_index = this._intersect_index(other)
com_cols = this._intersect_columns(other)

# feels hackish
if axis == 0:
result_index = com_index
if not drop:
result_index = self.columns.union(other.columns)
result_index = this.columns.union(other.columns)
else:
result_index = com_cols
if not drop:
result_index = self.index.union(other.index)
result_index = this.index.union(other.index)

left = self.reindex(index=com_index, columns=com_cols)
left = this.reindex(index=com_index, columns=com_cols)
right = other.reindex(index=com_index, columns=com_cols)

# mask missing values
Expand Down Expand Up @@ -2692,6 +2695,15 @@ def _get_numeric_columns(self):

return cols

def _get_numeric_data(self):
if self._is_mixed_type:
return self.ix[:, self._get_numeric_columns()]
else:
if self.values.dtype != np.object_:
return self
else:
return self.ix[:, []]

def clip(self, upper=None, lower=None):
"""
Trim values at input threshold(s)
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1704,6 +1704,22 @@ def test_corrwith(self):
for row in index[:4]:
assert_almost_equal(correls[row], df1.ix[row].corr(df2.ix[row]))

def test_corrwith_with_objects(self):
df1 = tm.makeTimeDataFrame()
df2 = tm.makeTimeDataFrame()
cols = ['A', 'B', 'C', 'D']

df1['obj'] = 'foo'
df2['obj'] = 'bar'

result = df1.corrwith(df2)
expected = df1.ix[:, cols].corrwith(df2.ix[:, cols])
assert_series_equal(result, expected)

result = df1.corrwith(df2, axis=1)
expected = df1.ix[:, cols].corrwith(df2.ix[:, cols], axis=1)
assert_series_equal(result, expected)

def test_dropEmptyRows(self):
N = len(self.frame.index)
mat = randn(N)
Expand Down

0 comments on commit a4d1ea3

Please sign in to comment.