Skip to content

Commit

Permalink
ENH: first hack at sliding window apply_along_axis in Cython
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Nov 13, 2011
1 parent 9ea758c commit 3241692
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 7 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
build
dist
MANIFEST
*.c
*.so
*.pyd
pandas/src/tseries.c
pandas/src/sparse.c
pandas/version.py
doc/source/generated
doc/source/_static
Expand Down
22 changes: 20 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2142,7 +2142,7 @@ def _shift_indexer(self, periods):
#----------------------------------------------------------------------
# Function application

def apply(self, func, axis=0, broadcast=False):
def apply(self, func, axis=0, broadcast=False, raw=False):
"""
Applies function along input axis of DataFrame. Objects passed to
functions are Series objects having index either the DataFrame's index
Expand All @@ -2158,6 +2158,11 @@ def apply(self, func, axis=0, broadcast=False):
broadcast : bool, default False
For aggregation functions, return object of same size with values
propagated
raw : boolean, default False
If False, convert each row or column into a Series. If raw=True the
passed function will receive ndarray objects instead. If you are
just applying a NumPy reduction function this will achieve much
better performance
Examples
--------
Expand All @@ -2182,10 +2187,23 @@ def apply(self, func, axis=0, broadcast=False):
columns=self.columns, copy=False)
else:
if not broadcast:
return self._apply_standard(func, axis)
if raw:
return self._apply_raw(func, axis)
else:
return self._apply_standard(func, axis)
else:
return self._apply_broadcast(func, axis)

def _apply_raw(self, func, axis):
result = np.apply_along_axis(func, axis, self.values)

# TODO: mixed type case
if result.ndim == 2:
return DataFrame(result, index=self.index,
columns=self.columns)
else:
return Series(result, index=self._get_agg_axis(axis))

def _apply_standard(self, func, axis, ignore_failures=False):
if axis == 0:
series_gen = ((c, self[c]) for c in self.columns)
Expand Down
48 changes: 48 additions & 0 deletions pandas/src/sandbox.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from numpy cimport *
import numpy as np

import_array()

cdef class ArrayCruncher:

cdef:
ndarray arr
object f
bint raw
Py_ssize_t N, K

def __init__(self, arr, f, axis=0, raw=True):
self.arr = arr
self.f = f
self.raw = raw
self.N, self.K = arr.shape

def reduce(self):
cdef:
char* dummy_buf
ndarray arr, result, chunk
Py_ssize_t i, increment
flatiter it

if not self.arr.flags.c_contiguous:
arr = self.arr.copy('C')
else:
arr = self.arr

increment = self.K * self.arr.dtype.itemsize
chunk = np.empty(self.K, dtype=arr.dtype)
result = np.empty(self.N, dtype=arr.dtype)
it = <flatiter> PyArray_IterNew(result)

dummy_buf = chunk.data
chunk.data = arr.data

for i in range(self.N):
PyArray_SETITEM(result, PyArray_ITER_DATA(it), self.f(chunk))
chunk.data = chunk.data + increment
PyArray_ITER_NEXT(it)

# so we don't free the wrong memory
chunk.data = dummy_buf

return result
10 changes: 10 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2587,6 +2587,16 @@ def test_apply_broadcast(self):
for idx in broadcasted.index:
self.assert_((broadcasted.xs(idx) == agged[idx]).all())

def test_apply_raw(self):
result0 = self.frame.apply(np.mean, raw=True)
result1 = self.frame.apply(np.mean, axis=1, raw=True)

expected0 = self.frame.apply(lambda x: x.values.mean())
expected1 = self.frame.apply(lambda x: x.values.mean(), axis=1)

assert_series_equal(result0, expected0)
assert_series_equal(result1, expected1)

def test_apply_axis1(self):
d = self.frame.index[0]
tapplied = self.frame.apply(np.mean, axis=1)
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,10 @@ def test_apply(self):
import math
assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts))

# does not return Series
result = self.ts.apply(lambda x: x.values * 2)
assert_series_equal(result, self.ts * 2)

def test_align(self):
def _check_align(a, b, how='left'):
aa, ab = a.align(b, join=how)
Expand Down
8 changes: 5 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,11 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
sources=[srcpath('engines', suffix=suffix)],
include_dirs=[np.get_include()])

extensions = [tseries_ext,
engines_ext,
sparse_ext]
sandbox_ext = Extension('pandas._sandbox',
sources=[srcpath('sandbox', suffix=suffix)],
include_dirs=[np.get_include()])

extensions = [tseries_ext, engines_ext, sparse_ext, sandbox_ext]

# if _have_setuptools:
# setuptools_args["test_suite"] = "nose.collector"
Expand Down

0 comments on commit 3241692

Please sign in to comment.