-
Notifications
You must be signed in to change notification settings - Fork 61
Series combine #821
base: master
Are you sure you want to change the base?
Series combine #821
Changes from 5 commits
7ec05f3
b576e03
95d233e
2b95e9d
2104aae
aeef026
f09b792
fafcaed
b1ed1b3
4fdb369
6d930d8
c85feff
18771f2
8628e31
6f37f53
f66ace3
9fc55a7
e7dc1f5
6c78b8e
d623b89
ecd3dba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# ***************************************************************************** | ||
# Copyright (c) 2020, Intel Corporation All rights reserved. | ||
# | ||
# Redistribution and use in source and binary forms, with or without | ||
# modification, are permitted provided that the following conditions are met: | ||
# | ||
# Redistributions of source code must retain the above copyright notice, | ||
# this list of conditions and the following disclaimer. | ||
# | ||
# Redistributions in binary form must reproduce the above copyright notice, | ||
# this list of conditions and the following disclaimer in the documentation | ||
# and/or other materials provided with the distribution. | ||
# | ||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, | ||
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR | ||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | ||
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | ||
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | ||
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, | ||
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
# ***************************************************************************** | ||
|
||
import pandas as pd | ||
from numba import njit | ||
|
||
|
||
@njit | ||
def series_combine(): | ||
s1 = pd.Series([1, 5, 2]) | ||
s2 = pd.Series([0, 3, 7, 8, 0]) | ||
|
||
return s1.combine(s2, max, fill_value=0) # Expect series of 1, 5, 7, 8, 0 | ||
|
||
|
||
print(series_combine()) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4882,3 +4882,65 @@ def sdc_pandas_series_skew_impl(self, axis=None, skipna=None, level=None, numeri | |
return numpy_like.skew(self._data) | ||
|
||
return sdc_pandas_series_skew_impl | ||
|
||
|
||
@sdc_overload_method(SeriesType, 'combine') | ||
def sdc_pandas_series_combine(self, other, func, fill_value=None): | ||
""" | ||
Intel Scalable Dataframe Compiler User Guide | ||
******************************************** | ||
|
||
Pandas API: pandas.Series.combine | ||
|
||
Limitations | ||
----------- | ||
- Only supports the case when data in series of the same type | ||
|
||
Examples | ||
-------- | ||
.. literalinclude:: ../../../examples/series/series_combine.py | ||
:language: python | ||
:lines: 27- | ||
:caption: Combined the Series with a Series according to func. | ||
:name: ex_series_combine | ||
|
||
.. command-output:: python ./series/series_combine.py | ||
:cwd: ../../../examples | ||
|
||
Intel Scalable Dataframe Compiler Developer Guide | ||
************************************************* | ||
Pandas Series method :meth:`pandas.Series.combine` implementation. | ||
|
||
.. only:: developer | ||
|
||
Tests: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_combine* | ||
""" | ||
_func_name = 'Method Series.combine().' | ||
|
||
ty_checker = TypeChecker(_func_name) | ||
ty_checker.check(self, SeriesType) | ||
|
||
ty_checker.check(other, SeriesType) | ||
|
||
if not isinstance(fill_value, (types.Omitted, types.NoneType, types.Number)) and fill_value is not None: | ||
ty_checker.raise_exc(fill_value, 'number', 'fill_value') | ||
|
||
def sdc_pandas_series_combine_impl(self, other, func, fill_value=None): | ||
|
||
if fill_value is None: | ||
fill_value = numpy.nan | ||
|
||
len_val = max(len(self), len(other)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And what if all indexes are different? I think we should use sdc_join_series_indexes to find len of result series |
||
result = numpy.empty(len_val, self._data.dtype) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is actually wrong, result dtype should be common dtype for result dtype of func(a, b) where a,b are series values and dtype of _fill_value. Provided tests do not cover this, but e.g. this (where fill_value is float and series are integers) won't pass:
|
||
for ind in range(len_val): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we parallel the method based on chunks? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is case for non-indexes series. Also, it should rewrite with prange There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
val_self = self._data[ind] | ||
val_other = other._data[ind] | ||
if len(self) < ind + 1: | ||
val_self = fill_value | ||
if len(other) < ind + 1: | ||
val_other = fill_value | ||
result[ind] = func(val_self, val_other) | ||
|
||
return pandas.Series(result) | ||
|
||
return sdc_pandas_series_combine_impl |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2760,7 +2760,6 @@ def test_impl(S1, S2): | |
S2 = pd.Series([6., 7.]) | ||
np.testing.assert_array_equal(hpat_func(S1, S2), test_impl(S1, S2)) | ||
|
||
@skip_numba_jit | ||
def test_series_combine(self): | ||
def test_impl(S1, S2): | ||
return S1.combine(S2, lambda a, b: 2 * a + b) | ||
|
@@ -2770,7 +2769,6 @@ def test_impl(S1, S2): | |
S2 = pd.Series([6.0, 21., 3.6, 5.]) | ||
pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2)) | ||
|
||
@skip_numba_jit | ||
def test_series_combine_float3264(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test has incorrect code, which should be corrected probably:
S2.dtype will be float64 on Win, not float32. Moreover, series dtype should be specified this way:
|
||
def test_impl(S1, S2): | ||
return S1.combine(S2, lambda a, b: 2 * a + b) | ||
|
@@ -2804,7 +2802,6 @@ def test_impl(S1, S2): | |
with self.assertRaises(AssertionError): | ||
hpat_func(S1, S2) | ||
|
||
@skip_numba_jit | ||
def test_series_combine_integer(self): | ||
def test_impl(S1, S2): | ||
return S1.combine(S2, lambda a, b: 2 * a + b, 16) | ||
|
@@ -2814,7 +2811,6 @@ def test_impl(S1, S2): | |
S2 = pd.Series([6, 21, 3, 5]) | ||
pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2)) | ||
|
||
@skip_numba_jit | ||
def test_series_combine_different_types(self): | ||
def test_impl(S1, S2): | ||
return S1.combine(S2, lambda a, b: 2 * a + b) | ||
|
@@ -2824,7 +2820,6 @@ def test_impl(S1, S2): | |
S2 = pd.Series([1, 2, 3, 4, 5]) | ||
pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2)) | ||
|
||
@skip_numba_jit | ||
def test_series_combine_integer_samelen(self): | ||
def test_impl(S1, S2): | ||
return S1.combine(S2, lambda a, b: 2 * a + b) | ||
|
@@ -2834,7 +2829,6 @@ def test_impl(S1, S2): | |
S2 = pd.Series([6, 21, 17, -5, 4]) | ||
pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2)) | ||
|
||
@skip_numba_jit | ||
def test_series_combine_samelen(self): | ||
def test_impl(S1, S2): | ||
return S1.combine(S2, lambda a, b: 2 * a + b) | ||
|
@@ -2844,7 +2838,6 @@ def test_impl(S1, S2): | |
S2 = pd.Series([6.0, 21., 3.6, 5., 0.0]) | ||
pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2)) | ||
|
||
@skip_numba_jit | ||
def test_series_combine_value(self): | ||
def test_impl(S1, S2): | ||
return S1.combine(S2, lambda a, b: 2 * a + b, 1237.56) | ||
|
@@ -2854,7 +2847,6 @@ def test_impl(S1, S2): | |
S2 = pd.Series([6.0, 21., 3.6, 5.]) | ||
pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2)) | ||
|
||
@skip_numba_jit | ||
def test_series_combine_value_samelen(self): | ||
def test_impl(S1, S2): | ||
return S1.combine(S2, lambda a, b: 2 * a + b, 1237.56) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will make fill_value type undefined at compile time. You can probably use the same approach as in operators:
sdc/sdc/sdc_function_templates.py
Line 144 in e87095a