IntelPython · Rubtsowa · Apr 28, 2020 · Apr 28, 2020 · Apr 28, 2020 · May 13, 2020
diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py
@@ -4913,7 +4913,7 @@ def sdc_pandas_series_skew_impl(self, axis=None, skipna=None, level=None, numeri
     return sdc_pandas_series_skew_impl
 
 
-@sdc_overload_method(SeriesType, 'combine')
+@sdc_overload_method(SeriesType, 'combine', jit_options={'error_model': 'numpy'})
 def sdc_pandas_series_combine(self, other, func, fill_value=None):
     """
     Intel Scalable Dataframe Compiler User Guide
@@ -4923,8 +4923,12 @@ def sdc_pandas_series_combine(self, other, func, fill_value=None):
 
     Limitations
     -----------
-    - Only supports the case when data in series of the same type.
-    - With the default fill_value parameter value, the type of the resulting series will be float.
+    - Resulting series dtype may be wider than in pandas due to
+      type-stability requirements and depends on fill_value dtype
+      and result of series indexes alignment.
+    - Indixes should be strictly ascending, as inside the function
+      they are sorted in ascending order and the answer becomes
+      different from the result of the pandas.
 
     Examples
     --------
@@ -4964,33 +4968,21 @@ def sdc_pandas_series_combine(self, other, func, fill_value=None):
 
     def sdc_pandas_series_combine_impl(self, other, func, fill_value=None):
 
-        if fill_value is not None:
-            _fill_value = fill_value
-        else:
-            _fill_value = numpy.nan
+        _fill_value = numpy.nan if fill_value is None else fill_value
 
         indexes, self_indexes, other_indexes = sdc_join_series_indexes(self.index, other.index)
         len_val = len(indexes)
 
         result = numpy.empty(len_val, res_dtype)
 
-        chunks = parallel_chunks(len_val)
-        for i in prange(len(chunks)):
-            chunk = chunks[i]
-            for j in range(chunk.start, chunk.stop):
-                self_idx = self_indexes[j]
-                if self_idx == -1:
-                    val_self = _fill_value
-                else:
-                    val_self = self[self_idx]._data[0]
+        for i in prange(len_val):
+            self_idx, other_idx = self_indexes[i], other_indexes[i]
+            val_self = _fill_value if self_idx == -1 else self._data[self_idx]
 
-                other_idx = other_indexes[j]
-                if other_idx == -1:
-                    val_other = _fill_value
-                else:
-                    val_other = other[other_idx]._data[0]
+            val_other = _fill_value if other_idx == -1 else other._data[other_idx]
+
+            result[i] = func(val_self, val_other)
 
-                result[j] = func(val_self, val_other)
         return pandas.Series(result, index=indexes)
 
     return sdc_pandas_series_combine_impl
diff --git a/sdc/tests/test_series.py b/sdc/tests/test_series.py
@@ -2770,91 +2770,131 @@ def test_impl(S1, S2):
         S2 = pd.Series([6.0, 21., 3.6, 5.])
         pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2))
 
-    def test_series_combine_float3264(self):
-        def test_impl(S1, S2):
-            return S1.combine(S2, lambda a, b: 2 * a + b)
-        hpat_func = self.jit(test_impl)
-
-        S1 = pd.Series([np.float64(1), np.float64(2),
-                        np.float64(3), np.float64(4), np.float64(5)])
-        S2 = pd.Series([np.float32(1), np.float32(2),
-                        np.float32(3), np.float32(4), np.float32(5)])
-        pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2))
-
-    def test_series_combine_assert1(self):
+    @unittest.expectedFailure
+    # https://github.com/numba/numba/issues/5792
+    def test_series_combine_div(self):
         def test_impl(S1, S2):
-            return S1.combine(S2, lambda a, b: 2 * a + b)
-        hpat_func = self.jit(test_impl)
+            return S1.combine(S2, lambda a, b: a/b, 0)
 
-        S1 = pd.Series([1, 2, 3])
-        S2 = pd.Series([6., 21., 3., 5.])
-        pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2))
-
-    def test_series_combine_assert2(self):
-        def test_impl(S1, S2):
-            return S1.combine(S2, lambda a, b: 2 * a + b)
         hpat_func = self.jit(test_impl)
 
-        S1 = pd.Series([6., 21., 3., 5.])
-        S2 = pd.Series([1, 2, 3])
-        pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2))
-
-    def test_series_combine_integer(self):
-        def test_impl(S1, S2):
-            return S1.combine(S2, lambda a, b: 2 * a + b, 16)
-        hpat_func = self.jit(test_impl)
+        sizes1 = [2, 4, 5, 6, 8]
+        sizes2 = [1, 3, 5, 7, 9]
+        series_dtypes = [None, np.int64, np.float64]
 
-        S1 = pd.Series([1, 2, 3, 4, 5])
-        S2 = pd.Series([6, 21, 3, 5])
-        pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2))
+        for n in sizes1:
+            for k in sizes2:
+                for dtype1, dtype2 in product(series_dtypes, series_dtypes):
+                    A = np.random.randint(-100, 100, n)
+                    B = np.arange(k) * 2 + 1
+                    S1 = pd.Series(A, dtype=dtype1)
+                    S2 = pd.Series(B, dtype=dtype2)
+                    with self.subTest(S1=S1, S2=S2):
+                        result = hpat_func(S1, S2)
+                        result_ref = test_impl(S1, S2)
+                        # check_dtype=False due to difference to pandas in some cases
+                        pd.testing.assert_series_equal(result, result_ref, check_dtype=False)
 
-    def test_series_combine_different_types(self):
+    def test_series_combine_value(self):
         def test_impl(S1, S2):
             return S1.combine(S2, lambda a, b: 2 * a + b)
         hpat_func = self.jit(test_impl)
 
-        S1 = pd.Series([6.1, 21.2, 3.3, 5.4, 6.7])
-        S2 = pd.Series([1, 2, 3, 4, 5])
-        pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2))
+        series_indexes = [[1, 2, 3, 4, 5],
+                          [4, 5, 7, 8, 9],
+                          [0, 1, 7, 13, 25]]
+        # Only indixes ascending due to difference to pandas in some cases
 
-    @unittest.expectedFailure
-    def test_series_combine_integer_samelen(self):
-        """Result series type `int` is expected,
-        `float` is returned since this is the default fill_value type"""
-        def test_impl(S1, S2):
-            return S1.combine(S2, lambda a, b: 2 * a + b)
-        hpat_func = self.jit(test_impl)
-
-        S1 = pd.Series([1, 2, 3, 4, 5])
-        S2 = pd.Series([6, 21, 17, -5, 4])
-        pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2))
+        n = 5
+        np.random.seed(0)
+        A = np.random.randint(-100, 100, n)
+        B = np.arange(n) * 2 + 1
+
+        series_dtypes = [None, np.int64, np.float64]
+        fill_values = [None, np.nan, 4, 4.2]
+        for dtype1, dtype2 in product(series_dtypes, series_dtypes):
+            for series_index1 in series_indexes:
+                for series_index2 in series_indexes:
+                    S1 = pd.Series(A, index=series_index1, dtype=dtype1)
+                    S2 = pd.Series(B, index=series_index2, dtype=dtype2)
+                    with self.subTest(S1=S1, S2=S2):
+                        result = hpat_func(S1, S2)
+                        result_ref = test_impl(S1, S2)
+                        # check_dtype=False due to difference to pandas in some cases
+                        pd.testing.assert_series_equal(result, result_ref, check_dtype=False)
+
+    def test_series_combine_value_with_fill_value(self):
+        def test_impl(S1, S2, fill_value):
+            return S1.combine(S2, lambda a, b: 2 * a + b, fill_value)
+        hpat_func = self.jit(test_impl)
+
+        series_indexes = [[1, 2, 3, 4, 5],
+                          [4, 5, 7, 8, 9],
+                          [0, 1, 7, 13, 25]]
+        # Only indixes ascending due to difference to pandas in some cases
+
+        n = 5
+        np.random.seed(0)
+        A = np.random.randint(-100, 100, n)
+        B = np.arange(n) * 2 + 1
+
+        series_dtypes = [None, np.int64, np.float64]
+        fill_values = [None, np.nan, 4, 4.2]
+        for dtype1, dtype2, fill_value in product(series_dtypes, series_dtypes, fill_values):
+            for series_index1 in series_indexes:
+                for series_index2 in series_indexes:
+                    S1 = pd.Series(A, index=series_index1, dtype=dtype1)
+                    S2 = pd.Series(B, index=series_index2, dtype=dtype2)
+                    with self.subTest(S1=S1, S2=S2, fill_value=fill_value):
+                        result = hpat_func(S1, S2, fill_value)
+                        result_ref = test_impl(S1, S2, fill_value)
+                        # check_dtype=False due to difference to pandas in some cases
+                        pd.testing.assert_series_equal(result, result_ref, check_dtype=False)
 
-    def test_series_combine_samelen(self):
-        def test_impl(S1, S2):
-            return S1.combine(S2, lambda a, b: 2 * a + b)
+    def test_series_combine_value_samelen(self):
+        def test_impl(S1, S2, fill_value):
+            return S1.combine(S2, lambda a, b: 2 * a + b, fill_value=fill_value)
         hpat_func = self.jit(test_impl)
 
-        S1 = pd.Series([1.0, 2., 3., 4., 5.])
-        S2 = pd.Series([6.0, 21., 3.6, 5., 0.0])
-        pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2))
+        n = 11
+        np.random.seed(0)
+        A = np.random.randint(-100, 100, n)
+        B = np.arange(n) * 2 + 1
+        series_index = 1 + np.arange(n)
+
+        series_dtypes = [None, np.int64, np.float64]
+        fill_values = [None, np.nan, 4, 4.2]
+        for dtype1, dtype2, fill_value in product(series_dtypes, series_dtypes, fill_values):
+            S1 = pd.Series(A, index=series_index, dtype=dtype1)
+            S2 = pd.Series(B, index=series_index, dtype=dtype2)
+            with self.subTest(S1=S1, S2=S2, fill_value=fill_value):
+                result = hpat_func(S1, S2, fill_value)
+                result_ref = test_impl(S1, S2, fill_value)
+                # check_dtype=False due to difference to pandas in some cases
+                pd.testing.assert_series_equal(result, result_ref, check_dtype=False)
 
-    def test_series_combine_value(self):
+    def test_series_combine_different_types(self):
         def test_impl(S1, S2):
-            return S1.combine(S2, lambda a, b: 2 * a + b, 1237.56)
+            return S1.combine(S2, lambda a, b: 2 * a + b)
         hpat_func = self.jit(test_impl)
 
-        S1 = pd.Series([1.0, 2., 3., 4., 5.])
-        S2 = pd.Series([6.0, 21., 3.6, 5.])
-        pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2))
+        sizes1 = [2, 4, 5, 6, 8]
+        sizes2 = [1, 3, 5, 7, 9]
+        series_dtypes = [None, np.int64, np.float64]
 
-    def test_series_combine_value_samelen(self):
-        def test_impl(S1, S2):
-            return S1.combine(S2, lambda a, b: 2 * a + b, 1237.56)
-        hpat_func = self.jit(test_impl)
+        for n in sizes1:
+            for k in sizes2:
+                for dtype1, dtype2 in product(series_dtypes, series_dtypes):
+                    A = np.random.randint(-100, 100, n)
+                    B = np.arange(k) * 2 + 1
+                    S1 = pd.Series(A, dtype=dtype1)
+                    S2 = pd.Series(B, dtype=dtype2)
+                    with self.subTest(S1=S1, S2=S2):
+                        result = hpat_func(S1, S2)
+                        result_ref = test_impl(S1, S2)
+                        # check_dtype=False due to difference to pandas in some cases
+                        pd.testing.assert_series_equal(result, result_ref, check_dtype=False)
 
-        S1 = pd.Series([1.0, 2., 3., 4., 5.])
-        S2 = pd.Series([6.0, 21., 3.6, 5., 0.0])
-        pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2))
 
     def test_series_abs1(self):
         def test_impl(S):