From d49ff6e736f2ffe2b1cedf9ab94978f5f591be5e Mon Sep 17 00:00:00 2001 From: mloubout Date: Sat, 18 Jan 2025 18:16:03 -0500 Subject: [PATCH] compiler: fix missing algorithm include for min/max --- devito/operator/operator.py | 5 +++-- devito/passes/iet/languages/CXX.py | 1 + devito/passes/iet/misc.py | 31 ++++++++++++++++-------------- tests/test_gpu_openacc.py | 15 +++++++++++++-- 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/devito/operator/operator.py b/devito/operator/operator.py index 38566e6a52..27ed6b5b70 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -470,6 +470,7 @@ def _lower_iet(cls, uiet, profiler=None, **kwargs): * Finalize (e.g., symbol definitions, array casts) """ name = kwargs.get("name", "Kernel") + lang = cls._Target.lang() # Wrap the IET with an EntryFunction (a special Callable representing # the entry point of the generated library) @@ -488,10 +489,10 @@ def _lower_iet(cls, uiet, profiler=None, **kwargs): cls._Target.instrument(graph, profiler=profiler, **kwargs) # Extract the necessary macros from the symbolic objects - generate_macros(graph, **kwargs) + generate_macros(graph, lang=lang, **kwargs) # Add type specific metadata - lower_dtypes(graph, lang=cls._Target.lang(), **kwargs) + lower_dtypes(graph, lang=lang, **kwargs) # Target-independent optimizations minimize_symbols(graph) diff --git a/devito/passes/iet/languages/CXX.py b/devito/passes/iet/languages/CXX.py index 17003c0d8f..bfa6bebe35 100644 --- a/devito/passes/iet/languages/CXX.py +++ b/devito/passes/iet/languages/CXX.py @@ -45,6 +45,7 @@ class CXXBB(LangBB): mapper = { 'header-memcpy': 'string.h', + 'header-algorithm': 'algorithm', 'host-alloc': lambda i, j, k: Call('posix_memalign', (i, j, k)), 'host-alloc-pin': lambda i, j, k: diff --git a/devito/passes/iet/misc.py b/devito/passes/iet/misc.py index e9d8308336..63a9b9a72f 100644 --- a/devito/passes/iet/misc.py +++ b/devito/passes/iet/misc.py @@ -143,7 +143,7 @@ def generate_macros(graph, **kwargs): @iet_pass -def _generate_macros(iet, tracker=None, **kwargs): +def _generate_macros(iet, tracker=None, lang=None, **kwargs): # Derive the Macros necessary for the FIndexeds iet = _generate_macros_findexeds(iet, tracker=tracker, **kwargs) @@ -151,7 +151,8 @@ def _generate_macros(iet, tracker=None, **kwargs): headers = sorted((ccode(define), ccode(expr)) for define, expr in headers) # Generate Macros from higher-level SymPy objects - headers.extend(_generate_macros_math(iet)) + mheaders, includes = _generate_macros_math(iet, lang=lang) + headers.extend(mheaders) # Remove redundancies while preserving the order headers = filter_ordered(headers) @@ -159,7 +160,6 @@ def _generate_macros(iet, tracker=None, **kwargs): # Some special Symbols may represent Macros defined in standard libraries, # so we need to include the respective includes limits = FindApplications(ValueLimit).visit(iet) - includes = set() if limits & (set(limits_mapper[np.int32]) | set(limits_mapper[np.int64])): includes.add('limits.h') elif limits & (set(limits_mapper[np.float32]) | set(limits_mapper[np.float64])): @@ -194,35 +194,38 @@ def _generate_macros_findexeds(iet, sregistry=None, tracker=None, **kwargs): return iet -def _generate_macros_math(iet): +def _generate_macros_math(iet, lang=None): headers = [] + includes = [] for i in FindApplications().visit(iet): - headers.extend(_lower_macro_math(i)) + header, include = _lower_macro_math(i, lang) + headers.extend(header) + includes.extend(include) - return headers + return headers, includes @singledispatch -def _lower_macro_math(expr): - return () +def _lower_macro_math(expr, lang): + return (), {} @_lower_macro_math.register(Min) @_lower_macro_math.register(sympy.Min) -def _(expr): +def _(expr, lang): if has_integer_args(*expr.args) and len(expr.args) == 2: - return (('MIN(a,b)', ('(((a) < (b)) ? (a) : (b))')),) + return (('MIN(a,b)', ('(((a) < (b)) ? (a) : (b))')),), {} else: - return () + return (), (lang.get('header-algorithm'),) or () @_lower_macro_math.register(Max) @_lower_macro_math.register(sympy.Max) -def _(expr): +def _(expr, lang): if has_integer_args(*expr.args) and len(expr.args) == 2: - return (('MAX(a,b)', ('(((a) > (b)) ? (a) : (b))')),) + return (('MAX(a,b)', ('(((a) > (b)) ? (a) : (b))')),), {} else: - return () + return (), (lang.get('header-algorithm'),) or () @iet_pass diff --git a/tests/test_gpu_openacc.py b/tests/test_gpu_openacc.py index bdf732a12d..28255cd991 100644 --- a/tests/test_gpu_openacc.py +++ b/tests/test_gpu_openacc.py @@ -2,14 +2,14 @@ import numpy as np from devito import (Grid, Function, TimeFunction, SparseTimeFunction, Eq, Operator, - norm, solve) + norm, solve, Max) from conftest import skipif, assert_blocking, opts_device_tiling from devito.data import LEFT from devito.exceptions import InvalidOperator from devito.ir.iet import retrieve_iteration_tree, FindNodes, Iteration from examples.seismic import TimeAxis, RickerSource, Receiver -pytestmark = skipif(['nodevice'], whole_module=True) +pytestmark = skipif(['novice'], whole_module=True) class TestCodeGeneration: @@ -171,6 +171,17 @@ def test_multi_tile_blocking_structure(self): assert len(iters) == len(v) assert all(i.step == j for i, j in zip(iters, v)) + def test_std_max(self): + grid = Grid(shape=(3, 3, 3)) + x, y, z = grid.dimensions + + u = Function(name='u', grid=grid) + + op = Operator(Eq(u, Max(1.2 * x / y, 2.3 * y / x)), + platform='nvidiaX', language='openacc') + + assert '' in str(op) + class TestOperator: