diff --git a/loki/transformations/build_system/tests/__init__.py b/loki/transformations/build_system/tests/__init__.py new file mode 100644 index 000000000..538bb4e73 --- /dev/null +++ b/loki/transformations/build_system/tests/__init__.py @@ -0,0 +1,6 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. diff --git a/loki/transformations/data_offload/__init__.py b/loki/transformations/data_offload/__init__.py new file mode 100644 index 000000000..b6046c746 --- /dev/null +++ b/loki/transformations/data_offload/__init__.py @@ -0,0 +1,13 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +""" +Sub-package providing data offload transformations. +""" + +from loki.transformations.data_offload.field_api import * # noqa +from loki.transformations.data_offload.global_var import * # noqa +from loki.transformations.data_offload.offload import * # noqa diff --git a/loki/transformations/data_offload/field_api.py b/loki/transformations/data_offload/field_api.py new file mode 100644 index 000000000..00d9de4f4 --- /dev/null +++ b/loki/transformations/data_offload/field_api.py @@ -0,0 +1,253 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from itertools import chain + +from loki.batch import Transformation +from loki.expression import Array, symbols as sym +from loki.ir import ( + FindNodes, PragmaRegion, CallStatement, + Transformer, pragma_regions_attached, + SubstituteExpressions +) +from loki.logging import warning, error +from loki.tools import as_tuple +from loki.types import BasicType + +from loki.transformations.data_offload.offload import DataOffloadTransformation +from loki.transformations.parallel import ( + FieldAPITransferType, field_get_device_data, field_sync_host, + remove_field_api_view_updates +) + + +__all__ = ['FieldOffloadTransformation'] + + +def find_target_calls(region, targets): + """ + Returns a list of all calls to targets inside the region. + + Parameters + ---------- + :region: :any:`PragmaRegion` + :targets: collection of :any:`Subroutine` + Iterable object of subroutines or functions called + :returns: list of :any:`CallStatement` + """ + calls = FindNodes(CallStatement).visit(region) + calls = [c for c in calls if str(c.name).lower() in targets] + return calls + + +class FieldOffloadTransformation(Transformation): + """ + + Transformation to offload arrays owned by Field API fields to the device. **This transformation is IFS specific.** + + The transformation assumes that fields are wrapped in derived types specified in + ``field_group_types`` and will only offload arrays that are members of such derived types. + In the process this transformation removes calls to Field API ``update_view`` and adds + declarations for the device pointers to the driver subroutine. + + The transformation acts on ``!$loki data`` regions and offloads all :any:`Array` + symbols that satisfy the following conditions: + + 1. The array is a member of an object that is of type specified in ``field_group_types``. + + 2. The array is passed as a parameter to at least one of the kernel targets passed to ``transform_subroutine``. + + Parameters + ---------- + devptr_prefix: str, optional + The prefix of device pointers added by this transformation (defaults to ``'loki_devptr_'``). + field_group_types: list or tuple of str, optional + Names of the field group types with members that may be offloaded (defaults to ``['']``). + offload_index: str, optional + Names of index variable to inject in the outmost dimension of offloaded arrays in the kernel + calls (defaults to ``'IBL'``). + """ + + class FieldPointerMap: + """ + Helper class to :any:`FieldOffloadTransformation` that is used to store arrays passed to + target kernel calls and the corresponding device pointers added by the transformation. + The pointer/array variable pairs are exposed through the class properties, based on + the intent of the kernel argument. + """ + def __init__(self, devptrs, inargs, inoutargs, outargs): + self.inargs = inargs + self.inoutargs = inoutargs + self.outargs = outargs + self.devptrs = devptrs + + + @property + def in_pairs(self): + """ + Iterator that yields array/pointer pairs for kernel arguments of intent(in). + + Yields + ______ + :any:`Array` + Original kernel call argument + :any:`Array` + Corresponding device pointer added by the transformation. + """ + for i, inarg in enumerate(self.inargs): + yield inarg, self.devptrs[i] + + @property + def inout_pairs(self): + """ + Iterator that yields array/pointer pairs for arguments with intent(inout). + + Yields + ______ + :any:`Array` + Original kernel call argument + :any:`Array` + Corresponding device pointer added by the transformation. + """ + start = len(self.inargs) + for i, inoutarg in enumerate(self.inoutargs): + yield inoutarg, self.devptrs[i+start] + + @property + def out_pairs(self): + """ + Iterator that yields array/pointer pairs for arguments with intent(out) + + Yields + ______ + :any:`Array` + Original kernel call argument + :any:`Array` + Corresponding device pointer added by the transformation. + """ + + start = len(self.inargs)+len(self.inoutargs) + for i, outarg in enumerate(self.outargs): + yield outarg, self.devptrs[i+start] + + + def __init__(self, devptr_prefix=None, field_group_types=None, offload_index=None): + self.deviceptr_prefix = 'loki_devptr_' if devptr_prefix is None else devptr_prefix + field_group_types = [''] if field_group_types is None else field_group_types + self.field_group_types = tuple(typename.lower() for typename in field_group_types) + self.offload_index = 'IBL' if offload_index is None else offload_index + + def transform_subroutine(self, routine, **kwargs): + role = kwargs['role'] + targets = as_tuple(kwargs.get('targets'), (None)) + if role == 'driver': + self.process_driver(routine, targets) + + def process_driver(self, driver, targets): + remove_field_api_view_updates(driver, self.field_group_types) + with pragma_regions_attached(driver): + for region in FindNodes(PragmaRegion).visit(driver.body): + # Only work on active `!$loki data` regions + if not DataOffloadTransformation._is_active_loki_data_region(region, targets): + continue + kernel_calls = find_target_calls(region, targets) + offload_variables = self.find_offload_variables(driver, kernel_calls) + device_ptrs = self._declare_device_ptrs(driver, offload_variables) + offload_map = self.FieldPointerMap(device_ptrs, *offload_variables) + self._add_field_offload_calls(driver, region, offload_map) + self._replace_kernel_args(driver, kernel_calls, offload_map) + + def find_offload_variables(self, driver, calls): + inargs = () + inoutargs = () + outargs = () + + for call in calls: + if call.routine is BasicType.DEFERRED: + error(f'[Loki] Data offload: Routine {driver.name} has not been enriched ' + + f'in {str(call.name).lower()}') + raise RuntimeError + for param, arg in call.arg_iter(): + if not isinstance(param, Array): + continue + try: + parent = arg.parent + if parent.type.dtype.name.lower() not in self.field_group_types: + warning(f'[Loki] Data offload: The parent object {parent.name} of type ' + + f'{parent.type.dtype} is not in the list of field wrapper types') + continue + except AttributeError: + warning(f'[Loki] Data offload: Raw array object {arg.name} encountered in' + + f' {driver.name} that is not wrapped by a Field API object') + continue + + if param.type.intent.lower() == 'in': + inargs += (arg, ) + if param.type.intent.lower() == 'inout': + inoutargs += (arg, ) + if param.type.intent.lower() == 'out': + outargs += (arg, ) + + inoutargs += tuple(v for v in inargs if v in outargs) + inargs = tuple(v for v in inargs if v not in inoutargs) + outargs = tuple(v for v in outargs if v not in inoutargs) + + inargs = tuple(set(inargs)) + inoutargs = tuple(set(inoutargs)) + outargs = tuple(set(outargs)) + return inargs, inoutargs, outargs + + + def _declare_device_ptrs(self, driver, offload_variables): + device_ptrs = tuple(self._devptr_from_array(driver, a) for a in chain(*offload_variables)) + driver.variables += device_ptrs + return device_ptrs + + def _devptr_from_array(self, driver, a: sym.Array): + """ + Returns a contiguous pointer :any:`Variable` with types matching the array a + """ + shape = (sym.RangeIndex((None, None)),) * (len(a.shape)+1) + devptr_type = a.type.clone(pointer=True, contiguous=True, shape=shape, intent=None) + base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) + devptr_name = self.deviceptr_prefix + base_name + if devptr_name in driver.variable_map: + warning(f'[Loki] Data offload: The routine {driver.name} already has a ' + + f'variable named {devptr_name}') + devptr = sym.Variable(name=devptr_name, type=devptr_type, dimensions=shape) + return devptr + + def _add_field_offload_calls(self, driver, region, offload_map): + host_to_device = tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, + FieldAPITransferType.READ_ONLY, driver) for inarg, devptr in offload_map.in_pairs) + host_to_device += tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, + FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.inout_pairs) + host_to_device += tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, + FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.out_pairs) + device_to_host = tuple(field_sync_host(self._get_field_ptr_from_view(inarg), driver) + for inarg, _ in chain(offload_map.inout_pairs, offload_map.out_pairs)) + update_map = {region: host_to_device + (region,) + device_to_host} + Transformer(update_map, inplace=True).visit(driver.body) + + def _get_field_ptr_from_view(self, field_view): + type_chain = field_view.name.split('%') + field_type_name = 'F_' + type_chain[-1] + return field_view.parent.get_derived_type_member(field_type_name) + + def _replace_kernel_args(self, driver, kernel_calls, offload_map): + change_map = {} + offload_idx_expr = driver.variable_map[self.offload_index] + for arg, devptr in chain(offload_map.in_pairs, offload_map.inout_pairs, offload_map.out_pairs): + if len(arg.dimensions) != 0: + dims = arg.dimensions + (offload_idx_expr,) + else: + dims = (sym.RangeIndex((None, None)),) * (len(devptr.shape)-1) + (offload_idx_expr,) + change_map[arg] = devptr.clone(dimensions=dims) + + arg_transformer = SubstituteExpressions(change_map, inplace=True) + for call in kernel_calls: + arg_transformer.visit(call) diff --git a/loki/transformations/data_offload.py b/loki/transformations/data_offload/global_var.py similarity index 60% rename from loki/transformations/data_offload.py rename to loki/transformations/data_offload/global_var.py index 646ca9d66..2bd4b6de8 100644 --- a/loki/transformations/data_offload.py +++ b/loki/transformations/data_offload/global_var.py @@ -10,218 +10,23 @@ from loki.analyse import dataflow_analysis_attached from loki.batch import Transformation, ProcedureItem, ModuleItem -from loki.expression import Scalar, Array, symbols as sym +from loki.expression import Scalar, Array from loki.ir import ( - FindNodes, PragmaRegion, CallStatement, Pragma, Import, Comment, - Transformer, pragma_regions_attached, get_pragma_parameters, + FindNodes, CallStatement, Pragma, Import, Comment, + Transformer, get_pragma_parameters, FindInlineCalls, SubstituteExpressions ) -from loki.logging import warning, error +from loki.logging import warning from loki.tools import as_tuple, flatten, CaseInsensitiveDict, CaseInsensitiveDefaultDict -from loki.types import BasicType, DerivedType -from loki.transformations.parallel import ( - FieldAPITransferType, field_get_device_data, field_sync_host, remove_field_api_view_updates -) +from loki.types import DerivedType + __all__ = [ - 'DataOffloadTransformation', 'GlobalVariableAnalysis', - 'GlobalVarOffloadTransformation', 'GlobalVarHoistTransformation', - 'FieldOffloadTransformation' + 'GlobalVariableAnalysis', 'GlobalVarOffloadTransformation', + 'GlobalVarHoistTransformation', ] -class DataOffloadTransformation(Transformation): - """ - Utility transformation to insert data offload regions for GPU devices - based on marked ``!$loki data`` regions. In the first instance this - will insert OpenACC data offload regions, but can be extended to other - offload region semantics (eg. OpenMP-5) in the future. - - Parameters - ---------- - remove_openmp : bool - Remove any existing OpenMP pragmas inside the marked region. - present_on_device : bool - Assume arrays are already offloaded and present on device" - assume_deviceptr : bool - Mark all offloaded arrays as true device-pointers if data offload - is being managed outside of structured OpenACC data regions. - """ - - def __init__(self, **kwargs): - # We need to record if we actually added any, so - # that down-stream processing can use that info - self.has_data_regions = False - self.remove_openmp = kwargs.get('remove_openmp', False) - self.assume_deviceptr = kwargs.get('assume_deviceptr', False) - self.present_on_device = kwargs.get('present_on_device', False) - - if self.assume_deviceptr and not self.present_on_device: - error("[Loki] Data offload: Can't assume device pointer arrays without arrays being marked" + - "present on device.") - raise RuntimeError - - def transform_subroutine(self, routine, **kwargs): - """ - Apply the transformation to a `Subroutine` object. - - Parameters - ---------- - routine : `Subroutine` - Subroutine to apply this transformation to. - role : string - Role of the `routine` in the scheduler call tree. - This transformation will only apply at the ``'driver'`` level. - targets : list or string - List of subroutines that are to be considered as part of - the transformation call tree. - """ - role = kwargs.get('role') - targets = as_tuple(kwargs.get('targets', None)) - - if targets: - targets = tuple(t.lower() for t in targets) - - if role == 'driver': - self.remove_openmp_pragmas(routine, targets) - self.insert_data_offload_pragmas(routine, targets) - - @staticmethod - def _is_active_loki_data_region(region, targets): - """ - Utility to decide if a ``PragmaRegion`` is of type ``!$loki data`` - and has active target routines. - """ - if region.pragma.keyword.lower() != 'loki': - return False - if 'data' not in region.pragma.content.lower(): - return False - - # Find all targeted kernel calls - calls = FindNodes(CallStatement).visit(region) - calls = [c for c in calls if str(c.name).lower() in targets] - if len(calls) == 0: - return False - - return True - - def insert_data_offload_pragmas(self, routine, targets): - """ - Find ``!$loki data`` pragma regions and create according - ``!$acc udpdate`` regions. - - Parameters - ---------- - routine : `Subroutine` - Subroutine to apply this transformation to. - targets : list or string - List of subroutines that are to be considered as part of - the transformation call tree. - """ - pragma_map = {} - with pragma_regions_attached(routine): - for region in FindNodes(PragmaRegion).visit(routine.body): - # Only work on active `!$loki data` regions - if not self._is_active_loki_data_region(region, targets): - continue - - # Find all targeted kernel calls - calls = FindNodes(CallStatement).visit(region) - calls = [c for c in calls if str(c.name).lower() in targets] - - # Collect the three types of device data accesses from calls - inargs = () - inoutargs = () - outargs = () - - for call in calls: - if call.routine is BasicType.DEFERRED: - warning(f'[Loki] Data offload: Routine {routine.name} has not been enriched ' + - f'in {str(call.name).lower()}') - - continue - - for param, arg in call.arg_iter(): - if isinstance(param, Array) and param.type.intent.lower() == 'in': - inargs += (str(arg.name).lower(),) - if isinstance(param, Array) and param.type.intent.lower() == 'inout': - inoutargs += (str(arg.name).lower(),) - if isinstance(param, Array) and param.type.intent.lower() == 'out': - outargs += (str(arg.name).lower(),) - - # Sanitize data access categories to avoid double-counting variables - inoutargs += tuple(v for v in inargs if v in outargs) - inargs = tuple(v for v in inargs if v not in inoutargs) - outargs = tuple(v for v in outargs if v not in inoutargs) - - # Filter for duplicates - inargs = tuple(dict.fromkeys(inargs)) - outargs = tuple(dict.fromkeys(outargs)) - inoutargs = tuple(dict.fromkeys(inoutargs)) - - # Now generate the pre- and post pragmas (OpenACC) - if self.present_on_device: - if self.assume_deviceptr: - offload_args = inargs + outargs + inoutargs - if offload_args: - deviceptr = f' deviceptr({", ".join(offload_args)})' - else: - deviceptr = '' - pragma = Pragma(keyword='acc', content=f'data{deviceptr}') - else: - offload_args = inargs + outargs + inoutargs - if offload_args: - present = f' present({", ".join(offload_args)})' - else: - present = '' - pragma = Pragma(keyword='acc', content=f'data{present}') - else: - copyin = f'copyin({", ".join(inargs)})' if inargs else '' - copy = f'copy({", ".join(inoutargs)})' if inoutargs else '' - copyout = f'copyout({", ".join(outargs)})' if outargs else '' - pragma = Pragma(keyword='acc', content=f'data {copyin} {copy} {copyout}') - pragma_post = Pragma(keyword='acc', content='end data') - pragma_map[region.pragma] = pragma - pragma_map[region.pragma_post] = pragma_post - - # Record that we actually created a new region - if not self.has_data_regions: - self.has_data_regions = True - - routine.body = Transformer(pragma_map).visit(routine.body) - - def remove_openmp_pragmas(self, routine, targets): - """ - Remove any existing OpenMP pragmas in the offload regions that - will have been intended for OpenMP threading rather than - offload. - - Parameters - ---------- - routine : `Subroutine` - Subroutine to apply this transformation to. - targets : list or string - List of subroutines that are to be considered as part of - the transformation call tree. - """ - pragma_map = {} - with pragma_regions_attached(routine): - for region in FindNodes(PragmaRegion).visit(routine.body): - # Only work on active `!$loki data` regions - if not self._is_active_loki_data_region(region, targets): - continue - - for p in FindNodes(Pragma).visit(routine.body): - if p.keyword.lower() == 'omp': - pragma_map[p] = None - for r in FindNodes(PragmaRegion).visit(region): - if r.pragma.keyword.lower() == 'omp': - pragma_map[r.pragma] = None - pragma_map[r.pragma_post] = None - - routine.body = Transformer(pragma_map).visit(routine.body) - - class GlobalVariableAnalysis(Transformation): """ Transformation pass to analyse the declaration and use of (global) module variables. @@ -927,228 +732,3 @@ def _append_routine_arguments(self, routine, item): )) for arg in new_arguments ] routine.arguments += tuple(sorted(new_arguments, key=lambda symbol: symbol.name)) - - -def find_target_calls(region, targets): - """ - Returns a list of all calls to targets inside the region. - - Parameters - ---------- - :region: :any:`PragmaRegion` - :targets: collection of :any:`Subroutine` - Iterable object of subroutines or functions called - :returns: list of :any:`CallStatement` - """ - calls = FindNodes(CallStatement).visit(region) - calls = [c for c in calls if str(c.name).lower() in targets] - return calls - - -class FieldOffloadTransformation(Transformation): - """ - - Transformation to offload arrays owned by Field API fields to the device. **This transformation is IFS specific.** - - The transformation assumes that fields are wrapped in derived types specified in - ``field_group_types`` and will only offload arrays that are members of such derived types. - In the process this transformation removes calls to Field API ``update_view`` and adds - declarations for the device pointers to the driver subroutine. - - The transformation acts on ``!$loki data`` regions and offloads all :any:`Array` - symbols that satisfy the following conditions: - - 1. The array is a member of an object that is of type specified in ``field_group_types``. - - 2. The array is passed as a parameter to at least one of the kernel targets passed to ``transform_subroutine``. - - Parameters - ---------- - devptr_prefix: str, optional - The prefix of device pointers added by this transformation (defaults to ``'loki_devptr_'``). - field_froup_types: list or tuple of str, optional - Names of the field group types with members that may be offloaded (defaults to ``['']``). - offload_index: str, optional - Names of index variable to inject in the outmost dimension of offloaded arrays in the kernel - calls (defaults to ``'IBL'``). - """ - - class FieldPointerMap: - """ - Helper class to :any:`FieldOffloadTransformation` that is used to store arrays passed to - target kernel calls and the corresponding device pointers added by the transformation. - The pointer/array variable pairs are exposed through the class properties, based on - the intent of the kernel argument. - """ - def __init__(self, devptrs, inargs, inoutargs, outargs): - self.inargs = inargs - self.inoutargs = inoutargs - self.outargs = outargs - self.devptrs = devptrs - - - @property - def in_pairs(self): - """ - Iterator that yields array/pointer pairs for kernel arguments of intent(in). - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. - """ - for i, inarg in enumerate(self.inargs): - yield inarg, self.devptrs[i] - - @property - def inout_pairs(self): - """ - Iterator that yields array/pointer pairs for arguments with intent(inout). - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. - """ - start = len(self.inargs) - for i, inoutarg in enumerate(self.inoutargs): - yield inoutarg, self.devptrs[i+start] - - @property - def out_pairs(self): - """ - Iterator that yields array/pointer pairs for arguments with intent(out) - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. - """ - - start = len(self.inargs)+len(self.inoutargs) - for i, outarg in enumerate(self.outargs): - yield outarg, self.devptrs[i+start] - - - def __init__(self, devptr_prefix=None, field_group_types=None, offload_index=None): - self.deviceptr_prefix = 'loki_devptr_' if devptr_prefix is None else devptr_prefix - field_group_types = [''] if field_group_types is None else field_group_types - self.field_group_types = tuple(typename.lower() for typename in field_group_types) - self.offload_index = 'IBL' if offload_index is None else offload_index - - def transform_subroutine(self, routine, **kwargs): - role = kwargs['role'] - targets = as_tuple(kwargs.get('targets'), (None)) - if role == 'driver': - self.process_driver(routine, targets) - - def process_driver(self, driver, targets): - remove_field_api_view_updates(driver, self.field_group_types) - with pragma_regions_attached(driver): - for region in FindNodes(PragmaRegion).visit(driver.body): - # Only work on active `!$loki data` regions - if not DataOffloadTransformation._is_active_loki_data_region(region, targets): - continue - kernel_calls = find_target_calls(region, targets) - offload_variables = self.find_offload_variables(driver, kernel_calls) - device_ptrs = self._declare_device_ptrs(driver, offload_variables) - offload_map = self.FieldPointerMap(device_ptrs, *offload_variables) - self._add_field_offload_calls(driver, region, offload_map) - self._replace_kernel_args(driver, kernel_calls, offload_map) - - def find_offload_variables(self, driver, calls): - inargs = () - inoutargs = () - outargs = () - - for call in calls: - if call.routine is BasicType.DEFERRED: - error(f'[Loki] Data offload: Routine {driver.name} has not been enriched ' + - f'in {str(call.name).lower()}') - raise RuntimeError - for param, arg in call.arg_iter(): - if not isinstance(param, Array): - continue - try: - parent = arg.parent - if parent.type.dtype.name.lower() not in self.field_group_types: - warning(f'[Loki] Data offload: The parent object {parent.name} of type ' + - f'{parent.type.dtype} is not in the list of field wrapper types') - continue - except AttributeError: - warning(f'[Loki] Data offload: Raw array object {arg.name} encountered in' - + f' {driver.name} that is not wrapped by a Field API object') - continue - - if param.type.intent.lower() == 'in': - inargs += (arg, ) - if param.type.intent.lower() == 'inout': - inoutargs += (arg, ) - if param.type.intent.lower() == 'out': - outargs += (arg, ) - - inoutargs += tuple(v for v in inargs if v in outargs) - inargs = tuple(v for v in inargs if v not in inoutargs) - outargs = tuple(v for v in outargs if v not in inoutargs) - - inargs = tuple(set(inargs)) - inoutargs = tuple(set(inoutargs)) - outargs = tuple(set(outargs)) - return inargs, inoutargs, outargs - - - def _declare_device_ptrs(self, driver, offload_variables): - device_ptrs = tuple(self._devptr_from_array(driver, a) for a in chain(*offload_variables)) - driver.variables += device_ptrs - return device_ptrs - - def _devptr_from_array(self, driver, a: sym.Array): - """ - Returns a contiguous pointer :any:`Variable` with types matching the array a - """ - shape = (sym.RangeIndex((None, None)),) * (len(a.shape)+1) - devptr_type = a.type.clone(pointer=True, contiguous=True, shape=shape, intent=None) - base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) - devptr_name = self.deviceptr_prefix + base_name - if devptr_name in driver.variable_map: - warning(f'[Loki] Data offload: The routine {driver.name} already has a ' + - f'variable named {devptr_name}') - devptr = sym.Variable(name=devptr_name, type=devptr_type, dimensions=shape) - return devptr - - def _add_field_offload_calls(self, driver, region, offload_map): - host_to_device = tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, - FieldAPITransferType.READ_ONLY, driver) for inarg, devptr in offload_map.in_pairs) - host_to_device += tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, - FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.inout_pairs) - host_to_device += tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, - FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.out_pairs) - device_to_host = tuple(field_sync_host(self._get_field_ptr_from_view(inarg), driver) - for inarg, _ in chain(offload_map.inout_pairs, offload_map.out_pairs)) - update_map = {region: host_to_device + (region,) + device_to_host} - Transformer(update_map, inplace=True).visit(driver.body) - - def _get_field_ptr_from_view(self, field_view): - type_chain = field_view.name.split('%') - field_type_name = 'F_' + type_chain[-1] - return field_view.parent.get_derived_type_member(field_type_name) - - def _replace_kernel_args(self, driver, kernel_calls, offload_map): - change_map = {} - offload_idx_expr = driver.variable_map[self.offload_index] - for arg, devptr in chain(offload_map.in_pairs, offload_map.inout_pairs, offload_map.out_pairs): - if len(arg.dimensions) != 0: - dims = arg.dimensions + (offload_idx_expr,) - else: - dims = (sym.RangeIndex((None, None)),) * (len(devptr.shape)-1) + (offload_idx_expr,) - change_map[arg] = devptr.clone(dimensions=dims) - - arg_transformer = SubstituteExpressions(change_map, inplace=True) - for call in kernel_calls: - arg_transformer.visit(call) diff --git a/loki/transformations/data_offload/offload.py b/loki/transformations/data_offload/offload.py new file mode 100644 index 000000000..13df21248 --- /dev/null +++ b/loki/transformations/data_offload/offload.py @@ -0,0 +1,211 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from loki.batch import Transformation +from loki.expression import Array +from loki.ir import ( + FindNodes, PragmaRegion, CallStatement, Pragma, Transformer, + pragma_regions_attached, +) +from loki.logging import warning, error +from loki.tools import as_tuple +from loki.types import BasicType + + +__all__ = ['DataOffloadTransformation'] + + +class DataOffloadTransformation(Transformation): + """ + Utility transformation to insert data offload regions for GPU devices + based on marked ``!$loki data`` regions. In the first instance this + will insert OpenACC data offload regions, but can be extended to other + offload region semantics (eg. OpenMP-5) in the future. + + Parameters + ---------- + remove_openmp : bool + Remove any existing OpenMP pragmas inside the marked region. + present_on_device : bool + Assume arrays are already offloaded and present on device" + assume_deviceptr : bool + Mark all offloaded arrays as true device-pointers if data offload + is being managed outside of structured OpenACC data regions. + """ + + def __init__(self, **kwargs): + # We need to record if we actually added any, so + # that down-stream processing can use that info + self.has_data_regions = False + self.remove_openmp = kwargs.get('remove_openmp', False) + self.assume_deviceptr = kwargs.get('assume_deviceptr', False) + self.present_on_device = kwargs.get('present_on_device', False) + + if self.assume_deviceptr and not self.present_on_device: + error("[Loki] Data offload: Can't assume device pointer arrays without arrays being marked" + + "present on device.") + raise RuntimeError + + def transform_subroutine(self, routine, **kwargs): + """ + Apply the transformation to a `Subroutine` object. + + Parameters + ---------- + routine : `Subroutine` + Subroutine to apply this transformation to. + role : string + Role of the `routine` in the scheduler call tree. + This transformation will only apply at the ``'driver'`` level. + targets : list or string + List of subroutines that are to be considered as part of + the transformation call tree. + """ + role = kwargs.get('role') + targets = as_tuple(kwargs.get('targets', None)) + + if targets: + targets = tuple(t.lower() for t in targets) + + if role == 'driver': + self.remove_openmp_pragmas(routine, targets) + self.insert_data_offload_pragmas(routine, targets) + + @staticmethod + def _is_active_loki_data_region(region, targets): + """ + Utility to decide if a ``PragmaRegion`` is of type ``!$loki data`` + and has active target routines. + """ + if region.pragma.keyword.lower() != 'loki': + return False + if 'data' not in region.pragma.content.lower(): + return False + + # Find all targeted kernel calls + calls = FindNodes(CallStatement).visit(region) + calls = [c for c in calls if str(c.name).lower() in targets] + if len(calls) == 0: + return False + + return True + + def insert_data_offload_pragmas(self, routine, targets): + """ + Find ``!$loki data`` pragma regions and create according + ``!$acc udpdate`` regions. + + Parameters + ---------- + routine : `Subroutine` + Subroutine to apply this transformation to. + targets : list or string + List of subroutines that are to be considered as part of + the transformation call tree. + """ + pragma_map = {} + with pragma_regions_attached(routine): + for region in FindNodes(PragmaRegion).visit(routine.body): + # Only work on active `!$loki data` regions + if not self._is_active_loki_data_region(region, targets): + continue + + # Find all targeted kernel calls + calls = FindNodes(CallStatement).visit(region) + calls = [c for c in calls if str(c.name).lower() in targets] + + # Collect the three types of device data accesses from calls + inargs = () + inoutargs = () + outargs = () + + for call in calls: + if call.routine is BasicType.DEFERRED: + warning(f'[Loki] Data offload: Routine {routine.name} has not been enriched ' + + f'in {str(call.name).lower()}') + + continue + + for param, arg in call.arg_iter(): + if isinstance(param, Array) and param.type.intent.lower() == 'in': + inargs += (str(arg.name).lower(),) + if isinstance(param, Array) and param.type.intent.lower() == 'inout': + inoutargs += (str(arg.name).lower(),) + if isinstance(param, Array) and param.type.intent.lower() == 'out': + outargs += (str(arg.name).lower(),) + + # Sanitize data access categories to avoid double-counting variables + inoutargs += tuple(v for v in inargs if v in outargs) + inargs = tuple(v for v in inargs if v not in inoutargs) + outargs = tuple(v for v in outargs if v not in inoutargs) + + # Filter for duplicates + inargs = tuple(dict.fromkeys(inargs)) + outargs = tuple(dict.fromkeys(outargs)) + inoutargs = tuple(dict.fromkeys(inoutargs)) + + # Now generate the pre- and post pragmas (OpenACC) + if self.present_on_device: + if self.assume_deviceptr: + offload_args = inargs + outargs + inoutargs + if offload_args: + deviceptr = f' deviceptr({", ".join(offload_args)})' + else: + deviceptr = '' + pragma = Pragma(keyword='acc', content=f'data{deviceptr}') + else: + offload_args = inargs + outargs + inoutargs + if offload_args: + present = f' present({", ".join(offload_args)})' + else: + present = '' + pragma = Pragma(keyword='acc', content=f'data{present}') + else: + copyin = f'copyin({", ".join(inargs)})' if inargs else '' + copy = f'copy({", ".join(inoutargs)})' if inoutargs else '' + copyout = f'copyout({", ".join(outargs)})' if outargs else '' + pragma = Pragma(keyword='acc', content=f'data {copyin} {copy} {copyout}') + pragma_post = Pragma(keyword='acc', content='end data') + pragma_map[region.pragma] = pragma + pragma_map[region.pragma_post] = pragma_post + + # Record that we actually created a new region + if not self.has_data_regions: + self.has_data_regions = True + + routine.body = Transformer(pragma_map).visit(routine.body) + + def remove_openmp_pragmas(self, routine, targets): + """ + Remove any existing OpenMP pragmas in the offload regions that + will have been intended for OpenMP threading rather than + offload. + + Parameters + ---------- + routine : `Subroutine` + Subroutine to apply this transformation to. + targets : list or string + List of subroutines that are to be considered as part of + the transformation call tree. + """ + pragma_map = {} + with pragma_regions_attached(routine): + for region in FindNodes(PragmaRegion).visit(routine.body): + # Only work on active `!$loki data` regions + if not self._is_active_loki_data_region(region, targets): + continue + + for p in FindNodes(Pragma).visit(routine.body): + if p.keyword.lower() == 'omp': + pragma_map[p] = None + for r in FindNodes(PragmaRegion).visit(region): + if r.pragma.keyword.lower() == 'omp': + pragma_map[r.pragma] = None + pragma_map[r.pragma_post] = None + + routine.body = Transformer(pragma_map).visit(routine.body) diff --git a/loki/transformations/data_offload/tests/__init__.py b/loki/transformations/data_offload/tests/__init__.py new file mode 100644 index 000000000..538bb4e73 --- /dev/null +++ b/loki/transformations/data_offload/tests/__init__.py @@ -0,0 +1,6 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. diff --git a/loki/transformations/tests/sources/projGlobalVarImports/driver.F90 b/loki/transformations/data_offload/tests/sources/projGlobalVarImports/driver.F90 similarity index 100% rename from loki/transformations/tests/sources/projGlobalVarImports/driver.F90 rename to loki/transformations/data_offload/tests/sources/projGlobalVarImports/driver.F90 diff --git a/loki/transformations/tests/sources/projGlobalVarImports/driver_derived_type.F90 b/loki/transformations/data_offload/tests/sources/projGlobalVarImports/driver_derived_type.F90 similarity index 100% rename from loki/transformations/tests/sources/projGlobalVarImports/driver_derived_type.F90 rename to loki/transformations/data_offload/tests/sources/projGlobalVarImports/driver_derived_type.F90 diff --git a/loki/transformations/tests/sources/projGlobalVarImports/functions.F90 b/loki/transformations/data_offload/tests/sources/projGlobalVarImports/functions.F90 similarity index 100% rename from loki/transformations/tests/sources/projGlobalVarImports/functions.F90 rename to loki/transformations/data_offload/tests/sources/projGlobalVarImports/functions.F90 diff --git a/loki/transformations/tests/sources/projGlobalVarImports/kernel_derived_type.F90 b/loki/transformations/data_offload/tests/sources/projGlobalVarImports/kernel_derived_type.F90 similarity index 100% rename from loki/transformations/tests/sources/projGlobalVarImports/kernel_derived_type.F90 rename to loki/transformations/data_offload/tests/sources/projGlobalVarImports/kernel_derived_type.F90 diff --git a/loki/transformations/tests/sources/projGlobalVarImports/kernels.F90 b/loki/transformations/data_offload/tests/sources/projGlobalVarImports/kernels.F90 similarity index 100% rename from loki/transformations/tests/sources/projGlobalVarImports/kernels.F90 rename to loki/transformations/data_offload/tests/sources/projGlobalVarImports/kernels.F90 diff --git a/loki/transformations/tests/sources/projGlobalVarImports/moduleA.F90 b/loki/transformations/data_offload/tests/sources/projGlobalVarImports/moduleA.F90 similarity index 100% rename from loki/transformations/tests/sources/projGlobalVarImports/moduleA.F90 rename to loki/transformations/data_offload/tests/sources/projGlobalVarImports/moduleA.F90 diff --git a/loki/transformations/tests/sources/projGlobalVarImports/moduleB.F90 b/loki/transformations/data_offload/tests/sources/projGlobalVarImports/moduleB.F90 similarity index 100% rename from loki/transformations/tests/sources/projGlobalVarImports/moduleB.F90 rename to loki/transformations/data_offload/tests/sources/projGlobalVarImports/moduleB.F90 diff --git a/loki/transformations/tests/sources/projGlobalVarImports/moduleC.F90 b/loki/transformations/data_offload/tests/sources/projGlobalVarImports/moduleC.F90 similarity index 100% rename from loki/transformations/tests/sources/projGlobalVarImports/moduleC.F90 rename to loki/transformations/data_offload/tests/sources/projGlobalVarImports/moduleC.F90 diff --git a/loki/transformations/tests/sources/projGlobalVarImports/module_derived_type.F90 b/loki/transformations/data_offload/tests/sources/projGlobalVarImports/module_derived_type.F90 similarity index 100% rename from loki/transformations/tests/sources/projGlobalVarImports/module_derived_type.F90 rename to loki/transformations/data_offload/tests/sources/projGlobalVarImports/module_derived_type.F90 diff --git a/loki/transformations/data_offload/tests/test_field_api.py b/loki/transformations/data_offload/tests/test_field_api.py new file mode 100644 index 000000000..aaddbc543 --- /dev/null +++ b/loki/transformations/data_offload/tests/test_field_api.py @@ -0,0 +1,580 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import pytest + +from loki import Sourcefile +from loki.frontend import available_frontends +from loki.logging import log_levels +from loki.ir import FindNodes, Pragma, CallStatement +import loki.expression.symbols as sym +from loki.module import Module +from loki.transformations import FieldOffloadTransformation + + +@pytest.fixture(name="parkind_mod") +def fixture_parkind_mod(tmp_path, frontend): + fcode = """ + module parkind1 + integer, parameter :: jprb=4 + end module + """ + return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + +@pytest.fixture(name="field_module") +def fixture_field_module(tmp_path, frontend): + fcode = """ + module field_module + implicit none + + type field_2rb + real, pointer :: f_ptr(:,:,:) + end type field_2rb + + type field_3rb + real, pointer :: f_ptr(:,:,:) + contains + procedure :: update_view + end type field_3rb + + type field_4rb + real, pointer :: f_ptr(:,:,:) + contains + procedure :: update_view + end type field_4rb + + contains + subroutine update_view(self, idx) + class(field_3rb), intent(in) :: self + integer, intent(in) :: idx + end subroutine + end module + """ + return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload(frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument + fcode = """ + module driver_mod + use parkind1, only: jprb + use field_module, only: field_2rb, field_3rb + implicit none + + type state_type + real(kind=jprb), dimension(10,10), pointer :: a, b, c + class(field_3rb), pointer :: f_a, f_b, f_c + contains + procedure :: update_view => state_update_view + end type state_type + + contains + + subroutine state_update_view(self, idx) + class(state_type), intent(in) :: self + integer, intent(in) :: idx + end subroutine + + subroutine kernel_routine(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real(kind=jprb), intent(in) :: a(nlon,nlev) + real(kind=jprb), intent(inout) :: b(nlon,nlev) + real(kind=jprb), intent(out) :: c(nlon,nlev) + integer :: i, j + + do j=1, nlon + do i=1, nlev + b(i,j) = a(i,j) + 0.1 + c(i,j) = 0.1 + end do + end do + end subroutine kernel_routine + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i + + !$loki data + do i=1,nlev + call state%update_view(i) + call kernel_routine(nlon, nlev, state%a, state%b, state%c) + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', + field_group_types=['state_type']), + role='driver', + targets=['kernel_routine']) + + calls = FindNodes(CallStatement).visit(driver.body) + kernel_call = next(c for c in calls if c.name=='kernel_routine') + + # verify that field offloads are generated properly + in_calls = [c for c in calls if 'get_device_data_rdonly' in c.name.name.lower()] + assert len(in_calls) == 1 + inout_calls = [c for c in calls if 'get_device_data_rdwr' in c.name.name.lower()] + assert len(inout_calls) == 2 + # verify that field sync host calls are generated properly + sync_calls = [c for c in calls if 'sync_host_rdwr' in c.name.name.lower()] + assert len(sync_calls) == 2 + + # verify that data offload pragmas remain + pragmas = FindNodes(Pragma).visit(driver.body) + assert len(pragmas) == 2 + assert all(p.keyword=='loki' and p.content==c for p, c in zip(pragmas, ['data', 'end data'])) + + # verify that new pointer variables are created and used in driver calls + for var in ['state_a', 'state_b', 'state_c']: + name = deviceptr_prefix + var + assert name in driver.variable_map + devptr = driver.variable_map[name] + assert isinstance(devptr, sym.Array) + assert len(devptr.shape) == 3 + assert devptr.name in (arg.name for arg in kernel_call.arguments) + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_slices(frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument + fcode = """ + module driver_mod + use parkind1, only: jprb + use field_module, only: field_4rb + implicit none + + type state_type + real(kind=jprb), dimension(10,10,10), pointer :: a, b, c, d + class(field_4rb), pointer :: f_a, f_b, f_c, f_d + contains + procedure :: update_view => state_update_view + end type state_type + + contains + + subroutine state_update_view(self, idx) + class(state_type), intent(in) :: self + integer, intent(in) :: idx + end subroutine + + subroutine kernel_routine(nlon, nlev, a, b, c, d) + integer, intent(in) :: nlon, nlev + real(kind=jprb), intent(in) :: a(nlon,nlev,nlon) + real(kind=jprb), intent(inout) :: b(nlon,nlev) + real(kind=jprb), intent(out) :: c(nlon) + real(kind=jprb), intent(in) :: d(nlon,nlev,nlon) + integer :: i, j + end subroutine kernel_routine + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i + !$loki data + do i=1,nlev + call kernel_routine(nlon, nlev, state%a(:,:,1), state%b(:,1,1), state%c(1,1,1), state%d) + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', + field_group_types=['state_type']), + role='driver', + targets=['kernel_routine']) + + calls = FindNodes(CallStatement).visit(driver.body) + kernel_call = next(c for c in calls if c.name=='kernel_routine') + # verify that new pointer variables are created and used in driver calls + for var, rank in zip(['state_d', 'state_a', 'state_b', 'state_c',], [4, 3, 2, 1]): + name = deviceptr_prefix + var + assert name in driver.variable_map + devptr = driver.variable_map[name] + assert isinstance(devptr, sym.Array) + assert len(devptr.shape) == 4 + assert devptr.name in (arg.name for arg in kernel_call.arguments) + arg = next(arg for arg in kernel_call.arguments if devptr.name in arg.name) + assert arg.dimensions == ((sym.RangeIndex((None,None)),)*(rank-1) + + (sym.IntLiteral(1),)*(4-rank) + + (sym.Scalar(name='i'),)) + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_multiple_calls(frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument + fcode = """ + module driver_mod + use parkind1, only: jprb + use field_module, only: field_2rb, field_3rb + implicit none + + type state_type + real(kind=jprb), dimension(10,10), pointer :: a, b, c + class(field_3rb), pointer :: f_a, f_b, f_c + contains + procedure :: update_view => state_update_view + end type state_type + + contains + + subroutine state_update_view(self, idx) + class(state_type), intent(in) :: self + integer, intent(in) :: idx + end subroutine + + subroutine kernel_routine(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real(kind=jprb), intent(in) :: a(nlon,nlev) + real(kind=jprb), intent(inout) :: b(nlon,nlev) + real(kind=jprb), intent(out) :: c(nlon,nlev) + integer :: i, j + + do j=1, nlon + do i=1, nlev + b(i,j) = a(i,j) + 0.1 + c(i,j) = 0.1 + end do + end do + end subroutine kernel_routine + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i + + !$loki data + do i=1,nlev + call state%update_view(i) + + call kernel_routine(nlon, nlev, state%a, state%b, state%c) + + call kernel_routine(nlon, nlev, state%a, state%b, state%c) + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', + field_group_types=['state_type']), + role='driver', + targets=['kernel_routine']) + calls = FindNodes(CallStatement).visit(driver.body) + kernel_calls = [c for c in calls if c.name=='kernel_routine'] + + # verify that field offloads are generated properly + in_calls = [c for c in calls if 'get_device_data_rdonly' in c.name.name.lower()] + assert len(in_calls) == 1 + inout_calls = [c for c in calls if 'get_device_data_rdwr' in c.name.name.lower()] + assert len(inout_calls) == 2 + # verify that field sync host calls are generated properly + sync_calls = [c for c in calls if 'sync_host_rdwr' in c.name.name.lower()] + assert len(sync_calls) == 2 + + # verify that data offload pragmas remain + pragmas = FindNodes(Pragma).visit(driver.body) + assert len(pragmas) == 2 + assert all(p.keyword=='loki' and p.content==c for p, c in zip(pragmas, ['data', 'end data'])) + + # verify that new pointer variables are created and used in driver calls + for var in ['state_a', 'state_b', 'state_c']: + name = deviceptr_prefix + var + assert name in driver.variable_map + devptr = driver.variable_map[name] + assert isinstance(devptr, sym.Array) + assert len(devptr.shape) == 3 + assert devptr.name in (arg.name for kernel_call in kernel_calls for arg in kernel_call.arguments) + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_no_targets(frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument + fother = """ + module another_module + implicit none + contains + subroutine another_kernel(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real, intent(in) :: a(nlon,nlev) + real, intent(inout) :: b(nlon,nlev) + real, intent(out) :: c(nlon,nlev) + integer :: i, j + end subroutine + end module + """ + fcode = """ + module driver_mod + use parkind1, only: jprb + use field_module, only: field_2rb, field_3rb + use another_module, only: another_kernel + + implicit none + + type state_type + real(kind=jprb), dimension(10,10), pointer :: a, b, c + class(field_3rb), pointer :: f_a, f_b, f_c + contains + procedure :: update_view => state_update_view + end type state_type + + contains + + subroutine state_update_view(self, idx) + class(state_type), intent(in) :: self + integer, intent(in) :: idx + end subroutine + + subroutine kernel_routine(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real(kind=jprb), intent(in) :: a(nlon,nlev) + real(kind=jprb), intent(inout) :: b(nlon,nlev) + real(kind=jprb), intent(out) :: c(nlon,nlev) + integer :: i, j + + do j=1, nlon + do i=1, nlev + b(i,j) = a(i,j) + 0.1 + c(i,j) = 0.1 + end do + end do + end subroutine kernel_routine + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i + + !$loki data + do i=1,nlev + call state%update_view(i) + call another_kernel(nlon, state%a, state%b, state%c) + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + + Sourcefile.from_source(fother, frontend=frontend, xmods=[tmp_path]) + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', + field_group_types=['state_type']), + role='driver', + targets=['kernel_routine']) + + calls = FindNodes(CallStatement).visit(driver.body) + assert not any(c for c in calls if c.name=='kernel_routine') + + # verify that no field offloads are generated + in_calls = [c for c in calls if 'get_device_data_rdonly' in c.name.name.lower()] + assert len(in_calls) == 0 + inout_calls = [c for c in calls if 'get_device_data_rdwr' in c.name.name.lower()] + assert len(inout_calls) == 0 + # verify that no field sync host calls are generated + sync_calls = [c for c in calls if 'sync_host_rdwr' in c.name.name.lower()] + assert len(sync_calls) == 0 + + # verify that data offload pragmas remain + pragmas = FindNodes(Pragma).visit(driver.body) + assert len(pragmas) == 2 + assert all(p.keyword=='loki' and p.content==c for p, c in zip(pragmas, ['data', 'end data'])) + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_unknown_kernel(caplog, frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument + fother = """ + module another_module + implicit none + contains + subroutine another_kernel(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real, intent(in) :: a(nlon,nlev) + real, intent(inout) :: b(nlon,nlev) + real, intent(out) :: c(nlon,nlev) + integer :: i, j + end subroutine + end module + """ + fcode = """ + module driver_mod + use parkind1, only: jprb + use another_module, only: another_kernel + implicit none + + type state_type + real(kind=jprb), dimension(10,10), pointer :: a, b, c + class(field_3rb), pointer :: f_a, f_b, f_c + contains + procedure :: update_view => state_update_view + end type state_type + + contains + + subroutine state_update_view(self, idx) + class(state_type), intent(in) :: self + integer, intent(in) :: idx + end subroutine + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i + + !$loki data + do i=1,nlev + call state%update_view(i) + call another_kernel(nlon, nlev, state%a, state%b, state%c) + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + + Sourcefile.from_source(fother, frontend=frontend, xmods=[tmp_path]) + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + + field_offload_trafo = FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', + field_group_types=['state_type']) + caplog.clear() + with caplog.at_level(log_levels['ERROR']): + with pytest.raises(RuntimeError): + driver.apply(field_offload_trafo, role='driver', targets=['another_kernel']) + assert len(caplog.records) == 1 + assert ('[Loki] Data offload: Routine driver_routine has not been enriched '+ + 'in another_kernel') in caplog.records[0].message + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_warnings(caplog, frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument + fother_state = """ + module state_type_mod + implicit none + type state_type2 + real, dimension(10,10), pointer :: a, b, c + contains + procedure :: update_view => state_update_view + end type state_type2 + + contains + + subroutine state_update_view(self, idx) + class(state_type2), intent(in) :: self + integer, intent(in) :: idx + end subroutine + end module + """ + fother_mod= """ + module another_module + implicit none + contains + subroutine another_kernel(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real, intent(in) :: a(nlon,nlev) + real, intent(inout) :: b(nlon,nlev) + real, intent(out) :: c(nlon,nlev) + integer :: i, j + end subroutine + end module + """ + fcode = """ + module driver_mod + use state_type_mod, only: state_type2 + use parkind1, only: jprb + use field_module, only: field_2rb, field_3rb + use another_module, only: another_kernel + + implicit none + + type state_type + real(kind=jprb), dimension(10,10), pointer :: a, b, c + class(field_3rb), pointer :: f_a, f_b, f_c + contains + procedure :: update_view => state_update_view + end type state_type + + contains + + subroutine state_update_view(self, idx) + class(state_type), intent(in) :: self + integer, intent(in) :: idx + end subroutine + + subroutine kernel_routine(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real(kind=jprb), intent(in) :: a(nlon,nlev) + real(kind=jprb), intent(inout) :: b(nlon,nlev) + real(kind=jprb), intent(out) :: c(nlon,nlev) + integer :: i, j + + do j=1, nlon + do i=1, nlev + b(i,j) = a(i,j) + 0.1 + c(i,j) = 0.1 + end do + end do + end subroutine kernel_routine + + subroutine driver_routine(nlon, nlev, state, state2) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + type(state_type2), intent(inout) :: state2 + + integer :: i + real(kind=jprb) :: a(nlon,nlev) + real, pointer :: loki_devptr_prefix_state_b + + !$loki data + do i=1,nlev + call state%update_view(i) + call kernel_routine(nlon, nlev, a, state%b, state2%c) + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + Sourcefile.from_source(fother_state, frontend=frontend, xmods=[tmp_path]) + Sourcefile.from_source(fother_mod, frontend=frontend, xmods=[tmp_path]) + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + + field_offload_trafo = FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', + field_group_types=['state_type']) + caplog.clear() + with caplog.at_level(log_levels['WARNING']): + driver.apply(field_offload_trafo, role='driver', targets=['kernel_routine']) + assert len(caplog.records) == 3 + assert (('[Loki] Data offload: Raw array object a encountered in' + +' driver_routine that is not wrapped by a Field API object') + in caplog.records[0].message) + assert ('[Loki] Data offload: The parent object state2 of type state_type2 is not in the' + + ' list of field wrapper types') in caplog.records[1].message + assert ('[Loki] Data offload: The routine driver_routine already has a' + + ' variable named loki_devptr_prefix_state_b') in caplog.records[2].message diff --git a/loki/transformations/data_offload/tests/test_global_var.py b/loki/transformations/data_offload/tests/test_global_var.py new file mode 100644 index 000000000..0087af342 --- /dev/null +++ b/loki/transformations/data_offload/tests/test_global_var.py @@ -0,0 +1,603 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from pathlib import Path +import pytest + +from loki import Scheduler, FindInlineCalls +from loki.frontend import available_frontends, OMNI +from loki.ir import FindNodes, Pragma, CallStatement, Import + +from loki.transformations import ( + GlobalVariableAnalysis, GlobalVarOffloadTransformation, + GlobalVarHoistTransformation +) + + +@pytest.fixture(scope='module', name='here') +def fixture_here(): + return Path(__file__).parent + + +@pytest.fixture(name='config') +def fixture_config(): + """ + Default configuration dict with basic options. + """ + return { + 'default': { + 'mode': 'idem', + 'role': 'kernel', + 'expand': True, + 'strict': True, + 'enable_imports': True, + }, + } + + +@pytest.fixture(name='global_variable_analysis_code') +def fixture_global_variable_analysis_code(tmp_path): + fcode = { + #------------------------------ + 'global_var_analysis_header_mod': ( + #------------------------------ +""" +module global_var_analysis_header_mod + implicit none + + integer, parameter :: nval = 5 + integer, parameter :: nfld = 3 + + integer :: n + + integer :: iarr(nfld) + real :: rarr(nval, nfld) +end module global_var_analysis_header_mod +""" + ).strip(), + #---------------------------- + 'global_var_analysis_data_mod': ( + #---------------------------- +""" +module global_var_analysis_data_mod + implicit none + + real, allocatable :: rdata(:,:,:) + + type some_type + real :: val + real, allocatable :: vals(:,:) + end type some_type + + type(some_type) :: tt + +contains + subroutine some_routine(i) + integer, intent(inout) :: i + i = i + 1 + end subroutine some_routine +end module global_var_analysis_data_mod +""" + ).strip(), + #------------------------------ + 'global_var_analysis_kernel_mod': ( + #------------------------------ +""" +module global_var_analysis_kernel_mod + use global_var_analysis_header_mod, only: rarr + use global_var_analysis_data_mod, only: some_routine, some_type + + implicit none + +contains + subroutine kernel_a(arg, tt) + use global_var_analysis_header_mod, only: iarr, nval, nfld, n + + real, intent(inout) :: arg(:,:) + type(some_type), intent(in) :: tt + real :: tmp(n) + integer :: i, j + + do i=1,nfld + if (iarr(i) > 0) then + do j=1,nval + arg(j,i) = rarr(j, i) + tt%val + call some_routine(arg(j,i)) + enddo + endif + enddo + end subroutine kernel_a + + subroutine kernel_b(arg) + use global_var_analysis_header_mod, only: iarr, nfld + use global_var_analysis_data_mod, only: rdata, tt + + real, intent(inout) :: arg(:,:) + integer :: i + + do i=1,nfld + if (iarr(i) .ne. 0) then + rdata(:,:,i) = arg(:,:) + rdata(:,:,i) + else + arg(:,:) = tt%vals(:,:) + endif + enddo + end subroutine kernel_b +end module global_var_analysis_kernel_mod +""" + ).strip(), + #------- + 'driver': ( + #------- +""" +subroutine driver(arg) + use global_var_analysis_kernel_mod, only: kernel_a, kernel_b + use global_var_analysis_data_mod, only: tt + implicit none + + real, intent(inout) :: arg(:,:) + + !$loki update_device + + call kernel_a(arg, tt) + + call kernel_b(arg) + + !$loki update_host +end subroutine driver +""" + ).strip() + } + + for name, code in fcode.items(): + (tmp_path/f'{name}.F90').write_text(code) + return tmp_path + + +@pytest.mark.parametrize('frontend', available_frontends()) +@pytest.mark.parametrize('key', (None, 'foobar')) +def test_global_variable_analysis(frontend, key, config, global_variable_analysis_code): + config['routines'] = { + 'driver': {'role': 'driver'} + } + + scheduler = Scheduler( + paths=(global_variable_analysis_code,), config=config, seed_routines='driver', + frontend=frontend, xmods=(global_variable_analysis_code,) + ) + scheduler.process(GlobalVariableAnalysis(key=key)) + if key is None: + key = GlobalVariableAnalysis._key + + # Validate the analysis trafo_data + + # OMNI handles array indices and parameters differently + if frontend == OMNI: + nfld_dim = '3' + nval_dim = '5' + nfld_data = set() + nval_data = set() + else: + nfld_dim = 'nfld' + nval_dim = 'nval' + nfld_data = {('nfld', 'global_var_analysis_header_mod')} + nval_data = {('nval', 'global_var_analysis_header_mod')} + + expected_trafo_data = { + 'global_var_analysis_header_mod': { + 'declares': {f'iarr({nfld_dim})', f'rarr({nval_dim}, {nfld_dim})', 'n'}, + 'offload': {} + }, + 'global_var_analysis_data_mod': { + 'declares': {'rdata(:, :, :)', 'tt'}, + 'offload': {} + }, + 'global_var_analysis_data_mod#some_routine': {'defines_symbols': set(), 'uses_symbols': set()}, + 'global_var_analysis_kernel_mod#kernel_a': { + 'defines_symbols': set(), + 'uses_symbols': nval_data | nfld_data | { + (f'iarr({nfld_dim})', 'global_var_analysis_header_mod'), + ('n', 'global_var_analysis_header_mod'), + (f'rarr({nval_dim}, {nfld_dim})', 'global_var_analysis_header_mod') + } + }, + 'global_var_analysis_kernel_mod#kernel_b': { + 'defines_symbols': {('rdata(:, :, :)', 'global_var_analysis_data_mod')}, + 'uses_symbols': nfld_data | { + ('rdata(:, :, :)', 'global_var_analysis_data_mod'), ('tt', 'global_var_analysis_data_mod'), + ('tt%vals', 'global_var_analysis_data_mod'), (f'iarr({nfld_dim})', 'global_var_analysis_header_mod') + } + }, + '#driver': { + 'defines_symbols': {('rdata(:, :, :)', 'global_var_analysis_data_mod')}, + 'uses_symbols': nval_data | nfld_data | { + ('rdata(:, :, :)', 'global_var_analysis_data_mod'), + ('n', 'global_var_analysis_header_mod'), + ('tt', 'global_var_analysis_data_mod'), ('tt%vals', 'global_var_analysis_data_mod'), + (f'iarr({nfld_dim})', 'global_var_analysis_header_mod'), + (f'rarr({nval_dim}, {nfld_dim})', 'global_var_analysis_header_mod') + } + } + } + + assert set(scheduler.items) == set(expected_trafo_data) | {'global_var_analysis_data_mod#some_type'} + for item in scheduler.items: + if item == 'global_var_analysis_data_mod#some_type': + continue + for trafo_data_key, trafo_data_value in item.trafo_data[key].items(): + assert ( + sorted( + tuple(str(vv) for vv in v) if isinstance(v, tuple) else str(v) + for v in trafo_data_value + ) == sorted(expected_trafo_data[item.name][trafo_data_key]) + ) + + +@pytest.mark.parametrize('frontend', available_frontends()) +@pytest.mark.parametrize('key', (None, 'foobar')) +def test_global_variable_offload(frontend, key, config, global_variable_analysis_code): + + config['routines'] = { + 'driver': {'role': 'driver'} + } + + # OMNI handles array indices and parameters differently + if frontend == OMNI: + nfld_dim = '3' + nval_dim = '5' + else: + nfld_dim = 'nfld' + nval_dim = 'nval' + + scheduler = Scheduler( + paths=(global_variable_analysis_code,), config=config, seed_routines='driver', + frontend=frontend, xmods=(global_variable_analysis_code,) + ) + scheduler.process(GlobalVariableAnalysis(key=key)) + scheduler.process(GlobalVarOffloadTransformation(key=key)) + driver = scheduler['#driver'].ir + + if key is None: + key = GlobalVariableAnalysis._key + + expected_trafo_data = { + 'global_var_analysis_header_mod': { + 'declares': {f'iarr({nfld_dim})', f'rarr({nval_dim}, {nfld_dim})', 'n'}, + 'offload': {f'iarr({nfld_dim})', f'rarr({nval_dim}, {nfld_dim})', 'n'} + }, + 'global_var_analysis_data_mod': { + 'declares': {'rdata(:, :, :)', 'tt'}, + 'offload': {'rdata(:, :, :)', 'tt', 'tt%vals'} + }, + } + + # Verify module offload sets + for item in [scheduler['global_var_analysis_header_mod'], scheduler['global_var_analysis_data_mod']]: + for trafo_data_key, trafo_data_value in item.trafo_data[key].items(): + assert ( + sorted( + tuple(str(vv) for vv in v) if isinstance(v, tuple) else str(v) + for v in trafo_data_value + ) == sorted(expected_trafo_data[item.name][trafo_data_key]) + ) + + # Verify imports have been added to the driver + expected_imports = { + 'global_var_analysis_header_mod': {'iarr', 'rarr', 'n'}, + 'global_var_analysis_data_mod': {'rdata'} + } + + # We need to check only the first imports as they have to be prepended + for import_ in driver.imports[:len(expected_imports)]: + assert {var.name.lower() for var in import_.symbols} == expected_imports[import_.module.lower()] + + expected_h2d_pragmas = { + 'update device': {'iarr', 'rdata', 'rarr', 'n'}, + 'enter data copyin': {'tt%vals'} + } + expected_d2h_pragmas = { + 'update self': {'rdata'} + } + + acc_pragmas = [p for p in FindNodes(Pragma).visit(driver.ir) if p.keyword.lower() == 'acc'] + assert len(acc_pragmas) == len(expected_h2d_pragmas) + len(expected_d2h_pragmas) + for pragma in acc_pragmas[:len(expected_h2d_pragmas)]: + command, variables = pragma.content.lower().split('(') + assert command.strip() in expected_h2d_pragmas + assert set(variables.strip()[:-1].strip().split(', ')) == expected_h2d_pragmas[command.strip()] + for pragma in acc_pragmas[len(expected_h2d_pragmas):]: + command, variables = pragma.content.lower().split('(') + assert command.strip() in expected_d2h_pragmas + assert set(variables.strip()[:-1].strip().split(', ')) == expected_d2h_pragmas[command.strip()] + + # Verify declarations have been added to the header modules + expected_declarations = { + 'global_var_analysis_header_mod': {'iarr', 'rarr', 'n'}, + 'global_var_analysis_data_mod': {'rdata', 'tt'} + } + + modules = { + name: scheduler[name].ir for name in expected_declarations + } + + for name, module in modules.items(): + acc_pragmas = [p for p in FindNodes(Pragma).visit(module.spec) if p.keyword.lower() == 'acc'] + variables = { + v.strip() + for pragma in acc_pragmas + for v in pragma.content.lower().split('(')[-1].strip()[:-1].split(', ') + } + assert variables == expected_declarations[name] + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_transformation_global_var_import(here, config, frontend, tmp_path): + """ + Test the generation of offload instructions of global variable imports. + """ + config['routines'] = { + 'driver': {'role': 'driver'} + } + + scheduler = Scheduler(paths=here/'sources/projGlobalVarImports', config=config, frontend=frontend, xmods=[tmp_path]) + scheduler.process(transformation=GlobalVariableAnalysis()) + scheduler.process(transformation=GlobalVarOffloadTransformation()) + + driver = scheduler['#driver'].ir + moduleA = scheduler['modulea'].ir + moduleB = scheduler['moduleb'].ir + moduleC = scheduler['modulec'].ir + + # check that global variables have been added to driver symbol table + imports = FindNodes(Import).visit(driver.spec) + assert len(imports) == 2 + assert imports[0].module != imports[1].module + assert imports[0].symbols != imports[1].symbols + for i in imports: + assert len(i.symbols) == 2 + assert i.module.lower() in ('moduleb', 'modulec') + assert set(s.name for s in i.symbols) in ({'var2', 'var3'}, {'var4', 'var5'}) + + # check that existing acc pragmas have not been stripped and update device/update self added correctly + pragmas = FindNodes(Pragma).visit(driver.body) + assert len(pragmas) == 4 + assert all(p.keyword.lower() == 'acc' for p in pragmas) + + assert 'update device' in pragmas[0].content + assert 'var2' in pragmas[0].content + assert 'var3' in pragmas[0].content + + assert pragmas[1].content == 'serial' + assert pragmas[2].content == 'end serial' + + assert 'update self' in pragmas[3].content + assert 'var4' in pragmas[3].content + assert 'var5' in pragmas[3].content + + # check that no declarations have been added for parameters + pragmas = FindNodes(Pragma).visit(moduleA.spec) + assert not pragmas + + # check for device-side declarations where appropriate + pragmas = FindNodes(Pragma).visit(moduleB.spec) + assert len(pragmas) == 1 + assert pragmas[0].keyword == 'acc' + assert 'declare create' in pragmas[0].content + assert 'var2' in pragmas[0].content + assert 'var3' in pragmas[0].content + + pragmas = FindNodes(Pragma).visit(moduleC.spec) + assert len(pragmas) == 1 + assert pragmas[0].keyword == 'acc' + assert 'declare create' in pragmas[0].content + assert 'var4' in pragmas[0].content + assert 'var5' in pragmas[0].content + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_transformation_global_var_import_derived_type(here, config, frontend, tmp_path): + """ + Test the generation of offload instructions of derived-type global variable imports. + """ + + config['default']['enable_imports'] = True + config['routines'] = { + 'driver_derived_type': {'role': 'driver'} + } + + scheduler = Scheduler(paths=here/'sources/projGlobalVarImports', config=config, frontend=frontend, xmods=[tmp_path]) + scheduler.process(transformation=GlobalVariableAnalysis()) + scheduler.process(transformation=GlobalVarOffloadTransformation()) + + driver = scheduler['#driver_derived_type'].ir + module = scheduler['module_derived_type'].ir + + # check that global variables have been added to driver symbol table + imports = FindNodes(Import).visit(driver.spec) + assert len(imports) == 1 + assert len(imports[0].symbols) == 2 + assert imports[0].module.lower() == 'module_derived_type' + assert set(s.name for s in imports[0].symbols) == {'p', 'p0'} + + # check that existing acc pragmas have not been stripped and update device/update self added correctly + pragmas = FindNodes(Pragma).visit(driver.body) + assert len(pragmas) == 5 + assert all(p.keyword.lower() == 'acc' for p in pragmas) + + assert 'enter data copyin' in pragmas[0].content + assert 'p0%x' in pragmas[0].content + assert 'p0%y' in pragmas[0].content + assert 'p0%z' in pragmas[0].content + assert 'p%n' in pragmas[0].content + + assert 'enter data create' in pragmas[1].content + assert 'p%x' in pragmas[1].content + assert 'p%y' in pragmas[1].content + assert 'p%z' in pragmas[1].content + + assert pragmas[2].content == 'serial' + assert pragmas[3].content == 'end serial' + + assert 'exit data copyout' in pragmas[4].content + assert 'p%x' in pragmas[4].content + assert 'p%y' in pragmas[4].content + assert 'p%z' in pragmas[4].content + + # check for device-side declarations + pragmas = FindNodes(Pragma).visit(module.spec) + assert len(pragmas) == 1 + assert pragmas[0].keyword == 'acc' + assert 'declare create' in pragmas[0].content + assert 'p' in pragmas[0].content + assert 'p0' in pragmas[0].content + assert 'p_array' in pragmas[0].content + # Note: g is not offloaded because it is not used by the kernel (albeit imported) + + +@pytest.mark.parametrize('frontend', available_frontends()) +@pytest.mark.parametrize('hoist_parameters', (False, True)) +@pytest.mark.parametrize('ignore_modules', (None, ('moduleb',))) +def test_transformation_global_var_hoist(here, config, frontend, hoist_parameters, ignore_modules, tmp_path): + """ + Test hoisting of global variable imports. + """ + config['default']['enable_imports'] = True + config['routines'] = { + 'driver': {'role': 'driver'} + } + + scheduler = Scheduler(paths=here/'sources/projGlobalVarImports', config=config, frontend=frontend, xmods=[tmp_path]) + scheduler.process(transformation=GlobalVariableAnalysis()) + scheduler.process(transformation=GlobalVarHoistTransformation(hoist_parameters=hoist_parameters, + ignore_modules=ignore_modules)) + + driver = scheduler['#driver'].ir + kernel0 = scheduler['#kernel0'].ir + kernel_map = {key: scheduler[f'#{key}'].ir for key in ['kernel1', 'kernel2', 'kernel3']} + some_func = scheduler['func_mod#some_func'].ir + + # symbols within each module + expected_symbols = {'modulea': ['var0', 'var1'], 'moduleb': ['var2', 'var3'], + 'modulec': ['var4', 'var5']} + # expected intent of those variables (if hoisted) + var_intent_map = {'var0': 'in', 'var1': 'in', 'var2': 'in', + 'var3': 'in', 'var4': 'inout', 'var5': 'inout', 'tmp': None} + # DRIVER + imports = FindNodes(Import).visit(driver.spec) + import_names = [_import.module.lower() for _import in imports] + # check driver imports + expected_driver_modules = ['modulec'] + expected_driver_modules += ['moduleb'] if ignore_modules is None else [] + # OMNI handles parameters differently, ModuleA only contains parameters + if frontend != OMNI: + expected_driver_modules += ['modulea'] if hoist_parameters else [] + assert len(imports) == len(expected_driver_modules) + assert sorted(expected_driver_modules) == sorted(import_names) + for _import in imports: + assert sorted([sym.name for sym in _import.symbols]) == expected_symbols[_import.module.lower()] + # check driver call + driver_calls = FindNodes(CallStatement).visit(driver.body) + expected_args = [] + for module in expected_driver_modules: + expected_args.extend(expected_symbols[module]) + assert [arg.name for arg in driver_calls[0].arguments] == sorted(expected_args) + + originally = {'kernel1': ['modulea'], 'kernel2': ['moduleb'], + 'kernel3': ['moduleb', 'modulec']} + # KERNEL0 + expected_vars = expected_args.copy() + expected_vars.append('a') + assert [arg.name for arg in kernel0.arguments] == sorted(expected_args) + assert [arg.name for arg in kernel0.variables] == sorted(expected_vars) + for var in kernel0.arguments: + assert kernel0.variable_map[var.name.lower()].type.intent == var_intent_map[var.name.lower()] + assert var.scope == kernel0 + kernel0_inline_calls = FindInlineCalls().visit(kernel0.body) + for inline_call in kernel0_inline_calls: + if ignore_modules is None: + assert len(inline_call.arguments) == 1 + assert [arg.name for arg in inline_call.arguments] == ['var2'] + assert [arg.name for arg in some_func.arguments] == ['var2'] + else: + assert len(inline_call.arguments) == 0 + assert len(some_func.arguments) == 0 + kernel0_calls = FindNodes(CallStatement).visit(kernel0.body) + # KERNEL1 & KERNEL2 & KERNEL3 + for call in kernel0_calls: + expected_args = [] + expected_imports = [] + kernel_expected_symbols = [] + for module in originally[call.routine.name]: + # always, since at least 'some_func' is imported + if call.routine.name == 'kernel1' and module == 'modulea': + expected_imports.append(module) + kernel_expected_symbols.append('some_func') + if module in expected_driver_modules: + expected_args.extend(expected_symbols[module]) + else: + # already added + if module != 'modulea': + expected_imports.append(module) + kernel_expected_symbols.extend(expected_symbols[module]) + assert len(expected_args) == len(call.arguments) + assert [arg.name for arg in call.arguments] == expected_args + assert [arg.name for arg in kernel_map[call.routine.name].arguments] == expected_args + for var in kernel_map[call.routine.name].variables: + var_intent = kernel_map[call.routine.name].variable_map[var.name.lower()].type.intent + assert var.scope == kernel_map[call.routine.name] + assert var_intent == var_intent_map[var.name.lower()] + if call.routine.name in ['kernel1', 'kernel2']: + expected_args = ['tmp'] + expected_args + assert [arg.name for arg in kernel_map[call.routine.name].variables] == expected_args + kernel_imports = FindNodes(Import).visit(call.routine.spec) + assert sorted([_import.module.lower() for _import in kernel_imports]) == sorted(expected_imports) + imported_symbols = [] # _import.symbols for _import in kernel_imports] + for _import in kernel_imports: + imported_symbols.extend([sym.name.lower() for sym in _import.symbols]) + assert sorted(imported_symbols) == sorted(kernel_expected_symbols) + + +@pytest.mark.parametrize('frontend', available_frontends()) +@pytest.mark.parametrize('hoist_parameters', (False, True)) +def test_transformation_global_var_derived_type_hoist(here, config, frontend, hoist_parameters, tmp_path): + """ + Test hoisting of derived-type global variable imports. + """ + + config['default']['enable_imports'] = True + config['routines'] = { + 'driver_derived_type': {'role': 'driver'} + } + + scheduler = Scheduler(paths=here/'sources/projGlobalVarImports', config=config, frontend=frontend, xmods=[tmp_path]) + scheduler.process(transformation=GlobalVariableAnalysis()) + scheduler.process(transformation=GlobalVarHoistTransformation(hoist_parameters)) + + driver = scheduler['#driver_derived_type'].ir + kernel = scheduler['#kernel_derived_type'].ir + + # DRIVER + imports = FindNodes(Import).visit(driver.spec) + assert len(imports) == 1 + assert imports[0].module.lower() == 'module_derived_type' + assert sorted([sym.name.lower() for sym in imports[0].symbols]) == sorted(['p', 'p_array', 'p0']) + calls = FindNodes(CallStatement).visit(driver.body) + assert len(calls) == 1 + # KERNEL + assert [arg.name for arg in calls[0].arguments] == ['p', 'p0', 'p_array'] + assert [arg.name for arg in kernel.arguments] == ['p', 'p0', 'p_array'] + kernel_imports = FindNodes(Import).visit(kernel.spec) + assert len(kernel_imports) == 1 + assert [sym.name.lower() for sym in kernel_imports[0].symbols] == ['g'] + assert sorted([var.name for var in kernel.variables]) == ['i', 'j', 'p', 'p0', 'p_array'] + assert kernel.variable_map['p_array'].type.allocatable + assert kernel.variable_map['p_array'].type.intent == 'inout' + assert kernel.variable_map['p_array'].type.dtype.name == 'point' + assert kernel.variable_map['p'].type.intent == 'inout' + assert kernel.variable_map['p'].type.dtype.name == 'point' + assert kernel.variable_map['p0'].type.intent == 'in' + assert kernel.variable_map['p0'].type.dtype.name == 'point' diff --git a/loki/transformations/data_offload/tests/test_offload.py b/loki/transformations/data_offload/tests/test_offload.py new file mode 100644 index 000000000..d369a8e0f --- /dev/null +++ b/loki/transformations/data_offload/tests/test_offload.py @@ -0,0 +1,235 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import pytest + +from loki import Sourcefile +from loki.frontend import available_frontends +from loki.logging import log_levels +from loki.ir import ( + FindNodes, Pragma, PragmaRegion, Loop, CallStatement, + pragma_regions_attached, get_pragma_parameters +) + +from loki.transformations import DataOffloadTransformation + + +@pytest.mark.parametrize('frontend', available_frontends()) +@pytest.mark.parametrize('assume_deviceptr', [True, False]) +@pytest.mark.parametrize('present_on_device', [True, False]) +def test_data_offload_region_openacc(caplog, frontend, assume_deviceptr, present_on_device): + """ + Test the creation of a simple device data offload region + (`!$acc update`) from a `!$loki data` region with a single + kernel call. + """ + + fcode_driver = """ + SUBROUTINE driver_routine(nlon, nlev, a, b, c) + INTEGER, INTENT(IN) :: nlon, nlev + REAL, INTENT(INOUT) :: a(nlon,nlev) + REAL, INTENT(INOUT) :: b(nlon,nlev) + REAL, INTENT(INOUT) :: c(nlon,nlev) + + !$loki data + call kernel_routine(nlon, nlev, a, b, c) + !$loki end data + + END SUBROUTINE driver_routine +""" + fcode_kernel = """ + SUBROUTINE kernel_routine(nlon, nlev, a, b, c) + INTEGER, INTENT(IN) :: nlon, nlev + REAL, INTENT(IN) :: a(nlon,nlev) + REAL, INTENT(INOUT) :: b(nlon,nlev) + REAL, INTENT(OUT) :: c(nlon,nlev) + INTEGER :: i, j + + do j=1, nlon + do i=1, nlev + b(i,j) = a(i,j) + 0.1 + c(i,j) = 0.1 + end do + end do + END SUBROUTINE kernel_routine +""" + driver = Sourcefile.from_source(fcode_driver, frontend=frontend)['driver_routine'] + kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] + driver.enrich(kernel) + + if assume_deviceptr and not present_on_device: + caplog.clear() + with caplog.at_level(log_levels['ERROR']): + with pytest.raises(RuntimeError): + data_offload_trafo = DataOffloadTransformation(assume_deviceptr=assume_deviceptr, + present_on_device=present_on_device) + assert len(caplog.records) == 1 + assert ("[Loki] Data offload: Can't assume device pointer arrays without arrays being marked" + + "present on device.") in caplog.records[0].message + return + + data_offload_trafo = DataOffloadTransformation(assume_deviceptr=assume_deviceptr, + present_on_device=present_on_device) + driver.apply(data_offload_trafo, role='driver', targets=['kernel_routine']) + + pragmas = FindNodes(Pragma).visit(driver.body) + assert len(pragmas) == 2 + assert all(p.keyword == 'acc' for p in pragmas) + if assume_deviceptr: + assert 'deviceptr' in pragmas[0].content + params = get_pragma_parameters(pragmas[0], only_loki_pragmas=False) + assert all(var in params['deviceptr'] for var in ('a', 'b', 'c')) + elif present_on_device: + assert 'present' in pragmas[0].content + params = get_pragma_parameters(pragmas[0], only_loki_pragmas=False) + assert all(var in params['present'] for var in ('a', 'b', 'c')) + else: + transformed = driver.to_fortran() + assert 'copyin( a )' in transformed + assert 'copy( b )' in transformed + assert 'copyout( c )' in transformed + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_data_offload_region_complex_remove_openmp(frontend): + """ + Test the creation of a data offload region (OpenACC) with + driver-side loops and CPU-style OpenMP pragmas to be removed. + """ + + fcode_driver = """ + SUBROUTINE driver_routine(nlon, nlev, a, b, c, flag) + INTEGER, INTENT(IN) :: nlon, nlev + REAL, INTENT(INOUT) :: a(nlon,nlev) + REAL, INTENT(INOUT) :: b(nlon,nlev) + REAL, INTENT(INOUT) :: c(nlon,nlev) + logical, intent(in) :: flag + INTEGER :: j + + !$loki data + call my_custom_timer() + + if(flag)then + !$omp parallel do private(j) + do j=1, nlev + call kernel_routine(nlon, j, a(:,j), b(:,j), c(:,j)) + end do + !$omp end parallel do + else + !$omp parallel do private(j) + do j=1, nlev + a(:,j) = 0. + b(:,j) = 0. + c(:,j) = 0. + end do + !$omp end parallel do + endif + call my_custom_timer() + + !$loki end data + END SUBROUTINE driver_routine +""" + fcode_kernel = """ + SUBROUTINE kernel_routine(nlon, j, a, b, c) + INTEGER, INTENT(IN) :: nlon, j + REAL, INTENT(IN) :: a(nlon) + REAL, INTENT(INOUT) :: b(nlon) + REAL, INTENT(INOUT) :: c(nlon) + INTEGER :: i + + do j=1, nlon + b(i) = a(i) + 0.1 + c(i) = 0.1 + end do + END SUBROUTINE kernel_routine +""" + driver = Sourcefile.from_source(fcode_driver, frontend=frontend)['driver_routine'] + kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] + driver.enrich(kernel) + + offload_transform = DataOffloadTransformation(remove_openmp=True) + driver.apply(offload_transform, role='driver', targets=['kernel_routine']) + + assert len(FindNodes(Pragma).visit(driver.body)) == 2 + assert all(p.keyword == 'acc' for p in FindNodes(Pragma).visit(driver.body)) + + with pragma_regions_attached(driver): + # Ensure that loops in the region are preserved + regions = FindNodes(PragmaRegion).visit(driver.body) + assert len(regions) == 1 + assert len(FindNodes(Loop).visit(regions[0])) == 2 + + # Ensure all activa and inactive calls are there + calls = FindNodes(CallStatement).visit(regions[0]) + assert len(calls) == 3 + assert calls[0].name == 'my_custom_timer' + assert calls[1].name == 'kernel_routine' + assert calls[2].name == 'my_custom_timer' + + # Ensure OpenMP loop pragma is taken out + assert len(FindNodes(Pragma).visit(regions[0])) == 0 + + transformed = driver.to_fortran() + assert 'copyin( a )' in transformed + assert 'copy( b, c )' in transformed + assert '!$omp' not in transformed + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_data_offload_region_multiple(frontend): + """ + Test the creation of a device data offload region (`!$acc update`) + from a `!$loki data` region with multiple kernel calls. + """ + + fcode_driver = """ + SUBROUTINE driver_routine(nlon, nlev, a, b, c, d) + INTEGER, INTENT(IN) :: nlon, nlev + REAL, INTENT(INOUT) :: a(nlon,nlev) + REAL, INTENT(INOUT) :: b(nlon,nlev) + REAL, INTENT(INOUT) :: c(nlon,nlev) + REAL, INTENT(INOUT) :: d(nlon,nlev) + + !$loki data + call kernel_routine(nlon, nlev, a, b, c) + + call kernel_routine(nlon, nlev, d, b, a) + !$loki end data + + END SUBROUTINE driver_routine +""" + fcode_kernel = """ + SUBROUTINE kernel_routine(nlon, nlev, a, b, c) + INTEGER, INTENT(IN) :: nlon, nlev + REAL, INTENT(IN) :: a(nlon,nlev) + REAL, INTENT(INOUT) :: b(nlon,nlev) + REAL, INTENT(OUT) :: c(nlon,nlev) + INTEGER :: i, j + + do j=1, nlon + do i=1, nlev + b(i,j) = a(i,j) + 0.1 + c(i,j) = 0.1 + end do + end do + END SUBROUTINE kernel_routine +""" + driver = Sourcefile.from_source(fcode_driver, frontend=frontend)['driver_routine'] + kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] + driver.enrich(kernel) + + driver.apply(DataOffloadTransformation(), role='driver', targets=['kernel_routine']) + + assert len(FindNodes(Pragma).visit(driver.body)) == 2 + assert all(p.keyword == 'acc' for p in FindNodes(Pragma).visit(driver.body)) + + # Ensure that the copy direction is the union of the two calls, ie. + # "a" is "copyin" in first call and "copyout" in second, so it should be "copy" + transformed = driver.to_fortran() + assert 'copyin( d )' in transformed + assert 'copy( b, a )' in transformed + assert 'copyout( c )' in transformed diff --git a/loki/transformations/extract/tests/__init__.py b/loki/transformations/extract/tests/__init__.py new file mode 100644 index 000000000..538bb4e73 --- /dev/null +++ b/loki/transformations/extract/tests/__init__.py @@ -0,0 +1,6 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. diff --git a/loki/transformations/inline/__init__.py b/loki/transformations/inline/__init__.py index e1702970d..f3eb8b7fa 100644 --- a/loki/transformations/inline/__init__.py +++ b/loki/transformations/inline/__init__.py @@ -16,115 +16,4 @@ from loki.transformations.inline.functions import * # noqa from loki.transformations.inline.mapper import * # noqa from loki.transformations.inline.procedures import * # noqa - -from loki.batch import Transformation -from loki.transformations.remove_code import do_remove_dead_code - - -__all__ = ['InlineTransformation'] - - -class InlineTransformation(Transformation): - """ - :any:`Transformation` class to apply several types of source inlining - when batch-processing large source trees via the :any:`Scheduler`. - - Parameters - ---------- - inline_constants : bool - Replace instances of variables with known constant values by - :any:`Literal` (see :any:`inline_constant_parameters`); default: False. - inline_elementals : bool - Replaces :any:`InlineCall` expression to elemental functions - with the called function's body (see :any:`inline_elemental_functions`); - default: True. - inline_stmt_funcs: bool - Replaces :any:`InlineCall` expression to statement functions - with the corresponding rhs of the statement function if - the statement function declaration is available; default: False. - inline_internals : bool - Inline internal procedure (see :any:`inline_internal_procedures`); - default: False. - inline_marked : bool - Inline :any:`Subroutine` objects marked by pragma annotations - (see :any:`inline_marked_subroutines`); default: True. - remove_dead_code : bool - Perform dead code elimination, where unreachable branches are - trimmed from the code (see :any:`dead_code_elimination`); default: True - allowed_aliases : tuple or list of str or :any:`Expression`, optional - List of variables that will not be renamed in the parent scope during - internal and pragma-driven inlining. - adjust_imports : bool - Adjust imports by removing the symbol of the inlined routine or adding - imports needed by the imported routine (optional, default: True) - external_only : bool, optional - Do not replace variables declared in the local scope when - inlining constants (default: True) - resolve_sequence_association: bool - Resolve sequence association for routines that contain calls to inline (default: False) - """ - - # Ensure correct recursive inlining by traversing from the leaves - reverse_traversal = True - - # This transformation will potentially change the edges in the callgraph - creates_items = False - - def __init__( - self, inline_constants=False, inline_elementals=True, - inline_stmt_funcs=False, inline_internals=False, - inline_marked=True, remove_dead_code=True, - allowed_aliases=None, adjust_imports=True, - external_only=True, resolve_sequence_association=False - ): - self.inline_constants = inline_constants - self.inline_elementals = inline_elementals - self.inline_stmt_funcs = inline_stmt_funcs - self.inline_internals = inline_internals - self.inline_marked = inline_marked - self.remove_dead_code = remove_dead_code - self.allowed_aliases = allowed_aliases - self.adjust_imports = adjust_imports - self.external_only = external_only - self.resolve_sequence_association = resolve_sequence_association - if self.inline_marked: - self.creates_items = True - - def transform_subroutine(self, routine, **kwargs): - - # Resolve sequence association in calls that are about to be inlined. - # This step runs only if all of the following hold: - # 1) it is requested by the user - # 2) inlining of "internals" or "marked" routines is activated - # 3) there is an "internal" or "marked" procedure to inline. - if self.resolve_sequence_association: - resolve_sequence_association_for_inlined_calls( - routine, self.inline_internals, self.inline_marked - ) - - # Replace constant parameter variables with explicit values - if self.inline_constants: - inline_constant_parameters(routine, external_only=self.external_only) - - # Inline elemental functions - if self.inline_elementals: - inline_elemental_functions(routine) - - # Inline Statement Functions - if self.inline_stmt_funcs: - inline_statement_functions(routine) - - # Inline internal (contained) procedures - if self.inline_internals: - inline_internal_procedures(routine, allowed_aliases=self.allowed_aliases) - - # Inline explicitly pragma-marked subroutines - if self.inline_marked: - inline_marked_subroutines( - routine, allowed_aliases=self.allowed_aliases, - adjust_imports=self.adjust_imports - ) - - # After inlining, attempt to trim unreachable code paths - if self.remove_dead_code: - do_remove_dead_code(routine) +from loki.transformations.inline.transformation import * # noqa diff --git a/loki/transformations/inline/tests/__init__.py b/loki/transformations/inline/tests/__init__.py new file mode 100644 index 000000000..538bb4e73 --- /dev/null +++ b/loki/transformations/inline/tests/__init__.py @@ -0,0 +1,6 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. diff --git a/loki/transformations/inline/transformation.py b/loki/transformations/inline/transformation.py new file mode 100644 index 000000000..937939bde --- /dev/null +++ b/loki/transformations/inline/transformation.py @@ -0,0 +1,127 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from loki.batch import Transformation +from loki.transformations.remove_code import do_remove_dead_code + +from loki.transformations.inline.constants import inline_constant_parameters +from loki.transformations.inline.functions import ( + inline_elemental_functions, inline_statement_functions +) +from loki.transformations.inline.procedures import ( + inline_internal_procedures, inline_marked_subroutines, + resolve_sequence_association_for_inlined_calls +) + + +__all__ = ['InlineTransformation'] + + +class InlineTransformation(Transformation): + """ + :any:`Transformation` class to apply several types of source inlining + when batch-processing large source trees via the :any:`Scheduler`. + + Parameters + ---------- + inline_constants : bool + Replace instances of variables with known constant values by + :any:`Literal` (see :any:`inline_constant_parameters`); default: False. + inline_elementals : bool + Replaces :any:`InlineCall` expression to elemental functions + with the called function's body (see :any:`inline_elemental_functions`); + default: True. + inline_stmt_funcs: bool + Replaces :any:`InlineCall` expression to statement functions + with the corresponding rhs of the statement function if + the statement function declaration is available; default: False. + inline_internals : bool + Inline internal procedure (see :any:`inline_internal_procedures`); + default: False. + inline_marked : bool + Inline :any:`Subroutine` objects marked by pragma annotations + (see :any:`inline_marked_subroutines`); default: True. + remove_dead_code : bool + Perform dead code elimination, where unreachable branches are + trimmed from the code (see :any:`dead_code_elimination`); default: True + allowed_aliases : tuple or list of str or :any:`Expression`, optional + List of variables that will not be renamed in the parent scope during + internal and pragma-driven inlining. + adjust_imports : bool + Adjust imports by removing the symbol of the inlined routine or adding + imports needed by the imported routine (optional, default: True) + external_only : bool, optional + Do not replace variables declared in the local scope when + inlining constants (default: True) + resolve_sequence_association: bool + Resolve sequence association for routines that contain calls to inline (default: False) + """ + + # Ensure correct recursive inlining by traversing from the leaves + reverse_traversal = True + + # This transformation will potentially change the edges in the callgraph + creates_items = False + + def __init__( + self, inline_constants=False, inline_elementals=True, + inline_stmt_funcs=False, inline_internals=False, + inline_marked=True, remove_dead_code=True, + allowed_aliases=None, adjust_imports=True, + external_only=True, resolve_sequence_association=False + ): + self.inline_constants = inline_constants + self.inline_elementals = inline_elementals + self.inline_stmt_funcs = inline_stmt_funcs + self.inline_internals = inline_internals + self.inline_marked = inline_marked + self.remove_dead_code = remove_dead_code + self.allowed_aliases = allowed_aliases + self.adjust_imports = adjust_imports + self.external_only = external_only + self.resolve_sequence_association = resolve_sequence_association + if self.inline_marked: + self.creates_items = True + + def transform_subroutine(self, routine, **kwargs): + + # Resolve sequence association in calls that are about to be inlined. + # This step runs only if all of the following hold: + # 1) it is requested by the user + # 2) inlining of "internals" or "marked" routines is activated + # 3) there is an "internal" or "marked" procedure to inline. + if self.resolve_sequence_association: + resolve_sequence_association_for_inlined_calls( + routine, self.inline_internals, self.inline_marked + ) + + # Replace constant parameter variables with explicit values + if self.inline_constants: + inline_constant_parameters(routine, external_only=self.external_only) + + # Inline elemental functions + if self.inline_elementals: + inline_elemental_functions(routine) + + # Inline Statement Functions + if self.inline_stmt_funcs: + inline_statement_functions(routine) + + # Inline internal (contained) procedures + if self.inline_internals: + inline_internal_procedures(routine, allowed_aliases=self.allowed_aliases) + + # Inline explicitly pragma-marked subroutines + if self.inline_marked: + inline_marked_subroutines( + routine, allowed_aliases=self.allowed_aliases, + adjust_imports=self.adjust_imports + ) + + # After inlining, attempt to trim unreachable code paths + if self.remove_dead_code: + do_remove_dead_code(routine) diff --git a/loki/transformations/parallel/tests/__init__.py b/loki/transformations/parallel/tests/__init__.py new file mode 100644 index 000000000..538bb4e73 --- /dev/null +++ b/loki/transformations/parallel/tests/__init__.py @@ -0,0 +1,6 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. diff --git a/loki/transformations/parallel/tests/test_field_api.py b/loki/transformations/parallel/tests/test_field_api.py index 5e08f9c4c..267c43e91 100644 --- a/loki/transformations/parallel/tests/test_field_api.py +++ b/loki/transformations/parallel/tests/test_field_api.py @@ -7,14 +7,14 @@ import pytest -from loki import Subroutine, Module, Dimension +from loki import Subroutine, Dimension from loki.frontend import available_frontends, OMNI from loki.ir import nodes as ir, FindNodes from loki.expression import symbols as sym from loki.scope import Scope from loki.transformations.parallel import ( - remove_field_api_view_updates, add_field_api_view_updates, get_field_type, - field_get_device_data, FieldAPITransferType + remove_field_api_view_updates, add_field_api_view_updates, + get_field_type, field_get_device_data, FieldAPITransferType ) from loki.types import BasicType, SymbolAttributes from loki.logging import WARNING @@ -198,4 +198,3 @@ def test_field_get_device_data(): assert get_dev_data_call.name.parent == fptr with pytest.raises(TypeError): _ = field_get_device_data(fptr, dev_ptr, "none_transfer_type", scope) - diff --git a/loki/transformations/parallel/tests/test_openmp_region.py b/loki/transformations/parallel/tests/test_openmp_region.py index e9a05705b..7e30d4ac0 100644 --- a/loki/transformations/parallel/tests/test_openmp_region.py +++ b/loki/transformations/parallel/tests/test_openmp_region.py @@ -262,11 +262,11 @@ def test_remove_firstprivate_copies(frontend): @pytest.mark.parametrize('frontend', available_frontends( skip=[(OMNI, 'OMNI needs full type definitions for derived types')] )) -def test_add_firstprivate_copies(tmp_path, frontend): +def test_add_firstprivate_copies(frontend): """ A simple test for :any:`add_firstprivate_copies` """ - + fcode = """ subroutine test_add_openmp_loop(ydgeom, state, arr) use geom_mod, only: geom_type @@ -312,7 +312,7 @@ def test_add_firstprivate_copies(tmp_path, frontend): assert str(calls[0].name).startswith('state%') assert calls[1].arguments[0].parent == 'state' assert len(FindNodes(ir.Loop).visit(routine.body)) == 2 - + # Put the explicit firstprivate copies back in add_firstprivate_copies( routine=routine, fprivate_map=fprivate_map diff --git a/loki/transformations/sanitise/tests/__init__.py b/loki/transformations/sanitise/tests/__init__.py new file mode 100644 index 000000000..538bb4e73 --- /dev/null +++ b/loki/transformations/sanitise/tests/__init__.py @@ -0,0 +1,6 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. diff --git a/loki/transformations/sanitise/tests/test_associates.py b/loki/transformations/sanitise/tests/test_associates.py index 62b8085f9..d1e3ff7be 100644 --- a/loki/transformations/sanitise/tests/test_associates.py +++ b/loki/transformations/sanitise/tests/test_associates.py @@ -7,7 +7,7 @@ import pytest -from loki import BasicType, Subroutine, Module +from loki import BasicType, Subroutine from loki.expression import symbols as sym from loki.frontend import available_frontends, OMNI from loki.ir import nodes as ir, FindNodes @@ -447,7 +447,7 @@ def test_resolve_associates_stmt_func(frontend): Test scope management for stmt funcs, either as :any:`ProcedureSymbol` or :any:`DeferredTypeSymbol`. """ - fcode = f""" + fcode = """ subroutine test_associates_stmt_func(ydcst, a, b) use yomcst, only: tcst implicit none diff --git a/loki/transformations/single_column/tests/__init__.py b/loki/transformations/single_column/tests/__init__.py new file mode 100644 index 000000000..538bb4e73 --- /dev/null +++ b/loki/transformations/single_column/tests/__init__.py @@ -0,0 +1,6 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. diff --git a/loki/transformations/single_column/tests/test_scc_vector.py b/loki/transformations/single_column/tests/test_scc_vector.py index 96378d869..6b95d7142 100644 --- a/loki/transformations/single_column/tests/test_scc_vector.py +++ b/loki/transformations/single_column/tests/test_scc_vector.py @@ -624,7 +624,7 @@ def test_scc_devector_section_special_case(frontend, horizontal, vertical, block # Check that all else-bodies have been wrapped else_bodies = conditional.else_bodies - assert(len(else_bodies) == 3) + assert len(else_bodies) == 3 for body in else_bodies: assert isinstance(body[0], ir.Comment) assert isinstance(body[1], ir.Loop) diff --git a/loki/transformations/tests/__init__.py b/loki/transformations/tests/__init__.py new file mode 100644 index 000000000..538bb4e73 --- /dev/null +++ b/loki/transformations/tests/__init__.py @@ -0,0 +1,6 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. diff --git a/loki/transformations/tests/test_data_offload.py b/loki/transformations/tests/test_data_offload.py deleted file mode 100644 index 50cbe2dd1..000000000 --- a/loki/transformations/tests/test_data_offload.py +++ /dev/null @@ -1,1390 +0,0 @@ -# (C) Copyright 2018- ECMWF. -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from pathlib import Path -import pytest - -from loki import ( - Sourcefile, Scheduler, FindInlineCalls, warning -) -from loki.frontend import available_frontends, OMNI -from loki.logging import log_levels, logger -from loki.ir import ( - FindNodes, Pragma, PragmaRegion, Loop, CallStatement, Import, - pragma_regions_attached, get_pragma_parameters -) -import loki.expression.symbols as sym -from loki.module import Module -from loki.transformations import ( - DataOffloadTransformation, GlobalVariableAnalysis, - GlobalVarOffloadTransformation, GlobalVarHoistTransformation, FieldOffloadTransformation -) - -@pytest.fixture(scope='module', name='here') -def fixture_here(): - return Path(__file__).parent - -@pytest.fixture(name='config') -def fixture_config(): - """ - Default configuration dict with basic options. - """ - return { - 'default': { - 'mode': 'idem', - 'role': 'kernel', - 'expand': True, - 'strict': True, - 'enable_imports': True, - }, - } - -@pytest.fixture(name="parkind_mod") -def fixture_parkind_mod(tmp_path, frontend): - fcode = """ - module parkind1 - integer, parameter :: jprb=4 - end module - """ - return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) - -@pytest.fixture(name="field_module") -def fixture_field_module(tmp_path, frontend): - fcode = """ - module field_module - implicit none - - type field_2rb - real, pointer :: f_ptr(:,:,:) - end type field_2rb - - type field_3rb - real, pointer :: f_ptr(:,:,:) - contains - procedure :: update_view - end type field_3rb - - type field_4rb - real, pointer :: f_ptr(:,:,:) - contains - procedure :: update_view - end type field_4rb - - contains - subroutine update_view(self, idx) - class(field_3rb), intent(in) :: self - integer, intent(in) :: idx - end subroutine - end module - """ - return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) - - -@pytest.mark.parametrize('frontend', available_frontends()) -@pytest.mark.parametrize('assume_deviceptr', [True, False]) -@pytest.mark.parametrize('present_on_device', [True, False]) -def test_data_offload_region_openacc(caplog, frontend, assume_deviceptr, present_on_device): - """ - Test the creation of a simple device data offload region - (`!$acc update`) from a `!$loki data` region with a single - kernel call. - """ - - fcode_driver = """ - SUBROUTINE driver_routine(nlon, nlev, a, b, c) - INTEGER, INTENT(IN) :: nlon, nlev - REAL, INTENT(INOUT) :: a(nlon,nlev) - REAL, INTENT(INOUT) :: b(nlon,nlev) - REAL, INTENT(INOUT) :: c(nlon,nlev) - - !$loki data - call kernel_routine(nlon, nlev, a, b, c) - !$loki end data - - END SUBROUTINE driver_routine -""" - fcode_kernel = """ - SUBROUTINE kernel_routine(nlon, nlev, a, b, c) - INTEGER, INTENT(IN) :: nlon, nlev - REAL, INTENT(IN) :: a(nlon,nlev) - REAL, INTENT(INOUT) :: b(nlon,nlev) - REAL, INTENT(OUT) :: c(nlon,nlev) - INTEGER :: i, j - - do j=1, nlon - do i=1, nlev - b(i,j) = a(i,j) + 0.1 - c(i,j) = 0.1 - end do - end do - END SUBROUTINE kernel_routine -""" - driver = Sourcefile.from_source(fcode_driver, frontend=frontend)['driver_routine'] - kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] - driver.enrich(kernel) - - if assume_deviceptr and not present_on_device: - caplog.clear() - with caplog.at_level(log_levels['ERROR']): - with pytest.raises(RuntimeError): - data_offload_trafo = DataOffloadTransformation(assume_deviceptr=assume_deviceptr, - present_on_device=present_on_device) - assert len(caplog.records) == 1 - assert ("[Loki] Data offload: Can't assume device pointer arrays without arrays being marked" + - "present on device.") in caplog.records[0].message - return - - data_offload_trafo = DataOffloadTransformation(assume_deviceptr=assume_deviceptr, - present_on_device=present_on_device) - driver.apply(data_offload_trafo, role='driver', targets=['kernel_routine']) - - pragmas = FindNodes(Pragma).visit(driver.body) - assert len(pragmas) == 2 - assert all(p.keyword == 'acc' for p in pragmas) - if assume_deviceptr: - assert 'deviceptr' in pragmas[0].content - params = get_pragma_parameters(pragmas[0], only_loki_pragmas=False) - assert all(var in params['deviceptr'] for var in ('a', 'b', 'c')) - elif present_on_device: - assert 'present' in pragmas[0].content - params = get_pragma_parameters(pragmas[0], only_loki_pragmas=False) - assert all(var in params['present'] for var in ('a', 'b', 'c')) - else: - transformed = driver.to_fortran() - assert 'copyin( a )' in transformed - assert 'copy( b )' in transformed - assert 'copyout( c )' in transformed - - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_data_offload_region_complex_remove_openmp(frontend): - """ - Test the creation of a data offload region (OpenACC) with - driver-side loops and CPU-style OpenMP pragmas to be removed. - """ - - fcode_driver = """ - SUBROUTINE driver_routine(nlon, nlev, a, b, c, flag) - INTEGER, INTENT(IN) :: nlon, nlev - REAL, INTENT(INOUT) :: a(nlon,nlev) - REAL, INTENT(INOUT) :: b(nlon,nlev) - REAL, INTENT(INOUT) :: c(nlon,nlev) - logical, intent(in) :: flag - INTEGER :: j - - !$loki data - call my_custom_timer() - - if(flag)then - !$omp parallel do private(j) - do j=1, nlev - call kernel_routine(nlon, j, a(:,j), b(:,j), c(:,j)) - end do - !$omp end parallel do - else - !$omp parallel do private(j) - do j=1, nlev - a(:,j) = 0. - b(:,j) = 0. - c(:,j) = 0. - end do - !$omp end parallel do - endif - call my_custom_timer() - - !$loki end data - END SUBROUTINE driver_routine -""" - fcode_kernel = """ - SUBROUTINE kernel_routine(nlon, j, a, b, c) - INTEGER, INTENT(IN) :: nlon, j - REAL, INTENT(IN) :: a(nlon) - REAL, INTENT(INOUT) :: b(nlon) - REAL, INTENT(INOUT) :: c(nlon) - INTEGER :: i - - do j=1, nlon - b(i) = a(i) + 0.1 - c(i) = 0.1 - end do - END SUBROUTINE kernel_routine -""" - driver = Sourcefile.from_source(fcode_driver, frontend=frontend)['driver_routine'] - kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] - driver.enrich(kernel) - - offload_transform = DataOffloadTransformation(remove_openmp=True) - driver.apply(offload_transform, role='driver', targets=['kernel_routine']) - - assert len(FindNodes(Pragma).visit(driver.body)) == 2 - assert all(p.keyword == 'acc' for p in FindNodes(Pragma).visit(driver.body)) - - with pragma_regions_attached(driver): - # Ensure that loops in the region are preserved - regions = FindNodes(PragmaRegion).visit(driver.body) - assert len(regions) == 1 - assert len(FindNodes(Loop).visit(regions[0])) == 2 - - # Ensure all activa and inactive calls are there - calls = FindNodes(CallStatement).visit(regions[0]) - assert len(calls) == 3 - assert calls[0].name == 'my_custom_timer' - assert calls[1].name == 'kernel_routine' - assert calls[2].name == 'my_custom_timer' - - # Ensure OpenMP loop pragma is taken out - assert len(FindNodes(Pragma).visit(regions[0])) == 0 - - transformed = driver.to_fortran() - assert 'copyin( a )' in transformed - assert 'copy( b, c )' in transformed - assert '!$omp' not in transformed - - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_data_offload_region_multiple(frontend): - """ - Test the creation of a device data offload region (`!$acc update`) - from a `!$loki data` region with multiple kernel calls. - """ - - fcode_driver = """ - SUBROUTINE driver_routine(nlon, nlev, a, b, c, d) - INTEGER, INTENT(IN) :: nlon, nlev - REAL, INTENT(INOUT) :: a(nlon,nlev) - REAL, INTENT(INOUT) :: b(nlon,nlev) - REAL, INTENT(INOUT) :: c(nlon,nlev) - REAL, INTENT(INOUT) :: d(nlon,nlev) - - !$loki data - call kernel_routine(nlon, nlev, a, b, c) - - call kernel_routine(nlon, nlev, d, b, a) - !$loki end data - - END SUBROUTINE driver_routine -""" - fcode_kernel = """ - SUBROUTINE kernel_routine(nlon, nlev, a, b, c) - INTEGER, INTENT(IN) :: nlon, nlev - REAL, INTENT(IN) :: a(nlon,nlev) - REAL, INTENT(INOUT) :: b(nlon,nlev) - REAL, INTENT(OUT) :: c(nlon,nlev) - INTEGER :: i, j - - do j=1, nlon - do i=1, nlev - b(i,j) = a(i,j) + 0.1 - c(i,j) = 0.1 - end do - end do - END SUBROUTINE kernel_routine -""" - driver = Sourcefile.from_source(fcode_driver, frontend=frontend)['driver_routine'] - kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] - driver.enrich(kernel) - - driver.apply(DataOffloadTransformation(), role='driver', targets=['kernel_routine']) - - assert len(FindNodes(Pragma).visit(driver.body)) == 2 - assert all(p.keyword == 'acc' for p in FindNodes(Pragma).visit(driver.body)) - - # Ensure that the copy direction is the union of the two calls, ie. - # "a" is "copyin" in first call and "copyout" in second, so it should be "copy" - transformed = driver.to_fortran() - assert 'copyin( d )' in transformed - assert 'copy( b, a )' in transformed - assert 'copyout( c )' in transformed - - -@pytest.fixture(name='global_variable_analysis_code') -def fixture_global_variable_analysis_code(tmp_path): - fcode = { - #------------------------------ - 'global_var_analysis_header_mod': ( - #------------------------------ -""" -module global_var_analysis_header_mod - implicit none - - integer, parameter :: nval = 5 - integer, parameter :: nfld = 3 - - integer :: n - - integer :: iarr(nfld) - real :: rarr(nval, nfld) -end module global_var_analysis_header_mod -""" - ).strip(), - #---------------------------- - 'global_var_analysis_data_mod': ( - #---------------------------- -""" -module global_var_analysis_data_mod - implicit none - - real, allocatable :: rdata(:,:,:) - - type some_type - real :: val - real, allocatable :: vals(:,:) - end type some_type - - type(some_type) :: tt - -contains - subroutine some_routine(i) - integer, intent(inout) :: i - i = i + 1 - end subroutine some_routine -end module global_var_analysis_data_mod -""" - ).strip(), - #------------------------------ - 'global_var_analysis_kernel_mod': ( - #------------------------------ -""" -module global_var_analysis_kernel_mod - use global_var_analysis_header_mod, only: rarr - use global_var_analysis_data_mod, only: some_routine, some_type - - implicit none - -contains - subroutine kernel_a(arg, tt) - use global_var_analysis_header_mod, only: iarr, nval, nfld, n - - real, intent(inout) :: arg(:,:) - type(some_type), intent(in) :: tt - real :: tmp(n) - integer :: i, j - - do i=1,nfld - if (iarr(i) > 0) then - do j=1,nval - arg(j,i) = rarr(j, i) + tt%val - call some_routine(arg(j,i)) - enddo - endif - enddo - end subroutine kernel_a - - subroutine kernel_b(arg) - use global_var_analysis_header_mod, only: iarr, nfld - use global_var_analysis_data_mod, only: rdata, tt - - real, intent(inout) :: arg(:,:) - integer :: i - - do i=1,nfld - if (iarr(i) .ne. 0) then - rdata(:,:,i) = arg(:,:) + rdata(:,:,i) - else - arg(:,:) = tt%vals(:,:) - endif - enddo - end subroutine kernel_b -end module global_var_analysis_kernel_mod -""" - ).strip(), - #------- - 'driver': ( - #------- -""" -subroutine driver(arg) - use global_var_analysis_kernel_mod, only: kernel_a, kernel_b - use global_var_analysis_data_mod, only: tt - implicit none - - real, intent(inout) :: arg(:,:) - - !$loki update_device - - call kernel_a(arg, tt) - - call kernel_b(arg) - - !$loki update_host -end subroutine driver -""" - ).strip() - } - - for name, code in fcode.items(): - (tmp_path/f'{name}.F90').write_text(code) - return tmp_path - - -@pytest.mark.parametrize('frontend', available_frontends()) -@pytest.mark.parametrize('key', (None, 'foobar')) -def test_global_variable_analysis(frontend, key, config, global_variable_analysis_code): - config['routines'] = { - 'driver': {'role': 'driver'} - } - - scheduler = Scheduler( - paths=(global_variable_analysis_code,), config=config, seed_routines='driver', - frontend=frontend, xmods=(global_variable_analysis_code,) - ) - scheduler.process(GlobalVariableAnalysis(key=key)) - if key is None: - key = GlobalVariableAnalysis._key - - # Validate the analysis trafo_data - - # OMNI handles array indices and parameters differently - if frontend == OMNI: - nfld_dim = '3' - nval_dim = '5' - nfld_data = set() - nval_data = set() - else: - nfld_dim = 'nfld' - nval_dim = 'nval' - nfld_data = {('nfld', 'global_var_analysis_header_mod')} - nval_data = {('nval', 'global_var_analysis_header_mod')} - - expected_trafo_data = { - 'global_var_analysis_header_mod': { - 'declares': {f'iarr({nfld_dim})', f'rarr({nval_dim}, {nfld_dim})', 'n'}, - 'offload': {} - }, - 'global_var_analysis_data_mod': { - 'declares': {'rdata(:, :, :)', 'tt'}, - 'offload': {} - }, - 'global_var_analysis_data_mod#some_routine': {'defines_symbols': set(), 'uses_symbols': set()}, - 'global_var_analysis_kernel_mod#kernel_a': { - 'defines_symbols': set(), - 'uses_symbols': nval_data | nfld_data | { - (f'iarr({nfld_dim})', 'global_var_analysis_header_mod'), - ('n', 'global_var_analysis_header_mod'), - (f'rarr({nval_dim}, {nfld_dim})', 'global_var_analysis_header_mod') - } - }, - 'global_var_analysis_kernel_mod#kernel_b': { - 'defines_symbols': {('rdata(:, :, :)', 'global_var_analysis_data_mod')}, - 'uses_symbols': nfld_data | { - ('rdata(:, :, :)', 'global_var_analysis_data_mod'), ('tt', 'global_var_analysis_data_mod'), - ('tt%vals', 'global_var_analysis_data_mod'), (f'iarr({nfld_dim})', 'global_var_analysis_header_mod') - } - }, - '#driver': { - 'defines_symbols': {('rdata(:, :, :)', 'global_var_analysis_data_mod')}, - 'uses_symbols': nval_data | nfld_data | { - ('rdata(:, :, :)', 'global_var_analysis_data_mod'), - ('n', 'global_var_analysis_header_mod'), - ('tt', 'global_var_analysis_data_mod'), ('tt%vals', 'global_var_analysis_data_mod'), - (f'iarr({nfld_dim})', 'global_var_analysis_header_mod'), - (f'rarr({nval_dim}, {nfld_dim})', 'global_var_analysis_header_mod') - } - } - } - - assert set(scheduler.items) == set(expected_trafo_data) | {'global_var_analysis_data_mod#some_type'} - for item in scheduler.items: - if item == 'global_var_analysis_data_mod#some_type': - continue - for trafo_data_key, trafo_data_value in item.trafo_data[key].items(): - assert ( - sorted( - tuple(str(vv) for vv in v) if isinstance(v, tuple) else str(v) - for v in trafo_data_value - ) == sorted(expected_trafo_data[item.name][trafo_data_key]) - ) - - -@pytest.mark.parametrize('frontend', available_frontends()) -@pytest.mark.parametrize('key', (None, 'foobar')) -def test_global_variable_offload(frontend, key, config, global_variable_analysis_code): - - config['routines'] = { - 'driver': {'role': 'driver'} - } - - # OMNI handles array indices and parameters differently - if frontend == OMNI: - nfld_dim = '3' - nval_dim = '5' - else: - nfld_dim = 'nfld' - nval_dim = 'nval' - - scheduler = Scheduler( - paths=(global_variable_analysis_code,), config=config, seed_routines='driver', - frontend=frontend, xmods=(global_variable_analysis_code,) - ) - scheduler.process(GlobalVariableAnalysis(key=key)) - scheduler.process(GlobalVarOffloadTransformation(key=key)) - driver = scheduler['#driver'].ir - - if key is None: - key = GlobalVariableAnalysis._key - - expected_trafo_data = { - 'global_var_analysis_header_mod': { - 'declares': {f'iarr({nfld_dim})', f'rarr({nval_dim}, {nfld_dim})', 'n'}, - 'offload': {f'iarr({nfld_dim})', f'rarr({nval_dim}, {nfld_dim})', 'n'} - }, - 'global_var_analysis_data_mod': { - 'declares': {'rdata(:, :, :)', 'tt'}, - 'offload': {'rdata(:, :, :)', 'tt', 'tt%vals'} - }, - } - - # Verify module offload sets - for item in [scheduler['global_var_analysis_header_mod'], scheduler['global_var_analysis_data_mod']]: - for trafo_data_key, trafo_data_value in item.trafo_data[key].items(): - assert ( - sorted( - tuple(str(vv) for vv in v) if isinstance(v, tuple) else str(v) - for v in trafo_data_value - ) == sorted(expected_trafo_data[item.name][trafo_data_key]) - ) - - # Verify imports have been added to the driver - expected_imports = { - 'global_var_analysis_header_mod': {'iarr', 'rarr', 'n'}, - 'global_var_analysis_data_mod': {'rdata'} - } - - # We need to check only the first imports as they have to be prepended - for import_ in driver.imports[:len(expected_imports)]: - assert {var.name.lower() for var in import_.symbols} == expected_imports[import_.module.lower()] - - expected_h2d_pragmas = { - 'update device': {'iarr', 'rdata', 'rarr', 'n'}, - 'enter data copyin': {'tt%vals'} - } - expected_d2h_pragmas = { - 'update self': {'rdata'} - } - - acc_pragmas = [p for p in FindNodes(Pragma).visit(driver.ir) if p.keyword.lower() == 'acc'] - assert len(acc_pragmas) == len(expected_h2d_pragmas) + len(expected_d2h_pragmas) - for pragma in acc_pragmas[:len(expected_h2d_pragmas)]: - command, variables = pragma.content.lower().split('(') - assert command.strip() in expected_h2d_pragmas - assert set(variables.strip()[:-1].strip().split(', ')) == expected_h2d_pragmas[command.strip()] - for pragma in acc_pragmas[len(expected_h2d_pragmas):]: - command, variables = pragma.content.lower().split('(') - assert command.strip() in expected_d2h_pragmas - assert set(variables.strip()[:-1].strip().split(', ')) == expected_d2h_pragmas[command.strip()] - - # Verify declarations have been added to the header modules - expected_declarations = { - 'global_var_analysis_header_mod': {'iarr', 'rarr', 'n'}, - 'global_var_analysis_data_mod': {'rdata', 'tt'} - } - - modules = { - name: scheduler[name].ir for name in expected_declarations - } - - for name, module in modules.items(): - acc_pragmas = [p for p in FindNodes(Pragma).visit(module.spec) if p.keyword.lower() == 'acc'] - variables = { - v.strip() - for pragma in acc_pragmas - for v in pragma.content.lower().split('(')[-1].strip()[:-1].split(', ') - } - assert variables == expected_declarations[name] - - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_transformation_global_var_import(here, config, frontend, tmp_path): - """ - Test the generation of offload instructions of global variable imports. - """ - config['routines'] = { - 'driver': {'role': 'driver'} - } - - scheduler = Scheduler(paths=here/'sources/projGlobalVarImports', config=config, frontend=frontend, xmods=[tmp_path]) - scheduler.process(transformation=GlobalVariableAnalysis()) - scheduler.process(transformation=GlobalVarOffloadTransformation()) - - driver = scheduler['#driver'].ir - moduleA = scheduler['modulea'].ir - moduleB = scheduler['moduleb'].ir - moduleC = scheduler['modulec'].ir - - # check that global variables have been added to driver symbol table - imports = FindNodes(Import).visit(driver.spec) - assert len(imports) == 2 - assert imports[0].module != imports[1].module - assert imports[0].symbols != imports[1].symbols - for i in imports: - assert len(i.symbols) == 2 - assert i.module.lower() in ('moduleb', 'modulec') - assert set(s.name for s in i.symbols) in ({'var2', 'var3'}, {'var4', 'var5'}) - - # check that existing acc pragmas have not been stripped and update device/update self added correctly - pragmas = FindNodes(Pragma).visit(driver.body) - assert len(pragmas) == 4 - assert all(p.keyword.lower() == 'acc' for p in pragmas) - - assert 'update device' in pragmas[0].content - assert 'var2' in pragmas[0].content - assert 'var3' in pragmas[0].content - - assert pragmas[1].content == 'serial' - assert pragmas[2].content == 'end serial' - - assert 'update self' in pragmas[3].content - assert 'var4' in pragmas[3].content - assert 'var5' in pragmas[3].content - - # check that no declarations have been added for parameters - pragmas = FindNodes(Pragma).visit(moduleA.spec) - assert not pragmas - - # check for device-side declarations where appropriate - pragmas = FindNodes(Pragma).visit(moduleB.spec) - assert len(pragmas) == 1 - assert pragmas[0].keyword == 'acc' - assert 'declare create' in pragmas[0].content - assert 'var2' in pragmas[0].content - assert 'var3' in pragmas[0].content - - pragmas = FindNodes(Pragma).visit(moduleC.spec) - assert len(pragmas) == 1 - assert pragmas[0].keyword == 'acc' - assert 'declare create' in pragmas[0].content - assert 'var4' in pragmas[0].content - assert 'var5' in pragmas[0].content - - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_transformation_global_var_import_derived_type(here, config, frontend, tmp_path): - """ - Test the generation of offload instructions of derived-type global variable imports. - """ - - config['default']['enable_imports'] = True - config['routines'] = { - 'driver_derived_type': {'role': 'driver'} - } - - scheduler = Scheduler(paths=here/'sources/projGlobalVarImports', config=config, frontend=frontend, xmods=[tmp_path]) - scheduler.process(transformation=GlobalVariableAnalysis()) - scheduler.process(transformation=GlobalVarOffloadTransformation()) - - driver = scheduler['#driver_derived_type'].ir - module = scheduler['module_derived_type'].ir - - # check that global variables have been added to driver symbol table - imports = FindNodes(Import).visit(driver.spec) - assert len(imports) == 1 - assert len(imports[0].symbols) == 2 - assert imports[0].module.lower() == 'module_derived_type' - assert set(s.name for s in imports[0].symbols) == {'p', 'p0'} - - # check that existing acc pragmas have not been stripped and update device/update self added correctly - pragmas = FindNodes(Pragma).visit(driver.body) - assert len(pragmas) == 5 - assert all(p.keyword.lower() == 'acc' for p in pragmas) - - assert 'enter data copyin' in pragmas[0].content - assert 'p0%x' in pragmas[0].content - assert 'p0%y' in pragmas[0].content - assert 'p0%z' in pragmas[0].content - assert 'p%n' in pragmas[0].content - - assert 'enter data create' in pragmas[1].content - assert 'p%x' in pragmas[1].content - assert 'p%y' in pragmas[1].content - assert 'p%z' in pragmas[1].content - - assert pragmas[2].content == 'serial' - assert pragmas[3].content == 'end serial' - - assert 'exit data copyout' in pragmas[4].content - assert 'p%x' in pragmas[4].content - assert 'p%y' in pragmas[4].content - assert 'p%z' in pragmas[4].content - - # check for device-side declarations - pragmas = FindNodes(Pragma).visit(module.spec) - assert len(pragmas) == 1 - assert pragmas[0].keyword == 'acc' - assert 'declare create' in pragmas[0].content - assert 'p' in pragmas[0].content - assert 'p0' in pragmas[0].content - assert 'p_array' in pragmas[0].content - # Note: g is not offloaded because it is not used by the kernel (albeit imported) - - -@pytest.mark.parametrize('frontend', available_frontends()) -@pytest.mark.parametrize('hoist_parameters', (False, True)) -@pytest.mark.parametrize('ignore_modules', (None, ('moduleb',))) -def test_transformation_global_var_hoist(here, config, frontend, hoist_parameters, ignore_modules, tmp_path): - """ - Test hoisting of global variable imports. - """ - config['default']['enable_imports'] = True - config['routines'] = { - 'driver': {'role': 'driver'} - } - - scheduler = Scheduler(paths=here/'sources/projGlobalVarImports', config=config, frontend=frontend, xmods=[tmp_path]) - scheduler.process(transformation=GlobalVariableAnalysis()) - scheduler.process(transformation=GlobalVarHoistTransformation(hoist_parameters=hoist_parameters, - ignore_modules=ignore_modules)) - - driver = scheduler['#driver'].ir - kernel0 = scheduler['#kernel0'].ir - kernel_map = {key: scheduler[f'#{key}'].ir for key in ['kernel1', 'kernel2', 'kernel3']} - some_func = scheduler['func_mod#some_func'].ir - - # symbols within each module - expected_symbols = {'modulea': ['var0', 'var1'], 'moduleb': ['var2', 'var3'], - 'modulec': ['var4', 'var5']} - # expected intent of those variables (if hoisted) - var_intent_map = {'var0': 'in', 'var1': 'in', 'var2': 'in', - 'var3': 'in', 'var4': 'inout', 'var5': 'inout', 'tmp': None} - # DRIVER - imports = FindNodes(Import).visit(driver.spec) - import_names = [_import.module.lower() for _import in imports] - # check driver imports - expected_driver_modules = ['modulec'] - expected_driver_modules += ['moduleb'] if ignore_modules is None else [] - # OMNI handles parameters differently, ModuleA only contains parameters - if frontend != OMNI: - expected_driver_modules += ['modulea'] if hoist_parameters else [] - assert len(imports) == len(expected_driver_modules) - assert sorted(expected_driver_modules) == sorted(import_names) - for _import in imports: - assert sorted([sym.name for sym in _import.symbols]) == expected_symbols[_import.module.lower()] - # check driver call - driver_calls = FindNodes(CallStatement).visit(driver.body) - expected_args = [] - for module in expected_driver_modules: - expected_args.extend(expected_symbols[module]) - assert [arg.name for arg in driver_calls[0].arguments] == sorted(expected_args) - - originally = {'kernel1': ['modulea'], 'kernel2': ['moduleb'], - 'kernel3': ['moduleb', 'modulec']} - # KERNEL0 - expected_vars = expected_args.copy() - expected_vars.append('a') - assert [arg.name for arg in kernel0.arguments] == sorted(expected_args) - assert [arg.name for arg in kernel0.variables] == sorted(expected_vars) - for var in kernel0.arguments: - assert kernel0.variable_map[var.name.lower()].type.intent == var_intent_map[var.name.lower()] - assert var.scope == kernel0 - kernel0_inline_calls = FindInlineCalls().visit(kernel0.body) - for inline_call in kernel0_inline_calls: - if ignore_modules is None: - assert len(inline_call.arguments) == 1 - assert [arg.name for arg in inline_call.arguments] == ['var2'] - assert [arg.name for arg in some_func.arguments] == ['var2'] - else: - assert len(inline_call.arguments) == 0 - assert len(some_func.arguments) == 0 - kernel0_calls = FindNodes(CallStatement).visit(kernel0.body) - # KERNEL1 & KERNEL2 & KERNEL3 - for call in kernel0_calls: - expected_args = [] - expected_imports = [] - kernel_expected_symbols = [] - for module in originally[call.routine.name]: - # always, since at least 'some_func' is imported - if call.routine.name == 'kernel1' and module == 'modulea': - expected_imports.append(module) - kernel_expected_symbols.append('some_func') - if module in expected_driver_modules: - expected_args.extend(expected_symbols[module]) - else: - # already added - if module != 'modulea': - expected_imports.append(module) - kernel_expected_symbols.extend(expected_symbols[module]) - assert len(expected_args) == len(call.arguments) - assert [arg.name for arg in call.arguments] == expected_args - assert [arg.name for arg in kernel_map[call.routine.name].arguments] == expected_args - for var in kernel_map[call.routine.name].variables: - var_intent = kernel_map[call.routine.name].variable_map[var.name.lower()].type.intent - assert var.scope == kernel_map[call.routine.name] - assert var_intent == var_intent_map[var.name.lower()] - if call.routine.name in ['kernel1', 'kernel2']: - expected_args = ['tmp'] + expected_args - assert [arg.name for arg in kernel_map[call.routine.name].variables] == expected_args - kernel_imports = FindNodes(Import).visit(call.routine.spec) - assert sorted([_import.module.lower() for _import in kernel_imports]) == sorted(expected_imports) - imported_symbols = [] # _import.symbols for _import in kernel_imports] - for _import in kernel_imports: - imported_symbols.extend([sym.name.lower() for sym in _import.symbols]) - assert sorted(imported_symbols) == sorted(kernel_expected_symbols) - - -@pytest.mark.parametrize('frontend', available_frontends()) -@pytest.mark.parametrize('hoist_parameters', (False, True)) -def test_transformation_global_var_derived_type_hoist(here, config, frontend, hoist_parameters, tmp_path): - """ - Test hoisting of derived-type global variable imports. - """ - - config['default']['enable_imports'] = True - config['routines'] = { - 'driver_derived_type': {'role': 'driver'} - } - - scheduler = Scheduler(paths=here/'sources/projGlobalVarImports', config=config, frontend=frontend, xmods=[tmp_path]) - scheduler.process(transformation=GlobalVariableAnalysis()) - scheduler.process(transformation=GlobalVarHoistTransformation(hoist_parameters)) - - driver = scheduler['#driver_derived_type'].ir - kernel = scheduler['#kernel_derived_type'].ir - - # DRIVER - imports = FindNodes(Import).visit(driver.spec) - assert len(imports) == 1 - assert imports[0].module.lower() == 'module_derived_type' - assert sorted([sym.name.lower() for sym in imports[0].symbols]) == sorted(['p', 'p_array', 'p0']) - calls = FindNodes(CallStatement).visit(driver.body) - assert len(calls) == 1 - # KERNEL - assert [arg.name for arg in calls[0].arguments] == ['p', 'p0', 'p_array'] - assert [arg.name for arg in kernel.arguments] == ['p', 'p0', 'p_array'] - kernel_imports = FindNodes(Import).visit(kernel.spec) - assert len(kernel_imports) == 1 - assert [sym.name.lower() for sym in kernel_imports[0].symbols] == ['g'] - assert sorted([var.name for var in kernel.variables]) == ['i', 'j', 'p', 'p0', 'p_array'] - assert kernel.variable_map['p_array'].type.allocatable - assert kernel.variable_map['p_array'].type.intent == 'inout' - assert kernel.variable_map['p_array'].type.dtype.name == 'point' - assert kernel.variable_map['p'].type.intent == 'inout' - assert kernel.variable_map['p'].type.dtype.name == 'point' - assert kernel.variable_map['p0'].type.intent == 'in' - assert kernel.variable_map['p0'].type.dtype.name == 'point' - - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload(frontend, parkind_mod, field_module, tmp_path): - fcode = """ - module driver_mod - use parkind1, only: jprb - use field_module, only: field_2rb, field_3rb - implicit none - - type state_type - real(kind=jprb), dimension(10,10), pointer :: a, b, c - class(field_3rb), pointer :: f_a, f_b, f_c - contains - procedure :: update_view => state_update_view - end type state_type - - contains - - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - - subroutine kernel_routine(nlon, nlev, a, b, c) - integer, intent(in) :: nlon, nlev - real(kind=jprb), intent(in) :: a(nlon,nlev) - real(kind=jprb), intent(inout) :: b(nlon,nlev) - real(kind=jprb), intent(out) :: c(nlon,nlev) - integer :: i, j - - do j=1, nlon - do i=1, nlev - b(i,j) = a(i,j) + 0.1 - c(i,j) = 0.1 - end do - end do - end subroutine kernel_routine - - subroutine driver_routine(nlon, nlev, state) - integer, intent(in) :: nlon, nlev - type(state_type), intent(inout) :: state - integer :: i - - !$loki data - do i=1,nlev - call state%update_view(i) - call kernel_routine(nlon, nlev, state%a, state%b, state%c) - end do - !$loki end data - - end subroutine driver_routine - end module driver_mod - """ - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] - driver = driver_mod['driver_routine'] - deviceptr_prefix = 'loki_devptr_prefix_' - driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, - offload_index='i', - field_group_types=['state_type']), - role='driver', - targets=['kernel_routine']) - - calls = FindNodes(CallStatement).visit(driver.body) - kernel_call = next(c for c in calls if c.name=='kernel_routine') - - # verify that field offloads are generated properly - in_calls = [c for c in calls if 'get_device_data_rdonly' in c.name.name.lower()] - assert len(in_calls) == 1 - inout_calls = [c for c in calls if 'get_device_data_rdwr' in c.name.name.lower()] - assert len(inout_calls) == 2 - # verify that field sync host calls are generated properly - sync_calls = [c for c in calls if 'sync_host_rdwr' in c.name.name.lower()] - assert len(sync_calls) == 2 - - # verify that data offload pragmas remain - pragmas = FindNodes(Pragma).visit(driver.body) - assert len(pragmas) == 2 - assert all(p.keyword=='loki' and p.content==c for p, c in zip(pragmas, ['data', 'end data'])) - - # verify that new pointer variables are created and used in driver calls - for var in ['state_a', 'state_b', 'state_c']: - name = deviceptr_prefix + var - assert name in driver.variable_map - devptr = driver.variable_map[name] - assert isinstance(devptr, sym.Array) - assert len(devptr.shape) == 3 - assert devptr.name in (arg.name for arg in kernel_call.arguments) - - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_slices(frontend, parkind_mod, field_module, tmp_path): - fcode = """ - module driver_mod - use parkind1, only: jprb - use field_module, only: field_4rb - implicit none - - type state_type - real(kind=jprb), dimension(10,10,10), pointer :: a, b, c, d - class(field_4rb), pointer :: f_a, f_b, f_c, f_d - contains - procedure :: update_view => state_update_view - end type state_type - - contains - - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - - subroutine kernel_routine(nlon, nlev, a, b, c, d) - integer, intent(in) :: nlon, nlev - real(kind=jprb), intent(in) :: a(nlon,nlev,nlon) - real(kind=jprb), intent(inout) :: b(nlon,nlev) - real(kind=jprb), intent(out) :: c(nlon) - real(kind=jprb), intent(in) :: d(nlon,nlev,nlon) - integer :: i, j - end subroutine kernel_routine - - subroutine driver_routine(nlon, nlev, state) - integer, intent(in) :: nlon, nlev - type(state_type), intent(inout) :: state - integer :: i - !$loki data - do i=1,nlev - call kernel_routine(nlon, nlev, state%a(:,:,1), state%b(:,1,1), state%c(1,1,1), state%d) - end do - !$loki end data - - end subroutine driver_routine - end module driver_mod - """ - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] - driver = driver_mod['driver_routine'] - deviceptr_prefix = 'loki_devptr_prefix_' - driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, - offload_index='i', - field_group_types=['state_type']), - role='driver', - targets=['kernel_routine']) - - calls = FindNodes(CallStatement).visit(driver.body) - kernel_call = next(c for c in calls if c.name=='kernel_routine') - # verify that new pointer variables are created and used in driver calls - for var, rank in zip(['state_d', 'state_a', 'state_b', 'state_c',], [4, 3, 2, 1]): - name = deviceptr_prefix + var - assert name in driver.variable_map - devptr = driver.variable_map[name] - assert isinstance(devptr, sym.Array) - assert len(devptr.shape) == 4 - assert devptr.name in (arg.name for arg in kernel_call.arguments) - arg = next(arg for arg in kernel_call.arguments if devptr.name in arg.name) - assert arg.dimensions == ((sym.RangeIndex((None,None)),)*(rank-1) + - (sym.IntLiteral(1),)*(4-rank) + - (sym.Scalar(name='i'),)) - - - - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_multiple_calls(frontend, parkind_mod, field_module, tmp_path): - fcode = """ - module driver_mod - use parkind1, only: jprb - use field_module, only: field_2rb, field_3rb - implicit none - - type state_type - real(kind=jprb), dimension(10,10), pointer :: a, b, c - class(field_3rb), pointer :: f_a, f_b, f_c - contains - procedure :: update_view => state_update_view - end type state_type - - contains - - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - - subroutine kernel_routine(nlon, nlev, a, b, c) - integer, intent(in) :: nlon, nlev - real(kind=jprb), intent(in) :: a(nlon,nlev) - real(kind=jprb), intent(inout) :: b(nlon,nlev) - real(kind=jprb), intent(out) :: c(nlon,nlev) - integer :: i, j - - do j=1, nlon - do i=1, nlev - b(i,j) = a(i,j) + 0.1 - c(i,j) = 0.1 - end do - end do - end subroutine kernel_routine - - subroutine driver_routine(nlon, nlev, state) - integer, intent(in) :: nlon, nlev - type(state_type), intent(inout) :: state - integer :: i - - !$loki data - do i=1,nlev - call state%update_view(i) - - call kernel_routine(nlon, nlev, state%a, state%b, state%c) - - call kernel_routine(nlon, nlev, state%a, state%b, state%c) - end do - !$loki end data - - end subroutine driver_routine - end module driver_mod - """ - - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] - driver = driver_mod['driver_routine'] - deviceptr_prefix = 'loki_devptr_prefix_' - driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, - offload_index='i', - field_group_types=['state_type']), - role='driver', - targets=['kernel_routine']) - calls = FindNodes(CallStatement).visit(driver.body) - kernel_calls = [c for c in calls if c.name=='kernel_routine'] - - # verify that field offloads are generated properly - in_calls = [c for c in calls if 'get_device_data_rdonly' in c.name.name.lower()] - assert len(in_calls) == 1 - inout_calls = [c for c in calls if 'get_device_data_rdwr' in c.name.name.lower()] - assert len(inout_calls) == 2 - # verify that field sync host calls are generated properly - sync_calls = [c for c in calls if 'sync_host_rdwr' in c.name.name.lower()] - assert len(sync_calls) == 2 - - # verify that data offload pragmas remain - pragmas = FindNodes(Pragma).visit(driver.body) - assert len(pragmas) == 2 - assert all(p.keyword=='loki' and p.content==c for p, c in zip(pragmas, ['data', 'end data'])) - - # verify that new pointer variables are created and used in driver calls - for var in ['state_a', 'state_b', 'state_c']: - name = deviceptr_prefix + var - assert name in driver.variable_map - devptr = driver.variable_map[name] - assert isinstance(devptr, sym.Array) - assert len(devptr.shape) == 3 - assert devptr.name in (arg.name for kernel_call in kernel_calls for arg in kernel_call.arguments) - - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_no_targets(frontend, parkind_mod, field_module, tmp_path): - fother = """ - module another_module - implicit none - contains - subroutine another_kernel(nlon, nlev, a, b, c) - integer, intent(in) :: nlon, nlev - real, intent(in) :: a(nlon,nlev) - real, intent(inout) :: b(nlon,nlev) - real, intent(out) :: c(nlon,nlev) - integer :: i, j - end subroutine - end module - """ - fcode = """ - module driver_mod - use parkind1, only: jprb - use field_module, only: field_2rb, field_3rb - use another_module, only: another_kernel - - implicit none - - type state_type - real(kind=jprb), dimension(10,10), pointer :: a, b, c - class(field_3rb), pointer :: f_a, f_b, f_c - contains - procedure :: update_view => state_update_view - end type state_type - - contains - - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - - subroutine kernel_routine(nlon, nlev, a, b, c) - integer, intent(in) :: nlon, nlev - real(kind=jprb), intent(in) :: a(nlon,nlev) - real(kind=jprb), intent(inout) :: b(nlon,nlev) - real(kind=jprb), intent(out) :: c(nlon,nlev) - integer :: i, j - - do j=1, nlon - do i=1, nlev - b(i,j) = a(i,j) + 0.1 - c(i,j) = 0.1 - end do - end do - end subroutine kernel_routine - - subroutine driver_routine(nlon, nlev, state) - integer, intent(in) :: nlon, nlev - type(state_type), intent(inout) :: state - integer :: i - - !$loki data - do i=1,nlev - call state%update_view(i) - call another_kernel(nlon, state%a, state%b, state%c) - end do - !$loki end data - - end subroutine driver_routine - end module driver_mod - """ - - Sourcefile.from_source(fother, frontend=frontend, xmods=[tmp_path]) - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] - driver = driver_mod['driver_routine'] - deviceptr_prefix = 'loki_devptr_prefix_' - driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, - offload_index='i', - field_group_types=['state_type']), - role='driver', - targets=['kernel_routine']) - - calls = FindNodes(CallStatement).visit(driver.body) - assert not any(c for c in calls if c.name=='kernel_routine') - - # verify that no field offloads are generated - in_calls = [c for c in calls if 'get_device_data_rdonly' in c.name.name.lower()] - assert len(in_calls) == 0 - inout_calls = [c for c in calls if 'get_device_data_rdwr' in c.name.name.lower()] - assert len(inout_calls) == 0 - # verify that no field sync host calls are generated - sync_calls = [c for c in calls if 'sync_host_rdwr' in c.name.name.lower()] - assert len(sync_calls) == 0 - - # verify that data offload pragmas remain - pragmas = FindNodes(Pragma).visit(driver.body) - assert len(pragmas) == 2 - assert all(p.keyword=='loki' and p.content==c for p, c in zip(pragmas, ['data', 'end data'])) - - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_unknown_kernel(caplog, frontend, parkind_mod, field_module, tmp_path): - fother = """ - module another_module - implicit none - contains - subroutine another_kernel(nlon, nlev, a, b, c) - integer, intent(in) :: nlon, nlev - real, intent(in) :: a(nlon,nlev) - real, intent(inout) :: b(nlon,nlev) - real, intent(out) :: c(nlon,nlev) - integer :: i, j - end subroutine - end module - """ - fcode = """ - module driver_mod - use parkind1, only: jprb - use another_module, only: another_kernel - implicit none - - type state_type - real(kind=jprb), dimension(10,10), pointer :: a, b, c - class(field_3rb), pointer :: f_a, f_b, f_c - contains - procedure :: update_view => state_update_view - end type state_type - - contains - - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - - subroutine driver_routine(nlon, nlev, state) - integer, intent(in) :: nlon, nlev - type(state_type), intent(inout) :: state - integer :: i - - !$loki data - do i=1,nlev - call state%update_view(i) - call another_kernel(nlon, nlev, state%a, state%b, state%c) - end do - !$loki end data - - end subroutine driver_routine - end module driver_mod - """ - - Sourcefile.from_source(fother, frontend=frontend, xmods=[tmp_path]) - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] - driver = driver_mod['driver_routine'] - deviceptr_prefix = 'loki_devptr_prefix_' - - field_offload_trafo = FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, - offload_index='i', - field_group_types=['state_type']) - caplog.clear() - with caplog.at_level(log_levels['ERROR']): - with pytest.raises(RuntimeError): - driver.apply(field_offload_trafo, role='driver', targets=['another_kernel']) - assert len(caplog.records) == 1 - assert ('[Loki] Data offload: Routine driver_routine has not been enriched '+ - 'in another_kernel') in caplog.records[0].message - - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_warnings(caplog, frontend, parkind_mod, field_module, tmp_path): - fother_state = """ - module state_type_mod - implicit none - type state_type2 - real, dimension(10,10), pointer :: a, b, c - contains - procedure :: update_view => state_update_view - end type state_type2 - - contains - - subroutine state_update_view(self, idx) - class(state_type2), intent(in) :: self - integer, intent(in) :: idx - end subroutine - end module - """ - fother_mod= """ - module another_module - implicit none - contains - subroutine another_kernel(nlon, nlev, a, b, c) - integer, intent(in) :: nlon, nlev - real, intent(in) :: a(nlon,nlev) - real, intent(inout) :: b(nlon,nlev) - real, intent(out) :: c(nlon,nlev) - integer :: i, j - end subroutine - end module - """ - fcode = """ - module driver_mod - use state_type_mod, only: state_type2 - use parkind1, only: jprb - use field_module, only: field_2rb, field_3rb - use another_module, only: another_kernel - - implicit none - - type state_type - real(kind=jprb), dimension(10,10), pointer :: a, b, c - class(field_3rb), pointer :: f_a, f_b, f_c - contains - procedure :: update_view => state_update_view - end type state_type - - contains - - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - - subroutine kernel_routine(nlon, nlev, a, b, c) - integer, intent(in) :: nlon, nlev - real(kind=jprb), intent(in) :: a(nlon,nlev) - real(kind=jprb), intent(inout) :: b(nlon,nlev) - real(kind=jprb), intent(out) :: c(nlon,nlev) - integer :: i, j - - do j=1, nlon - do i=1, nlev - b(i,j) = a(i,j) + 0.1 - c(i,j) = 0.1 - end do - end do - end subroutine kernel_routine - - subroutine driver_routine(nlon, nlev, state, state2) - integer, intent(in) :: nlon, nlev - type(state_type), intent(inout) :: state - type(state_type2), intent(inout) :: state2 - - integer :: i - real(kind=jprb) :: a(nlon,nlev) - real, pointer :: loki_devptr_prefix_state_b - - !$loki data - do i=1,nlev - call state%update_view(i) - call kernel_routine(nlon, nlev, a, state%b, state2%c) - end do - !$loki end data - - end subroutine driver_routine - end module driver_mod - """ - Sourcefile.from_source(fother_state, frontend=frontend, xmods=[tmp_path]) - Sourcefile.from_source(fother_mod, frontend=frontend, xmods=[tmp_path]) - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] - driver = driver_mod['driver_routine'] - deviceptr_prefix = 'loki_devptr_prefix_' - - field_offload_trafo = FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, - offload_index='i', - field_group_types=['state_type']) - caplog.clear() - with caplog.at_level(log_levels['WARNING']): - driver.apply(field_offload_trafo, role='driver', targets=['kernel_routine']) - assert len(caplog.records) == 3 - assert (('[Loki] Data offload: Raw array object a encountered in' - +' driver_routine that is not wrapped by a Field API object') - in caplog.records[0].message) - assert ('[Loki] Data offload: The parent object state2 of type state_type2 is not in the' + - ' list of field wrapper types') in caplog.records[1].message - assert ('[Loki] Data offload: The routine driver_routine already has a' + - ' variable named loki_devptr_prefix_state_b') in caplog.records[2].message diff --git a/loki/transformations/tests/test_loop_blocking.py b/loki/transformations/tests/test_loop_blocking.py index 0ea3133a6..176893129 100644 --- a/loki/transformations/tests/test_loop_blocking.py +++ b/loki/transformations/tests/test_loop_blocking.py @@ -9,8 +9,15 @@ import pytest import numpy as np -from loki import available_frontends, Subroutine, pragmas_attached, find_driver_loops, Loop, fgen, \ - ir, FindNodes, jit_compile, clean_test, FindVariables, Array +from loki import Subroutine +from loki.build import jit_compile, clean_test +from loki.expression import Array +from loki.frontend import available_frontends +from loki.ir import ( + nodes as ir, FindNodes, FindVariables, pragmas_attached +) + +from loki.transformations.utilities import find_driver_loops from loki.transformations.loop_blocking import split_loop, block_loop_arrays @@ -43,16 +50,17 @@ def test_1d_splitting(tmp_path, frontend, block_size, n): loops = FindNodes(ir.Loop).visit(routine.ir) num_loops = len(loops) num_vars = len(routine.variable_map) - with pragmas_attached(routine, Loop): - loops = find_driver_loops(routine.body, - targets=None) - splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size) + with pragmas_attached(routine, ir.Loop): + loops = find_driver_loops(routine.body, targets=None) + split_loop(routine, loops[0], block_size) loops = FindNodes(ir.Loop).visit(routine.ir) - assert len( - loops) == num_loops + 1, f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" - assert len( - routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, f"Total number of variables after loop splitting is: {len(routine.variable_map)} but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + assert len(loops) == num_loops + 1, \ + f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" + assert len(routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, ( + f"Total number of variables after loop splitting is: {len(routine.variable_map)} " + f"but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + ) filepath = tmp_path / (f'{routine.name}_{frontend}.f90') function = jit_compile(routine, filepath=filepath, objname=routine.name) @@ -93,16 +101,17 @@ def test_1d_splitting_multi_var(tmp_path, frontend, block_size, n): loops = FindNodes(ir.Loop).visit(routine.ir) num_loops = len(loops) num_vars = len(routine.variable_map) - with pragmas_attached(routine, Loop): - loops = find_driver_loops(routine.body, - targets=None) - splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size) + with pragmas_attached(routine, ir.Loop): + loops = find_driver_loops(routine.body, targets=None) + split_loop(routine, loops[0], block_size) loops = FindNodes(ir.Loop).visit(routine.ir) - assert len( - loops) == num_loops + 1, f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" - assert len( - routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, f"Total number of variables after loop splitting is: {len(routine.variable_map)} but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + assert len(loops) == num_loops + 1, \ + f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" + assert len(routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, ( + f"Total number of variables after loop splitting is: {len(routine.variable_map)} " + f"but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + ) filepath = tmp_path / (f'{routine.name}_{frontend}.f90') function = jit_compile(routine, filepath=filepath, objname=routine.name) @@ -141,16 +150,17 @@ def test_2d_splitting(tmp_path, frontend, block_size, n): loops = FindNodes(ir.Loop).visit(routine.ir) num_loops = len(loops) num_vars = len(routine.variable_map) - with pragmas_attached(routine, Loop): - loops = find_driver_loops(routine.body, - targets=None) - splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size) + with pragmas_attached(routine, ir.Loop): + loops = find_driver_loops(routine.body, targets=None) + split_loop(routine, loops[0], block_size) loops = FindNodes(ir.Loop).visit(routine.ir) - assert len( - loops) == num_loops + 1, f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" - assert len( - routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, f"Total number of variables after loop splitting is: {len(routine.variable_map)} but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + assert len(loops) == num_loops + 1, \ + f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" + assert len(routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, ( + f"Total number of variables after loop splitting is: {len(routine.variable_map)} " + f"but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + ) filepath = tmp_path / (f'{routine.name}_{frontend}.f90') function = jit_compile(routine, filepath=filepath, objname=routine.name) @@ -191,16 +201,17 @@ def test_3d_splitting(tmp_path, frontend, block_size, n): loops = FindNodes(ir.Loop).visit(routine.ir) num_loops = len(loops) num_vars = len(routine.variable_map) - with pragmas_attached(routine, Loop): - loops = find_driver_loops(routine.body, - targets=None) - splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size) + with pragmas_attached(routine, ir.Loop): + loops = find_driver_loops(routine.body, targets=None) + split_loop(routine, loops[0], block_size) loops = FindNodes(ir.Loop).visit(routine.ir) - assert len( - loops) == num_loops + 1, f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" - assert len( - routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, f"Total number of variables after loop splitting is: {len(routine.variable_map)} but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + assert len(loops) == num_loops + 1, \ + f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" + assert len(routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, ( + f"Total number of variables after loop splitting is: {len(routine.variable_map)} " + f"but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + ) filepath = tmp_path / (f'{routine.name}_{frontend}.f90') function = jit_compile(routine, filepath=filepath, objname=routine.name) @@ -249,7 +260,7 @@ def test_1d_blocking(tmp_path, frontend, block_size, n): """ routine = Subroutine.from_source(fcode, frontend=frontend) loops = FindNodes(ir.Loop).visit(routine.ir) - with pragmas_attached(routine, Loop): + with pragmas_attached(routine, ir.Loop): loops = find_driver_loops(routine.body, targets=None) @@ -258,10 +269,12 @@ def test_1d_blocking(tmp_path, frontend, block_size, n): splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size) loops = FindNodes(ir.Loop).visit(routine.ir) - assert len( - loops) == num_loops + 1, f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" - assert len( - routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, f"Total number of variables after loop splitting is: {len(routine.variable_map)} but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + assert len(loops) == num_loops + 1, \ + f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" + assert len(routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, ( + f"Total number of variables after loop splitting is: {len(routine.variable_map)} " + f"but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + ) num_vars = len(routine.variable_map) blocking_indices = ['i'] @@ -308,7 +321,7 @@ def test_1d_blocking_multi_intent(tmp_path, frontend, block_size, n): """ routine = Subroutine.from_source(fcode, frontend=frontend) loops = FindNodes(ir.Loop).visit(routine.ir) - with pragmas_attached(routine, Loop): + with pragmas_attached(routine, ir.Loop): loops = find_driver_loops(routine.body, targets=None) @@ -316,11 +329,12 @@ def test_1d_blocking_multi_intent(tmp_path, frontend, block_size, n): num_vars = len(routine.variable_map) splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size) loops = FindNodes(ir.Loop).visit(routine.ir) - assert len( - loops) == num_loops + 1, f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" - assert len( - routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, f"Total number of variables after loop splitting is: {len(routine.variable_map)} but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" - + assert len(loops) == num_loops + 1, \ + f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" + assert len(routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, ( + f"Total number of variables after loop splitting is: {len(routine.variable_map)} " + f"but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + ) num_vars = len(routine.variable_map) blocking_indices = ['i'] @@ -371,16 +385,17 @@ def test_2d_blocking(tmp_path, frontend, block_size, n): loops = FindNodes(ir.Loop).visit(routine.ir) num_loops = len(loops) num_vars = len(routine.variable_map) - with pragmas_attached(routine, Loop): - loops = find_driver_loops(routine.body, - targets=None) + with pragmas_attached(routine, ir.Loop): + loops = find_driver_loops(routine.body, targets=None) splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size) loops = FindNodes(ir.Loop).visit(routine.ir) - assert len( - loops) == num_loops + 1, f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" - assert len( - routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, f"Total number of variables after loop splitting is: {len(routine.variable_map)} but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + assert len(loops) == num_loops + 1, \ + f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" + assert len(routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, ( + f"Total number of variables after loop splitting is: {len(routine.variable_map)} " + f"but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + ) num_vars = len(routine.variable_map) blocking_indices = ['i'] @@ -406,7 +421,6 @@ def test_2d_blocking(tmp_path, frontend, block_size, n): clean_test(filepath) - @pytest.mark.parametrize('frontend', available_frontends()) @pytest.mark.parametrize('block_size', [117]) @pytest.mark.parametrize('n', [500]) @@ -431,16 +445,17 @@ def test_3d_blocking(tmp_path, frontend, block_size, n): loops = FindNodes(ir.Loop).visit(routine.ir) num_loops = len(loops) num_vars = len(routine.variable_map) - with pragmas_attached(routine, Loop): - loops = find_driver_loops(routine.body, - targets=None) + with pragmas_attached(routine, ir.Loop): + loops = find_driver_loops(routine.body, targets=None) splitting_vars, inner_loop, outer_loop = split_loop(routine, loops[0], block_size) loops = FindNodes(ir.Loop).visit(routine.ir) - assert len( - loops) == num_loops + 1, f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" - assert len( - routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, f"Total number of variables after loop splitting is: {len(routine.variable_map)} but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + assert len(loops) == num_loops + 1, \ + f"Total number of loops transformation is: {len(loops)} but expected {num_loops + 1}" + assert len(routine.variable_map) == num_vars + LOKI_LOOP_SLIT_VAR_ADDITION, ( + f"Total number of variables after loop splitting is: {len(routine.variable_map)} " + f"but expected {num_vars + LOKI_LOOP_SLIT_VAR_ADDITION}" + ) num_vars = len(routine.variable_map) blocking_indices = ['i'] diff --git a/loki/transformations/tests/test_utilities.py b/loki/transformations/tests/test_utilities.py index 5ccfcaa99..ff27b4ea8 100644 --- a/loki/transformations/tests/test_utilities.py +++ b/loki/transformations/tests/test_utilities.py @@ -394,7 +394,7 @@ def test_transform_utilites_get_loop_bounds(frontend, tmp_path): y = Dimension(name='y', size='n', index='i', bounds=('a', 'b')) z = Dimension(name='y', size='n', index='i', bounds=('dim%a', 'dim%b')) - start, end = get_loop_bounds(routine, x) + start, end = get_loop_bounds(routine, x) # pylint: disable=unbalanced-tuple-unpacking assert isinstance(start, sym.Scalar) assert start.type.dtype == BasicType.INTEGER assert start.type.intent == 'in' @@ -403,10 +403,10 @@ def test_transform_utilites_get_loop_bounds(frontend, tmp_path): assert end.type.intent == 'in' with pytest.raises(RuntimeError): - _, _ = get_loop_bounds(routine, y) + _, _ = get_loop_bounds(routine, y) # pylint: disable=unbalanced-tuple-unpacking # Test type-bound symbol resolution - start, end = get_loop_bounds(routine, z) + start, end = get_loop_bounds(routine, z) # pylint: disable=unbalanced-tuple-unpacking assert isinstance(start, sym.Scalar) assert start.type.dtype == BasicType.INTEGER assert start.type.kind == '8' diff --git a/loki/transformations/transpile/tests/__init__.py b/loki/transformations/transpile/tests/__init__.py new file mode 100644 index 000000000..538bb4e73 --- /dev/null +++ b/loki/transformations/transpile/tests/__init__.py @@ -0,0 +1,6 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. diff --git a/loki/transformations/transpile/tests/test_sdfg.py b/loki/transformations/transpile/tests/test_sdfg.py index af568f449..d214db008 100644 --- a/loki/transformations/transpile/tests/test_sdfg.py +++ b/loki/transformations/transpile/tests/test_sdfg.py @@ -7,7 +7,6 @@ import importlib import itertools -from shutil import rmtree from pathlib import Path import numpy as np import pytest