diff --git a/loki/transformations/__init__.py b/loki/transformations/__init__.py index 4998f8fce..7d6fe4176 100644 --- a/loki/transformations/__init__.py +++ b/loki/transformations/__init__.py @@ -18,6 +18,7 @@ from loki.transformations.data_offload import * # noqa from loki.transformations.drhook import * # noqa from loki.transformations.extract import * # noqa +from loki.transformations.field_api import * # noqa from loki.transformations.hoist_variables import * # noqa from loki.transformations.idempotence import * # noqa from loki.transformations.inline import * # noqa diff --git a/loki/transformations/data_offload/__init__.py b/loki/transformations/data_offload/__init__.py index b6046c746..602f83226 100644 --- a/loki/transformations/data_offload/__init__.py +++ b/loki/transformations/data_offload/__init__.py @@ -8,6 +8,6 @@ Sub-package providing data offload transformations. """ -from loki.transformations.data_offload.field_api import * # noqa +from loki.transformations.data_offload.field_offload import * # noqa from loki.transformations.data_offload.global_var import * # noqa from loki.transformations.data_offload.offload import * # noqa diff --git a/loki/transformations/data_offload/field_api.py b/loki/transformations/data_offload/field_api.py deleted file mode 100644 index 00d9de4f4..000000000 --- a/loki/transformations/data_offload/field_api.py +++ /dev/null @@ -1,253 +0,0 @@ -# (C) Copyright 2018- ECMWF. -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from itertools import chain - -from loki.batch import Transformation -from loki.expression import Array, symbols as sym -from loki.ir import ( - FindNodes, PragmaRegion, CallStatement, - Transformer, pragma_regions_attached, - SubstituteExpressions -) -from loki.logging import warning, error -from loki.tools import as_tuple -from loki.types import BasicType - -from loki.transformations.data_offload.offload import DataOffloadTransformation -from loki.transformations.parallel import ( - FieldAPITransferType, field_get_device_data, field_sync_host, - remove_field_api_view_updates -) - - -__all__ = ['FieldOffloadTransformation'] - - -def find_target_calls(region, targets): - """ - Returns a list of all calls to targets inside the region. - - Parameters - ---------- - :region: :any:`PragmaRegion` - :targets: collection of :any:`Subroutine` - Iterable object of subroutines or functions called - :returns: list of :any:`CallStatement` - """ - calls = FindNodes(CallStatement).visit(region) - calls = [c for c in calls if str(c.name).lower() in targets] - return calls - - -class FieldOffloadTransformation(Transformation): - """ - - Transformation to offload arrays owned by Field API fields to the device. **This transformation is IFS specific.** - - The transformation assumes that fields are wrapped in derived types specified in - ``field_group_types`` and will only offload arrays that are members of such derived types. - In the process this transformation removes calls to Field API ``update_view`` and adds - declarations for the device pointers to the driver subroutine. - - The transformation acts on ``!$loki data`` regions and offloads all :any:`Array` - symbols that satisfy the following conditions: - - 1. The array is a member of an object that is of type specified in ``field_group_types``. - - 2. The array is passed as a parameter to at least one of the kernel targets passed to ``transform_subroutine``. - - Parameters - ---------- - devptr_prefix: str, optional - The prefix of device pointers added by this transformation (defaults to ``'loki_devptr_'``). - field_group_types: list or tuple of str, optional - Names of the field group types with members that may be offloaded (defaults to ``['']``). - offload_index: str, optional - Names of index variable to inject in the outmost dimension of offloaded arrays in the kernel - calls (defaults to ``'IBL'``). - """ - - class FieldPointerMap: - """ - Helper class to :any:`FieldOffloadTransformation` that is used to store arrays passed to - target kernel calls and the corresponding device pointers added by the transformation. - The pointer/array variable pairs are exposed through the class properties, based on - the intent of the kernel argument. - """ - def __init__(self, devptrs, inargs, inoutargs, outargs): - self.inargs = inargs - self.inoutargs = inoutargs - self.outargs = outargs - self.devptrs = devptrs - - - @property - def in_pairs(self): - """ - Iterator that yields array/pointer pairs for kernel arguments of intent(in). - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. - """ - for i, inarg in enumerate(self.inargs): - yield inarg, self.devptrs[i] - - @property - def inout_pairs(self): - """ - Iterator that yields array/pointer pairs for arguments with intent(inout). - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. - """ - start = len(self.inargs) - for i, inoutarg in enumerate(self.inoutargs): - yield inoutarg, self.devptrs[i+start] - - @property - def out_pairs(self): - """ - Iterator that yields array/pointer pairs for arguments with intent(out) - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. - """ - - start = len(self.inargs)+len(self.inoutargs) - for i, outarg in enumerate(self.outargs): - yield outarg, self.devptrs[i+start] - - - def __init__(self, devptr_prefix=None, field_group_types=None, offload_index=None): - self.deviceptr_prefix = 'loki_devptr_' if devptr_prefix is None else devptr_prefix - field_group_types = [''] if field_group_types is None else field_group_types - self.field_group_types = tuple(typename.lower() for typename in field_group_types) - self.offload_index = 'IBL' if offload_index is None else offload_index - - def transform_subroutine(self, routine, **kwargs): - role = kwargs['role'] - targets = as_tuple(kwargs.get('targets'), (None)) - if role == 'driver': - self.process_driver(routine, targets) - - def process_driver(self, driver, targets): - remove_field_api_view_updates(driver, self.field_group_types) - with pragma_regions_attached(driver): - for region in FindNodes(PragmaRegion).visit(driver.body): - # Only work on active `!$loki data` regions - if not DataOffloadTransformation._is_active_loki_data_region(region, targets): - continue - kernel_calls = find_target_calls(region, targets) - offload_variables = self.find_offload_variables(driver, kernel_calls) - device_ptrs = self._declare_device_ptrs(driver, offload_variables) - offload_map = self.FieldPointerMap(device_ptrs, *offload_variables) - self._add_field_offload_calls(driver, region, offload_map) - self._replace_kernel_args(driver, kernel_calls, offload_map) - - def find_offload_variables(self, driver, calls): - inargs = () - inoutargs = () - outargs = () - - for call in calls: - if call.routine is BasicType.DEFERRED: - error(f'[Loki] Data offload: Routine {driver.name} has not been enriched ' + - f'in {str(call.name).lower()}') - raise RuntimeError - for param, arg in call.arg_iter(): - if not isinstance(param, Array): - continue - try: - parent = arg.parent - if parent.type.dtype.name.lower() not in self.field_group_types: - warning(f'[Loki] Data offload: The parent object {parent.name} of type ' + - f'{parent.type.dtype} is not in the list of field wrapper types') - continue - except AttributeError: - warning(f'[Loki] Data offload: Raw array object {arg.name} encountered in' - + f' {driver.name} that is not wrapped by a Field API object') - continue - - if param.type.intent.lower() == 'in': - inargs += (arg, ) - if param.type.intent.lower() == 'inout': - inoutargs += (arg, ) - if param.type.intent.lower() == 'out': - outargs += (arg, ) - - inoutargs += tuple(v for v in inargs if v in outargs) - inargs = tuple(v for v in inargs if v not in inoutargs) - outargs = tuple(v for v in outargs if v not in inoutargs) - - inargs = tuple(set(inargs)) - inoutargs = tuple(set(inoutargs)) - outargs = tuple(set(outargs)) - return inargs, inoutargs, outargs - - - def _declare_device_ptrs(self, driver, offload_variables): - device_ptrs = tuple(self._devptr_from_array(driver, a) for a in chain(*offload_variables)) - driver.variables += device_ptrs - return device_ptrs - - def _devptr_from_array(self, driver, a: sym.Array): - """ - Returns a contiguous pointer :any:`Variable` with types matching the array a - """ - shape = (sym.RangeIndex((None, None)),) * (len(a.shape)+1) - devptr_type = a.type.clone(pointer=True, contiguous=True, shape=shape, intent=None) - base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) - devptr_name = self.deviceptr_prefix + base_name - if devptr_name in driver.variable_map: - warning(f'[Loki] Data offload: The routine {driver.name} already has a ' + - f'variable named {devptr_name}') - devptr = sym.Variable(name=devptr_name, type=devptr_type, dimensions=shape) - return devptr - - def _add_field_offload_calls(self, driver, region, offload_map): - host_to_device = tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, - FieldAPITransferType.READ_ONLY, driver) for inarg, devptr in offload_map.in_pairs) - host_to_device += tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, - FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.inout_pairs) - host_to_device += tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, - FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.out_pairs) - device_to_host = tuple(field_sync_host(self._get_field_ptr_from_view(inarg), driver) - for inarg, _ in chain(offload_map.inout_pairs, offload_map.out_pairs)) - update_map = {region: host_to_device + (region,) + device_to_host} - Transformer(update_map, inplace=True).visit(driver.body) - - def _get_field_ptr_from_view(self, field_view): - type_chain = field_view.name.split('%') - field_type_name = 'F_' + type_chain[-1] - return field_view.parent.get_derived_type_member(field_type_name) - - def _replace_kernel_args(self, driver, kernel_calls, offload_map): - change_map = {} - offload_idx_expr = driver.variable_map[self.offload_index] - for arg, devptr in chain(offload_map.in_pairs, offload_map.inout_pairs, offload_map.out_pairs): - if len(arg.dimensions) != 0: - dims = arg.dimensions + (offload_idx_expr,) - else: - dims = (sym.RangeIndex((None, None)),) * (len(devptr.shape)-1) + (offload_idx_expr,) - change_map[arg] = devptr.clone(dimensions=dims) - - arg_transformer = SubstituteExpressions(change_map, inplace=True) - for call in kernel_calls: - arg_transformer.visit(call) diff --git a/loki/transformations/data_offload/field_offload.py b/loki/transformations/data_offload/field_offload.py new file mode 100644 index 000000000..3d6c3a8f1 --- /dev/null +++ b/loki/transformations/data_offload/field_offload.py @@ -0,0 +1,185 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from loki.analyse import dataflow_analysis_attached +from loki.batch import Transformation +from loki.expression import Array, symbols as sym +from loki.ir import ( + nodes as ir, FindNodes, FindVariables, Transformer, + SubstituteExpressions, pragma_regions_attached, is_loki_pragma +) +from loki.logging import warning, error +from loki.types import BasicType + +from loki.transformations.field_api import FieldPointerMap +from loki.transformations.parallel import remove_field_api_view_updates + + +__all__ = [ + 'FieldOffloadTransformation', 'find_offload_variables', + 'add_field_offload_calls', 'replace_kernel_args' +] + + +class FieldOffloadTransformation(Transformation): + """ + + Transformation to offload arrays owned by Field API fields to the device. + + **This transformation is IFS specific.** + + The transformation assumes that fields are wrapped in derived types specified in + ``field_group_types`` and will only offload arrays that are members of such derived types. + In the process this transformation removes calls to Field API ``update_view`` and adds + declarations for the device pointers to the driver subroutine. + + The transformation acts on ``!$loki data`` regions and offloads all :any:`Array` + symbols that satisfy the following conditions: + + 1. The array is a member of an object that is of type specified in ``field_group_types``. + + 2. The array is passed as a parameter to at least one of the kernel targets passed to ``transform_subroutine``. + + Parameters + ---------- + devptr_prefix: str, optional + The prefix of device pointers added by this transformation (defaults to ``'loki_devptr_'``). + field_group_types: list or tuple of str, optional + Names of the field group types with members that may be offloaded (defaults to ``['']``). + offload_index: str, optional + Names of index variable to inject in the outmost dimension of offloaded arrays in the kernel + calls (defaults to ``'IBL'``). + """ + + def __init__(self, devptr_prefix=None, field_group_types=None, offload_index=None): + self.deviceptr_prefix = 'loki_devptr_' if devptr_prefix is None else devptr_prefix + field_group_types = [''] if field_group_types is None else field_group_types + self.field_group_types = tuple(typename.lower() for typename in field_group_types) + self.offload_index = 'IBL' if offload_index is None else offload_index + + def transform_subroutine(self, routine, **kwargs): + role = kwargs['role'] + if role == 'driver': + self.process_driver(routine) + + def process_driver(self, driver): + + # Remove the Field-API view-pointer boilerplate + remove_field_api_view_updates(driver, self.field_group_types) + + with pragma_regions_attached(driver): + with dataflow_analysis_attached(driver): + for region in FindNodes(ir.PragmaRegion).visit(driver.body): + # Only work on active `!$loki data` regions + if not region.pragma or not is_loki_pragma(region.pragma, starts_with='data'): + continue + + # Determine the array variables for generating Field API offload + offload_variables = find_offload_variables(driver, region, self.field_group_types) + offload_map = FieldPointerMap( + *offload_variables, scope=driver, ptr_prefix=self.deviceptr_prefix + ) + + # Inject declarations and offload API calls into driver region + declare_device_ptrs(driver, deviceptrs=offload_map.dataptrs) + add_field_offload_calls(driver, region, offload_map) + replace_kernel_args(driver, offload_map, self.offload_index) + + +def find_offload_variables(driver, region, field_group_types): + """ + Finds the sets of array variable symbols for which we can generate + Field API offload code. + + Note + ---- + This method requires Loki's dataflow analysis to be run on the + :data:`region` via :meth:`dataflow_analysis_attached`. + + Parameters + ---------- + region : :any:`PragmaRegion` + Code region object for which to determine offload variables + field_group_types : list or tuple of str, optional + Names of the field group types with members that may be offloaded (defaults to ``['']``). + + Returns + ------- + (inargs, inoutargs, outargs) : (tuple, tuple, tuple) + The sets of array symbols split into three tuples according to access type. + """ + + # Use dataflow analysis to find in, out and inout variables to that region + inargs = region.uses_symbols - region.defines_symbols + inoutargs = region.uses_symbols & region.defines_symbols + outargs = region.defines_symbols - region.uses_symbols + + # Filter out relevant array symbols + inargs = tuple(a for a in inargs if isinstance(a, sym.Array) and a.parent) + inoutargs = tuple(a for a in inoutargs if isinstance(a, sym.Array) and a.parent) + outargs = tuple(a for a in outargs if isinstance(a, sym.Array) and a.parent) + + # Do some sanity checking and warning for enclosed calls + for call in FindNodes(ir.CallStatement).visit(region): + if call.routine is BasicType.DEFERRED: + error(f'[Loki] Data offload: Routine {driver.name} has not been enriched ' + + f'in {str(call.name).lower()}') + raise RuntimeError + for param, arg in call.arg_iter(): + if not isinstance(param, Array): + continue + try: + parent = arg.parent + if parent.type.dtype.name.lower() not in field_group_types: + warning(f'[Loki] Data offload: The parent object {parent.name} of type ' + + f'{parent.type.dtype} is not in the list of field wrapper types') + continue + except AttributeError: + warning(f'[Loki] Data offload: Raw array object {arg.name} encountered in' + + f' {driver.name} that is not wrapped by a Field API object') + continue + + return inargs, inoutargs, outargs + + +def declare_device_ptrs(driver, deviceptrs): + """ + Add a set of data pointer declarations to a given :any:`Subroutine` + """ + for devptr in deviceptrs: + if devptr.name in driver.variable_map: + warning(f'[Loki] Data offload: The routine {driver.name} already has a ' + + f'variable named {devptr.name}') + + driver.variables += deviceptrs + + +def add_field_offload_calls(driver, region, offload_map): + + update_map = { + region: offload_map.host_to_device_calls + (region,) + offload_map.sync_host_calls + } + Transformer(update_map, inplace=True).visit(driver.body) + + +def replace_kernel_args(driver, offload_map, offload_index): + change_map = {} + offload_idx_expr = driver.variable_map[offload_index] + + args = offload_map.args + for arg in FindVariables().visit(driver.body): + if not arg.name in args: + continue + + dataptr = offload_map.dataptr_from_array(arg) + if len(arg.dimensions) != 0: + dims = arg.dimensions + (offload_idx_expr,) + else: + dims = (sym.RangeIndex((None, None)),) * (len(dataptr.shape)-1) + (offload_idx_expr,) + change_map[arg] = dataptr.clone(dimensions=dims) + + driver.body = SubstituteExpressions(change_map, inplace=True).visit(driver.body) diff --git a/loki/transformations/data_offload/tests/test_field_api.py b/loki/transformations/data_offload/tests/test_field_offload.py similarity index 76% rename from loki/transformations/data_offload/tests/test_field_api.py rename to loki/transformations/data_offload/tests/test_field_offload.py index aaddbc543..f1bdb3608 100644 --- a/loki/transformations/data_offload/tests/test_field_api.py +++ b/loki/transformations/data_offload/tests/test_field_offload.py @@ -7,12 +7,12 @@ import pytest -from loki import Sourcefile -from loki.frontend import available_frontends -from loki.logging import log_levels -from loki.ir import FindNodes, Pragma, CallStatement +from loki import Sourcefile, Module import loki.expression.symbols as sym -from loki.module import Module +from loki.frontend import available_frontends, OMNI +from loki.ir import nodes as ir, FindNodes, Pragma, CallStatement +from loki.logging import log_levels + from loki.transformations import FieldOffloadTransformation @@ -25,6 +25,7 @@ def fixture_parkind_mod(tmp_path, frontend): """ return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + @pytest.fixture(name="field_module") def fixture_field_module(tmp_path, frontend): fcode = """ @@ -54,20 +55,22 @@ def fixture_field_module(tmp_path, frontend): end subroutine end module """ - return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) -@pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload(frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument +@pytest.fixture(name="state_module") +def fixture_state_module(tmp_path, parkind_mod, field_module, frontend): # pylint: disable=unused-argument fcode = """ - module driver_mod + module state_mod use parkind1, only: jprb use field_module, only: field_2rb, field_3rb implicit none type state_type real(kind=jprb), dimension(10,10), pointer :: a, b, c + real(kind=jprb), pointer :: d(10,10,10) class(field_3rb), pointer :: f_a, f_b, f_c + class(field_4rb), pointer :: f_d contains procedure :: update_view => state_update_view end type state_type @@ -78,6 +81,21 @@ def test_field_offload(frontend, parkind_mod, field_module, tmp_path): # pylint class(state_type), intent(in) :: self integer, intent(in) :: idx end subroutine + end module state_mod +""" + return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload(frontend, state_module, tmp_path): + fcode = """ + module driver_mod + use state_mod, only: state_type + use parkind1, only: jprb + use field_module, only: field_2rb, field_3rb + implicit none + + contains subroutine kernel_routine(nlon, nlev, a, b, c) integer, intent(in) :: nlon, nlev @@ -109,7 +127,9 @@ def test_field_offload(frontend, parkind_mod, field_module, tmp_path): # pylint end subroutine driver_routine end module driver_mod """ - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver_mod = Module.from_source( + fcode, frontend=frontend, definitions=state_module, xmods=[tmp_path] + ) driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, @@ -215,27 +235,15 @@ def test_field_offload_slices(frontend, parkind_mod, field_module, tmp_path): # @pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_multiple_calls(frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument +def test_field_offload_multiple_calls(frontend, state_module, tmp_path): fcode = """ module driver_mod use parkind1, only: jprb - use field_module, only: field_2rb, field_3rb + use state_mod, only: state_type implicit none - type state_type - real(kind=jprb), dimension(10,10), pointer :: a, b, c - class(field_3rb), pointer :: f_a, f_b, f_c - contains - procedure :: update_view => state_update_view - end type state_type - contains - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - subroutine kernel_routine(nlon, nlev, a, b, c) integer, intent(in) :: nlon, nlev real(kind=jprb), intent(in) :: a(nlon,nlev) @@ -270,7 +278,9 @@ def test_field_offload_multiple_calls(frontend, parkind_mod, field_module, tmp_p end module driver_mod """ - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver_mod = Module.from_source( + fcode, frontend=frontend, definitions=state_module, xmods=[tmp_path] + ) driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, @@ -306,7 +316,7 @@ def test_field_offload_multiple_calls(frontend, parkind_mod, field_module, tmp_p @pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_no_targets(frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument +def test_field_offload_unknown_kernel(caplog, frontend, state_module, tmp_path): fother = """ module another_module implicit none @@ -320,122 +330,16 @@ def test_field_offload_no_targets(frontend, parkind_mod, field_module, tmp_path) end subroutine end module """ - fcode = """ - module driver_mod - use parkind1, only: jprb - use field_module, only: field_2rb, field_3rb - use another_module, only: another_kernel - - implicit none - - type state_type - real(kind=jprb), dimension(10,10), pointer :: a, b, c - class(field_3rb), pointer :: f_a, f_b, f_c - contains - procedure :: update_view => state_update_view - end type state_type - - contains - - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - - subroutine kernel_routine(nlon, nlev, a, b, c) - integer, intent(in) :: nlon, nlev - real(kind=jprb), intent(in) :: a(nlon,nlev) - real(kind=jprb), intent(inout) :: b(nlon,nlev) - real(kind=jprb), intent(out) :: c(nlon,nlev) - integer :: i, j - - do j=1, nlon - do i=1, nlev - b(i,j) = a(i,j) + 0.1 - c(i,j) = 0.1 - end do - end do - end subroutine kernel_routine - - subroutine driver_routine(nlon, nlev, state) - integer, intent(in) :: nlon, nlev - type(state_type), intent(inout) :: state - integer :: i - - !$loki data - do i=1,nlev - call state%update_view(i) - call another_kernel(nlon, state%a, state%b, state%c) - end do - !$loki end data - - end subroutine driver_routine - end module driver_mod - """ - - Sourcefile.from_source(fother, frontend=frontend, xmods=[tmp_path]) - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] - driver = driver_mod['driver_routine'] - deviceptr_prefix = 'loki_devptr_prefix_' - driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, - offload_index='i', - field_group_types=['state_type']), - role='driver', - targets=['kernel_routine']) - - calls = FindNodes(CallStatement).visit(driver.body) - assert not any(c for c in calls if c.name=='kernel_routine') - - # verify that no field offloads are generated - in_calls = [c for c in calls if 'get_device_data_rdonly' in c.name.name.lower()] - assert len(in_calls) == 0 - inout_calls = [c for c in calls if 'get_device_data_rdwr' in c.name.name.lower()] - assert len(inout_calls) == 0 - # verify that no field sync host calls are generated - sync_calls = [c for c in calls if 'sync_host_rdwr' in c.name.name.lower()] - assert len(sync_calls) == 0 - - # verify that data offload pragmas remain - pragmas = FindNodes(Pragma).visit(driver.body) - assert len(pragmas) == 2 - assert all(p.keyword=='loki' and p.content==c for p, c in zip(pragmas, ['data', 'end data'])) - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_unknown_kernel(caplog, frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument - fother = """ - module another_module - implicit none - contains - subroutine another_kernel(nlon, nlev, a, b, c) - integer, intent(in) :: nlon, nlev - real, intent(in) :: a(nlon,nlev) - real, intent(inout) :: b(nlon,nlev) - real, intent(out) :: c(nlon,nlev) - integer :: i, j - end subroutine - end module - """ fcode = """ module driver_mod use parkind1, only: jprb + use state_mod, only: state_type use another_module, only: another_kernel implicit none - type state_type - real(kind=jprb), dimension(10,10), pointer :: a, b, c - class(field_3rb), pointer :: f_a, f_b, f_c - contains - procedure :: update_view => state_update_view - end type state_type - contains - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - subroutine driver_routine(nlon, nlev, state) integer, intent(in) :: nlon, nlev type(state_type), intent(inout) :: state @@ -453,7 +357,9 @@ def test_field_offload_unknown_kernel(caplog, frontend, parkind_mod, field_modul """ Sourcefile.from_source(fother, frontend=frontend, xmods=[tmp_path]) - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver_mod = Module.from_source( + fcode, frontend=frontend, definitions=state_module, xmods=[tmp_path] + ) driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' @@ -470,7 +376,7 @@ def test_field_offload_unknown_kernel(caplog, frontend, parkind_mod, field_modul @pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_warnings(caplog, frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument +def test_field_offload_warnings(caplog, frontend, state_module, tmp_path): fother_state = """ module state_type_mod implicit none @@ -488,6 +394,7 @@ def test_field_offload_warnings(caplog, frontend, parkind_mod, field_module, tmp end subroutine end module """ + fother_mod= """ module another_module implicit none @@ -501,29 +408,18 @@ def test_field_offload_warnings(caplog, frontend, parkind_mod, field_module, tmp end subroutine end module """ + fcode = """ module driver_mod use state_type_mod, only: state_type2 use parkind1, only: jprb - use field_module, only: field_2rb, field_3rb + use state_mod, only: state_type use another_module, only: another_kernel implicit none - type state_type - real(kind=jprb), dimension(10,10), pointer :: a, b, c - class(field_3rb), pointer :: f_a, f_b, f_c - contains - procedure :: update_view => state_update_view - end type state_type - contains - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - subroutine kernel_routine(nlon, nlev, a, b, c) integer, intent(in) :: nlon, nlev real(kind=jprb), intent(in) :: a(nlon,nlev) @@ -560,7 +456,9 @@ def test_field_offload_warnings(caplog, frontend, parkind_mod, field_module, tmp """ Sourcefile.from_source(fother_state, frontend=frontend, xmods=[tmp_path]) Sourcefile.from_source(fother_mod, frontend=frontend, xmods=[tmp_path]) - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver_mod = Sourcefile.from_source( + fcode, frontend=frontend, definitions=state_module, xmods=[tmp_path] + )['driver_mod'] driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' @@ -578,3 +476,135 @@ def test_field_offload_warnings(caplog, frontend, parkind_mod, field_module, tmp ' list of field wrapper types') in caplog.records[1].message assert ('[Loki] Data offload: The routine driver_routine already has a' + ' variable named loki_devptr_prefix_state_b') in caplog.records[2].message + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_aliasing(frontend, state_module, tmp_path): + fcode = """ + module driver_mod + use state_mod, only: state_type + use parkind1, only: jprb + implicit none + + contains + + subroutine kernel_routine(nlon, nlev, a1, a2, a3) + integer, intent(in) :: nlon, nlev + real(kind=jprb), intent(in) :: a1(nlon) + real(kind=jprb), intent(inout) :: a2(nlon) + real(kind=jprb), intent(out) :: a3(nlon) + integer :: i + + do i=1, nlon + a1(i) = a2(i) + 0.1 + a3(i) = 0.1 + end do + end subroutine kernel_routine + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i + + !$loki data + do i=1,nlev + call state%update_view(i) + call kernel_routine(nlon, nlev, state%a(:,1), state%a(:,2), state%a(:,3)) + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + driver_mod = Module.from_source( + fcode, frontend=frontend, definitions=state_module, xmods=[tmp_path] + ) + driver = driver_mod['driver_routine'] + + field_offload = FieldOffloadTransformation( + devptr_prefix='', offload_index='i', field_group_types=['state_type'] + ) + driver.apply(field_offload, role='driver', targets=['kernel_routine']) + + calls = FindNodes(ir.CallStatement).visit(driver.body) + kernel_call = next(c for c in calls if c.name=='kernel_routine') + + assert 'state_a' in driver.variable_map + assert driver.variable_map['state_a'].type.shape == (':', ':', ':') + + assert kernel_call.arguments[:2] == ('nlon', 'nlev') + assert kernel_call.arguments[2] == 'state_a(:,1,i)' + assert kernel_call.arguments[3] == 'state_a(:,2,i)' + assert kernel_call.arguments[4] == 'state_a(:,3,i)' + + assert len(calls) == 3 + assert calls[0].name == 'state%f_a%get_device_data_rdwr' + assert calls[0].arguments == ('state_a',) + assert calls[1] == kernel_call + assert calls[2].name == 'state%f_a%sync_host_rdwr' + assert calls[2].arguments == () + + decls = FindNodes(ir.VariableDeclaration).visit(driver.spec) + assert len(decls) == 5 if frontend == OMNI else 4 + assert decls[-1].symbols == ('state_a(:,:,:)',) + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_driver_compute(frontend, state_module, tmp_path): + fcode = """ + module driver_mod + use state_mod, only: state_type + use parkind1, only: jprb + implicit none + + contains + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i, ibl + + !$loki data + do ibl=1,nlev + call state%update_view(ibl) + + do i=1, nlon + state%a(i, 1) = state%b(i, 1) + 0.1 + state%a(i, 2) = state%a(i, 1) + end do + + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + driver_mod = Module.from_source( + fcode, frontend=frontend, definitions=state_module, xmods=[tmp_path] + ) + driver = driver_mod['driver_routine'] + + calls = FindNodes(ir.CallStatement).visit(driver.body) + assert len(calls) == 1 + assert calls[0].name == 'state%update_view' + + field_offload = FieldOffloadTransformation( + devptr_prefix='', offload_index='ibl', field_group_types=['state_type'] + ) + driver.apply(field_offload, role='driver', targets=['kernel_routine']) + + calls = FindNodes(ir.CallStatement).visit(driver.body) + assert len(calls) == 3 + assert calls[0].name == 'state%f_b%get_device_data_rdonly' + assert calls[0].arguments == ('state_b',) + assert calls[1].name == 'state%f_a%get_device_data_rdwr' + assert calls[1].arguments == ('state_a',) + assert calls[2].name == 'state%f_a%sync_host_rdwr' + assert calls[2].arguments == () + + assigns = FindNodes(ir.Assignment).visit(driver.body) + assert len(assigns) == 2 + assert assigns[0].lhs == 'state_a(i,1,ibl)' + assert assigns[0].rhs == 'state_b(i,1,ibl) + 0.1' + assert assigns[1].lhs == 'state_a(i,2,ibl)' + assert assigns[1].rhs == 'state_a(i,1,ibl)' diff --git a/loki/transformations/field_api.py b/loki/transformations/field_api.py new file mode 100644 index 000000000..58f1d6f1a --- /dev/null +++ b/loki/transformations/field_api.py @@ -0,0 +1,191 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +A set of utility classes for dealing with FIELD API boilerplate in +parallel kernels and offload regions. +""" + +from enum import Enum +from itertools import chain + +from loki.expression import symbols as sym +from loki.ir import nodes as ir +from loki.scope import Scope + + +__all__ = [ + 'FieldAPITransferType', 'FieldPointerMap', 'get_field_type', + 'field_get_device_data', 'field_sync_host' +] + + +class FieldAPITransferType(Enum): + READ_ONLY = 1 + READ_WRITE = 2 + WRITE_ONLY = 3 + + +class FieldPointerMap: + """ + Helper class to map FIELD API pointers to intents and access descriptors. + + This utility is used to store arrays passed to target kernel calls + and easily access corresponding device pointers added by the transformation. + """ + def __init__(self, inargs, inoutargs, outargs, scope, ptr_prefix='loki_ptr_'): + # Ensure no duplication between in/inout/out args + inoutargs += tuple(v for v in inargs if v in outargs) + inargs = tuple(v for v in inargs if v not in inoutargs) + outargs = tuple(v for v in outargs if v not in inoutargs) + + # Filter out duplicates and return as tuple + self.inargs = tuple(dict.fromkeys(a.clone(dimensions=None) for a in inargs)) + self.inoutargs = tuple(dict.fromkeys(a.clone(dimensions=None) for a in inoutargs)) + self.outargs = tuple(dict.fromkeys(a.clone(dimensions=None) for a in outargs)) + + # Filter out duplicates across argument tuples + self.inargs = tuple(a for a in self.inargs if a not in self.inoutargs) + + self.scope = scope + + self.ptr_prefix = ptr_prefix + + def dataptr_from_array(self, a: sym.Array): + """ + Returns a contiguous pointer :any:`Variable` with types matching the array :data:`a`. + """ + shape = (sym.RangeIndex((None, None)),) * (len(a.shape)+1) + dataptr_type = a.type.clone(pointer=True, contiguous=True, shape=shape, intent=None) + base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) + return sym.Variable(name=self.ptr_prefix + base_name, type=dataptr_type, dimensions=shape) + + @staticmethod + def field_ptr_from_view(field_view): + """ + Returns a symbol for the pointer to the corresponding Field object. + """ + type_chain = field_view.name.split('%') + field_type_name = 'F_' + type_chain[-1] + return field_view.parent.get_derived_type_member(field_type_name) + + @property + def args(self): + """ A tuple of all argument symbols, concatanating in/inout/out arguments """ + return tuple(chain(*(self.inargs, self.inoutargs, self.outargs))) + + @property + def dataptrs(self): + """ Create a list of contiguous data pointer symbols """ + return tuple(dict.fromkeys(self.dataptr_from_array(a) for a in self.args)) + + @property + def host_to_device_calls(self): + """ + Returns a tuple of :any:`CallStatement` for host-to-device transfers on fields. + """ + READ_ONLY, READ_WRITE = FieldAPITransferType.READ_ONLY, FieldAPITransferType.READ_WRITE + + host_to_device = tuple(field_get_device_data( + self.field_ptr_from_view(arg), self.dataptr_from_array(arg), READ_ONLY, scope=self.scope + ) for arg in self.inargs) + host_to_device += tuple(field_get_device_data( + self.field_ptr_from_view(arg), self.dataptr_from_array(arg), READ_WRITE, scope=self.scope + ) for arg in self.inoutargs) + host_to_device += tuple(field_get_device_data( + self.field_ptr_from_view(arg), self.dataptr_from_array(arg), READ_WRITE, scope=self.scope + ) for arg in self.outargs) + + return tuple(dict.fromkeys(host_to_device)) + + @property + def sync_host_calls(self): + """ + Returns a tuple of :any:`CallStatement` for host-synchronization transfers on fields. + """ + sync_host = tuple( + field_sync_host(self.field_ptr_from_view(arg), scope=self.scope) for arg in self.inoutargs + ) + sync_host += tuple( + field_sync_host(self.field_ptr_from_view(arg), scope=self.scope) for arg in self.outargs + ) + return tuple(dict.fromkeys(sync_host)) + + +def get_field_type(a: sym.Array) -> sym.DerivedType: + """ + Returns the corresponding FIELD API type for an array. + + This function is IFS specific and assumes that the + type is an array declared with one of the IFS type specifiers, e.g. KIND=JPRB + """ + type_map = ["jprb", + "jpit", + "jpis", + "jpim", + "jpib", + "jpia", + "jprt", + "jprs", + "jprm", + "jprd", + "jplm"] + type_name = a.type.kind.name + + assert type_name.lower() in type_map, ('Error array type kind is: ' + f'"{type_name}" which is not a valid IFS type specifier') + rank = len(a.shape) + field_type = sym.DerivedType(name="field_" + str(rank) + type_name[2:4].lower()) + return field_type + + +def field_get_device_data(field_ptr, dev_ptr, transfer_type: FieldAPITransferType, scope: Scope): + """ + Utility function to generate a :any:`CallStatement` corresponding to a Field API + ``GET_DEVICE_DATA`` call. + + Parameters + ---------- + field_ptr: pointer to field object + Pointer to the field to call ``GET_DEVICE_DATA`` from. + dev_ptr: :any:`Array` + Device pointer array + transfer_type: :any:`FieldAPITransferType` + Field API transfer type to determine which ``GET_DEVICE_DATA`` method to call. + scope: :any:`Scope` + Scope of the created :any:`CallStatement` + """ + if not isinstance(transfer_type, FieldAPITransferType): + raise TypeError(f"transfer_type must be of type FieldAPITransferType, but is of type {type(transfer_type)}") + if transfer_type == FieldAPITransferType.READ_ONLY: + suffix = 'RDONLY' + elif transfer_type == FieldAPITransferType.READ_WRITE: + suffix = 'RDWR' + elif transfer_type == FieldAPITransferType.WRITE_ONLY: + suffix = 'WRONLY' + else: + suffix = '' + procedure_name = 'GET_DEVICE_DATA_' + suffix + return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), + arguments=(dev_ptr.clone(dimensions=None),), ) + + +def field_sync_host(field_ptr, scope): + """ + Utility function to generate a :any:`CallStatement` corresponding to a Field API + ``SYNC_HOST`` call. + + Parameters + ---------- + field_ptr: pointer to field object + Pointer to the field to call ``SYNC_HOST`` from. + scope: :any:`Scope` + Scope of the created :any:`CallStatement` + """ + + procedure_name = 'SYNC_HOST_RDWR' + return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), arguments=()) diff --git a/loki/transformations/parallel/__init__.py b/loki/transformations/parallel/__init__.py index 0071a9814..07bcacd7a 100644 --- a/loki/transformations/parallel/__init__.py +++ b/loki/transformations/parallel/__init__.py @@ -11,5 +11,5 @@ """ from loki.transformations.parallel.block_loop import * # noqa -from loki.transformations.parallel.field_api import * # noqa +from loki.transformations.parallel.field_views import * # noqa from loki.transformations.parallel.openmp_region import * # noqa diff --git a/loki/transformations/parallel/field_api.py b/loki/transformations/parallel/field_views.py similarity index 67% rename from loki/transformations/parallel/field_api.py rename to loki/transformations/parallel/field_views.py index 4b5705770..e1e06b491 100644 --- a/loki/transformations/parallel/field_api.py +++ b/loki/transformations/parallel/field_views.py @@ -9,18 +9,16 @@ Transformation utilities to manage and inject FIELD-API boilerplate code. """ -from enum import Enum from loki.expression import symbols as sym from loki.ir import ( nodes as ir, FindNodes, FindVariables, Transformer ) -from loki.scope import Scope from loki.logging import warning from loki.tools import as_tuple + __all__ = [ - 'remove_field_api_view_updates', 'add_field_api_view_updates', 'get_field_type', - 'field_get_device_data', 'field_sync_host', 'FieldAPITransferType' + 'remove_field_api_view_updates', 'add_field_api_view_updates' ] @@ -152,85 +150,3 @@ def visit_Loop(self, loop, **kwargs): # pylint: disable=unused-argument return loop routine.body = InsertFieldAPIViewsTransformer().visit(routine.body, scope=routine) - - -def get_field_type(a: sym.Array) -> sym.DerivedType: - """ - Returns the corresponding FIELD API type for an array. - - This function is IFS specific and assumes that the - type is an array declared with one of the IFS type specifiers, e.g. KIND=JPRB - """ - type_map = ["jprb", - "jpit", - "jpis", - "jpim", - "jpib", - "jpia", - "jprt", - "jprs", - "jprm", - "jprd", - "jplm"] - type_name = a.type.kind.name - - assert type_name.lower() in type_map, ('Error array type kind is: ' - f'"{type_name}" which is not a valid IFS type specifier') - rank = len(a.shape) - field_type = sym.DerivedType(name="field_" + str(rank) + type_name[2:4].lower()) - return field_type - - - -class FieldAPITransferType(Enum): - READ_ONLY = 1 - READ_WRITE = 2 - WRITE_ONLY = 3 - - -def field_get_device_data(field_ptr, dev_ptr, transfer_type: FieldAPITransferType, scope: Scope): - """ - Utility function to generate a :any:`CallStatement` corresponding to a Field API - ``GET_DEVICE_DATA`` call. - - Parameters - ---------- - field_ptr: pointer to field object - Pointer to the field to call ``GET_DEVICE_DATA`` from. - dev_ptr: :any:`Array` - Device pointer array - transfer_type: :any:`FieldAPITransferType` - Field API transfer type to determine which ``GET_DEVICE_DATA`` method to call. - scope: :any:`Scope` - Scope of the created :any:`CallStatement` - """ - if not isinstance(transfer_type, FieldAPITransferType): - raise TypeError(f"transfer_type must be of type FieldAPITransferType, but is of type {type(transfer_type)}") - if transfer_type == FieldAPITransferType.READ_ONLY: - suffix = 'RDONLY' - elif transfer_type == FieldAPITransferType.READ_WRITE: - suffix = 'RDWR' - elif transfer_type == FieldAPITransferType.WRITE_ONLY: - suffix = 'WRONLY' - else: - suffix = '' - procedure_name = 'GET_DEVICE_DATA_' + suffix - return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), - arguments=(dev_ptr.clone(dimensions=None),), ) - - -def field_sync_host(field_ptr, scope): - """ - Utility function to generate a :any:`CallStatement` corresponding to a Field API - ``SYNC_HOST`` call. - - Parameters - ---------- - field_ptr: pointer to field object - Pointer to the field to call ``SYNC_HOST`` from. - scope: :any:`Scope` - Scope of the created :any:`CallStatement` - """ - - procedure_name = 'SYNC_HOST_RDWR' - return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), arguments=()) diff --git a/loki/transformations/parallel/tests/test_field_api.py b/loki/transformations/parallel/tests/test_field_views.py similarity index 98% rename from loki/transformations/parallel/tests/test_field_api.py rename to loki/transformations/parallel/tests/test_field_views.py index 267c43e91..3e56a19e9 100644 --- a/loki/transformations/parallel/tests/test_field_api.py +++ b/loki/transformations/parallel/tests/test_field_views.py @@ -12,13 +12,16 @@ from loki.ir import nodes as ir, FindNodes from loki.expression import symbols as sym from loki.scope import Scope -from loki.transformations.parallel import ( - remove_field_api_view_updates, add_field_api_view_updates, - get_field_type, field_get_device_data, FieldAPITransferType -) from loki.types import BasicType, SymbolAttributes from loki.logging import WARNING +from loki.transformations.field_api import ( + get_field_type, field_get_device_data, FieldAPITransferType +) +from loki.transformations.parallel import ( + remove_field_api_view_updates, add_field_api_view_updates +) + @pytest.mark.parametrize('frontend', available_frontends( skip=[(OMNI, 'OMNI needs full type definitions for derived types')]