diff --git a/.github/workflows/regression_tests.yml b/.github/workflows/regression_tests.yml index acc6ed2d8..c07a93534 100644 --- a/.github/workflows/regression_tests.yml +++ b/.github/workflows/regression_tests.yml @@ -33,7 +33,7 @@ jobs: with: repository: ecmwf-ifs/dwarf-p-cloudsc path: cloudsc - ref: develop + ref: naml-loki-c-pipeline - name: Clone CLOUDSC2 TL AD uses: actions/checkout@v4 diff --git a/cmake/loki_transform.cmake b/cmake/loki_transform.cmake index 933f4f408..5e73b4d6e 100644 --- a/cmake/loki_transform.cmake +++ b/cmake/loki_transform.cmake @@ -291,26 +291,6 @@ function( loki_transform_target ) list( APPEND _TRANSFORM_OPTIONS CPP ) endif() - if( _PAR_T_INLINE_MEMBERS ) - list( APPEND _TRANSFORM_OPTIONS INLINE_MEMBERS ) - endif() - - if( _PAR_T_RESOLVE_SEQUENCE_ASSOCIATION ) - list( APPEND _TRANSFORM_OPTIONS RESOLVE_SEQUENCE_ASSOCIATION ) - endif() - - if( _PAR_T_DERIVE_ARGUMENT_ARRAY_SHAPE ) - list( APPEND _TRANSFORM_OPTIONS DERIVE_ARGUMENT_ARRAY_SHAPE ) - endif() - - if( _PAR_T_TRIM_VECTOR_SECTIONS ) - list( APPEND _TRANSFORM_OPTIONS TRIM_VECTOR_SECTIONS ) - endif() - - if( _PAR_T_GLOBAL_VAR_OFFLOAD ) - list( APPEND _TRANSFORM_OPTIONS GLOBAL_VAR_OFFLOAD ) - endif() - loki_transform( COMMAND ${_PAR_T_COMMAND} OUTPUT ${LOKI_SOURCES_TO_APPEND} @@ -383,97 +363,6 @@ function( loki_transform_target ) endfunction() -############################################################################## -# .rst: -# -# loki_transform_convert -# ====================== -# -# Deprecated interface to loki-transform.py. Use loki_transform( COMMAND convert ) instead.:: -# -############################################################################## - -function( loki_transform_convert ) - - ecbuild_warn( "\ -loki_transform_convert() is deprecated and will be removed in a future version! -Please use - loki_transform( COMMAND convert [...] ) -or - loki_transform_target( COMMAND convert [...] ). -" - ) - - set( options - CPP DATA_OFFLOAD REMOVE_OPENMP ASSUME_DEVICEPTR GLOBAL_VAR_OFFLOAD - TRIM_VECTOR_SECTIONS REMOVE_DERIVED_ARGS INLINE_MEMBERS - RESOLVE_SEQUENCE_ASSOCIATION DERIVE_ARGUMENT_ARRAY_SHAPE - ) - set( oneValueArgs - MODE DIRECTIVE FRONTEND CONFIG PATH OUTPATH - ) - set( multiValueArgs - OUTPUT DEPENDS INCLUDES HEADERS DEFINITIONS OMNI_INCLUDE XMOD - ) - - cmake_parse_arguments( _PAR "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) - - if( _PAR_UNPARSED_ARGUMENTS ) - ecbuild_critical( "Unknown keywords given to loki_transform_convert(): \"${_PAR_UNPARSED_ARGUMENTS}\"") - endif() - - # - # Rewrite old argument names - # - - # PATH -> SOURCES - list( TRANSFORM ARGV REPLACE "^PATH$" "SOURCES" ) - - # OUTPATH -> BUILDDIR - list( TRANSFORM ARGV REPLACE "^OUTPATH$" "BUILDDIR" ) - - # - # Call loki_transform - # - loki_transform( COMMAND "convert" ${ARGV} ) - -endfunction() - -############################################################################## -# .rst: -# -# loki_transform_transpile -# ======================== -# -# **Removed:** Apply Loki transformation in transpile mode.:: -# -# loki_transform_transpile( -# ) -# -# ..warning:: -# loki_transform_transpile() was removed! -# -# Please use -# loki_transform( COMMAND convert [...] ) -# or -# loki_transform_target( COMMAND convert [...] ). -# -############################################################################## - -function( loki_transform_transpile ) - - ecbuild_critical( "\ -loki_transform_transpile() was removed! -Please use - loki_transform( COMMAND convert [...] ) -or - loki_transform_target( COMMAND convert [...] ). -" - ) - -endfunction() - - ############################################################################## # .rst: # diff --git a/cmake/loki_transform_helpers.cmake b/cmake/loki_transform_helpers.cmake index 07df7b7d7..4f7ac1601 100644 --- a/cmake/loki_transform_helpers.cmake +++ b/cmake/loki_transform_helpers.cmake @@ -84,41 +84,6 @@ macro( _loki_transform_parse_options ) list( APPEND _ARGS --cpp ) endif() - if( _PAR_DATA_OFFLOAD ) - list( APPEND _ARGS --data-offload ) - endif() - - if( _PAR_REMOVE_OPENMP ) - list( APPEND _ARGS --remove-openmp ) - endif() - - if( _PAR_ASSUME_DEVICEPTR ) - list( APPEND _ARGS --assume-deviceptr ) - endif() - - if( _PAR_GLOBAL_VAR_OFFLOAD ) - list( APPEND _ARGS --global-var-offload ) - endif() - - if( _PAR_TRIM_VECTOR_SECTIONS ) - list( APPEND _ARGS --trim-vector-sections ) - endif() - - if( _PAR_REMOVE_DERIVED_ARGS ) - list( APPEND _ARGS --remove-derived-args ) - endif() - - if( _PAR_INLINE_MEMBERS ) - list( APPEND _ARGS --inline-members ) - endif() - - if( _PAR_RESOLVE_SEQUENCE_ASSOCIATION ) - list( APPEND _ARGS --resolve-sequence-association ) - endif() - - if( _PAR_DERIVE_ARGUMENT_ARRAY_SHAPE ) - list( APPEND _ARGS --derive-argument-array-shape ) - endif() endmacro() diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index 77ec6a4fb..8951adb57 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -18,41 +18,12 @@ from loki import ( config as loki_config, Sourcefile, Frontend, as_tuple, - set_excepthook, auto_post_mortem_debugger, info, warning + set_excepthook, auto_post_mortem_debugger, info ) -from loki.batch import Pipeline, Scheduler, SchedulerConfig, ProcessingStrategy +from loki.batch import Scheduler, SchedulerConfig, ProcessingStrategy + +from loki.transformations.build_system import FileWriteTransformation -# Get generalized transformations provided by Loki -from loki.transformations.argument_shape import ( - ArgumentArrayShapeAnalysis, ExplicitArgumentArrayShapeTransformation -) -from loki.transformations.build_system import ( - DependencyTransformation, ModuleWrapTransformation, FileWriteTransformation -) -from loki.transformations.data_offload import ( - DataOffloadTransformation, GlobalVariableAnalysis, GlobalVarOffloadTransformation -) -from loki.transformations.transform_derived_types import DerivedTypeArgumentsTransformation -from loki.transformations.drhook import DrHookTransformation -from loki.transformations.idempotence import IdemTransformation -from loki.transformations.inline import InlineTransformation -from loki.transformations.pool_allocator import TemporariesPoolAllocatorTransformation -from loki.transformations.remove_code import RemoveCodeTransformation -from loki.transformations.sanitise import SanitiseTransformation -from loki.transformations.single_column import ( - ExtractSCATransformation, CLAWTransformation, SCCVectorPipeline, - SCCHoistPipeline, SCCStackPipeline, SCCRawStackPipeline, -) -from loki.transformations.transpile import ( - FortranCTransformation, FortranISOCWrapperTransformation -) -from loki.transformations.block_index_transformations import ( - LowerBlockIndexTransformation, InjectBlockIndexTransformation, - LowerBlockLoopTransformation -) -from loki.transformations.single_column.scc_low_level import ( - SCCLowLevelCufHoist, SCCLowLevelCufParametrise, SCCLowLevelHoist, SCCLowLevelParametrise -) @click.group() @click.option('--debug/--no-debug', default=False, show_default=True, @@ -76,8 +47,6 @@ def cli(debug): help='Path for additional header file(s).') @click.option('--cpp/--no-cpp', default=False, help='Trigger C-preprocessing of source files.') -@click.option('--directive', default='openacc', type=click.Choice(['openacc', 'openmp', 'none']), - help='Programming model directives to insert (default openacc)') @click.option('--include', '-I', type=click.Path(), multiple=True, help='Path for additional header file(s)') @click.option('--define', '-D', multiple=True, @@ -86,33 +55,8 @@ def cli(debug): help='Additional path for header files, specifically for OMNI') @click.option('--xmod', '-M', type=click.Path(), multiple=True, help='Path for additional .xmod file(s) for OMNI') -@click.option('--data-offload', is_flag=True, default=False, - help='Run transformation to insert custom data offload regions.') -@click.option('--remove-openmp', is_flag=True, default=False, - help='Removes existing OpenMP pragmas in "!$loki data" regions.') -@click.option('--assume-deviceptr', is_flag=True, default=False, - help='Mark the relevant arguments as true device-pointers in "!$loki data" regions.') @click.option('--frontend', default='fp', type=click.Choice(['fp', 'ofp', 'omni']), help='Frontend parser to use (default FP)') -@click.option('--trim-vector-sections', is_flag=True, default=False, - help='Trim vector loops in SCC transform to exclude scalar assignments.') -@click.option('--global-var-offload', is_flag=True, default=False, - help="Generate offload instructions for global vars imported via 'USE' statements.") -@click.option('--remove-derived-args/--no-remove-derived-args', default=False, - help="Remove derived-type arguments and replace with canonical arguments") -@click.option('--inline-members/--no-inline-members', default=False, - help='Inline member functions for SCC-class transformations.') -@click.option('--inline-marked/--no-inline-marked', default=True, - help='Inline pragma-marked subroutines for SCC-class transformations.') -@click.option('--resolve-sequence-association/--no-resolve-sequence-association', default=False, - help='Replace array arguments passed as scalars with arrays.') -@click.option('--resolve-sequence-association-inlined-calls/--no-resolve-sequence-association-inlined-calls', - help='Replace array arguments passed as scalars with arrays, but only in calls that are inlined.', - default=False) -@click.option('--derive-argument-array-shape/--no-derive-argument-array-shape', default=False, - help="Recursively derive explicit shape dimension for argument arrays") -@click.option('--eliminate-dead-code/--no-eliminate-dead-code', default=True, - help='Perform dead code elimination, where unreachable branches are trimmed from the code.') @click.option('--plan-file', type=click.Path(), default=None, help='Process pipeline in planning mode and generate CMake "plan" file.') @click.option('--callgraph', '-g', type=click.Path(), default=None, @@ -123,11 +67,9 @@ def cli(debug): type=click.Choice(['debug', 'detail', 'perf', 'info', 'warning', 'error']), help='Log level to output during batch processing') def convert( - mode, config, build, source, header, cpp, directive, include, define, omni_include, xmod, - data_offload, remove_openmp, assume_deviceptr, frontend, trim_vector_sections, - global_var_offload, remove_derived_args, inline_members, inline_marked, - resolve_sequence_association, resolve_sequence_association_inlined_calls, - derive_argument_array_shape, eliminate_dead_code, plan_file, callgraph, root, log_level + mode, config, build, source, header, cpp, include, define, + omni_include, xmod, frontend, plan_file, callgraph, root, + log_level ): """ Batch-processing mode for Fortran-to-Fortran transformations that @@ -154,8 +96,6 @@ def convert( # set default transformation mode in Scheduler config config.default['mode'] = mode - directive = None if directive.lower() == 'none' else directive.lower() - build_args = { 'preprocess': cpp, 'includes': include, @@ -189,260 +129,29 @@ def convert( # If requested, apply a custom pipeline from the scheduler config # Note that this new entry point will bypass all other default # behaviour and exit immediately after. - if mode in config.pipelines: - info(f'[Loki-transform] Applying custom pipeline {mode} from config:') - info(str(config.pipelines[mode])) - - scheduler.process(config.pipelines[mode], proc_strategy=processing_strategy) - - mode = mode.replace('-', '_') # Sanitize mode string - - # Write out all modified source files into the build directory - file_write_trafo = scheduler.config.transformations.get('FileWriteTransformation', None) - if not file_write_trafo: - file_write_trafo = FileWriteTransformation(cuf='cuf' in mode) - scheduler.process(transformation=file_write_trafo, proc_strategy=processing_strategy) - - if plan_file is not None: - scheduler.write_cmake_plan(plan_file, rootpath=root) - - if callgraph: - scheduler.callgraph(callgraph) - - return - - if plan_file is not None: - msg = '[Loki] ERROR: Plan mode requires a pipeline definition in the config file.\n' + if not mode in config.pipelines: + msg = '[Loki] ERROR: Pipeline or transformation mode not found in config file.\n' msg += '[Loki] Please provide a config file with configured transformation or pipelines instead.\n' sys.exit(msg) - # If we do not use a custom pipeline, it should be one of the internally supported ones - assert mode in [ - 'idem', 'c', 'idem-stack', 'sca', 'claw', 'scc', 'scc-hoist', 'scc-stack', - 'cuf-parametrise', 'cuf-hoist', 'cuf-dynamic', 'scc-raw-stack', - 'idem-lower', 'idem-lower-loop', 'cuda-parametrise', 'cuda-hoist' - ] - - # Add deprecation message to warn about future removal of non-config entry point. - # Once we're ready to force config-only mode, everything after this can go. - msg = '[Loki] [DEPRECATION WARNING] Custom entry points to loki-transform.py convert are deprecated.\n' - msg += '[Loki] Please provide a config file with configured transformation or pipelines instead.\n' - warning(msg) - - # Pull dimension definition from configuration - horizontal = scheduler.config.dimensions.get('horizontal', None) - vertical = scheduler.config.dimensions.get('vertical', None) - block_dim = scheduler.config.dimensions.get('block_dim', None) - - # First, remove all derived-type arguments; caller first! - if remove_derived_args: - scheduler.process( DerivedTypeArgumentsTransformation() ) - - # Re-write DR_HOOK labels for non-GPU paths - if 'scc' not in mode and 'cuda' not in mode : - scheduler.process( DrHookTransformation(suffix=mode, remove=False) ) - - # Perform general source removal of unwanted calls or code regions - # (do not perfrom Dead Code Elimination yet, inlining will do this.) - remove_code_trafo = scheduler.config.transformations.get('RemoveCodeTransformation', None) - if not remove_code_trafo: - remove_code_trafo = RemoveCodeTransformation( - remove_marked_regions=True, remove_dead_code=False, kernel_only=True, - call_names=('ABOR1', 'DR_HOOK'), intrinsic_names=('WRITE(NULOUT',) - ) - scheduler.process(transformation=remove_code_trafo) - - # Perform general source sanitisation steps to level the playing field - sanitise_trafo = scheduler.config.transformations.get('SanitiseTransformation', None) - if not sanitise_trafo: - sanitise_trafo = SanitiseTransformation( - resolve_sequence_association=resolve_sequence_association, - ) - scheduler.process(transformation=sanitise_trafo) + info(f'[Loki-transform] Applying custom pipeline {mode} from config:') + info(str(config.pipelines[mode])) - # Perform source-inlining either from CLI arguments or from config - inline_trafo = scheduler.config.transformations.get('InlineTransformation', None) - if not inline_trafo: - inline_trafo = InlineTransformation( - inline_internals=inline_members, inline_marked=inline_marked, - remove_dead_code=eliminate_dead_code, allowed_aliases=horizontal.index, - resolve_sequence_association=resolve_sequence_association_inlined_calls - ) - scheduler.process(transformation=inline_trafo) - - # Backward insert argument shapes (for surface routines) - if derive_argument_array_shape: - scheduler.process(transformation=ArgumentArrayShapeAnalysis()) - scheduler.process(transformation=ExplicitArgumentArrayShapeTransformation()) - - # Insert data offload regions for GPUs and remove OpenMP threading directives - if mode not in ['cuda-hoist', 'cuda-parametrise']: - use_claw_offload = True - if data_offload: - offload_transform = DataOffloadTransformation( - remove_openmp=remove_openmp, assume_deviceptr=assume_deviceptr - ) - scheduler.process(offload_transform) - use_claw_offload = not offload_transform.has_data_regions - - if global_var_offload: - scheduler.process(transformation=GlobalVariableAnalysis()) - scheduler.process(transformation=GlobalVarOffloadTransformation()) - - # Now we create and apply the main transformation pipeline - if mode == 'idem': - pipeline = IdemTransformation() - scheduler.process( pipeline ) - - if mode == 'idem-stack': - pipeline = Pipeline( - classes=(IdemTransformation, TemporariesPoolAllocatorTransformation), - block_dim=block_dim, horizontal=horizontal, directive='openmp', check_bounds=True - ) - scheduler.process( pipeline ) - - if mode == 'idem-lower': - pipeline = Pipeline( - classes=(IdemTransformation, - LowerBlockIndexTransformation, - InjectBlockIndexTransformation,), - block_dim=block_dim, directive='openmp', check_bounds=True, - horizontal=horizontal, vertical=vertical, - ) - scheduler.process( pipeline ) - - if mode == 'idem-lower-loop': - pipeline = Pipeline( - classes=(IdemTransformation, - LowerBlockIndexTransformation, - InjectBlockIndexTransformation, - LowerBlockLoopTransformation), - block_dim=block_dim, directive='openmp', check_bounds=True, - horizontal=horizontal, vertical=vertical, - ) - scheduler.process( pipeline ) - - if mode == 'sca': - pipeline = ExtractSCATransformation(horizontal=horizontal) - scheduler.process( pipeline ) - - if mode == 'claw': - pipeline = CLAWTransformation( - horizontal=horizontal, claw_data_offload=use_claw_offload - ) - scheduler.process( pipeline ) - - if mode == 'scc': - pipeline = scheduler.config.transformations.get('scc', None) - if not pipeline: - pipeline = SCCVectorPipeline( - horizontal=horizontal, vertical=vertical, - block_dim=block_dim, directive=directive, - trim_vector_sections=trim_vector_sections - ) - scheduler.process( pipeline ) - - if mode == 'scc-hoist': - pipeline = scheduler.config.transformations.get('scc-hoist', None) - if not pipeline: - pipeline = SCCHoistPipeline( - horizontal=horizontal, vertical=vertical, - block_dim=block_dim, directive=directive, - dim_vars=(vertical.size,) if vertical else None, - trim_vector_sections=trim_vector_sections - ) - scheduler.process( pipeline ) - - if mode == 'scc-stack': - pipeline = scheduler.config.transformations.get('scc-stack', None) - if not pipeline: - pipeline = SCCStackPipeline( - horizontal=horizontal, vertical=vertical, - block_dim=block_dim, directive=directive, - check_bounds=False, - trim_vector_sections=trim_vector_sections - ) - scheduler.process( pipeline ) - - if mode == 'scc-raw-stack': - pipeline = scheduler.config.transformations.get('scc-raw-stack', None) - if not pipeline: - pipeline = SCCRawStackPipeline( - horizontal=horizontal, - block_dim=block_dim, directive=directive, - check_bounds=False, - trim_vector_sections=trim_vector_sections, - ) - scheduler.process( pipeline ) - - if mode == 'cuf-hoist': - pipeline = scheduler.config.transformations.get('cuf-hoist', None) - if not pipeline: - pipeline = SCCLowLevelCufHoist(horizontal=horizontal, vertical=vertical, directive=directive, - trim_vector_sections=trim_vector_sections, - transformation_type='hoist', derived_types = ['TECLDP'], block_dim=block_dim, - dim_vars=(vertical.size,), as_kwarguments=True, remove_vector_section=True) - scheduler.process( pipeline ) - - if mode == 'cuf-parametrise': - pipeline = scheduler.config.transformations.get('cuf-parametrise', None) - if not pipeline: - dic2p = {'NLEV': 137} - pipeline = SCCLowLevelCufParametrise(horizontal=horizontal, vertical=vertical, directive=directive, - trim_vector_sections=trim_vector_sections, - transformation_type='parametrise', derived_types = ['TECLDP'], block_dim=block_dim, - dim_vars=(vertical.size,), as_kwarguments=True, dic2p=dic2p, remove_vector_section=True) - scheduler.process( pipeline ) - - if mode == 'cuda-hoist': - pipeline = scheduler.config.transformations.get('cuda-hoist', None) - if not pipeline: - pipeline = SCCLowLevelHoist(horizontal=horizontal, vertical=vertical, directive=directive, - trim_vector_sections=trim_vector_sections, - transformation_type='hoist', derived_types = ['TECLDP'], block_dim=block_dim, mode='cuda', - dim_vars=(vertical.size,), as_kwarguments=True, hoist_parameters=True, - ignore_modules=['parkind1'], all_derived_types=True) - scheduler.process( pipeline ) - - - if mode == 'cuda-parametrise': - pipeline = pipeline = scheduler.config.transformations.get('scc-raw-stack', None) - if not pipeline: - dic2p = {'NLEV': 137} - pipeline = SCCLowLevelParametrise(horizontal=horizontal, vertical=vertical, directive=directive, - trim_vector_sections=trim_vector_sections, - transformation_type='parametrise', derived_types = ['TECLDP'], block_dim=block_dim, mode='cuda', - dim_vars=(vertical.size,), as_kwarguments=True, hoist_parameters=True, - ignore_modules=['parkind1'], all_derived_types=True, dic2p=dic2p) - scheduler.process( pipeline ) + scheduler.process(config.pipelines[mode], proc_strategy=processing_strategy) mode = mode.replace('-', '_') # Sanitize mode string - if mode in ['c', 'cuda_parametrise', 'cuda_hoist']: - if mode == 'c': - f2c_transformation = FortranCTransformation() - f2c_wrapper = FortranISOCWrapperTransformation() - elif mode in ['cuda_parametrise', 'cuda_hoist']: - f2c_transformation = FortranCTransformation(language='cuda') - f2c_wrapper = FortranISOCWrapperTransformation(language='cuda', use_c_ptr=True) - else: - assert False - scheduler.process(f2c_transformation) - scheduler.process(f2c_wrapper) - build_args['output_dir'] = build - for h in definitions: - f2c_wrapper.apply(h, role='header', build_args=build_args) - # Housekeeping: Inject our re-named kernel and auto-wrapped it in a module - dependency = DependencyTransformation(suffix='_FC', module_suffix='_MOD') - scheduler.process(dependency) - else: - # Housekeeping: Inject our re-named kernel and auto-wrapped it in a module - scheduler.process( ModuleWrapTransformation(module_suffix='_MOD') ) - scheduler.process( DependencyTransformation(suffix=f'_{mode.upper()}', module_suffix='_MOD') ) # Write out all modified source files into the build directory - scheduler.process(transformation=FileWriteTransformation( - cuf='cuf' in mode, include_module_var_imports=global_var_offload - )) + file_write_trafo = scheduler.config.transformations.get('FileWriteTransformation', None) + if not file_write_trafo: + file_write_trafo = FileWriteTransformation(cuf='cuf' in mode) + scheduler.process(transformation=file_write_trafo, proc_strategy=processing_strategy) + + if plan_file is not None: + scheduler.write_cmake_plan(plan_file, rootpath=root) + + if callgraph: + scheduler.callgraph(callgraph) @cli.command('plan') @@ -458,8 +167,6 @@ def convert( help='Path to build directory for source generation.') @click.option('--root', type=click.Path(), default=None, help='Root path to which all paths are relative to.') -@click.option('--directive', default='openacc', type=click.Choice(['openacc', 'openmp', 'none']), - help='Programming model directives to insert (default openacc)') @click.option('--cpp/--no-cpp', default=False, help='Trigger C-preprocessing of source files.') @click.option('--frontend', default='fp', type=click.Choice(['fp', 'ofp', 'omni']), @@ -473,8 +180,8 @@ def convert( help='Log level to output during batch processing') @click.pass_context def plan( - ctx, mode, config, header, source, build, root, cpp, directive, - frontend, callgraph, plan_file, log_level + ctx, mode, config, header, source, build, root, cpp, + frontend, callgraph, plan_file, log_level ): """ Create a "plan", a schedule of files to inject and transform for a