From 9b46412fbd93cf9baf5d3d0111cffb45340dec34 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Thu, 21 Nov 2024 13:12:23 +0000 Subject: [PATCH] Started on: making Loki plan trafo (pipeline) aware --- loki/batch/item.py | 6 + loki/batch/scheduler.py | 128 +++++- loki/batch/transformation.py | 119 +++++ loki/sourcefile.py | 1 + .../build_system/file_write.py | 18 + loki/transformations/parametrise.py | 69 ++- scripts/loki_transform.py | 434 +++++++++++++++++- 7 files changed, 743 insertions(+), 32 deletions(-) diff --git a/loki/batch/item.py b/loki/batch/item.py index 1ce98917f..8bd1ad9d5 100644 --- a/loki/batch/item.py +++ b/loki/batch/item.py @@ -517,6 +517,12 @@ def path(self): """ return self.source.path + @property + def orig_path(self): + """ + The filepath of the associated source file + """ + return self.source.orig_path class FileItem(Item): """ diff --git a/loki/batch/scheduler.py b/loki/batch/scheduler.py index a6d3fef98..74dacc491 100644 --- a/loki/batch/scheduler.py +++ b/loki/batch/scheduler.py @@ -508,6 +508,120 @@ def _get_definition_items(_item, sgraph_items): self._discover() self._parse_items() + def process_plan(self, transformation): + """ + """ + if isinstance(transformation, Transformation): + self.process_plan_transformation(transformation=transformation) + + elif isinstance(transformation, Pipeline): + self.process_plan_pipeline(pipeline=transformation) + + else: + error('[Loki::Scheduler] Batch processing requires Transformation or Pipeline object') + raise RuntimeError('[Loki] Could not batch process {transformation_or_pipeline}') + + def process_plan_pipeline(self, pipeline): + """ + Process a given :any:`Pipeline` by applying its assocaited + transformations in turn. + + Parameters + ---------- + transformation : :any:`Pipeline` + The transformation pipeline to apply + """ + for transformation in pipeline.transformations: + self.process_plan_transformation(transformation) + + def process_plan_transformation(self, transformation): + """ + Process all :attr:`items` in the scheduler's graph + + By default, the traversal is performed in topological order, which + ensures that an item is processed before the items it depends upon + (e.g., via a procedure call) + This order can be reversed in the :any:`Transformation` manifest by + setting :any:`Transformation.reverse_traversal` to ``True``. + + The scheduler applies the transformation to the scope corresponding to + each item in the scheduler's graph, determined by the :any:`Item.scope_ir` + property. For example, for a :any:`ProcedureItem`, the transformation is + applied to the corresponding :any:`Subroutine` object. + + Optionally, the traversal can be performed on a source file level only, + if the transformation has set :any:`Transformation.traverse_file_graph` + to ``True``. This uses the :attr:`filegraph` to process the dependency tree. + If combined with a :any:`Transformation.item_filter`, only source files with + at least one object corresponding to an item of that type are processed. + + Parameters + ---------- + transformation : :any:`Transformation` + The transformation to apply over the dependency tree + """ + def _get_definition_items(_item, sgraph_items): + # For backward-compatibility with the DependencyTransform and LinterTransformation + if not transformation.traverse_file_graph: + return None + + # Recursively obtain all definition items but exclude any that are not part of the original SGraph + items = () + for item in _item.create_definition_items(item_factory=self.item_factory, config=self.config): + # Recursion gives us only items that are included in the SGraph, or the parent scopes + # of items included in the SGraph + child_items = _get_definition_items(item, sgraph_items) + # If the current item has relevant children, or is included in the SGraph itself, we + # include it in the list of items + if child_items or item in sgraph_items: + if transformation.process_ignored_items or not item.is_ignored: + items += (item,) + child_items + return items + + trafo_name = transformation.__class__.__name__ + log = f'[Loki::Scheduler] Applied transformation <{trafo_name}>' + ' in {:.2f}s' + with Timer(logger=info, text=log): + + # Extract the graph iteration properties from the transformation + item_filter = as_tuple(transformation.item_filter) + if transformation.traverse_file_graph: + sgraph = self.sgraph + graph = sgraph.as_filegraph( + self.item_factory, self.config, item_filter=item_filter, + exclude_ignored=not transformation.process_ignored_items + ) + sgraph_items = sgraph.items + traversal = SFilter( + graph, reverse=transformation.reverse_traversal, + include_external=self.config.default.get('strict', True) + ) + else: + graph = self.sgraph + sgraph_items = graph.items + traversal = SFilter( + graph, item_filter=item_filter, reverse=transformation.reverse_traversal, + exclude_ignored=not transformation.process_ignored_items, + include_external=self.config.default.get('strict', True) + ) + + for _item in traversal: + if isinstance(_item, ExternalItem): + raise RuntimeError(f'Cannot apply {trafo_name} to {_item.name}: Item is marked as external.') + + transformation.apply_plan( + _item.scope_ir, role=_item.role, mode=_item.mode, + item=_item, targets=_item.targets, items=_get_definition_items(_item, sgraph_items), + successors=graph.successors(_item, item_filter=item_filter), + depths=graph.depths, build_args=self.build_args # , item_factory=self.item_factory + ) + + if transformation.renames_items: + self.rekey_item_cache() + + if transformation.creates_items: + self._discover() + self._parse_items() + def callgraph(self, path, with_file_graph=False, with_legend=False): """ Generate a callgraph visualization and dump to file. @@ -640,16 +754,18 @@ def write_cmake_plan(self, filepath, mode, buildpath, rootpath): if item.is_ignored: continue - sourcepath = item.path.resolve() - newsource = sourcepath.with_suffix(f'.{mode.lower()}.F90') + sourcepath = item.orig_path.resolve() + newsource = item.path.resolve() + # sourcepath = item.path.resolve() + # newsource = sourcepath.with_suffix(f'.{mode.lower()}.F90') if buildpath: newsource = buildpath/newsource.name - - # Make new CMake paths relative to source again + # + # # Make new CMake paths relative to source again if rootpath is not None: sourcepath = sourcepath.relative_to(rootpath) - - debug(f'Planning:: {item.name} (role={item.role}, mode={mode})') + # + # debug(f'Planning:: {item.name} (role={item.role}, mode={mode})') # Inject new object into the final binary libs if newsource not in sources_to_append: diff --git a/loki/batch/transformation.py b/loki/batch/transformation.py index 7b4454c8c..84ae0e9d3 100644 --- a/loki/batch/transformation.py +++ b/loki/batch/transformation.py @@ -412,3 +412,122 @@ def post_apply_module(self, module, rescope_symbols): # Ensure all objects in the IR are in the module's scope. if rescope_symbols: module.rescope_symbols() + + def plan_subroutine(self, source, **kwargs): + """ + ... + """ + + def plan_module(self, source, **kwargs): + """ + ... + """ + + def plan_file(self, source, **kwargs): + """ + ... + """ + + def apply_plan(self, source, **kwargs): + """ + ... + """ + if isinstance(source, Sourcefile): + self.apply_plan_file(source, **kwargs) + + if isinstance(source, Subroutine): + self.apply_plan_subroutine(source, **kwargs) + + if isinstance(source, Module): + self.apply_plan_module(source, **kwargs) + + def apply_plan_file(self, sourcefile, **kwargs): + """ + """ + if not isinstance(sourcefile, Sourcefile): + raise TypeError('Transformation.apply_file can only be applied to Sourcefile object') + + # if sourcefile._incomplete: + # raise RuntimeError('Transformation.apply_file requires Sourcefile to be complete') + + item = kwargs.pop('item', None) + items = kwargs.pop('items', None) + role = kwargs.pop('role', None) + targets = kwargs.pop('targets', None) + + # Apply file-level transformations + self.plan_file(sourcefile, item=item, role=role, targets=targets, items=items, **kwargs) + + # Recurse to modules, if configured + if self.recurse_to_modules: + if items: + # Recursion into all module items in the current file + for item in items: + if isinstance(item, ModuleItem): + # Currently, we don't get the role for modules correct as 'driver' + # if the role overwrite in the config marks only specific procedures + # as driver, but everything else as kernel by default. This is in particular the + # case, if the ModuleWrapTransformation is applied to a driver routine. + # For that reason, we set the role as unspecified (None) if not the role is + # universally equal throughout the module + item_role = item.role + definitions_roles = {_it.role for _it in items if _it.scope_name == item.name} + if definitions_roles != {item_role}: + item_role = None + + # Provide the list of items that belong to this module + item_items = tuple(_it for _it in items if _it.scope is item.ir) + + self.plan_module( + item.ir, item=item, role=item_role, targets=item.targets, items=item_items, **kwargs + ) + else: + for module in sourcefile.modules: + self.plan_module(module, item=item, role=role, targets=targets, items=items, **kwargs) + + # Recurse into procedures, if configured + if self.recurse_to_procedures: + if items: + # Recursion into all subroutine items in the current file + for item in items: + if isinstance(item, ProcedureItem): + self.plan_subroutine( + item.ir, item=item, role=item.role, targets=item.targets, **kwargs + ) + else: + for routine in sourcefile.all_subroutines: + self.plan_subroutine(routine, item=item, role=role, targets=targets, **kwargs) + + def apply_plan_subroutine(self, subroutine, **kwargs): + """ + """ + if not isinstance(subroutine, Subroutine): + raise TypeError('Transformation.apply_subroutine can only be applied to Subroutine object') + + # if subroutine._incomplete: + # raise RuntimeError('Transformation.apply_subroutine requires Subroutine to be complete') + + # Apply the actual transformation for subroutines + self.plan_subroutine(subroutine, **kwargs) + + # Recurse to internal procedures + if self.recurse_to_internal_procedures: + for routine in subroutine.subroutines: + self.apply_plan_subroutine(routine, **kwargs) + + def apply_plan_module(self, module, **kwargs): + """ + """ + if not isinstance(module, Module): + raise TypeError('Transformation.apply_module can only be applied to Module object') + + # if module._incomplete: + # raise RuntimeError('Transformation.apply_module requires Module to be complete') + + # Apply the actual transformation for modules + self.transform_module(module, **kwargs) + + # Recurse to procedures contained in this module + if self.recurse_to_procedures: + for routine in module.subroutines: + self.apply_plan_subroutine(routine, **kwargs) diff --git a/loki/sourcefile.py b/loki/sourcefile.py index 5f5c1e1fa..8dc663f5b 100644 --- a/loki/sourcefile.py +++ b/loki/sourcefile.py @@ -64,6 +64,7 @@ class Sourcefile: def __init__(self, path, ir=None, ast=None, source=None, incomplete=False, parser_classes=None): self.path = Path(path) if path is not None else path + self.orig_path = self.path if ir is not None and not isinstance(ir, Section): ir = Section(body=ir) self.ir = ir diff --git a/loki/transformations/build_system/file_write.py b/loki/transformations/build_system/file_write.py index f9182d877..6f762f540 100644 --- a/loki/transformations/build_system/file_write.py +++ b/loki/transformations/build_system/file_write.py @@ -73,3 +73,21 @@ def transform_file(self, sourcefile, **kwargs): if build_args and (output_dir := build_args.get('output_dir', None)) is not None: sourcepath = Path(output_dir)/sourcepath.name sourcefile.write(path=sourcepath, cuf=self.cuf) + + def plan_file(self, sourcefile, **kwargs): + item = kwargs.get('item', None) + if not item and 'items' in kwargs: + if kwargs['items']: + item = kwargs['items'][0] + + if not item: + raise ValueError('No Item provided; required to determine file write path') + + _mode = item.mode if item.mode else 'loki' + _mode = _mode.replace('-', '_') # Sanitize mode string + + path = Path(item.path) + suffix = self.suffix if self.suffix else path.suffix + sourcepath = Path(item.path).with_suffix(f'.{_mode}{suffix}') + item.source.path = sourcepath + diff --git a/loki/transformations/parametrise.py b/loki/transformations/parametrise.py index a21123025..47b0b2cd0 100644 --- a/loki/transformations/parametrise.py +++ b/loki/transformations/parametrise.py @@ -82,6 +82,8 @@ scheduler.process(transformation=ParametriseTransformation(dic2p=dic2p, replace_by_value=True)) """ +from pathlib import Path +from loki import ProcedureItem from loki.batch import Transformation from loki.expression import symbols as sym from loki.ir import nodes as ir, Transformer, FindNodes @@ -90,9 +92,74 @@ from loki.transformations.inline import inline_constant_parameters -__all__ = ['ParametriseTransformation'] +__all__ = ['ParametriseTransformation', 'DuplicateKernel', 'RemoveKernel'] +class DuplicateKernel(Transformation): + + def __init__(self, kernels=None): + self.kernels = tuple(kernel.lower() for kernel in as_tuple(kernels)) + + def transform_subroutine(self, routine, **kwargs): + calls = FindNodes(ir.CallStatement).visit(routine.body) + call_map = {} + for call in calls: + if str(call.name).lower() in self.kernels: + call_map[call] = (call, call.clone()) + routine.body = Transformer(call_map).visit(routine.body) + + def plan_subroutine(self, routine, **kwargs): + item = kwargs.get('item', None) + if not item and 'items' in kwargs: + if kwargs['items']: + item = kwargs['items'][0] + + successors = as_tuple(kwargs.get('successors')) + + # this only renames, however we want to create a duplicated kernel and not only as duplicate call ... + """ + for child in successors: + if not isinstance(child, ProcedureItem): + continue + print(f"plan_subroutine - child for {routine} : {child}") + if child.local_name.lower() in self.kernels: + path = Path(child.path) + suffix = path.suffix + child.source.path = Path(child.path).with_suffix(f'.duplicate{suffix}') + """ + +class RemoveKernel(Transformation): + + def __init__(self, kernels=None): + self.kernels = tuple(kernel.lower() for kernel in as_tuple(kernels)) + + def transform_subroutine(self, routine, **kwargs): + calls = FindNodes(ir.CallStatement).visit(routine.body) + call_map = {} + for call in calls: + if str(call.name).lower() in self.kernels: + call_map[call] = None + routine.body = Transformer(call_map).visit(routine.body) + + """ + def plan_subroutine(self, sourcefile, **kwargs): + item = kwargs.get('item', None) + if not item and 'items' in kwargs: + if kwargs['items']: + item = kwargs['items'][0] + + if not item: + raise ValueError('No Item provided; required to determine file write path') + + _mode = item.mode if item.mode else 'loki' + _mode = _mode.replace('-', '_') # Sanitize mode string + + path = Path(item.path) + suffix = self.suffix if self.suffix else path.suffix + sourcepath = Path(item.path).with_suffix(f'.{_mode}{suffix}') + item.source.path = sourcepath + """ + class ParametriseTransformation(Transformation): """ Parametrise variables with provided values. diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index f8e3b14b3..9d6c38756 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -418,51 +418,382 @@ def convert( cuf='cuf' in mode, include_module_var_imports=global_var_offload )) - -@cli.command('plan') -@click.option('--mode', '-m', default='sca', - type=click.Choice(['idem', 'idem-stack', 'sca', 'claw', 'scc', 'scc-hoist', 'scc-stack'])) -@click.option('--config', '-c', type=click.Path(), - help='Path to configuration file.') -@click.option('--header', '-I', type=click.Path(), multiple=True, - help='Path for additional header file(s).') -@click.option('--source', '-s', type=click.Path(), multiple=True, - help='Path to source files to transform.') -@click.option('--build', '-b', type=click.Path(), default=None, +@cli.command() +@click.option('--mode', '-m', default='idem', type=click.STRING, + help='Transformation mode, selecting which code transformations to apply.') +@click.option('--config', default=None, type=click.Path(), + help='Path to custom scheduler configuration file') +@click.option('--build', '-b', '--out-path', type=click.Path(), default=None, help='Path to build directory for source generation.') -@click.option('--root', type=click.Path(), default=None, - help='Root path to which all paths are relative to.') -@click.option('--directive', default='openacc', type=click.Choice(['openacc', 'openmp', 'none']), - help='Programming model directives to insert (default openacc)') +@click.option('--source', '-s', '--path', type=click.Path(), multiple=True, + help='Path to search during source exploration.') +@click.option('--header', '-h', type=click.Path(), multiple=True, + help='Path for additional header file(s).') @click.option('--cpp/--no-cpp', default=False, help='Trigger C-preprocessing of source files.') +@click.option('--directive', default='openacc', type=click.Choice(['openacc', 'openmp', 'none']), + help='Programming model directives to insert (default openacc)') +@click.option('--include', '-I', type=click.Path(), multiple=True, + help='Path for additional header file(s)') +@click.option('--define', '-D', multiple=True, + help='Additional symbol definitions for the C-preprocessor') +@click.option('--omni-include', type=click.Path(), multiple=True, + help='Additional path for header files, specifically for OMNI') +@click.option('--xmod', '-M', type=click.Path(), multiple=True, + help='Path for additional .xmod file(s) for OMNI') +@click.option('--data-offload', is_flag=True, default=False, + help='Run transformation to insert custom data offload regions.') +@click.option('--remove-openmp', is_flag=True, default=False, + help='Removes existing OpenMP pragmas in "!$loki data" regions.') +@click.option('--assume-deviceptr', is_flag=True, default=False, + help='Mark the relevant arguments as true device-pointers in "!$loki data" regions.') @click.option('--frontend', default='fp', type=click.Choice(['fp', 'ofp', 'omni']), help='Frontend parser to use (default FP)') +@click.option('--trim-vector-sections', is_flag=True, default=False, + help='Trim vector loops in SCC transform to exclude scalar assignments.') +@click.option('--global-var-offload', is_flag=True, default=False, + help="Generate offload instructions for global vars imported via 'USE' statements.") +@click.option('--remove-derived-args/--no-remove-derived-args', default=False, + help="Remove derived-type arguments and replace with canonical arguments") +@click.option('--inline-members/--no-inline-members', default=False, + help='Inline member functions for SCC-class transformations.') +@click.option('--inline-marked/--no-inline-marked', default=True, + help='Inline pragma-marked subroutines for SCC-class transformations.') +@click.option('--resolve-sequence-association/--no-resolve-sequence-association', default=False, + help='Replace array arguments passed as scalars with arrays.') +@click.option('--resolve-sequence-association-inlined-calls/--no-resolve-sequence-association-inlined-calls', + help='Replace array arguments passed as scalars with arrays, but only in calls that are inlined.', + default=False) +@click.option('--derive-argument-array-shape/--no-derive-argument-array-shape', default=False, + help="Recursively derive explicit shape dimension for argument arrays") +@click.option('--eliminate-dead-code/--no-eliminate-dead-code', default=True, + help='Perform dead code elimination, where unreachable branches are trimmed from the code.') +@click.option('--log-level', '-l', default='info', envvar='LOKI_LOGGING', + type=click.Choice(['debug', 'detail', 'perf', 'info', 'warning', 'error']), + help='Log level to output during batch processing') +@click.option('--root', type=click.Path(), default=None, + help='Root path to which all paths are relative to.') @click.option('--callgraph', '-g', type=click.Path(), default=None, help='Generate and display the subroutine callgraph.') @click.option('--plan-file', type=click.Path(), help='CMake "plan" file to generate.') -@click.option('--log-level', '-l', default='info', envvar='LOKI_LOGGING', - type=click.Choice(['debug', 'detail', 'perf', 'info', 'warning', 'error']), - help='Log level to output during batch processing') def plan( - mode, config, header, source, build, root, cpp, directive, - frontend, callgraph, plan_file, log_level + mode, config, build, source, header, cpp, directive, include, define, omni_include, xmod, + data_offload, remove_openmp, assume_deviceptr, frontend, trim_vector_sections, + global_var_offload, remove_derived_args, inline_members, inline_marked, + resolve_sequence_association, resolve_sequence_association_inlined_calls, + derive_argument_array_shape, eliminate_dead_code, log_level, root, + callgraph, plan_file ): +# def plan( +# mode, config, header, source, build, root, cpp, directive, +# frontend, callgraph, plan_file, log_level +# ): """ - Create a "plan", a schedule of files to inject and transform for a - given configuration. + Batch-processing mode for Fortran-to-Fortran transformations that + employs a :class:`Scheduler` to process large numbers of source + files. + + Based on the given "mode" string, configuration file, source file + paths and build arguments the :any:`Scheduler` will perform + automatic call-tree exploration and apply a set of + :any:`Transformation` objects to this call tree. """ loki_config['log-level'] = log_level - info(f'[Loki] Creating CMake plan file from config: {config}') + info(f'[Loki] Batch-processing source files using config: {config} ') + config = SchedulerConfig.from_file(config) - paths = [Path(s).resolve() for s in source] - paths += [Path(h).resolve().parent for h in header] + # set default transformation mode in Scheduler config + config.default['mode'] = mode + + directive = None if directive.lower() == 'none' else directive.lower() + + build_args = { + 'preprocess': cpp, + 'includes': include, + 'defines': define, + 'xmods': xmod, + 'omni_includes': omni_include, + } + + frontend = Frontend[frontend.upper()] + frontend_type = Frontend.FP if frontend == Frontend.OMNI else frontend + + # Note, in order to get function inlinig correct, we need full knowledge + # of any imported symbols and functions. Since we cannot yet retro-fit that + # after creation, we need to make sure that the order of definitions can + # be used to create a coherent stack of type definitions. + # definitions with new scheduler not necessary anymore. However, "source" need to be adjusted + # in order to allow the scheduler to find the dependencies + definitions = [] + for h in header: + sfile = Sourcefile.from_file(filename=h, frontend=frontend_type, definitions=definitions, + **build_args) + definitions = definitions + list(sfile.definitions) + + # Create a scheduler to bulk-apply source transformations + paths = [Path(p).resolve() for p in as_tuple(source)] + paths += [Path(h).resolve().parent for h in as_tuple(header)] + # scheduler = Scheduler( + # paths=paths, config=config, frontend=frontend, definitions=definitions, output_dir=build, **build_args + # ) scheduler = Scheduler(paths=paths, config=config, frontend=frontend, full_parse=False, preprocess=cpp) + # If requested, apply a custom pipeline from the scheduler config + # Note that this new entry point will bypass all other default + # behaviour and exit immediately after. + if mode in config.pipelines: + info(f'[Loki-transform] Applying custom pipeline {mode} from config:') + info(str(config.pipelines[mode])) + + scheduler.process_plan( config.pipelines[mode] ) + + mode = mode.replace('-', '_') # Sanitize mode string + + # Write out all modified source files into the build directory + file_write_trafo = scheduler.config.transformations.get('FileWriteTransformation', None) + if not file_write_trafo: + file_write_trafo = FileWriteTransformation(cuf='cuf' in mode) + scheduler.process_plan(transformation=file_write_trafo) + + mode = mode.replace('-', '_') # Sanitize mode string + + # Construct the transformation plan as a set of CMake lists of source files + scheduler.write_cmake_plan(filepath=plan_file, mode=mode, buildpath=build, rootpath=root) + + # Output the resulting callgraph + if callgraph: + scheduler.callgraph(callgraph) + + return + + # If we do not use a custom pipeline, it should be one of the internally supported ones + assert mode in [ + 'idem', 'c', 'idem-stack', 'sca', 'claw', 'scc', 'scc-hoist', 'scc-stack', + 'cuf-parametrise', 'cuf-hoist', 'cuf-dynamic', 'scc-raw-stack', + 'idem-lower', 'idem-lower-loop', 'cuda-parametrise', 'cuda-hoist' + ] + + # Add deprecation message to warn about future removal of non-config entry point. + # Once we're ready to force config-only mode, everything after this can go. + msg = '[Loki] [DEPRECATION WARNING] Custom entry points to loki-transform.py convert are deprecated.\n' + msg += '[Loki] Please provide a config file with configured transformation or pipelines instead.\n' + warning(msg) + + # Pull dimension definition from configuration + horizontal = scheduler.config.dimensions.get('horizontal', None) + vertical = scheduler.config.dimensions.get('vertical', None) + block_dim = scheduler.config.dimensions.get('block_dim', None) + + # First, remove all derived-type arguments; caller first! + if remove_derived_args: + scheduler.process_plan( DerivedTypeArgumentsTransformation() ) + + # Re-write DR_HOOK labels for non-GPU paths + if 'scc' not in mode and 'cuda' not in mode : + scheduler.process_plan( DrHookTransformation(suffix=mode, remove=False) ) + + # Perform general source removal of unwanted calls or code regions + # (do not perfrom Dead Code Elimination yet, inlining will do this.) + remove_code_trafo = scheduler.config.transformations.get('RemoveCodeTransformation', None) + if not remove_code_trafo: + remove_code_trafo = RemoveCodeTransformation( + remove_marked_regions=True, remove_dead_code=False, kernel_only=True, + call_names=('ABOR1', 'DR_HOOK'), intrinsic_names=('WRITE(NULOUT',) + ) + scheduler.process_plan(transformation=remove_code_trafo) + + # Perform general source sanitisation steps to level the playing field + sanitise_trafo = scheduler.config.transformations.get('SanitiseTransformation', None) + if not sanitise_trafo: + sanitise_trafo = SanitiseTransformation( + resolve_sequence_association=resolve_sequence_association, + ) + scheduler.process_plan(transformation=sanitise_trafo) + + # Perform source-inlining either from CLI arguments or from config + inline_trafo = scheduler.config.transformations.get('InlineTransformation', None) + if not inline_trafo: + inline_trafo = InlineTransformation( + inline_internals=inline_members, inline_marked=inline_marked, + remove_dead_code=eliminate_dead_code, allowed_aliases=horizontal.index, + resolve_sequence_association=resolve_sequence_association_inlined_calls + ) + scheduler.process_plan(transformation=inline_trafo) + + # Backward insert argument shapes (for surface routines) + if derive_argument_array_shape: + scheduler.process_plan(transformation=ArgumentArrayShapeAnalysis()) + scheduler.process_plan(transformation=ExplicitArgumentArrayShapeTransformation()) + + # Insert data offload regions for GPUs and remove OpenMP threading directives + if mode not in ['cuda-hoist', 'cuda-parametrise']: + use_claw_offload = True + if data_offload: + offload_transform = DataOffloadTransformation( + remove_openmp=remove_openmp, assume_deviceptr=assume_deviceptr + ) + scheduler.process_plan(offload_transform) + use_claw_offload = not offload_transform.has_data_regions + + if global_var_offload: + scheduler.process_plan(transformation=GlobalVariableAnalysis()) + scheduler.process_plan(transformation=GlobalVarOffloadTransformation()) + + # Now we create and apply the main transformation pipeline + if mode == 'idem': + pipeline = IdemTransformation() + scheduler.process_plan( pipeline ) + + if mode == 'idem-stack': + pipeline = Pipeline( + classes=(IdemTransformation, TemporariesPoolAllocatorTransformation), + block_dim=block_dim, horizontal=horizontal, directive='openmp', check_bounds=True + ) + scheduler.process_plan( pipeline ) + + if mode == 'idem-lower': + pipeline = Pipeline( + classes=(IdemTransformation, + LowerBlockIndexTransformation, + InjectBlockIndexTransformation,), + block_dim=block_dim, directive='openmp', check_bounds=True, + horizontal=horizontal, vertical=vertical, + ) + scheduler.process_plan( pipeline ) + + if mode == 'idem-lower-loop': + pipeline = Pipeline( + classes=(IdemTransformation, + LowerBlockIndexTransformation, + InjectBlockIndexTransformation, + LowerBlockLoopTransformation), + block_dim=block_dim, directive='openmp', check_bounds=True, + horizontal=horizontal, vertical=vertical, + ) + scheduler.process_plan( pipeline ) + + if mode == 'sca': + pipeline = ExtractSCATransformation(horizontal=horizontal) + scheduler.process_plan( pipeline ) + + if mode == 'claw': + pipeline = CLAWTransformation( + horizontal=horizontal, claw_data_offload=use_claw_offload + ) + scheduler.process_plan( pipeline ) + + if mode == 'scc': + pipeline = scheduler.config.transformations.get('scc', None) + if not pipeline: + pipeline = SCCVectorPipeline( + horizontal=horizontal, vertical=vertical, + block_dim=block_dim, directive=directive, + trim_vector_sections=trim_vector_sections + ) + scheduler.process_plan( pipeline ) + + if mode == 'scc-hoist': + pipeline = scheduler.config.transformations.get('scc-hoist', None) + if not pipeline: + pipeline = SCCHoistPipeline( + horizontal=horizontal, vertical=vertical, + block_dim=block_dim, directive=directive, + dim_vars=(vertical.size,) if vertical else None, + trim_vector_sections=trim_vector_sections + ) + scheduler.process_plan( pipeline ) + + if mode == 'scc-stack': + pipeline = scheduler.config.transformations.get('scc-stack', None) + if not pipeline: + pipeline = SCCStackPipeline( + horizontal=horizontal, vertical=vertical, + block_dim=block_dim, directive=directive, + check_bounds=False, + trim_vector_sections=trim_vector_sections + ) + scheduler.process_plan( pipeline ) + + if mode == 'scc-raw-stack': + pipeline = scheduler.config.transformations.get('scc-raw-stack', None) + if not pipeline: + pipeline = SCCRawStackPipeline( + horizontal=horizontal, + block_dim=block_dim, directive=directive, + check_bounds=False, + trim_vector_sections=trim_vector_sections, + ) + scheduler.process_plan( pipeline ) + + if mode == 'cuf-hoist': + pipeline = scheduler.config.transformations.get('cuf-hoist', None) + if not pipeline: + pipeline = SCCLowLevelCufHoist(horizontal=horizontal, vertical=vertical, directive=directive, + trim_vector_sections=trim_vector_sections, + transformation_type='hoist', derived_types = ['TECLDP'], block_dim=block_dim, + dim_vars=(vertical.size,), as_kwarguments=True, remove_vector_section=True) + scheduler.process_plan( pipeline ) + + if mode == 'cuf-parametrise': + pipeline = scheduler.config.transformations.get('cuf-parametrise', None) + if not pipeline: + dic2p = {'NLEV': 137} + pipeline = SCCLowLevelCufParametrise(horizontal=horizontal, vertical=vertical, directive=directive, + trim_vector_sections=trim_vector_sections, + transformation_type='parametrise', derived_types = ['TECLDP'], block_dim=block_dim, + dim_vars=(vertical.size,), as_kwarguments=True, dic2p=dic2p, remove_vector_section=True) + scheduler.process_plan( pipeline ) + + if mode == 'cuda-hoist': + pipeline = scheduler.config.transformations.get('cuda-hoist', None) + if not pipeline: + pipeline = SCCLowLevelHoist(horizontal=horizontal, vertical=vertical, directive=directive, + trim_vector_sections=trim_vector_sections, + transformation_type='hoist', derived_types = ['TECLDP'], block_dim=block_dim, mode='cuda', + dim_vars=(vertical.size,), as_kwarguments=True, hoist_parameters=True, + ignore_modules=['parkind1'], all_derived_types=True) + scheduler.process_plan( pipeline ) + + + if mode == 'cuda-parametrise': + pipeline = pipeline = scheduler.config.transformations.get('scc-raw-stack', None) + if not pipeline: + dic2p = {'NLEV': 137} + pipeline = SCCLowLevelParametrise(horizontal=horizontal, vertical=vertical, directive=directive, + trim_vector_sections=trim_vector_sections, + transformation_type='parametrise', derived_types = ['TECLDP'], block_dim=block_dim, mode='cuda', + dim_vars=(vertical.size,), as_kwarguments=True, hoist_parameters=True, + ignore_modules=['parkind1'], all_derived_types=True, dic2p=dic2p) + scheduler.process_plan( pipeline ) + + mode = mode.replace('-', '_') # Sanitize mode string + if mode in ['c', 'cuda_parametrise', 'cuda_hoist']: + if mode == 'c': + f2c_transformation = FortranCTransformation(path=build) + elif mode in ['cuda_parametrise', 'cuda_hoist']: + f2c_transformation = FortranCTransformation(path=build, language='cuda', use_c_ptr=True) + else: + assert False + scheduler.process_plan(f2c_transformation) + for h in definitions: + f2c_transformation.apply(h, role='header') + # Housekeeping: Inject our re-named kernel and auto-wrapped it in a module + dependency = DependencyTransformation(suffix='_FC', module_suffix='_MOD') + scheduler.process_plan(dependency) + else: + # Housekeeping: Inject our re-named kernel and auto-wrapped it in a module + scheduler.process_plan( ModuleWrapTransformation(module_suffix='_MOD') ) + scheduler.process_plan( DependencyTransformation(suffix=f'_{mode.upper()}', module_suffix='_MOD') ) + + # Write out all modified source files into the build directory + scheduler.process_plan(transformation=FileWriteTransformation( + cuf='cuf' in mode, include_module_var_imports=global_var_offload + )) + mode = mode.replace('-', '_') # Sanitize mode string # Construct the transformation plan as a set of CMake lists of source files @@ -472,6 +803,59 @@ def plan( if callgraph: scheduler.callgraph(callgraph) +# @cli.command('plan') +# @click.option('--mode', '-m', default='sca', +# type=click.Choice(['idem', 'idem-stack', 'sca', 'claw', 'scc', 'scc-hoist', 'scc-stack'])) +# @click.option('--config', '-c', type=click.Path(), +# help='Path to configuration file.') +# @click.option('--header', '-I', type=click.Path(), multiple=True, +# help='Path for additional header file(s).') +# @click.option('--source', '-s', type=click.Path(), multiple=True, +# help='Path to source files to transform.') +# @click.option('--build', '-b', type=click.Path(), default=None, +# help='Path to build directory for source generation.') +# @click.option('--root', type=click.Path(), default=None, +# help='Root path to which all paths are relative to.') +# @click.option('--directive', default='openacc', type=click.Choice(['openacc', 'openmp', 'none']), +# help='Programming model directives to insert (default openacc)') +# @click.option('--cpp/--no-cpp', default=False, +# help='Trigger C-preprocessing of source files.') +# @click.option('--frontend', default='fp', type=click.Choice(['fp', 'ofp', 'omni']), +# help='Frontend parser to use (default FP)') +# @click.option('--callgraph', '-g', type=click.Path(), default=None, +# help='Generate and display the subroutine callgraph.') +# @click.option('--plan-file', type=click.Path(), +# help='CMake "plan" file to generate.') +# @click.option('--log-level', '-l', default='info', envvar='LOKI_LOGGING', +# type=click.Choice(['debug', 'detail', 'perf', 'info', 'warning', 'error']), +# help='Log level to output during batch processing') +# def plan( +# mode, config, header, source, build, root, cpp, directive, +# frontend, callgraph, plan_file, log_level +# ): +# """ +# Create a "plan", a schedule of files to inject and transform for a +# given configuration. +# """ +# +# loki_config['log-level'] = log_level +# +# info(f'[Loki] Creating CMake plan file from config: {config}') +# config = SchedulerConfig.from_file(config) +# +# paths = [Path(s).resolve() for s in source] +# paths += [Path(h).resolve().parent for h in header] +# scheduler = Scheduler(paths=paths, config=config, frontend=frontend, full_parse=False, preprocess=cpp) +# +# mode = mode.replace('-', '_') # Sanitize mode string +# +# # Construct the transformation plan as a set of CMake lists of source files +# scheduler.write_cmake_plan(filepath=plan_file, mode=mode, buildpath=build, rootpath=root) +# +# # Output the resulting callgraph +# if callgraph: +# scheduler.callgraph(callgraph) + if __name__ == "__main__": cli() # pylint: disable=no-value-for-parameter