Merge pull request #347 from Andrew-Beggs-ECMWF/naab-loop-unroll-tran…

…sform Add loop unroll transformation
ecmwf-ifs · Jul 26, 2024 · 190bdfa · 190bdfa
2 parents bac8b3a + fa26ed3
commit 190bdfa
Show file tree

Hide file tree

Showing 3 changed files with 505 additions and 3 deletions.
diff --git a/AUTHORS.md b/AUTHORS.md
@@ -1,5 +1,6 @@
 # Authors and Contributors
 
+- A. Beggs (ECMWF)
 - R. Heilemann Myhre (Met Norway)
 - S. Karppinen (FMI)
 - P. Kiepas (École polytechnique/IPSL)

diff --git a/loki/transformations/tests/test_transform_loop.py b/loki/transformations/tests/test_transform_loop.py
@@ -6,6 +6,7 @@
 # nor does it submit to any jurisdiction.
 
 # pylint: disable=too-many-lines
+import itertools
 import pytest
 import numpy as np
 
@@ -20,7 +21,7 @@
 
 from loki.transformations.array_indexing import normalize_range_indexing
 from loki.transformations.transform_loop import (
-    loop_interchange, loop_fusion, loop_fission
+    loop_interchange, loop_fusion, loop_fission, loop_unroll
 )
 
 
@@ -1627,3 +1628,366 @@ def test_transform_loop_fusion_fission(tmp_path, frontend):
 
     clean_test(filepath)
     clean_test(fissioned_filepath)
+
+
+@pytest.mark.parametrize('frontend', available_frontends())
+def test_transform_loop_unroll(tmp_path, frontend):
+    fcode = """
+subroutine test_transform_loop_unroll(s)
+    implicit none
+    integer :: a
+    integer, intent(inout) :: s
+
+    !Loop A
+    !$loki loop-unroll
+    do a=1, 10
+        s = s + a + 1
+    end do
+
+end subroutine test_transform_loop_unroll
+ """
+    routine = Subroutine.from_source(fcode, frontend=frontend)
+    filepath = tmp_path / f'{routine.name}_{frontend}.f90'
+    function = jit_compile(routine, filepath=filepath, objname=routine.name)
+
+    # Test the reference solution
+    s = np.zeros(1)
+    function(s=s)
+    assert s == sum([x + 1 for x in range(1, 11)])
+
+    # Apply transformation
+    assert len(FindNodes(Loop).visit(routine.body)) == 1
+    loop_unroll(routine)
+    assert len(FindNodes(Loop).visit(routine.body)) == 0 and len(FindNodes(Assignment).visit(routine.body)) == 10
+
+    unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90'
+    unrolled_function = jit_compile(routine, filepath=unrolled_filepath, objname=routine.name)
+
+    # Test transformation
+    s = np.zeros(1)
+    unrolled_function(s=s)
+    assert s == sum([x + 1 for x in range(1, 11)])
+
+    clean_test(filepath)
+    clean_test(unrolled_filepath)
+
+
+@pytest.mark.parametrize('frontend', available_frontends())
+def test_transform_loop_unroll_step(tmp_path, frontend):
+    fcode = """
+subroutine test_transform_loop_unroll_step(s)
+    implicit none
+    integer :: a
+    integer, intent(inout) :: s
+
+    !Loop A
+    !$loki loop-unroll
+    do a=1, 10, 2
+        s = s + a + 1
+    end do
+
+end subroutine test_transform_loop_unroll_step
+ """
+    routine = Subroutine.from_source(fcode, frontend=frontend)
+    filepath = tmp_path / f'{routine.name}_{frontend}.f90'
+    function = jit_compile(routine, filepath=filepath, objname=routine.name)
+
+    # Test the reference solution
+    s = np.zeros(1)
+    function(s=s)
+    assert s == sum([x + 1 for x in range(1, 11, 2)])
+
+    # Apply transformation
+    assert len(FindNodes(Loop).visit(routine.body)) == 1
+    loop_unroll(routine)
+    assert len(FindNodes(Loop).visit(routine.body)) == 0 and len(FindNodes(Assignment).visit(routine.body)) == 5
+
+    unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90'
+    unrolled_function = jit_compile(routine, filepath=unrolled_filepath, objname=routine.name)
+
+    # Test transformation
+    s = np.zeros(1)
+    unrolled_function(s=s)
+    assert s == sum([x + 1 for x in range(1, 11, 2)])
+
+    clean_test(filepath)
+    clean_test(unrolled_filepath)
+
+
+@pytest.mark.parametrize('frontend', available_frontends())
+def test_transform_loop_unroll_non_literal_range(tmp_path, frontend):
+    fcode = """
+subroutine test_transform_loop_unroll_non_literal_range(s)
+    implicit none
+    integer :: a, i
+    integer, intent(inout) :: s
+
+    i = 10
+
+    !Loop A
+    !$loki loop-unroll
+    do a=1, i
+        s = s + a + 1
+    end do
+
+end subroutine test_transform_loop_unroll_non_literal_range
+ """
+    routine = Subroutine.from_source(fcode, frontend=frontend)
+    filepath = tmp_path / f'{routine.name}_{frontend}.f90'
+    function = jit_compile(routine, filepath=filepath, objname=routine.name)
+
+    # Test the reference solution
+    s = np.zeros(1)
+    function(s=s)
+    assert s == sum([x + 1 for x in range(1, 11)])
+
+    # Apply transformation
+    assert len(FindNodes(Loop).visit(routine.body)) == 1
+    loop_unroll(routine)
+    assert len(FindNodes(Loop).visit(routine.body)) == 1 and len(FindNodes(Assignment).visit(routine.body)) == 2
+
+    unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90'
+    unrolled_function = jit_compile(routine, filepath=unrolled_filepath, objname=routine.name)
+
+    # Test transformation
+    s = np.zeros(1)
+    unrolled_function(s=s)
+    assert s == sum([x + 1 for x in range(1, 11)])
+
+    clean_test(filepath)
+    clean_test(unrolled_filepath)
+
+
+@pytest.mark.parametrize('frontend', available_frontends())
+def test_transform_loop_unroll_nested(tmp_path, frontend):
+    fcode = """
+subroutine test_transform_loop_unroll_nested(s)
+    implicit none
+    integer :: a, b
+    integer, intent(inout) :: s
+
+    !Loop A
+    !$loki loop-unroll
+    do a=1, 10
+        !Loop B
+        do b=1, 5
+            s = s + a + b + 1
+        end do
+    end do
+
+end subroutine test_transform_loop_unroll_nested
+ """
+    routine = Subroutine.from_source(fcode, frontend=frontend)
+    filepath = tmp_path / f'{routine.name}_{frontend}.f90'
+    function = jit_compile(routine, filepath=filepath, objname=routine.name)
+
+    # Test the reference solution
+    s = np.zeros(1)
+    function(s=s)
+    assert s == sum([a + b + 1 for (a, b) in itertools.product(range(1, 11), range(1, 6))])
+
+    # Apply transformation
+    assert len(FindNodes(Loop).visit(routine.body)) == 2
+    loop_unroll(routine)
+    assert len(FindNodes(Loop).visit(routine.body)) == 0 and len(FindNodes(Assignment).visit(routine.body)) == 50
+
+    unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90'
+    unrolled_function = jit_compile(routine, filepath=unrolled_filepath, objname=routine.name)
+
+    # Test transformation
+    s = np.zeros(1)
+    unrolled_function(s=s)
+    assert s == sum([a + b + 1 for (a, b) in itertools.product(range(1, 11), range(1, 6))])
+
+    clean_test(filepath)
+    clean_test(unrolled_filepath)
+
+
+@pytest.mark.parametrize('frontend', available_frontends())
+def test_transform_loop_unroll_nested_restricted_depth(tmp_path, frontend):
+    fcode = """
+subroutine test_transform_loop_unroll_nested_restricted_depth(s)
+    implicit none
+    integer :: a, b
+    integer, intent(inout) :: s
+
+    !Loop A
+    !$loki loop-unroll depth(1)
+    do a=1, 10
+        !Loop B
+        do b=1, 5
+            s = s + a + b + 1
+        end do
+    end do
+
+end subroutine test_transform_loop_unroll_nested_restricted_depth
+ """
+    routine = Subroutine.from_source(fcode, frontend=frontend)
+    filepath = tmp_path / f'{routine.name}_{frontend}.f90'
+    function = jit_compile(routine, filepath=filepath, objname=routine.name)
+
+    # Test the reference solution
+    s = np.zeros(1)
+    function(s=s)
+    assert s == sum([a + b + 1 for (a, b) in itertools.product(range(1, 11), range(1, 6))])
+
+    # Apply transformation
+    assert len(FindNodes(Loop).visit(routine.body)) == 2
+    loop_unroll(routine)
+    assert len(FindNodes(Loop).visit(routine.body)) == 10 and len(FindNodes(Assignment).visit(routine.body)) == 10
+
+    unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90'
+    unrolled_function = jit_compile(routine, filepath=unrolled_filepath, objname=routine.name)
+
+    # Test transformation
+    s = np.zeros(1)
+    unrolled_function(s=s)
+    assert s == sum([a + b + 1 for (a, b) in itertools.product(range(1, 11), range(1, 6))])
+
+    clean_test(filepath)
+    clean_test(unrolled_filepath)
+
+
+@pytest.mark.parametrize('frontend', available_frontends())
+def test_transform_loop_unroll_nested_restricted_depth_unrollable(tmp_path, frontend):
+    fcode = """
+subroutine test_transform_loop_unroll_nested_restricted_depth(s)
+    implicit none
+    integer :: a, b, i
+    integer, intent(inout) :: s
+
+    i = 10
+
+    !Loop A
+    !$loki loop-unroll depth(1)
+    do a=1, i
+        !Loop B
+        do b=1, 5
+            s = s + a + b + 1
+        end do
+    end do
+
+end subroutine test_transform_loop_unroll_nested_restricted_depth
+ """
+    routine = Subroutine.from_source(fcode, frontend=frontend)
+    filepath = tmp_path / f'{routine.name}_{frontend}.f90'
+    function = jit_compile(routine, filepath=filepath, objname=routine.name)
+
+    # Test the reference solution
+    s = np.zeros(1)
+    function(s=s)
+    assert s == sum([a + b + 1 for (a, b) in itertools.product(range(1, 11), range(1, 6))])
+
+    # Apply transformation
+    assert len(FindNodes(Loop).visit(routine.body)) == 2
+    loop_unroll(routine)
+    assert len(FindNodes(Loop).visit(routine.body)) == 1 and len(FindNodes(Assignment).visit(routine.body)) == 6
+
+    unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90'
+    unrolled_function = jit_compile(routine, filepath=unrolled_filepath, objname=routine.name)
+
+    # Test transformation
+    s = np.zeros(1)
+    unrolled_function(s=s)
+    assert s == sum([a + b + 1 for (a, b) in itertools.product(range(1, 11), range(1, 6))])
+
+    clean_test(filepath)
+    clean_test(unrolled_filepath)
+
+
+@pytest.mark.parametrize('frontend', available_frontends())
+def test_transform_loop_unroll_nested_counters(tmp_path, frontend):
+    fcode = """
+subroutine test_transform_loop_unroll_nested_counters(s)
+    implicit none
+
+    integer :: a, b
+    integer, intent(inout) :: s
+
+    !Loop A
+    !$loki loop-unroll
+    do a=1, 10
+        !Loop B
+        do b=1, a
+            s = s + a + b + 1
+        end do
+    end do
+
+end subroutine test_transform_loop_unroll_nested_counters
+ """
+    routine = Subroutine.from_source(fcode, frontend=frontend)
+    filepath = tmp_path / f'{routine.name}_{frontend}.f90'
+    function = jit_compile(routine, filepath=filepath, objname=routine.name)
+
+    # Test the reference solution
+    s = np.zeros(1)
+    function(s=s)
+    tuples = [a + b + 1 for (a, b) in itertools.product(range(1, 11), range(1, 11)) if b <= a]
+    assert s == sum(tuples)
+
+    # Apply transformation
+    assert len(FindNodes(Loop).visit(routine.body)) == 2
+    loop_unroll(routine)
+    assert len(FindNodes(Loop).visit(routine.body)) == 0 and \
+           len(FindNodes(Assignment).visit(routine.body)) == len(tuples)
+
+    unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90'
+    unrolled_function = jit_compile(routine, filepath=unrolled_filepath, objname=routine.name)
+
+    # Test transformation
+    s = np.zeros(1)
+    unrolled_function(s=s)
+    assert s == sum([a + b + 1 for (a, b) in itertools.product(range(1, 11), range(1, 11)) if b <= a])
+
+    clean_test(filepath)
+    clean_test(unrolled_filepath)
+
+
+@pytest.mark.parametrize('frontend', available_frontends())
+def test_transform_loop_unroll_nested_neighbours(tmp_path, frontend):
+    fcode = """
+subroutine test_transform_loop_unroll_nested_neighbours(s)
+    implicit none
+
+    integer :: a, b, c
+    integer, intent(inout) :: s
+
+    !Loop A
+    !$loki loop-unroll depth(1)
+    do a=1, 10
+        !Loop B
+        !$loki loop-unroll
+        do b=1, 5
+            s = s + a + b + 1
+        end do
+        !Loop C
+        do c=1, 5
+            s = s + a + c + 1
+        end do
+    end do
+
+end subroutine test_transform_loop_unroll_nested_neighbours
+ """
+    routine = Subroutine.from_source(fcode, frontend=frontend)
+    filepath = tmp_path / f'{routine.name}_{frontend}.f90'
+    function = jit_compile(routine, filepath=filepath, objname=routine.name)
+
+    # Test the reference solution
+    s = np.zeros(1)
+    function(s=s)
+    assert s == 2 * sum([a + b + 1 for (a, b) in itertools.product(range(1, 11), range(1, 6))])
+    # Apply transformation
+    assert len(FindNodes(Loop).visit(routine.body)) == 3
+    loop_unroll(routine)
+    assert len(FindNodes(Loop).visit(routine.body)) == 10 and len(FindNodes(Assignment).visit(routine.body)) == 60
+
+    unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90'
+    unrolled_function = jit_compile(routine, filepath=unrolled_filepath, objname=routine.name)
+
+    # Test transformation
+    s = np.zeros(1)
+    unrolled_function(s=s)
+    assert s == 2 * sum([a + b + 1 for (a, b) in itertools.product(range(1, 11), range(1, 6))])
+
+    clean_test(filepath)
+    clean_test(unrolled_filepath)