green-coding-solutions · dan-mm · Jan 11, 2024 · Jan 12, 2024 · Jan 12, 2024 · Jan 12, 2024
diff --git a/.github/actions/gmt-pytest/action.yml b/.github/actions/gmt-pytest/action.yml
@@ -9,10 +9,9 @@ inputs:
     description: 'The root directory of the gmt repository'
     required: false
     default: '.'
-  tests-command:
-    description: 'The command to run the tests'
-    required: false
-    default: 'pytest'
+  run-examples-directory-tests:
+    description: 'Run tests for examples directory instead of regular gmt tests'
+    default: false
   github-token:
     description: 'pass in your secrets.GITHUB_TOKEN'
     required: true
@@ -86,15 +85,22 @@ runs:
       run: sleep 10s
       shell: bash
 
-    # - name: Setup upterm session
-    #   uses: lhotari/action-upterm@v1
-
     - name: Run Tests
+      if: inputs.run-examples-directory-tests == 'false'
+      shell: bash
+      working-directory: ${{ inputs.gmt-directory }}/tests
+      run: |
+        source ../venv/bin/activate
+        python3 -m pytest -n auto -m "not serial" -rA | tee -a /tmp/test-results.txt
+        python3 -m pytest -m "serial" -rA | tee -a /tmp/test-results.txt
+
+    - name: Run Tests (examples directory)
+      if: inputs.run-examples-directory-tests == 'true'
       shell: bash
       working-directory: ${{ inputs.gmt-directory }}/tests
       run: |
         source ../venv/bin/activate
-        python3 -m ${{ inputs.tests-command }} -rA | tee /tmp/test-results.txt
+        python3 -m pytest ../../examples-directory/test/smoke_test.py -k "test_all_directories" -rA | tee -a /tmp/test-results.txt
 
     - name: Display Results
       shell: bash

diff --git a/.github/workflows/tests-bare-metal-main.yml b/.github/workflows/tests-bare-metal-main.yml
@@ -38,6 +38,7 @@ jobs:
         with:
          metrics-to-turn-off: 'Machine Sensors Debug MacOS'
          github-token: ${{ secrets.GITHUB_TOKEN }}
+         run-examples-directory-tests: false
 
       - name: Eco CI Energy Estimation - Get Measurement
         uses: green-coding-solutions/eco-ci-energy-estimation@v2

diff --git a/.github/workflows/tests-eco-ci-energy-estimation.yaml b/.github/workflows/tests-eco-ci-energy-estimation.yaml
@@ -32,6 +32,7 @@ jobs:
         with:
          metrics-to-turn-off: '--categories RAPL Machine Sensors Debug CGroupV2 MacOS GPU --providers PsuEnergyAcSdiaMachineProvider'
          github-token: ${{ secrets.GITHUB_TOKEN }}
+         run-examples-directory-tests: false
 
       - name: Eco CI Energy Estimation - Get Measurement
         uses: green-coding-solutions/eco-ci-energy-estimation@testing

diff --git a/.github/workflows/tests-vm-main.yml b/.github/workflows/tests-vm-main.yml
@@ -36,6 +36,7 @@ jobs:
         with:
          metrics-to-turn-off: '--categories RAPL Machine Sensors Debug CGroupV2 MacOS GPU --providers PsuEnergyAcSdiaMachineProvider'
          github-token: ${{ secrets.GITHUB_TOKEN }}
+         run-examples-directory-tests: false
 
       - name: Eco CI Energy Estimation - Get Measurement
         uses: green-coding-solutions/eco-ci-energy-estimation@v2

diff --git a/.github/workflows/tests-vm-pr.yml b/.github/workflows/tests-vm-pr.yml
@@ -27,6 +27,7 @@ jobs:
         with:
          metrics-to-turn-off: '--categories RAPL Machine Sensors Debug CGroupV2 MacOS GPU --providers PsuEnergyAcSdiaMachineProvider'
          github-token: ${{ secrets.GITHUB_TOKEN }}
+         run-examples-directory-tests: false
 
       - name: Eco CI Energy Estimation - Get Measurement
         uses: green-coding-solutions/eco-ci-energy-estimation@v2

diff --git a/lib/utils.py b/lib/utils.py
@@ -2,6 +2,8 @@
 import string
 import subprocess
 import psycopg
+import os
+from pathlib import Path
 
 from lib.db import DB
 
@@ -61,3 +63,41 @@ def get_architecture():
     if output == 'darwin':
         return 'macos'
     return output
+
+# This function takes a path and a file and joins them while making sure that no one is trying to escape the
+# path with `..`, symbolic links or similar.
+# We always return the same error message including the path and file parameter, never `filename` as
+# otherwise we might disclose if certain files exist or not.
+def join_paths(path, path2, mode='file'):
+    filename = os.path.realpath(os.path.join(path, path2))
+
+    # If the original path is a symlink we need to resolve it.
+    path = os.path.realpath(path)
+
+    # This is a special case in which the file is '.'
+    if filename == path.rstrip('/'):
+        return filename
+
+    if not filename.startswith(path):
+        raise ValueError(f"{path2} must not be in folder above {path}")
+
+    # To double check we also check if it is in the files allow list
+
+    if mode == 'file':
+        folder_content = [str(item) for item in Path(path).rglob("*") if item.is_file()]
+    elif mode == 'directory':
+        folder_content = [str(item) for item in Path(path).rglob("*") if item.is_dir()]
+    else:
+        raise RuntimeError(f"Unknown mode supplied for join_paths: {mode}")
+
+    if filename not in folder_content:
+        raise ValueError(f"{mode.capitalize()} '{path2}' not in '{path}'")
+
+    # Another way to implement this. This is checking the third time but we want to be extra secure 👾
+    if Path(path).resolve(strict=True) not in Path(path, path2).resolve(strict=True).parents:
+        raise ValueError(f"{mode.capitalize()} '{path2}' not in folder '{path}'")
+
+    if os.path.exists(filename):
+        return filename
+
+    raise FileNotFoundError(f"{path2} in {path} not found")
diff --git a/lib/yml_helpers.py b/lib/yml_helpers.py
@@ -0,0 +1,44 @@
+#pylint: disable=too-many-ancestors
+
+import yaml
+import os
+from lib import utils
+
+class Loader(yaml.SafeLoader):
+    def __init__(self, stream):
+        # We need to find our own root as the Loader is instantiated in PyYaml
+        self._root = os.path.split(stream.name)[0]
+        super().__init__(stream)
+
+    def include(self, node):
+        # We allow two types of includes
+        # !include <filename> => ScalarNode
+        # and
+        # !include <filename> <selector> => SequenceNode
+        if isinstance(node, yaml.nodes.ScalarNode):
+            nodes = [self.construct_scalar(node)]
+        elif isinstance(node, yaml.nodes.SequenceNode):
+            nodes = self.construct_sequence(node)
+        else:
+            raise ValueError("We don't support Mapping Nodes to date")
+
+        filename = utils.join_paths(self._root, nodes[0], 'file')
+
+        with open(filename, 'r', encoding='utf-8') as f:
+            # We want to enable a deep search for keys
+            def recursive_lookup(k, d):
+                if k in d:
+                    return d[k]
+                for v in d.values():
+                    if isinstance(v, dict):
+                        return recursive_lookup(k, v)
+                return None
+
+            # We can use load here as the Loader extends SafeLoader
+            if len(nodes) == 1:
+                # There is no selector specified
+                return yaml.load(f, Loader)
+
+            return recursive_lookup(nodes[1], yaml.load(f, Loader))
+
+Loader.add_constructor('!include', Loader.include)
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -6,6 +6,7 @@ pylint==3.0.3
 fastapi==0.109.2
 starlette>=0.32
 anybadge==1.14.0
+pytest-xdist==3.5.0
 
 # just to clear the pylint errors for the files in /api
 scipy==1.12.0

diff --git a/runner.py b/runner.py
@@ -38,52 +38,13 @@
 from lib.global_config import GlobalConfig
 from lib.notes import Notes
 from lib import system_checks
+from lib.yml_helpers import Loader
 
 from tools.machine import Machine
 
 def arrows(text):
     return f"\n\n>>>> {text} <<<<\n\n"
 
-# This function takes a path and a file and joins them while making sure that no one is trying to escape the
-# path with `..`, symbolic links or similar.
-# We always return the same error message including the path and file parameter, never `filename` as
-# otherwise we might disclose if certain files exist or not.
-def join_paths(path, path2, mode='file'):
-    filename = os.path.realpath(os.path.join(path, path2))
-
-    # If the original path is a symlink we need to resolve it.
-    path = os.path.realpath(path)
-
-    # This is a special case in which the file is '.'
-    if filename == path.rstrip('/'):
-        return filename
-
-    if not filename.startswith(path):
-        raise ValueError(f"{path2} must not be in folder above {path}")
-
-    # To double check we also check if it is in the files allow list
-
-    if mode == 'file':
-        folder_content = [str(item) for item in Path(path).rglob("*") if item.is_file()]
-    elif mode == 'directory':
-        folder_content = [str(item) for item in Path(path).rglob("*") if item.is_dir()]
-    else:
-        raise RuntimeError(f"Unknown mode supplied for join_paths: {mode}")
-
-    if filename not in folder_content:
-        raise ValueError(f"{mode.capitalize()} '{path2}' not in '{path}'")
-
-    # Another way to implement this. This is checking the third time but we want to be extra secure 👾
-    if Path(path).resolve(strict=True) not in Path(path, path2).resolve(strict=True).parents:
-        raise ValueError(f"{mode.capitalize()} '{path2}' not in folder '{path}'")
-
-    if os.path.exists(filename):
-        return filename
-
-    raise FileNotFoundError(f"{path2} in {path} not found")
-
-
-
 class Runner:
     def __init__(self,
         name, uri, uri_type, filename='usage_scenario.yml', branch=None,
@@ -241,47 +202,7 @@ def checkout_repository(self):
     # Inspiration from https://github.com/tanbro/pyyaml-include which we can't use as it doesn't
     # do security checking and has no option to select when imported
     def load_yml_file(self):
-        #pylint: disable=too-many-ancestors
-        class Loader(yaml.SafeLoader):
-            def __init__(self, stream):
-                # We need to find our own root as the Loader is instantiated in PyYaml
-                self._root = os.path.split(stream.name)[0]
-                super().__init__(stream)
-
-            def include(self, node):
-                # We allow two types of includes
-                # !include <filename> => ScalarNode
-                # and
-                # !include <filename> <selector> => SequenceNode
-                if isinstance(node, yaml.nodes.ScalarNode):
-                    nodes = [self.construct_scalar(node)]
-                elif isinstance(node, yaml.nodes.SequenceNode):
-                    nodes = self.construct_sequence(node)
-                else:
-                    raise ValueError("We don't support Mapping Nodes to date")
-
-                filename = join_paths(self._root, nodes[0], 'file')
-
-                with open(filename, 'r', encoding='utf-8') as f:
-                    # We want to enable a deep search for keys
-                    def recursive_lookup(k, d):
-                        if k in d:
-                            return d[k]
-                        for v in d.values():
-                            if isinstance(v, dict):
-                                return recursive_lookup(k, v)
-                        return None
-
-                    # We can use load here as the Loader extends SafeLoader
-                    if len(nodes) == 1:
-                        # There is no selector specified
-                        return yaml.load(f, Loader)
-
-                    return recursive_lookup(nodes[1], yaml.load(f, Loader))
-
-        Loader.add_constructor('!include', Loader.include)
-
-        usage_scenario_file = join_paths(self._folder, self._original_filename, 'file')
+        usage_scenario_file = utils.join_paths(self._folder, self._original_filename, 'file')
 
         # We set the working folder now to the actual location of the usage_scenario
         if '/' in self._original_filename:
@@ -563,8 +484,8 @@ def build_docker_images(self):
                 self.__notes_helper.add_note({'note': f"Building {service['image']}", 'detail_name': '[NOTES]', 'timestamp': int(time.time_ns() / 1_000)})
 
                 # Make sure the context docker file exists and is not trying to escape some root. We don't need the returns
-                context_path = join_paths(self._folder, context, 'directory')
-                join_paths(context_path, dockerfile, 'file')
+                context_path = utils.join_paths(self._folder, context, 'directory')
+                utils.join_paths(context_path, dockerfile, 'file')
 
                 docker_build_command = ['docker', 'run', '--rm',
                     '-v', f"{self._folder}:/workspace:ro", # this is the folder where the usage_scenario is!
@@ -671,7 +592,6 @@ def setup_services(self):
         # If so, change the order of the services accordingly.
         services_ordered = self.order_services(services)
         for service_name, service in services_ordered.items():
-
             if 'container_name' in service:
                 container_name = service['container_name']
             else:
@@ -817,7 +737,6 @@ def setup_services(self):
                 docker_run_string.append('--net')
                 docker_run_string.append(self.__networks[0])
 
-
             if 'pause-after-phase' in service:
                 self.__services_to_pause_phase[service['pause-after-phase']] = self.__services_to_pause_phase.get(service['pause-after-phase'], []) + [container_name]
 

diff --git a/tests/README.MD b/tests/README.MD
@@ -23,12 +23,9 @@ run:
 
 `python3 setup-test-env.py`
 
-from the test directory. This will create a copy of the `config.yml` and docker `compose.yml` files that will be used in
+from the test directory. This will create a copy of the docker `compose.yml` file that will be used in
 the test containers. Please make sure that you have compiled all the metric providers and source code in lib. You can do
-this automatically by using the `install.sh` command.
-
-You will need to re-run this setup script if new metric providers are added or the config.yml is otherwise changed in a
-significant way.
+this automatically by using the `install_linux.sh` or `install_mac.sh` command.
 
 ## Running
 
@@ -42,9 +39,25 @@ There are a few scripts to make this easy.
 `./run-tests.sh` will do everything - start the containers, run pytest, and then stop the containers.
 
 The recommended workflow is to start the containers with the `./start-test-containers.sh` script, then in another shell
-window run the pytest suite using `pytest`, and then stop the containers when your test run has finished.
+window run the pytest suite using:
+
+`pytest -n auto -m "not serial" --dist loadgroup && pytest -m "serial"`, and then stop the containers when your test run has finished.
 
 Running a subset of tests using pytest is better explained within the documentation here:
  https://docs.pytest.org/en/7.2.x/how-to/usage.html
 
 You can also do everything in one command using the `./run-tests.sh` script.
+
+
+## Parallelization
+We now  support running our test suite with parallelization using xdist. When writing tests it is important to note that not all tests can be parallelized, and the ones that cannot need to be marked accordingly. For parallelization, we use functions in test_functions.py to setup the environment with unique container names, as well as setting up the runner with setup_runner so that its tmp folders are also unique. If you bypass using the setup_runner, you will need need to still use the `parallelize_runner_folders` function to make sure its internal directories are correct.
+
+Any test that cannot be parrallelized should be marked with:
+`@pytest.mark.serial`
+
+This includes any test that runs the runner through a subprocess, or otherwise creates a Runner class withhout using either test_functions.setup_runner or test_functions.parallelize_runner_folders
+
+- tests that do not skip_system_checks can be parallelized, but only if they are marked with
+`@pytest.mark.xdist_group(name="systems_checks")`
+
+This will make all tests that use group name run sequentially on the same thread (but parallel to the rest of the suite). This is needed because we have a system check which makes sure the metric providers are not already running during setup.