From f67729b584020eb4459d54eb35f2efb00ed5076e Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 19 Sep 2024 16:12:18 +0200
Subject: [PATCH 001/167] enable compile commands generation

---
 CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c94e0144..8e91d9b2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,8 @@
 cmake_minimum_required(VERSION 3.22)
 project(FIRESTARTER)
 
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
 include(cmake/GitSubmoduleUpdate.cmake)
 
 # set FIRESTARTER version

From f24a0b2031546ff58611c0312d74161303398088 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 19 Sep 2024 18:27:54 +0200
Subject: [PATCH 002/167] add .clang-format file base on LLVM style

---
 .clang-format | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 .clang-format

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 00000000..656a3655
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,7 @@
+---
+BasedOnStyle: LLVM
+Language: Cpp
+BreakConstructorInitializersBeforeComma: 'true'
+AllowShortFunctionsOnASingleLine: All
+PointerAlignment: Left
+ColumnLimit: 120
\ No newline at end of file

From 9732bdb59717274f666e9c1497289d1f9a0d7858 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 19 Sep 2024 18:28:44 +0200
Subject: [PATCH 003/167] clang-format

---
 include/firestarter/Cuda/Cuda.hpp             |   8 +-
 .../firestarter/DumpRegisterWorkerData.hpp    |  10 +-
 .../firestarter/Environment/CPUTopology.hpp   |  18 +-
 .../firestarter/Environment/Environment.hpp   |  21 +-
 .../Environment/Payload/Payload.hpp           |  56 +-
 .../Environment/Platform/PlatformConfig.hpp   |  42 +-
 .../Environment/Platform/RuntimeConfig.hpp    |  68 ++-
 .../Environment/X86/Payload/AVX512Payload.hpp |  24 +-
 .../Environment/X86/Payload/AVXPayload.hpp    |  24 +-
 .../Environment/X86/Payload/FMA4Payload.hpp   |  29 +-
 .../Environment/X86/Payload/FMAPayload.hpp    |  30 +-
 .../Environment/X86/Payload/SSE2Payload.hpp   |  24 +-
 .../Environment/X86/Payload/X86Payload.hpp    |  47 +-
 .../Environment/X86/Payload/ZENFMAPayload.hpp |  22 +-
 .../X86/Platform/BulldozerConfig.hpp          |  13 +-
 .../X86/Platform/HaswellConfig.hpp            |  13 +-
 .../X86/Platform/HaswellEPConfig.hpp          |  20 +-
 .../X86/Platform/KnightsLandingConfig.hpp     |  16 +-
 .../Environment/X86/Platform/NaplesConfig.hpp |  20 +-
 .../X86/Platform/NehalemConfig.hpp            |  16 +-
 .../X86/Platform/NehalemEPConfig.hpp          |  16 +-
 .../Environment/X86/Platform/RomeConfig.hpp   |  21 +-
 .../X86/Platform/SandyBridgeConfig.hpp        |  20 +-
 .../X86/Platform/SandyBridgeEPConfig.hpp      |  20 +-
 .../X86/Platform/SkylakeConfig.hpp            |  20 +-
 .../X86/Platform/SkylakeSPConfig.hpp          |  13 +-
 .../X86/Platform/X86PlatformConfig.hpp        |  27 +-
 .../Environment/X86/X86CPUTopology.hpp        |  25 +-
 .../Environment/X86/X86Environment.hpp        |  57 +-
 include/firestarter/ErrorDetectionStruct.hpp  |   8 +-
 include/firestarter/Firestarter.hpp           |  60 +--
 include/firestarter/Json/Summary.hpp          |   9 +-
 include/firestarter/LoadWorkerData.hpp        |  62 +--
 .../Logging/FirstWorkerThreadFilter.hpp       |  10 +-
 include/firestarter/Logging/Log.hpp           |  37 +-
 .../Measurement/MeasurementWorker.hpp         |  41 +-
 .../firestarter/Measurement/MetricInterface.h |  12 +-
 include/firestarter/Measurement/Summary.hpp   |  10 +-
 include/firestarter/Measurement/TimeValue.hpp |   7 +-
 include/firestarter/OneAPI/OneAPI.hpp         |   8 +-
 include/firestarter/Optimizer/Algorithm.hpp   |   5 +-
 .../firestarter/Optimizer/Algorithm/NSGA2.hpp |   6 +-
 include/firestarter/Optimizer/History.hpp     | 107 ++--
 .../firestarter/Optimizer/OptimizerWorker.hpp |  13 +-
 include/firestarter/Optimizer/Population.hpp  |  30 +-
 include/firestarter/Optimizer/Problem.hpp     |  13 +-
 .../Optimizer/Problem/CLIArgumentProblem.hpp  |  60 +--
 .../Optimizer/Util/MultiObjective.hpp         |  40 +-
 src/firestarter/Cuda/Cuda.cpp                 | 500 ++++++++----------
 src/firestarter/DumpRegisterWorker.cpp        |  37 +-
 src/firestarter/Environment/CPUTopology.cpp   |  73 +--
 src/firestarter/Environment/Environment.cpp   |  69 +--
 .../Environment/Payload/Payload.cpp           |  46 +-
 .../Environment/X86/Payload/AVX512Payload.cpp | 108 ++--
 .../Environment/X86/Payload/AVXPayload.cpp    | 133 ++---
 .../Environment/X86/Payload/FMA4Payload.cpp   | 185 +++----
 .../Environment/X86/Payload/FMAPayload.cpp    | 118 ++---
 .../Environment/X86/Payload/SSE2Payload.cpp   | 128 ++---
 .../Environment/X86/Payload/X86Payload.cpp    |  62 +--
 .../Environment/X86/Payload/ZENFMAPayload.cpp | 120 ++---
 .../Environment/X86/X86CPUTopology.cpp        |  21 +-
 .../Environment/X86/X86Environment.cpp        | 110 ++--
 src/firestarter/Firestarter.cpp               | 198 +++----
 src/firestarter/LoadWorker.cpp                | 150 +++---
 src/firestarter/Main.cpp                      | 176 +++---
 .../Measurement/MeasurementWorker.cpp         | 161 +++---
 .../Measurement/Metric/IPCEstimate.cpp        |  18 +-
 src/firestarter/Measurement/Metric/Perf.cpp   |  71 ++-
 src/firestarter/Measurement/Metric/RAPL.cpp   |  43 +-
 src/firestarter/Measurement/Summary.cpp       |  14 +-
 src/firestarter/OneAPI/OneAPI.cpp             | 190 +++----
 src/firestarter/Optimizer/Algorithm/NSGA2.cpp |  55 +-
 src/firestarter/Optimizer/OptimizerWorker.cpp |  26 +-
 src/firestarter/Optimizer/Population.cpp      |  24 +-
 .../Optimizer/Util/MultiObjective.cpp         | 112 ++--
 src/firestarter/WatchdogWorker.cpp            |  28 +-
 76 files changed, 1657 insertions(+), 2597 deletions(-)

diff --git a/include/firestarter/Cuda/Cuda.hpp b/include/firestarter/Cuda/Cuda.hpp
index a2f281d9..d7911eb4 100644
--- a/include/firestarter/Cuda/Cuda.hpp
+++ b/include/firestarter/Cuda/Cuda.hpp
@@ -34,13 +34,11 @@ class Cuda {
   std::condition_variable _waitForInitCv;
   std::mutex _waitForInitCvMutex;
 
-  static void initGpus(std::condition_variable &cv,
-                       volatile unsigned long long *loadVar, bool useFloat,
-                       bool useDouble, unsigned matrixSize, int gpus);
+  static void initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble,
+                       unsigned matrixSize, int gpus);
 
 public:
-  Cuda(volatile unsigned long long *loadVar, bool useFloat, bool useDouble,
-       unsigned matrixSize, int gpus);
+  Cuda(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus);
 
   ~Cuda() {
     if (_initThread.joinable()) {
diff --git a/include/firestarter/DumpRegisterWorkerData.hpp b/include/firestarter/DumpRegisterWorkerData.hpp
index f7b721d4..14ccc95f 100644
--- a/include/firestarter/DumpRegisterWorkerData.hpp
+++ b/include/firestarter/DumpRegisterWorkerData.hpp
@@ -21,22 +21,20 @@
 
 #pragma once
 
+#include <chrono>
 #include <firestarter/DumpRegisterStruct.hpp>
 #include <firestarter/LoadWorkerData.hpp>
 
-#include <chrono>
-
 #ifdef FIRESTARTER_DEBUG_FEATURES
 
 namespace firestarter {
 
 class DumpRegisterWorkerData {
 public:
-  DumpRegisterWorkerData(std::shared_ptr<LoadWorkerData> loadWorkerData,
-                         std::chrono::seconds dumpTimeDelta,
+  DumpRegisterWorkerData(std::shared_ptr<LoadWorkerData> loadWorkerData, std::chrono::seconds dumpTimeDelta,
                          std::string dumpFilePath)
-      : loadWorkerData(loadWorkerData), dumpTimeDelta(dumpTimeDelta) {
-
+      : loadWorkerData(loadWorkerData)
+      , dumpTimeDelta(dumpTimeDelta) {
     if (dumpFilePath.empty()) {
       char cwd[PATH_MAX];
       if (getcwd(cwd, sizeof(cwd)) != NULL) {
diff --git a/include/firestarter/Environment/CPUTopology.hpp b/include/firestarter/Environment/CPUTopology.hpp
index dcb61e96..af749d78 100644
--- a/include/firestarter/Environment/CPUTopology.hpp
+++ b/include/firestarter/Environment/CPUTopology.hpp
@@ -37,18 +37,16 @@ class CPUTopology {
   CPUTopology(std::string architecture);
   virtual ~CPUTopology();
 
-  unsigned numThreads() const {
-    return _numThreadsPerCore * _numCoresTotal;
-  }
+  unsigned numThreads() const { return _numThreadsPerCore * _numCoresTotal; }
   unsigned maxNumThreads() const;
   unsigned numThreadsPerCore() const { return _numThreadsPerCore; }
   unsigned numCoresTotal() const { return _numCoresTotal; }
   unsigned numPackages() const { return _numPackages; }
 
-  std::string const &architecture() const { return _architecture; }
-  virtual std::string const &vendor() const { return _vendor; }
-  virtual std::string const &processorName() const { return _processorName; }
-  virtual std::string const &model() const = 0;
+  std::string const& architecture() const { return _architecture; }
+  virtual std::string const& vendor() const { return _vendor; }
+  virtual std::string const& processorName() const { return _processorName; }
+  virtual std::string const& model() const = 0;
 
   // get the size of the L1i-cache in bytes
   unsigned instructionCacheSize() const { return _instructionCacheSize; }
@@ -56,7 +54,7 @@ class CPUTopology {
   // return the cpu clockrate in Hz
   virtual unsigned long long clockrate() const { return _clockrate; }
   // return the cpu features
-  virtual std::list<std::string> const &features() const = 0;
+  virtual std::list<std::string> const& features() const = 0;
 
   // get a timestamp
   virtual unsigned long long timestamp() const = 0;
@@ -66,10 +64,10 @@ class CPUTopology {
 
 protected:
   std::string scalingGovernor() const;
-  std::ostream &print(std::ostream &stream) const;
+  std::ostream& print(std::ostream& stream) const;
 
 private:
-  static std::stringstream getFileAsStream(std::string const &filePath);
+  static std::stringstream getFileAsStream(std::string const& filePath);
 
   unsigned _numThreadsPerCore;
   unsigned _numCoresTotal;
diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index c76dc073..24722dc3 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -21,18 +21,18 @@
 
 #pragma once
 
+#include <cassert>
 #include <firestarter/Environment/CPUTopology.hpp>
 #include <firestarter/Environment/Platform/PlatformConfig.hpp>
 #include <firestarter/Environment/Platform/RuntimeConfig.hpp>
-
-#include <cassert>
 #include <vector>
 
 namespace firestarter::environment {
 
 class Environment {
 public:
-  Environment(CPUTopology *topology) : _topology(topology) {}
+  Environment(CPUTopology* topology)
+      : _topology(topology) {}
   ~Environment() {
     delete this->_topology;
     if (_selectedConfig != nullptr) {
@@ -45,15 +45,14 @@ class Environment {
   void printThreadSummary();
 
   virtual void evaluateFunctions() = 0;
-  virtual int selectFunction(unsigned functionId,
-                             bool allowUnavailablePayload) = 0;
+  virtual int selectFunction(unsigned functionId, bool allowUnavailablePayload) = 0;
   virtual int selectInstructionGroups(std::string groups) = 0;
   virtual void printAvailableInstructionGroups() = 0;
   virtual void setLineCount(unsigned lineCount) = 0;
   virtual void printSelectedCodePathSummary() = 0;
   virtual void printFunctionSummary() = 0;
 
-  platform::RuntimeConfig &selectedConfig() const {
+  platform::RuntimeConfig& selectedConfig() const {
 #if defined(__clang__)
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wunused-value"
@@ -68,18 +67,16 @@ class Environment {
     return *_selectedConfig;
   }
 
-  unsigned long long requestedNumThreads() const {
-    return _requestedNumThreads;
-  }
+  unsigned long long requestedNumThreads() const { return _requestedNumThreads; }
 
-  CPUTopology const &topology() const {
+  CPUTopology const& topology() const {
     assert(_topology != nullptr);
     return *_topology;
   }
 
 protected:
-  platform::RuntimeConfig *_selectedConfig = nullptr;
-  CPUTopology *_topology = nullptr;
+  platform::RuntimeConfig* _selectedConfig = nullptr;
+  CPUTopology* _topology = nullptr;
 
 private:
   unsigned long long _requestedNumThreads;
diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index 40246ac0..f16d6879 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -31,8 +31,7 @@ namespace firestarter::environment::payload {
 class Payload {
 private:
   std::string _name;
-  unsigned getSequenceStartCount(const std::vector<std::string> &sequence,
-                                 const std::string start);
+  unsigned getSequenceStartCount(const std::vector<std::string>& sequence, const std::string start);
 
 protected:
   unsigned _flops;
@@ -44,44 +43,39 @@ class Payload {
   // number of used simd registers
   unsigned _registerCount;
 
-  std::vector<std::string> generateSequence(
-      const std::vector<std::pair<std::string, unsigned>> &proportion);
-  unsigned getL2SequenceCount(const std::vector<std::string> &sequence) {
+  std::vector<std::string> generateSequence(const std::vector<std::pair<std::string, unsigned>>& proportion);
+  unsigned getL2SequenceCount(const std::vector<std::string>& sequence) {
     return getSequenceStartCount(sequence, "L2");
   };
-  unsigned getL3SequenceCount(const std::vector<std::string> &sequence) {
+  unsigned getL3SequenceCount(const std::vector<std::string>& sequence) {
     return getSequenceStartCount(sequence, "L3");
   };
-  unsigned getRAMSequenceCount(const std::vector<std::string> &sequence) {
+  unsigned getRAMSequenceCount(const std::vector<std::string>& sequence) {
     return getSequenceStartCount(sequence, "RAM");
   };
 
-  unsigned
-  getNumberOfSequenceRepetitions(const std::vector<std::string> &sequence,
-                                 const unsigned numberOfLines) {
+  unsigned getNumberOfSequenceRepetitions(const std::vector<std::string>& sequence, const unsigned numberOfLines) {
     if (sequence.size() == 0) {
       return 0;
     }
     return numberOfLines / sequence.size();
   };
 
-  unsigned getL2LoopCount(const std::vector<std::string> &sequence,
-                          const unsigned numberOfLines, const unsigned size,
+  unsigned getL2LoopCount(const std::vector<std::string>& sequence, const unsigned numberOfLines, const unsigned size,
                           const unsigned threads);
-  unsigned getL3LoopCount(const std::vector<std::string> &sequence,
-                          const unsigned numberOfLines, const unsigned size,
+  unsigned getL3LoopCount(const std::vector<std::string>& sequence, const unsigned numberOfLines, const unsigned size,
                           const unsigned threads);
-  unsigned getRAMLoopCount(const std::vector<std::string> &sequence,
-                           const unsigned numberOfLines, const unsigned size,
+  unsigned getRAMLoopCount(const std::vector<std::string>& sequence, const unsigned numberOfLines, const unsigned size,
                            const unsigned threads);
 
 public:
   Payload(std::string name, unsigned registerSize, unsigned registerCount)
-      : _name(name), _registerSize(registerSize),
-        _registerCount(registerCount) {}
+      : _name(name)
+      , _registerSize(registerSize)
+      , _registerCount(registerCount) {}
   virtual ~Payload() {}
 
-  const std::string &name() const { return _name; }
+  const std::string& name() const { return _name; }
   unsigned flops() const { return _flops; }
   unsigned bytes() const { return _bytes; }
   unsigned instructions() const { return _instructions; }
@@ -90,24 +84,18 @@ class Payload {
 
   virtual bool isAvailable() const = 0;
 
-  virtual void lowLoadFunction(volatile unsigned long long *addrHigh,
-                               unsigned long long period) = 0;
+  virtual void lowLoadFunction(volatile unsigned long long* addrHigh, unsigned long long period) = 0;
 
-  virtual int compilePayload(
-      std::vector<std::pair<std::string, unsigned>> const &proportion,
-      unsigned instructionCacheSize,
-      std::list<unsigned> const &dataCacheBufferSize, unsigned ramBufferSize,
-      unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-      bool errorDetection) = 0;
+  virtual int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
+                             unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
+                             unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
+                             bool errorDetection) = 0;
   virtual std::list<std::string> getAvailableInstructions() const = 0;
-  virtual void init(unsigned long long *memoryAddr,
-                    unsigned long long bufferSize) = 0;
-  virtual unsigned long long
-  highLoadFunction(unsigned long long *addrMem,
-                   volatile unsigned long long *addrHigh,
-                   unsigned long long iterations) = 0;
+  virtual void init(unsigned long long* memoryAddr, unsigned long long bufferSize) = 0;
+  virtual unsigned long long highLoadFunction(unsigned long long* addrMem, volatile unsigned long long* addrHigh,
+                                              unsigned long long iterations) = 0;
 
-  virtual Payload *clone() const = 0;
+  virtual Payload* clone() const = 0;
 };
 
 } // namespace firestarter::environment::payload
diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp
index cbde3c68..b396d134 100644
--- a/include/firestarter/Environment/Platform/PlatformConfig.hpp
+++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp
@@ -21,10 +21,9 @@
 
 #pragma once
 
+#include <algorithm>
 #include <firestarter/Environment/Payload/Payload.hpp>
 #include <firestarter/Logging/Log.hpp>
-
-#include <algorithm>
 #include <initializer_list>
 #include <map>
 #include <sstream>
@@ -36,7 +35,7 @@ class PlatformConfig {
 private:
   std::string _name;
   std::list<unsigned> _threads;
-  payload::Payload *_payload;
+  payload::Payload* _payload;
 
 protected:
   unsigned _instructionCacheSize;
@@ -45,33 +44,31 @@ class PlatformConfig {
   unsigned _lines;
 
 public:
-  PlatformConfig(std::string name, std::list<unsigned> threads,
-                 unsigned instructionCacheSize,
-                 std::initializer_list<unsigned> dataCacheBufferSize,
-                 unsigned ramBufferSize, unsigned lines,
-                 payload::Payload *payload)
-      : _name(name), _threads(threads), _payload(payload),
-        _instructionCacheSize(instructionCacheSize),
-        _dataCacheBufferSize(dataCacheBufferSize),
-        _ramBufferSize(ramBufferSize), _lines(lines) {}
+  PlatformConfig(std::string name, std::list<unsigned> threads, unsigned instructionCacheSize,
+                 std::initializer_list<unsigned> dataCacheBufferSize, unsigned ramBufferSize, unsigned lines,
+                 payload::Payload* payload)
+      : _name(name)
+      , _threads(threads)
+      , _payload(payload)
+      , _instructionCacheSize(instructionCacheSize)
+      , _dataCacheBufferSize(dataCacheBufferSize)
+      , _ramBufferSize(ramBufferSize)
+      , _lines(lines) {}
   virtual ~PlatformConfig() { delete _payload; }
 
-  const std::string &name() const { return _name; }
+  const std::string& name() const { return _name; }
   unsigned instructionCacheSize() const { return _instructionCacheSize; }
-  const std::list<unsigned> &dataCacheBufferSize() const {
-    return _dataCacheBufferSize;
-  }
+  const std::list<unsigned>& dataCacheBufferSize() const { return _dataCacheBufferSize; }
   unsigned ramBufferSize() const { return _ramBufferSize; }
   unsigned lines() const { return _lines; }
-  payload::Payload const &payload() const { return *_payload; }
+  payload::Payload const& payload() const { return *_payload; }
 
   std::map<unsigned, std::string> getThreadMap() const {
     std::map<unsigned, std::string> threadMap;
 
-    for (auto const &thread : _threads) {
+    for (auto const& thread : _threads) {
       std::stringstream functionName;
-      functionName << "FUNC_" << name() << "_" << payload().name() << "_"
-                   << thread << "T";
+      functionName << "FUNC_" << name() << "_" << payload().name() << "_" << thread << "T";
       threadMap[thread] = functionName.str();
     }
 
@@ -82,13 +79,12 @@ class PlatformConfig {
 
   virtual bool isDefault() const = 0;
 
-  virtual std::vector<std::pair<std::string, unsigned>>
-  getDefaultPayloadSettings() const = 0;
+  virtual std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const = 0;
 
   std::string getDefaultPayloadSettingsString() const {
     std::stringstream ss;
 
-    for (auto const &[name, value] : this->getDefaultPayloadSettings()) {
+    for (auto const& [name, value] : this->getDefaultPayloadSettings()) {
       ss << name << ":" << value << ",";
     }
 
diff --git a/include/firestarter/Environment/Platform/RuntimeConfig.hpp b/include/firestarter/Environment/Platform/RuntimeConfig.hpp
index 2ed821ea..86946877 100644
--- a/include/firestarter/Environment/Platform/RuntimeConfig.hpp
+++ b/include/firestarter/Environment/Platform/RuntimeConfig.hpp
@@ -21,15 +21,14 @@
 
 #pragma once
 
-#include <firestarter/Environment/Platform/PlatformConfig.hpp>
-
 #include <cassert>
+#include <firestarter/Environment/Platform/PlatformConfig.hpp>
 
 namespace firestarter::environment::platform {
 
 class RuntimeConfig {
 private:
-  PlatformConfig const &_platformConfig;
+  PlatformConfig const& _platformConfig;
   std::unique_ptr<payload::Payload> _payload;
   unsigned _thread;
   std::vector<std::pair<std::string, unsigned>> _payloadSettings;
@@ -39,31 +38,34 @@ class RuntimeConfig {
   unsigned _lines;
 
 public:
-  RuntimeConfig(PlatformConfig const &platformConfig, unsigned thread,
-                unsigned detectedInstructionCacheSize)
-      : _platformConfig(platformConfig), _payload(nullptr), _thread(thread),
-        _payloadSettings(platformConfig.getDefaultPayloadSettings()),
-        _instructionCacheSize(platformConfig.instructionCacheSize()),
-        _dataCacheBufferSize(platformConfig.dataCacheBufferSize()),
-        _ramBufferSize(platformConfig.ramBufferSize()),
-        _lines(platformConfig.lines()) {
+  RuntimeConfig(PlatformConfig const& platformConfig, unsigned thread, unsigned detectedInstructionCacheSize)
+      : _platformConfig(platformConfig)
+      , _payload(nullptr)
+      , _thread(thread)
+      , _payloadSettings(platformConfig.getDefaultPayloadSettings())
+      , _instructionCacheSize(platformConfig.instructionCacheSize())
+      , _dataCacheBufferSize(platformConfig.dataCacheBufferSize())
+      , _ramBufferSize(platformConfig.ramBufferSize())
+      , _lines(platformConfig.lines()) {
     if (detectedInstructionCacheSize != 0) {
       this->_instructionCacheSize = detectedInstructionCacheSize;
     }
   };
 
-  RuntimeConfig(const RuntimeConfig &c)
-      : _platformConfig(c.platformConfig()),
-        _payload(c.platformConfig().payload().clone()), _thread(c.thread()),
-        _payloadSettings(c.payloadSettings()),
-        _instructionCacheSize(c.instructionCacheSize()),
-        _dataCacheBufferSize(c.dataCacheBufferSize()),
-        _ramBufferSize(c.ramBufferSize()), _lines(c.lines()) {}
+  RuntimeConfig(const RuntimeConfig& c)
+      : _platformConfig(c.platformConfig())
+      , _payload(c.platformConfig().payload().clone())
+      , _thread(c.thread())
+      , _payloadSettings(c.payloadSettings())
+      , _instructionCacheSize(c.instructionCacheSize())
+      , _dataCacheBufferSize(c.dataCacheBufferSize())
+      , _ramBufferSize(c.ramBufferSize())
+      , _lines(c.lines()) {}
 
   ~RuntimeConfig() { _payload.reset(); }
 
-  PlatformConfig const &platformConfig() const { return _platformConfig; }
-  payload::Payload &payload() const {
+  PlatformConfig const& platformConfig() const { return _platformConfig; }
+  payload::Payload& payload() const {
 #if defined(__clang__)
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wunused-value"
@@ -80,26 +82,21 @@ class RuntimeConfig {
     return *_payload;
   }
   unsigned thread() const { return _thread; }
-  const std::vector<std::pair<std::string, unsigned>> &payloadSettings() const {
-    return _payloadSettings;
-  }
+  const std::vector<std::pair<std::string, unsigned>>& payloadSettings() const { return _payloadSettings; }
   std::vector<std::string> payloadItems() const {
     std::vector<std::string> items;
-    for (auto const &pair : _payloadSettings) {
+    for (auto const& pair : _payloadSettings) {
       items.push_back(pair.first);
     }
     return items;
   }
 
   unsigned instructionCacheSize() const { return _instructionCacheSize; }
-  const std::list<unsigned> &dataCacheBufferSize() const {
-    return _dataCacheBufferSize;
-  }
+  const std::list<unsigned>& dataCacheBufferSize() const { return _dataCacheBufferSize; }
   unsigned ramBufferSize() const { return _ramBufferSize; }
   unsigned lines() const { return _lines; }
 
-  void setPayloadSettings(
-      std::vector<std::pair<std::string, unsigned>> const &payloadSettings) {
+  void setPayloadSettings(std::vector<std::pair<std::string, unsigned>> const& payloadSettings) {
     this->_payloadSettings = payloadSettings;
   }
 
@@ -107,20 +104,17 @@ class RuntimeConfig {
 
   void printCodePathSummary() const {
     log::info() << "\n"
-                << "  Taking " << platformConfig().payload().name()
-                << " path optimized for " << platformConfig().name() << " - "
-                << thread() << " thread(s) per core\n"
+                << "  Taking " << platformConfig().payload().name() << " path optimized for " << platformConfig().name()
+                << " - " << thread() << " thread(s) per core\n"
                 << "  Used buffersizes per thread:";
 
     if (instructionCacheSize() != 0) {
-      log::info() << "    - L1i-Cache: " << instructionCacheSize() / thread()
-                  << " Bytes";
+      log::info() << "    - L1i-Cache: " << instructionCacheSize() / thread() << " Bytes";
     }
 
     unsigned i = 1;
-    for (auto const &bytes : dataCacheBufferSize()) {
-      log::info() << "    - L" << i << "d-Cache: " << bytes / thread()
-                  << " Bytes";
+    for (auto const& bytes : dataCacheBufferSize()) {
+      log::info() << "    - L" << i << "d-Cache: " << bytes / thread() << " Bytes";
       i++;
     }
 
diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
index b23f1b97..e5fa736f 100644
--- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
@@ -26,29 +26,23 @@
 namespace firestarter::environment::x86::payload {
 class AVX512Payload final : public X86Payload {
 public:
-  AVX512Payload(asmjit::CpuFeatures const &supportedFeatures)
-      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX512_F},
-                   "AVX512", 8, 32) {}
+  AVX512Payload(asmjit::CpuFeatures const& supportedFeatures)
+      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX512_F}, "AVX512", 8, 32) {}
 
-  int compilePayload(
-      std::vector<std::pair<std::string, unsigned>> const &proportion,
-      unsigned instructionCacheSize,
-      std::list<unsigned> const &dataCacheBufferSize, unsigned ramBufferSize,
-      unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-      bool errorDetection) override;
+  int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion, unsigned instructionCacheSize,
+                     std::list<unsigned> const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread,
+                     unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override;
   std::list<std::string> getAvailableInstructions() const override;
-  void init(unsigned long long *memoryAddr,
-            unsigned long long bufferSize) override;
+  void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override;
 
-  firestarter::environment::payload::Payload *clone() const override {
+  firestarter::environment::payload::Payload* clone() const override {
     return new AVX512Payload(this->supportedFeatures());
   };
 
 private:
   const std::map<std::string, unsigned> instructionFlops = {
-      {"REG", 32},   {"L1_L", 32},  {"L1_BROADCAST", 16}, {"L1_S", 16},
-      {"L1_LS", 16}, {"L2_L", 32},  {"L2_S", 16},         {"L2_LS", 16},
-      {"L3_L", 32},  {"L3_S", 16},  {"L3_LS", 16},        {"L3_P", 16},
+      {"REG", 32},   {"L1_L", 32},  {"L1_BROADCAST", 16}, {"L1_S", 16}, {"L1_LS", 16}, {"L2_L", 32},
+      {"L2_S", 16},  {"L2_LS", 16}, {"L3_L", 32},         {"L3_S", 16}, {"L3_LS", 16}, {"L3_P", 16},
       {"RAM_L", 32}, {"RAM_S", 16}, {"RAM_LS", 16},       {"RAM_P", 16}};
 
   const std::map<std::string, unsigned> instructionMemory = {
diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
index 0a6e8014..d0e7b381 100644
--- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
@@ -26,29 +26,23 @@
 namespace firestarter::environment::x86::payload {
 class AVXPayload final : public X86Payload {
 public:
-  AVXPayload(asmjit::CpuFeatures const &supportedFeatures)
-      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX}, "AVX",
-                   4, 16) {}
+  AVXPayload(asmjit::CpuFeatures const& supportedFeatures)
+      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX}, "AVX", 4, 16) {}
 
-  int compilePayload(
-      std::vector<std::pair<std::string, unsigned>> const &proportion,
-      unsigned instructionCacheSize,
-      std::list<unsigned> const &dataCacheBufferSize, unsigned ramBufferSize,
-      unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-      bool errorDetection) override;
+  int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion, unsigned instructionCacheSize,
+                     std::list<unsigned> const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread,
+                     unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override;
   std::list<std::string> getAvailableInstructions() const override;
-  void init(unsigned long long *memoryAddr,
-            unsigned long long bufferSize) override;
+  void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override;
 
-  firestarter::environment::payload::Payload *clone() const override {
+  firestarter::environment::payload::Payload* clone() const override {
     return new AVXPayload(this->supportedFeatures());
   };
 
 private:
   const std::map<std::string, unsigned> instructionFlops = {
-      {"REG", 4},  {"L1_L", 4},  {"L1_S", 4},  {"L1_LS", 4},  {"L2_L", 4},
-      {"L2_S", 4}, {"L2_LS", 4}, {"L3_L", 4},  {"L3_S", 4},   {"L3_LS", 4},
-      {"L3_P", 4}, {"RAM_L", 4}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}};
+      {"REG", 4},  {"L1_L", 4},  {"L1_S", 4}, {"L1_LS", 4}, {"L2_L", 4},  {"L2_S", 4},   {"L2_LS", 4}, {"L3_L", 4},
+      {"L3_S", 4}, {"L3_LS", 4}, {"L3_P", 4}, {"RAM_L", 4}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}};
 
   const std::map<std::string, unsigned> instructionMemory = {
       {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
index 47d8a778..6a1d3ee5 100644
--- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
@@ -27,31 +27,24 @@ namespace firestarter::environment::x86::payload {
 
 class FMA4Payload final : public X86Payload {
 public:
-  FMA4Payload(asmjit::CpuFeatures const &supportedFeatures)
-      : X86Payload(
-            supportedFeatures,
-            {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4},
-            "FMA4", 4, 16) {}
-
-  int compilePayload(
-      std::vector<std::pair<std::string, unsigned>> const &proportion,
-      unsigned instructionCacheSize,
-      std::list<unsigned> const &dataCacheBufferSize, unsigned ramBufferSize,
-      unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-      bool errorDetection) override;
+  FMA4Payload(asmjit::CpuFeatures const& supportedFeatures)
+      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4}, "FMA4", 4,
+                   16) {}
+
+  int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion, unsigned instructionCacheSize,
+                     std::list<unsigned> const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread,
+                     unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override;
   std::list<std::string> getAvailableInstructions() const override;
-  void init(unsigned long long *memoryAddr,
-            unsigned long long bufferSize) override;
+  void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override;
 
-  firestarter::environment::payload::Payload *clone() const override {
+  firestarter::environment::payload::Payload* clone() const override {
     return new FMA4Payload(this->supportedFeatures());
   };
 
 private:
   const std::map<std::string, unsigned> instructionFlops = {
-      {"REG", 8},  {"L1_L", 12}, {"L1_S", 8},  {"L1_LS", 8},  {"L2_L", 8},
-      {"L2_S", 4}, {"L2_LS", 4}, {"L3_L", 8},  {"L3_S", 4},   {"L3_LS", 4},
-      {"L3_P", 4}, {"RAM_L", 8}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}};
+      {"REG", 8},  {"L1_L", 12}, {"L1_S", 8}, {"L1_LS", 8}, {"L2_L", 8},  {"L2_S", 4},   {"L2_LS", 4}, {"L3_L", 8},
+      {"L3_S", 4}, {"L3_LS", 4}, {"L3_P", 4}, {"RAM_L", 8}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}};
 
   const std::map<std::string, unsigned> instructionMemory = {
       {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
index 57ab455d..da6c2b5a 100644
--- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
@@ -26,33 +26,25 @@
 namespace firestarter::environment::x86::payload {
 class FMAPayload final : public X86Payload {
 public:
-  FMAPayload(asmjit::CpuFeatures const &supportedFeatures)
-      : X86Payload(supportedFeatures,
-                   {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA},
-                   "FMA", 4, 16) {}
+  FMAPayload(asmjit::CpuFeatures const& supportedFeatures)
+      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, "FMA", 4, 16) {}
 
-  int compilePayload(
-      std::vector<std::pair<std::string, unsigned>> const &proportion,
-      unsigned instructionCacheSize,
-      std::list<unsigned> const &dataCacheBufferSize, unsigned ramBufferSize,
-      unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-      bool errorDetection) override;
+  int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion, unsigned instructionCacheSize,
+                     std::list<unsigned> const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread,
+                     unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override;
   std::list<std::string> getAvailableInstructions() const override;
-  void init(unsigned long long *memoryAddr,
-            unsigned long long bufferSize) override;
+  void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override;
 
-  firestarter::environment::payload::Payload *clone() const override {
+  firestarter::environment::payload::Payload* clone() const override {
     return new FMAPayload(this->supportedFeatures());
   };
 
 private:
   const std::map<std::string, unsigned> instructionFlops = {
-      {"REG", 16},  {"L1_L", 16},     {"L1_2L", 16},      {"L1_S", 8},
-      {"L1_LS", 8}, {"L1_LS_256", 8}, {"L1_2LS_256", 16}, {"L2_L", 16},
-      {"L2_S", 8},  {"L2_LS", 8},     {"L2_LS_256", 8},   {"L2_2LS_256", 16},
-      {"L3_L", 16}, {"L3_S", 8},      {"L3_LS", 8},       {"L3_LS_256", 8},
-      {"L3_P", 8},  {"RAM_L", 16},    {"RAM_S", 8},       {"RAM_LS", 8},
-      {"RAM_P", 8}};
+      {"REG", 16},        {"L1_L", 16},  {"L1_2L", 16}, {"L1_S", 8},      {"L1_LS", 8},     {"L1_LS_256", 8},
+      {"L1_2LS_256", 16}, {"L2_L", 16},  {"L2_S", 8},   {"L2_LS", 8},     {"L2_LS_256", 8}, {"L2_2LS_256", 16},
+      {"L3_L", 16},       {"L3_S", 8},   {"L3_LS", 8},  {"L3_LS_256", 8}, {"L3_P", 8},      {"RAM_L", 16},
+      {"RAM_S", 8},       {"RAM_LS", 8}, {"RAM_P", 8}};
 
   const std::map<std::string, unsigned> instructionMemory = {
       {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
index d02a28e9..d923c9b3 100644
--- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
@@ -26,29 +26,23 @@
 namespace firestarter::environment::x86::payload {
 class SSE2Payload final : public X86Payload {
 public:
-  SSE2Payload(asmjit::CpuFeatures const &supportedFeatures)
-      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kSSE2},
-                   "SSE2", 2, 16) {}
+  SSE2Payload(asmjit::CpuFeatures const& supportedFeatures)
+      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kSSE2}, "SSE2", 2, 16) {}
 
-  int compilePayload(
-      std::vector<std::pair<std::string, unsigned>> const &proportion,
-      unsigned instructionCacheSize,
-      std::list<unsigned> const &dataCacheBufferSize, unsigned ramBufferSize,
-      unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-      bool errorDetection) override;
+  int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion, unsigned instructionCacheSize,
+                     std::list<unsigned> const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread,
+                     unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override;
   std::list<std::string> getAvailableInstructions() const override;
-  void init(unsigned long long *memoryAddr,
-            unsigned long long bufferSize) override;
+  void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override;
 
-  firestarter::environment::payload::Payload *clone() const override {
+  firestarter::environment::payload::Payload* clone() const override {
     return new SSE2Payload(this->supportedFeatures());
   };
 
 private:
   const std::map<std::string, unsigned> instructionFlops = {
-      {"REG", 2},  {"L1_L", 2},  {"L1_S", 2},  {"L1_LS", 2},  {"L2_L", 2},
-      {"L2_S", 2}, {"L2_LS", 2}, {"L3_L", 2},  {"L3_S", 2},   {"L3_LS", 2},
-      {"L3_P", 2}, {"RAM_L", 2}, {"RAM_S", 2}, {"RAM_LS", 2}, {"RAM_P", 2}};
+      {"REG", 2},  {"L1_L", 2},  {"L1_S", 2}, {"L1_LS", 2}, {"L2_L", 2},  {"L2_S", 2},   {"L2_LS", 2}, {"L3_L", 2},
+      {"L3_S", 2}, {"L3_LS", 2}, {"L3_P", 2}, {"RAM_L", 2}, {"RAM_S", 2}, {"RAM_LS", 2}, {"RAM_P", 2}};
 
   const std::map<std::string, unsigned> instructionMemory = {
       {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index c0ebadc5..87d5e0be 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -21,13 +21,12 @@
 
 #pragma once
 
-#include <firestarter/Environment/Payload/Payload.hpp>
-#include <firestarter/Logging/Log.hpp>
+#include <asmjit/x86.h>
 
 #include <firestarter/DumpRegisterWorkerData.hpp>
+#include <firestarter/Environment/Payload/Payload.hpp>
 #include <firestarter/LoadWorkerData.hpp>
-
-#include <asmjit/x86.h>
+#include <firestarter/Logging/Log.hpp>
 
 #define INIT_BLOCKSIZE 1024
 
@@ -36,41 +35,34 @@ namespace firestarter::environment::x86::payload {
 class X86Payload : public environment::payload::Payload {
 private:
   // we can use this to check, if our platform support this payload
-  asmjit::CpuFeatures const &_supportedFeatures;
+  asmjit::CpuFeatures const& _supportedFeatures;
   std::list<asmjit::CpuFeatures::X86::Id> featureRequests;
 
 protected:
   //  asmjit::CodeHolder code;
   asmjit::JitRuntime rt;
   // typedef int (*LoadFunction)(firestarter::ThreadData *);
-  typedef unsigned long long (*LoadFunction)(unsigned long long *,
-                                             volatile unsigned long long *,
-                                             unsigned long long);
+  typedef unsigned long long (*LoadFunction)(unsigned long long*, volatile unsigned long long*, unsigned long long);
   LoadFunction loadFunction = nullptr;
 
-  asmjit::CpuFeatures const &supportedFeatures() const {
-    return this->_supportedFeatures;
-  }
+  asmjit::CpuFeatures const& supportedFeatures() const { return this->_supportedFeatures; }
 
   template <class IterReg, class VectorReg>
-  void emitErrorDetectionCode(asmjit::x86::Builder &cb, IterReg iter_reg,
-                              asmjit::x86::Gpq addrHigh_reg,
-                              asmjit::x86::Gpq pointer_reg,
-                              asmjit::x86::Gpq temp_reg,
-                              asmjit::x86::Gpq temp_reg2);
+  void emitErrorDetectionCode(asmjit::x86::Builder& cb, IterReg iter_reg, asmjit::x86::Gpq addrHigh_reg,
+                              asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2);
 
 public:
-  X86Payload(asmjit::CpuFeatures const &supportedFeatures,
-             std::initializer_list<asmjit::CpuFeatures::X86::Id> featureRequests,
-             std::string name, unsigned registerSize, unsigned registerCount)
-      : Payload(name, registerSize, registerCount),
-        _supportedFeatures(supportedFeatures),
-        featureRequests(featureRequests) {}
+  X86Payload(asmjit::CpuFeatures const& supportedFeatures,
+             std::initializer_list<asmjit::CpuFeatures::X86::Id> featureRequests, std::string name,
+             unsigned registerSize, unsigned registerCount)
+      : Payload(name, registerSize, registerCount)
+      , _supportedFeatures(supportedFeatures)
+      , featureRequests(featureRequests) {}
 
   bool isAvailable() const override {
     bool available = true;
 
-    for (auto const &feature : featureRequests) {
+    for (auto const& feature : featureRequests) {
       available &= this->_supportedFeatures.has(feature);
     }
 
@@ -84,18 +76,15 @@ class X86Payload : public environment::payload::Payload {
 #endif
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Woverloaded-virtual"
-  void init(unsigned long long *memoryAddr, unsigned long long bufferSize,
-            double firstValue, double lastValue);
+  void init(unsigned long long* memoryAddr, unsigned long long bufferSize, double firstValue, double lastValue);
 #pragma GCC diagnostic pop
 #if defined(__clang__)
 #pragma clang diagnostic pop
 #endif
   // use cpuid and usleep as low load
-  void lowLoadFunction(volatile unsigned long long *addrHigh,
-                       unsigned long long period) override;
+  void lowLoadFunction(volatile unsigned long long* addrHigh, unsigned long long period) override;
 
-  unsigned long long highLoadFunction(unsigned long long *addrMem,
-                                      volatile unsigned long long *addrHigh,
+  unsigned long long highLoadFunction(unsigned long long* addrMem, volatile unsigned long long* addrHigh,
                                       unsigned long long iterations) override;
 };
 
diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
index a1776f37..7254cb55 100644
--- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
@@ -26,23 +26,17 @@
 namespace firestarter::environment::x86::payload {
 class ZENFMAPayload final : public X86Payload {
 public:
-  ZENFMAPayload(asmjit::CpuFeatures const &supportedFeatures)
-      : X86Payload(
-            supportedFeatures,
-            {asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA},
-            "ZENFMA", 4, 16) {}
+  ZENFMAPayload(asmjit::CpuFeatures const& supportedFeatures)
+      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA},
+                   "ZENFMA", 4, 16) {}
 
-  int compilePayload(
-      std::vector<std::pair<std::string, unsigned>> const &proportion,
-      unsigned instructionCacheSize,
-      std::list<unsigned> const &dataCacheBufferSize, unsigned ramBufferSize,
-      unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-      bool errorDetection) override;
+  int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion, unsigned instructionCacheSize,
+                     std::list<unsigned> const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread,
+                     unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override;
   std::list<std::string> getAvailableInstructions() const override;
-  void init(unsigned long long *memoryAddr,
-            unsigned long long bufferSize) override;
+  void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override;
 
-  firestarter::environment::payload::Payload *clone() const override {
+  firestarter::environment::payload::Payload* clone() const override {
     return new ZENFMAPayload(this->supportedFeatures());
   };
 
diff --git a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
index 12a922b9..4cc4b811 100644
--- a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
@@ -26,17 +26,12 @@
 
 namespace firestarter::environment::x86::platform {
 class BulldozerConfig final : public X86PlatformConfig {
-
 public:
-  BulldozerConfig(asmjit::CpuFeatures const &supportedFeatures,
-                  unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("BLD_OPTERON", 21, {1, 2, 3}, {1}, 0,
-                          {16384, 1048576, 786432}, 104857600, 1536, family,
-                          model, threads,
-                          new payload::FMA4Payload(supportedFeatures)) {}
+  BulldozerConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
+      : X86PlatformConfig("BLD_OPTERON", 21, {1, 2, 3}, {1}, 0, {16384, 1048576, 786432}, 104857600, 1536, family,
+                          model, threads, new payload::FMA4Payload(supportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>>
-  getDefaultPayloadSettings() const override {
+  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
     return std::vector<std::pair<std::string, unsigned>>(
         {{"RAM_L", 1}, {"L3_L", 1}, {"L2_LS", 5}, {"L1_L", 90}, {"REG", 45}});
   }
diff --git a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
index f079ec18..5b30d6a0 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
@@ -26,17 +26,12 @@
 
 namespace firestarter::environment::x86::platform {
 class HaswellConfig final : public X86PlatformConfig {
-
 public:
-  HaswellConfig(asmjit::CpuFeatures const &supportedFeatures, unsigned family,
-                unsigned model, unsigned threads)
-      : X86PlatformConfig("HSW_COREI", 6, {60, 61, 69, 70, 71}, {1, 2}, 0,
-                          {32768, 262144, 1572864}, 104857600, 1536, family,
-                          model, threads,
-                          new payload::FMAPayload(supportedFeatures)) {}
+  HaswellConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
+      : X86PlatformConfig("HSW_COREI", 6, {60, 61, 69, 70, 71}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
+                          family, model, threads, new payload::FMAPayload(supportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>>
-  getDefaultPayloadSettings() const override {
+  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
     return std::vector<std::pair<std::string, unsigned>>(
         {{"RAM_L", 2}, {"L3_LS", 3}, {"L2_LS", 9}, {"L1_LS", 90}, {"REG", 40}});
   }
diff --git a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
index df5a1927..106dd0e3 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
@@ -26,22 +26,14 @@
 
 namespace firestarter::environment::x86::platform {
 class HaswellEPConfig final : public X86PlatformConfig {
-
 public:
-  HaswellEPConfig(asmjit::CpuFeatures const &supportedFeatures,
-                  unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("HSW_XEONEP", 6, {63, 79}, {1, 2}, 0,
-                          {32768, 262144, 2621440}, 104857600, 1536, family,
-                          model, threads,
-                          new payload::FMAPayload(supportedFeatures)) {}
+  HaswellEPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
+      : X86PlatformConfig("HSW_XEONEP", 6, {63, 79}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, family,
+                          model, threads, new payload::FMAPayload(supportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>>
-  getDefaultPayloadSettings() const override {
-    return std::vector<std::pair<std::string, unsigned>>({{"RAM_L", 8},
-                                                          {"L3_LS", 1},
-                                                          {"L2_LS", 29},
-                                                          {"L1_LS", 100},
-                                                          {"REG", 100}});
+  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+    return std::vector<std::pair<std::string, unsigned>>(
+        {{"RAM_L", 8}, {"L3_LS", 1}, {"L2_LS", 29}, {"L1_LS", 100}, {"REG", 100}});
   }
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
index de520c56..709ef934 100644
--- a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
@@ -26,19 +26,13 @@
 
 namespace firestarter::environment::x86::platform {
 class KnightsLandingConfig final : public X86PlatformConfig {
-
 public:
-  KnightsLandingConfig(asmjit::CpuFeatures const &supportedFeatures,
-                       unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("KNL_XEONPHI", 6, {87}, {4}, 0,
-                          {32768, 524288, 236279125}, 26214400, 1536, family,
-                          model, threads,
-                          new payload::AVX512Payload(supportedFeatures)) {}
+  KnightsLandingConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
+      : X86PlatformConfig("KNL_XEONPHI", 6, {87}, {4}, 0, {32768, 524288, 236279125}, 26214400, 1536, family, model,
+                          threads, new payload::AVX512Payload(supportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>>
-  getDefaultPayloadSettings() const override {
-    return std::vector<std::pair<std::string, unsigned>>(
-        {{"RAM_P", 3}, {"L2_S", 8}, {"L1_L", 40}, {"REG", 10}});
+  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+    return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 3}, {"L2_S", 8}, {"L1_L", 40}, {"REG", 10}});
   }
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
index 0ad94682..5ad0a065 100644
--- a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
@@ -26,22 +26,14 @@
 
 namespace firestarter::environment::x86::platform {
 class NaplesConfig final : public X86PlatformConfig {
-
 public:
-  NaplesConfig(asmjit::CpuFeatures const &supportedFeatures, unsigned family,
-               unsigned model, unsigned threads)
-      : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24}, {1, 2}, 0,
-                          {65536, 524288, 2097152}, 104857600, 1536, family,
-                          model, threads,
-                          new payload::ZENFMAPayload(supportedFeatures)) {}
+  NaplesConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
+      : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24}, {1, 2}, 0, {65536, 524288, 2097152}, 104857600, 1536, family,
+                          model, threads, new payload::ZENFMAPayload(supportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>>
-  getDefaultPayloadSettings() const override {
-    return std::vector<std::pair<std::string, unsigned>>({{"RAM_L", 3},
-                                                          {"L3_L", 14},
-                                                          {"L2_L", 75},
-                                                          {"L1_LS", 81},
-                                                          {"REG", 100}});
+  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+    return std::vector<std::pair<std::string, unsigned>>(
+        {{"RAM_L", 3}, {"L3_L", 14}, {"L2_L", 75}, {"L1_LS", 81}, {"REG", 100}});
   }
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
index da7764d4..3f0748de 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
@@ -26,19 +26,13 @@
 
 namespace firestarter::environment::x86::platform {
 class NehalemConfig final : public X86PlatformConfig {
-
 public:
-  NehalemConfig(asmjit::CpuFeatures const &supportedFeatures, unsigned family,
-                unsigned model, unsigned threads)
-      : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23}, {1, 2}, 0,
-                          {32768, 262144, 1572864}, 104857600, 1536, family,
-                          model, threads,
-                          new payload::SSE2Payload(supportedFeatures)) {}
+  NehalemConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
+      : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, family,
+                          model, threads, new payload::SSE2Payload(supportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>>
-  getDefaultPayloadSettings() const override {
-    return std::vector<std::pair<std::string, unsigned>>(
-        {{"RAM_P", 1}, {"L1_LS", 70}, {"REG", 2}});
+  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+    return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 1}, {"L1_LS", 70}, {"REG", 2}});
   }
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
index 06ac2f64..a738fb7f 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
@@ -26,19 +26,13 @@
 
 namespace firestarter::environment::x86::platform {
 class NehalemEPConfig final : public X86PlatformConfig {
-
 public:
-  NehalemEPConfig(asmjit::CpuFeatures const &supportedFeatures,
-                  unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("NHM_XEONEP", 6, {26, 44}, {1, 2}, 0,
-                          {32768, 262144, 2097152}, 104857600, 1536, family,
-                          model, threads,
-                          new payload::SSE2Payload(supportedFeatures)) {}
+  NehalemEPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
+      : X86PlatformConfig("NHM_XEONEP", 6, {26, 44}, {1, 2}, 0, {32768, 262144, 2097152}, 104857600, 1536, family,
+                          model, threads, new payload::SSE2Payload(supportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>>
-  getDefaultPayloadSettings() const override {
-    return std::vector<std::pair<std::string, unsigned>>(
-        {{"RAM_P", 1}, {"L1_LS", 60}, {"REG", 2}});
+  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+    return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 1}, {"L1_LS", 60}, {"REG", 2}});
   }
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
index f7569bf4..230d91ba 100644
--- a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
@@ -26,23 +26,14 @@
 
 namespace firestarter::environment::x86::platform {
 class RomeConfig final : public X86PlatformConfig {
-
 public:
-  RomeConfig(asmjit::CpuFeatures const &supportedFeatures, unsigned family,
-             unsigned model, unsigned threads)
-      : X86PlatformConfig("ZEN_2_EPYC", 23, {49}, {1, 2}, 0,
-                          {32768, 524288, 2097152}, 104857600, 1536, family,
-                          model, threads,
-                          new payload::FMAPayload(supportedFeatures)) {}
+  RomeConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
+      : X86PlatformConfig("ZEN_2_EPYC", 23, {49}, {1, 2}, 0, {32768, 524288, 2097152}, 104857600, 1536, family, model,
+                          threads, new payload::FMAPayload(supportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>>
-  getDefaultPayloadSettings() const override {
-    return std::vector<std::pair<std::string, unsigned>>({{"RAM_L", 10},
-                                                          {"L3_L", 25},
-                                                          {"L2_L", 91},
-                                                          {"L1_2LS_256", 72},
-                                                          {"L1_LS_256", 82},
-                                                          {"REG", 75}});
+  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+    return std::vector<std::pair<std::string, unsigned>>(
+        {{"RAM_L", 10}, {"L3_L", 25}, {"L2_L", 91}, {"L1_2LS_256", 72}, {"L1_LS_256", 82}, {"REG", 75}});
   }
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
index 7e928c1f..a58e193a 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
@@ -26,22 +26,14 @@
 
 namespace firestarter::environment::x86::platform {
 class SandyBridgeConfig final : public X86PlatformConfig {
-
 public:
-  SandyBridgeConfig(asmjit::CpuFeatures const &supportedFeatures,
-                    unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("SNB_COREI", 6, {42, 58}, {1, 2}, 0,
-                          {32768, 262144, 1572864}, 104857600, 1536, family,
-                          model, threads,
-                          new payload::AVXPayload(supportedFeatures)) {}
+  SandyBridgeConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
+      : X86PlatformConfig("SNB_COREI", 6, {42, 58}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, family, model,
+                          threads, new payload::AVXPayload(supportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>>
-  getDefaultPayloadSettings() const override {
-    return std::vector<std::pair<std::string, unsigned>>({{"RAM_L", 2},
-                                                          {"L3_LS", 4},
-                                                          {"L2_LS", 10},
-                                                          {"L1_LS", 90},
-                                                          {"REG", 45}});
+  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+    return std::vector<std::pair<std::string, unsigned>>(
+        {{"RAM_L", 2}, {"L3_LS", 4}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 45}});
   }
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
index cb7fcb43..3f4f6303 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
@@ -27,22 +27,14 @@
 
 namespace firestarter::environment::x86::platform {
 class SandyBridgeEPConfig final : public X86PlatformConfig {
-
 public:
-  SandyBridgeEPConfig(asmjit::CpuFeatures const &supportedFeatures,
-                      unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("SNB_XEONEP", 6, {45, 62}, {1, 2}, 0,
-                          {32768, 262144, 2621440}, 104857600, 1536, family,
-                          model, threads,
-                          new payload::AVXPayload(supportedFeatures)) {}
+  SandyBridgeEPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
+      : X86PlatformConfig("SNB_XEONEP", 6, {45, 62}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, family,
+                          model, threads, new payload::AVXPayload(supportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>>
-  getDefaultPayloadSettings() const override {
-    return std::vector<std::pair<std::string, unsigned>>({{"RAM_L", 3},
-                                                          {"L3_LS", 2},
-                                                          {"L2_LS", 10},
-                                                          {"L1_LS", 90},
-                                                          {"REG", 30}});
+  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+    return std::vector<std::pair<std::string, unsigned>>(
+        {{"RAM_L", 3}, {"L3_LS", 2}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 30}});
   }
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
index aec85be8..c533c3a5 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
@@ -27,22 +27,14 @@
 
 namespace firestarter::environment::x86::platform {
 class SkylakeConfig final : public X86PlatformConfig {
-
 public:
-  SkylakeConfig(asmjit::CpuFeatures const &supportedFeatures, unsigned family,
-                unsigned model, unsigned threads)
-      : X86PlatformConfig("SKL_COREI", 6, {78, 94}, {1, 2}, 0,
-                          {32768, 262144, 1572864}, 104857600, 1536, family,
-                          model, threads,
-                          new payload::FMAPayload(supportedFeatures)) {}
+  SkylakeConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
+      : X86PlatformConfig("SKL_COREI", 6, {78, 94}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, family, model,
+                          threads, new payload::FMAPayload(supportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>>
-  getDefaultPayloadSettings() const override {
-    return std::vector<std::pair<std::string, unsigned>>({{"RAM_L", 3},
-                                                          {"L3_LS_256", 5},
-                                                          {"L2_LS_256", 18},
-                                                          {"L1_2LS_256", 78},
-                                                          {"REG", 40}});
+  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+    return std::vector<std::pair<std::string, unsigned>>(
+        {{"RAM_L", 3}, {"L3_LS_256", 5}, {"L2_LS_256", 18}, {"L1_2LS_256", 78}, {"REG", 40}});
   }
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
index be767d0b..8243d9d6 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
@@ -26,17 +26,12 @@
 
 namespace firestarter::environment::x86::platform {
 class SkylakeSPConfig final : public X86PlatformConfig {
-
 public:
-  SkylakeSPConfig(asmjit::CpuFeatures const &supportedFeatures,
-                  unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("SKL_XEONEP", 6, {85}, {1, 2}, 0,
-                          {32768, 1048576, 1441792}, 1048576000, 1536, family,
-                          model, threads,
-                          new payload::AVX512Payload(supportedFeatures)) {}
+  SkylakeSPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
+      : X86PlatformConfig("SKL_XEONEP", 6, {85}, {1, 2}, 0, {32768, 1048576, 1441792}, 1048576000, 1536, family, model,
+                          threads, new payload::AVX512Payload(supportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>>
-  getDefaultPayloadSettings() const override {
+  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_S", 3},
                                                           {"RAM_P", 1},
                                                           {"L3_S", 1},
diff --git a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
index 45956f38..648346d8 100644
--- a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
@@ -35,23 +35,20 @@ class X86PlatformConfig : public environment::platform::PlatformConfig {
   unsigned _currentThreads;
 
 public:
-  X86PlatformConfig(std::string name, unsigned family,
-                    std::initializer_list<unsigned> models,
-                    std::initializer_list<unsigned> threads,
-                    unsigned instructionCacheSize,
-                    std::initializer_list<unsigned> dataCacheBufferSize,
-                    unsigned ramBuffersize, unsigned lines,
-                    unsigned currentFamily, unsigned currentModel,
-                    unsigned currentThreads, payload::X86Payload *payload)
-      : PlatformConfig(name, threads, instructionCacheSize, dataCacheBufferSize,
-                       ramBuffersize, lines, payload),
-        _family(family), _models(models), _currentFamily(currentFamily),
-        _currentModel(currentModel), _currentThreads(currentThreads) {}
+  X86PlatformConfig(std::string name, unsigned family, std::initializer_list<unsigned> models,
+                    std::initializer_list<unsigned> threads, unsigned instructionCacheSize,
+                    std::initializer_list<unsigned> dataCacheBufferSize, unsigned ramBuffersize, unsigned lines,
+                    unsigned currentFamily, unsigned currentModel, unsigned currentThreads,
+                    payload::X86Payload* payload)
+      : PlatformConfig(name, threads, instructionCacheSize, dataCacheBufferSize, ramBuffersize, lines, payload)
+      , _family(family)
+      , _models(models)
+      , _currentFamily(currentFamily)
+      , _currentModel(currentModel)
+      , _currentThreads(currentThreads) {}
 
   bool isDefault() const override {
-    return _family == _currentFamily &&
-           (std::find(_models.begin(), _models.end(), _currentModel) !=
-            _models.end()) &&
+    return _family == _currentFamily && (std::find(_models.begin(), _models.end(), _currentModel) != _models.end()) &&
            isAvailable();
   }
 };
diff --git a/include/firestarter/Environment/X86/X86CPUTopology.hpp b/include/firestarter/Environment/X86/X86CPUTopology.hpp
index 44a02dc2..fa3b033f 100644
--- a/include/firestarter/Environment/X86/X86CPUTopology.hpp
+++ b/include/firestarter/Environment/X86/X86CPUTopology.hpp
@@ -21,28 +21,23 @@
 
 #pragma once
 
-#include <firestarter/Environment/CPUTopology.hpp>
-
 #include <asmjit/asmjit.h>
 
+#include <firestarter/Environment/CPUTopology.hpp>
+
 namespace firestarter::environment::x86 {
 
 class X86CPUTopology final : public CPUTopology {
 public:
   X86CPUTopology();
 
-  friend std::ostream &operator<<(std::ostream &stream,
-                                  X86CPUTopology const &cpuTopology);
+  friend std::ostream& operator<<(std::ostream& stream, X86CPUTopology const& cpuTopology);
 
-  std::list<std::string> const &features() const override {
-    return this->featureList;
-  }
-  const asmjit::CpuFeatures& featuresAsmjit() const{
-    return this->cpuInfo.features();
-  }
+  std::list<std::string> const& features() const override { return this->featureList; }
+  const asmjit::CpuFeatures& featuresAsmjit() const { return this->cpuInfo.features(); }
 
-  std::string const &vendor() const override { return this->_vendor; }
-  std::string const &model() const override { return this->_model; }
+  std::string const& vendor() const override { return this->_vendor; }
+  std::string const& model() const override { return this->_model; }
 
   unsigned long long clockrate() const override;
 
@@ -55,8 +50,7 @@ class X86CPUTopology final : public CPUTopology {
 private:
   bool hasRdtsc() const { return this->_hasRdtsc; }
   bool hasInvariantRdtsc() const { return this->_hasInvariantRdtsc; }
-  void cpuid(unsigned long long *a, unsigned long long *b,
-             unsigned long long *c, unsigned long long *d) const;
+  void cpuid(unsigned long long* a, unsigned long long* b, unsigned long long* c, unsigned long long* d) const;
 
   asmjit::CpuInfo cpuInfo;
   std::list<std::string> featureList;
@@ -67,8 +61,7 @@ class X86CPUTopology final : public CPUTopology {
   std::string _model;
 };
 
-inline std::ostream &operator<<(std::ostream &stream,
-                                X86CPUTopology const &cpuTopology) {
+inline std::ostream& operator<<(std::ostream& stream, X86CPUTopology const& cpuTopology) {
   return cpuTopology.print(stream);
 }
 
diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp
index 11ad940e..b0e3aa8d 100644
--- a/include/firestarter/Environment/X86/X86Environment.hpp
+++ b/include/firestarter/Environment/X86/X86Environment.hpp
@@ -21,9 +21,9 @@
 
 #pragma once
 
-#include <firestarter/Environment/Environment.hpp>
-#include <firestarter/Environment/X86/X86CPUTopology.hpp>
+#include <asmjit/asmjit.h>
 
+#include <firestarter/Environment/Environment.hpp>
 #include <firestarter/Environment/X86/Platform/BulldozerConfig.hpp>
 #include <firestarter/Environment/X86/Platform/HaswellConfig.hpp>
 #include <firestarter/Environment/X86/Platform/HaswellEPConfig.hpp>
@@ -37,39 +37,35 @@
 #include <firestarter/Environment/X86/Platform/SkylakeConfig.hpp>
 #include <firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp>
 #include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
-
-#include <asmjit/asmjit.h>
-
+#include <firestarter/Environment/X86/X86CPUTopology.hpp>
 #include <functional>
 
-#define REGISTER(NAME)                                                         \
-  [](asmjit::CpuFeatures const &supportedFeatures, unsigned family,          \
-     unsigned model, unsigned threads) -> platform::X86PlatformConfig * {      \
-    return new platform::NAME(supportedFeatures, family, model, threads);      \
+#define REGISTER(NAME)                                                                                                 \
+  [](asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model,                                    \
+     unsigned threads) -> platform::X86PlatformConfig* {                                                               \
+    return new platform::NAME(supportedFeatures, family, model, threads);                                              \
   }
 
 namespace firestarter::environment::x86 {
 
 class X86Environment final : public Environment {
 public:
-  X86Environment() : Environment(new X86CPUTopology()) {}
+  X86Environment()
+      : Environment(new X86CPUTopology()) {}
 
   ~X86Environment() {
-    for (auto const &config : platformConfigs) {
+    for (auto const& config : platformConfigs) {
       delete config;
     }
-    for (auto const &config : fallbackPlatformConfigs) {
+    for (auto const& config : fallbackPlatformConfigs) {
       delete config;
     }
   }
 
-  X86CPUTopology const &topology() {
-    return *reinterpret_cast<X86CPUTopology *>(this->_topology);
-  }
+  X86CPUTopology const& topology() { return *reinterpret_cast<X86CPUTopology*>(this->_topology); }
 
   void evaluateFunctions() override;
-  int selectFunction(unsigned functionId,
-                     bool allowUnavailablePayload) override;
+  int selectFunction(unsigned functionId, bool allowUnavailablePayload) override;
   int selectInstructionGroups(std::string groups) override;
   void printAvailableInstructionGroups() override;
   void setLineCount(unsigned lineCount) override;
@@ -77,24 +73,19 @@ class X86Environment final : public Environment {
   void printFunctionSummary() override;
 
 private:
-  // The available function IDs are generated by iterating through this list of
-  // PlatformConfig. Add new PlatformConfig at the bottom to maintain stable
-  // IDs.
-  const std::list<std::function<platform::X86PlatformConfig *(
-      asmjit::CpuFeatures const &, unsigned, unsigned, unsigned)>>
-      platformConfigsCtor = {
-          REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig),
-          REGISTER(SkylakeSPConfig),      REGISTER(HaswellConfig),
-          REGISTER(HaswellEPConfig),      REGISTER(SandyBridgeConfig),
-          REGISTER(SandyBridgeEPConfig),  REGISTER(NehalemConfig),
-          REGISTER(NehalemEPConfig),      REGISTER(BulldozerConfig),
-          REGISTER(NaplesConfig),         REGISTER(RomeConfig)};
+  // The available function IDs are generated by iterating through this list
+  // of PlatformConfig. Add new PlatformConfig at the bottom to maintain
+  // stable IDs.
+  const std::list<std::function<platform::X86PlatformConfig*(asmjit::CpuFeatures const&, unsigned, unsigned, unsigned)>>
+      platformConfigsCtor = {REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig),   REGISTER(SkylakeSPConfig),
+                             REGISTER(HaswellConfig),        REGISTER(HaswellEPConfig), REGISTER(SandyBridgeConfig),
+                             REGISTER(SandyBridgeEPConfig),  REGISTER(NehalemConfig),   REGISTER(NehalemEPConfig),
+                             REGISTER(BulldozerConfig),      REGISTER(NaplesConfig),    REGISTER(RomeConfig)};
 
-  std::list<platform::X86PlatformConfig *> platformConfigs;
+  std::list<platform::X86PlatformConfig*> platformConfigs;
 
   // List of fallback PlatformConfig. Add one for each x86 extension.
-  const std::list<std::function<platform::X86PlatformConfig *(
-      asmjit::CpuFeatures const &, unsigned, unsigned, unsigned)>>
+  const std::list<std::function<platform::X86PlatformConfig*(asmjit::CpuFeatures const&, unsigned, unsigned, unsigned)>>
       fallbackPlatformConfigsCtor = {
           REGISTER(SkylakeSPConfig),   // AVX512
           REGISTER(BulldozerConfig),   // FMA4
@@ -103,7 +94,7 @@ class X86Environment final : public Environment {
           REGISTER(NehalemConfig)      // SSE2
       };
 
-  std::list<platform::X86PlatformConfig *> fallbackPlatformConfigs;
+  std::list<platform::X86PlatformConfig*> fallbackPlatformConfigs;
 
 #undef REGISTER
 };
diff --git a/include/firestarter/ErrorDetectionStruct.hpp b/include/firestarter/ErrorDetectionStruct.hpp
index 38bcbc6a..4ed2e9fa 100644
--- a/include/firestarter/ErrorDetectionStruct.hpp
+++ b/include/firestarter/ErrorDetectionStruct.hpp
@@ -28,14 +28,14 @@ struct ErrorDetectionStruct {
   // one ptr (8B)
 
   // the pointer to 16B of communication
-  volatile unsigned long long *communicationLeft;
+  volatile unsigned long long* communicationLeft;
   volatile unsigned long long localsLeft[4];
-  // if this variable is not 0, an error occured in the comparison with the left
-  // thread.
+  // if this variable is not 0, an error occured in the comparison with the
+  // left thread.
   volatile unsigned long long errorLeft;
   volatile unsigned long long paddingLeft[2];
 
-  volatile unsigned long long *communicationRight;
+  volatile unsigned long long* communicationRight;
   volatile unsigned long long localsRight[4];
   // if this variable is not 0, an error occured in the comparison with the
   // right thread.
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 31347dd2..cb0218f0 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -29,8 +29,6 @@
 #include <firestarter/OneAPI/OneAPI.hpp>
 #endif
 
-
-
 #include <firestarter/Constants.hpp>
 
 #if defined(linux) || defined(__linux__)
@@ -43,8 +41,7 @@
 #include <firestarter/DumpRegisterWorkerData.hpp>
 #include <firestarter/LoadWorkerData.hpp>
 
-#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||            \
-    defined(_M_X64)
+#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
 #include <firestarter/Environment/X86/X86Environment.hpp>
 #endif
 
@@ -66,28 +63,18 @@ namespace firestarter {
 
 class Firestarter {
 public:
-  Firestarter(const int argc, const char **argv,
-              std::chrono::seconds const &timeout, unsigned loadPercent,
-              std::chrono::microseconds const &period,
-              unsigned requestedNumThreads, std::string const &cpuBind,
-              bool printFunctionSummary, unsigned functionId,
-              bool listInstructionGroups, std::string const &instructionGroups,
-              unsigned lineCount, bool allowUnavailablePayload,
-              bool dumpRegisters,
-              std::chrono::seconds const &dumpRegistersTimeDelta,
-              std::string const &dumpRegistersOutpath, bool errorDetection,
-              int gpus, unsigned gpuMatrixSize, bool gpuUseFloat,
-              bool gpuUseDouble, bool listMetrics, bool measurement,
-              std::chrono::milliseconds const &startDelta,
-              std::chrono::milliseconds const &stopDelta,
-              std::chrono::milliseconds const &measurementInterval,
-              std::vector<std::string> const &metricPaths,
-              std::vector<std::string> const &stdinMetrics, bool optimize,
-              std::chrono::seconds const &preheat,
-              std::string const &optimizationAlgorithm,
-              std::vector<std::string> const &optimizationMetrics,
-              std::chrono::seconds const &evaluationDuration,
-              unsigned individuals, std::string const &optimizeOutfile,
+  Firestarter(const int argc, const char** argv, std::chrono::seconds const& timeout, unsigned loadPercent,
+              std::chrono::microseconds const& period, unsigned requestedNumThreads, std::string const& cpuBind,
+              bool printFunctionSummary, unsigned functionId, bool listInstructionGroups,
+              std::string const& instructionGroups, unsigned lineCount, bool allowUnavailablePayload,
+              bool dumpRegisters, std::chrono::seconds const& dumpRegistersTimeDelta,
+              std::string const& dumpRegistersOutpath, bool errorDetection, int gpus, unsigned gpuMatrixSize,
+              bool gpuUseFloat, bool gpuUseDouble, bool listMetrics, bool measurement,
+              std::chrono::milliseconds const& startDelta, std::chrono::milliseconds const& stopDelta,
+              std::chrono::milliseconds const& measurementInterval, std::vector<std::string> const& metricPaths,
+              std::vector<std::string> const& stdinMetrics, bool optimize, std::chrono::seconds const& preheat,
+              std::string const& optimizationAlgorithm, std::vector<std::string> const& optimizationMetrics,
+              std::chrono::seconds const& evaluationDuration, unsigned individuals, std::string const& optimizeOutfile,
               unsigned generations, double nsga2_cr, double nsga2_m);
 
   ~Firestarter();
@@ -96,7 +83,7 @@ class Firestarter {
 
 private:
   const int _argc;
-  const char **_argv;
+  const char** _argv;
   const std::chrono::seconds _timeout;
   const unsigned _loadPercent;
   std::chrono::microseconds _load;
@@ -123,13 +110,10 @@ class Firestarter {
   const double _nsga2_cr;
   const double _nsga2_m;
 
-#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||            \
-    defined(_M_X64)
-  environment::x86::X86Environment *_environment = nullptr;
+#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+  environment::x86::X86Environment* _environment = nullptr;
 
-  environment::x86::X86Environment &environment() const {
-    return *_environment;
-  }
+  environment::x86::X86Environment& environment() const { return *_environment; }
 #else
 #error "FIRESTARTER is not implemented for this ISA"
 #endif
@@ -158,14 +142,11 @@ class Firestarter {
   void signalWork() { signalLoadWorkers(THREAD_WORK); };
 
   // WatchdogWorker.cpp
-  int watchdogWorker(std::chrono::microseconds period,
-                     std::chrono::microseconds load,
-                     std::chrono::seconds timeout);
+  int watchdogWorker(std::chrono::microseconds period, std::chrono::microseconds load, std::chrono::seconds timeout);
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
   // DumpRegisterWorker.cpp
-  int initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta,
-                             std::string dumpFilePath);
+  int initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta, std::string dumpFilePath);
   void joinDumpRegisterWorker();
 #endif
 
@@ -191,8 +172,7 @@ class Firestarter {
   // variable to control the load of the threads
   inline static volatile unsigned long long loadVar = LOAD_LOW;
 
-  std::vector<std::pair<std::thread, std::shared_ptr<LoadWorkerData>>>
-      loadThreads;
+  std::vector<std::pair<std::thread, std::shared_ptr<LoadWorkerData>>> loadThreads;
 
   std::vector<std::shared_ptr<unsigned long long>> errorCommunication;
 
diff --git a/include/firestarter/Json/Summary.hpp b/include/firestarter/Json/Summary.hpp
index 540c4aed..d9a923cc 100644
--- a/include/firestarter/Json/Summary.hpp
+++ b/include/firestarter/Json/Summary.hpp
@@ -25,14 +25,13 @@
 
 namespace nlohmann {
 template <> struct adl_serializer<firestarter::measurement::Summary> {
-  static firestarter::measurement::Summary from_json(const json &j) {
+  static firestarter::measurement::Summary from_json(const json& j) {
     return {j["num_timepoints"].get<size_t>(),
-            std::chrono::milliseconds(
-                j["duration"].get<std::chrono::milliseconds::rep>()),
-            j["average"].get<double>(), j["stddev"].get<double>()};
+            std::chrono::milliseconds(j["duration"].get<std::chrono::milliseconds::rep>()), j["average"].get<double>(),
+            j["stddev"].get<double>()};
   }
 
-  static void to_json(json &j, firestarter::measurement::Summary s) {
+  static void to_json(json& j, firestarter::measurement::Summary s) {
     j = json::object();
 
     j["num_timepoints"] = s.num_timepoints;
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index ec70476f..78b11b80 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -21,17 +21,15 @@
 
 #pragma once
 
+#include <atomic>
 #include <firestarter/Constants.hpp>
 #include <firestarter/DumpRegisterStruct.hpp>
 #include <firestarter/Environment/Environment.hpp>
 #include <firestarter/ErrorDetectionStruct.hpp>
-
-#include <atomic>
 #include <memory>
 #include <mutex>
 
-#define PAD_SIZE(size, align)                                                  \
-  align *(int)std::ceil((double)size / (double)align)
+#define PAD_SIZE(size, align) align*(int)std::ceil((double)size / (double)align)
 
 #if defined(__APPLE__)
 #define ALIGNED_MALLOC(size, align) aligned_alloc(align, PAD_SIZE(size, align))
@@ -40,12 +38,10 @@
 #define ALIGNED_MALLOC(size, align) _mm_malloc(PAD_SIZE(size, align), align)
 #define ALIGNED_FREE _mm_free
 #elif defined(_MSC_VER)
-#define ALIGNED_MALLOC(size, align)                                            \
-  _aligned_malloc(PAD_SIZE(size, align), align)
+#define ALIGNED_MALLOC(size, align) _aligned_malloc(PAD_SIZE(size, align), align)
 #define ALIGNED_FREE _aligned_free
 #else
-#define ALIGNED_MALLOC(size, align)                                            \
-  std::aligned_alloc(align, PAD_SIZE(size, align))
+#define ALIGNED_MALLOC(size, align) std::aligned_alloc(align, PAD_SIZE(size, align))
 #define ALIGNED_FREE std::free
 #endif
 
@@ -53,25 +49,22 @@ namespace firestarter {
 
 class LoadWorkerData {
 public:
-  LoadWorkerData(int id, environment::Environment &environment,
-                 volatile unsigned long long *loadVar,
-                 unsigned long long period, bool dumpRegisters,
-                 bool errorDetection)
-      : addrHigh(loadVar), period(period), dumpRegisters(dumpRegisters),
-        errorDetection(errorDetection), _id(id), _environment(environment),
-        _config(new environment::platform::RuntimeConfig(
-            environment.selectedConfig())) {
+  LoadWorkerData(int id, environment::Environment& environment, volatile unsigned long long* loadVar,
+                 unsigned long long period, bool dumpRegisters, bool errorDetection)
+      : addrHigh(loadVar)
+      , period(period)
+      , dumpRegisters(dumpRegisters)
+      , errorDetection(errorDetection)
+      , _id(id)
+      , _environment(environment)
+      , _config(new environment::platform::RuntimeConfig(environment.selectedConfig())) {
     // use REGISTER_MAX_NUM cache lines for the dumped registers
     // and another cache line for the control variable.
-    // as we are doing aligned moves we only have the option to waste a whole
-    // cacheline
-    addrOffset = dumpRegisters
-                     ? sizeof(DumpRegisterStruct) / sizeof(unsigned long long)
-                     : 0;
+    // as we are doing aligned moves we only have the option to waste a
+    // whole cacheline
+    addrOffset = dumpRegisters ? sizeof(DumpRegisterStruct) / sizeof(unsigned long long) : 0;
 
-    addrOffset += errorDetection ? sizeof(ErrorDetectionStruct) /
-                                       sizeof(unsigned long long)
-                                 : 0;
+    addrOffset += errorDetection ? sizeof(ErrorDetectionStruct) / sizeof(unsigned long long) : 0;
   }
 
   ~LoadWorkerData() {
@@ -81,27 +74,26 @@ class LoadWorkerData {
     }
   }
 
-  void setErrorCommunication(
-      std::shared_ptr<unsigned long long> communicationLeft,
-      std::shared_ptr<unsigned long long> communicationRight) {
+  void setErrorCommunication(std::shared_ptr<unsigned long long> communicationLeft,
+                             std::shared_ptr<unsigned long long> communicationRight) {
     this->communicationLeft = communicationLeft;
     this->communicationRight = communicationRight;
   }
 
   int id() const { return _id; }
-  environment::Environment &environment() const { return _environment; }
-  environment::platform::RuntimeConfig &config() const { return *_config; }
+  environment::Environment& environment() const { return _environment; }
+  environment::platform::RuntimeConfig& config() const { return *_config; }
 
-  const ErrorDetectionStruct *errorDetectionStruct() const {
-    return reinterpret_cast<ErrorDetectionStruct *>(addrMem - addrOffset);
+  const ErrorDetectionStruct* errorDetectionStruct() const {
+    return reinterpret_cast<ErrorDetectionStruct*>(addrMem - addrOffset);
   }
 
   int comm = THREAD_WAIT;
   bool ack = false;
   std::mutex mutex;
-  unsigned long long *addrMem = nullptr;
+  unsigned long long* addrMem = nullptr;
   unsigned long long addrOffset;
-  volatile unsigned long long *addrHigh;
+  volatile unsigned long long* addrHigh;
   unsigned long long buffersizeMem;
   unsigned long long iterations = 0;
   // save the last iteration count when switching payloads
@@ -121,8 +113,8 @@ class LoadWorkerData {
 
 private:
   int _id;
-  environment::Environment &_environment;
-  environment::platform::RuntimeConfig *_config;
+  environment::Environment& _environment;
+  environment::platform::RuntimeConfig* _config;
 };
 
 } // namespace firestarter
diff --git a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
index af8b7ff1..a91e1228 100644
--- a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
+++ b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
@@ -23,7 +23,6 @@
 
 #include <nitro/log/log.hpp>
 #include <nitro/log/severity.hpp>
-
 #include <thread>
 
 namespace firestarter {
@@ -34,13 +33,10 @@ template <typename Record> class FirstWorkerThreadFilter {
 public:
   typedef Record record_type;
 
-  static void setFirstThread(std::thread::id newFirstThread) {
-    firstThread = newFirstThread;
-  }
+  static void setFirstThread(std::thread::id newFirstThread) { firstThread = newFirstThread; }
 
-  bool filter(Record &r) const {
-    return r.std_thread_id() == firstThread ||
-           r.severity() >= nitro::log::severity_level::error;
+  bool filter(Record& r) const {
+    return r.std_thread_id() == firstThread || r.severity() >= nitro::log::severity_level::error;
   }
 
 private:
diff --git a/include/firestarter/Logging/Log.hpp b/include/firestarter/Logging/Log.hpp
index f5b613c0..74cc3e1a 100644
--- a/include/firestarter/Logging/Log.hpp
+++ b/include/firestarter/Logging/Log.hpp
@@ -22,21 +22,17 @@
 #pragma once
 
 #include <firestarter/Logging/FirstWorkerThreadFilter.hpp>
-
-#include <nitro/log/log.hpp>
-#include <nitro/log/severity.hpp>
-
+#include <iomanip>
+#include <ios>
+#include <iostream>
 #include <nitro/log/attribute/message.hpp>
 #include <nitro/log/attribute/severity.hpp>
 #include <nitro/log/attribute/std_thread_id.hpp>
 #include <nitro/log/attribute/timestamp.hpp>
-
 #include <nitro/log/filter/and_filter.hpp>
 #include <nitro/log/filter/severity_filter.hpp>
-
-#include <iomanip>
-#include <ios>
-#include <iostream>
+#include <nitro/log/log.hpp>
+#include <nitro/log/severity.hpp>
 #include <sstream>
 #include <string>
 
@@ -46,8 +42,7 @@ namespace logging {
 
 class StdOut {
 public:
-  void sink(nitro::log::severity_level severity,
-            const std::string &formatted_record) {
+  void sink(nitro::log::severity_level severity, const std::string& formatted_record) {
     switch (severity) {
     case nitro::log::severity_level::warn:
     case nitro::log::severity_level::error:
@@ -61,13 +56,12 @@ class StdOut {
   }
 };
 
-using record = nitro::log::record<
-    nitro::log::severity_attribute, nitro::log::message_attribute,
-    nitro::log::timestamp_attribute, nitro::log::std_thread_id_attribute>;
+using record = nitro::log::record<nitro::log::severity_attribute, nitro::log::message_attribute,
+                                  nitro::log::timestamp_attribute, nitro::log::std_thread_id_attribute>;
 
 template <typename Record> class formater {
 public:
-  std::string format(Record &r) {
+  std::string format(Record& r) {
     std::stringstream s;
 
     switch (r.severity()) {
@@ -93,21 +87,16 @@ template <typename Record> class formater {
   }
 };
 
-template <typename Record>
-using filter = nitro::log::filter::severity_filter<Record>;
+template <typename Record> using filter = nitro::log::filter::severity_filter<Record>;
 
 template <typename Record>
-using workerFilter =
-    nitro::log::filter::and_filter<filter<Record>,
-                                   FirstWorkerThreadFilter<Record>>;
+using workerFilter = nitro::log::filter::and_filter<filter<Record>, FirstWorkerThreadFilter<Record>>;
 
 } // namespace logging
 
-using log = nitro::log::logger<logging::record, logging::formater,
-                               firestarter::logging::StdOut, logging::filter>;
+using log = nitro::log::logger<logging::record, logging::formater, firestarter::logging::StdOut, logging::filter>;
 
 using workerLog =
-    nitro::log::logger<logging::record, logging::formater,
-                       firestarter::logging::StdOut, logging::workerFilter>;
+    nitro::log::logger<logging::record, logging::formater, firestarter::logging::StdOut, logging::workerFilter>;
 
 } // namespace firestarter
diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp
index 4fc8a6a1..c115a476 100644
--- a/include/firestarter/Measurement/MeasurementWorker.hpp
+++ b/include/firestarter/Measurement/MeasurementWorker.hpp
@@ -21,11 +21,10 @@
 
 #pragma once
 
+#include <chrono>
 #include <firestarter/Logging/Log.hpp>
 #include <firestarter/Measurement/Summary.hpp>
 #include <firestarter/Measurement/TimeValue.hpp>
-
-#include <chrono>
 #include <map>
 #include <mutex>
 
@@ -34,12 +33,10 @@ extern "C" {
 #include <firestarter/Measurement/Metric/Perf.h>
 #include <firestarter/Measurement/Metric/RAPL.h>
 #include <firestarter/Measurement/MetricInterface.h>
-
 #include <pthread.h>
 }
 
-void insertCallback(void *cls, const char *metricName, int64_t timeSinceEpoch,
-                    double value);
+void insertCallback(void* cls, const char* metricName, int64_t timeSinceEpoch, double value);
 
 namespace firestarter::measurement {
 
@@ -48,17 +45,16 @@ class MeasurementWorker {
   pthread_t workerThread;
   pthread_t stdinThread;
 
-  std::vector<metric_interface_t *> metrics = {
-      &rapl_metric, &perf_ipc_metric, &perf_freq_metric, &ipc_estimate_metric};
+  std::vector<metric_interface_t*> metrics = {&rapl_metric, &perf_ipc_metric, &perf_freq_metric, &ipc_estimate_metric};
 
   std::mutex values_mutex;
   std::map<std::string, std::vector<TimeValue>> values = {};
 
-  static int *dataAcquisitionWorker(void *measurementWorker);
+  static int* dataAcquisitionWorker(void* measurementWorker);
 
-  static int *stdinDataAcquisitionWorker(void *measurementWorker);
+  static int* stdinDataAcquisitionWorker(void* measurementWorker);
 
-  const metric_interface_t *findMetricByName(std::string metricName);
+  const metric_interface_t* findMetricByName(std::string metricName);
 
   std::chrono::milliseconds updateInterval;
 
@@ -70,46 +66,39 @@ class MeasurementWorker {
   std::string availableMetricsString;
 
 #ifndef FIRESTARTER_LINK_STATIC
-  std::vector<void *> _metricDylibs = {};
+  std::vector<void*> _metricDylibs = {};
 #endif
 
   std::vector<std::string> _stdinMetrics = {};
 
 public:
   // creates the worker thread
-  MeasurementWorker(std::chrono::milliseconds updateInterval,
-                    unsigned long long numThreads,
-                    std::vector<std::string> const &metricDylibs,
-                    std::vector<std::string> const &stdinMetrics);
+  MeasurementWorker(std::chrono::milliseconds updateInterval, unsigned long long numThreads,
+                    std::vector<std::string> const& metricDylibs, std::vector<std::string> const& stdinMetrics);
 
   // stops the worker threads
   ~MeasurementWorker();
 
-  std::string const &availableMetrics() const {
-    return this->availableMetricsString;
-  }
+  std::string const& availableMetrics() const { return this->availableMetricsString; }
 
-  std::vector<std::string> const &stdinMetrics() { return _stdinMetrics; }
+  std::vector<std::string> const& stdinMetrics() { return _stdinMetrics; }
 
   // returns a list of metrics
   std::vector<std::string> metricNames();
 
   // setup the selected metrics
   // returns a vector with the names of inialized metrics
-  std::vector<std::string>
-  initMetrics(std::vector<std::string> const &metricNames);
+  std::vector<std::string> initMetrics(std::vector<std::string> const& metricNames);
 
   // callback function for metrics
-  void insertCallback(const char *metricName, int64_t timeSinceEpoch,
-                      double value);
+  void insertCallback(const char* metricName, int64_t timeSinceEpoch, double value);
 
   // start the measurement
   void startMeasurement();
 
   // get the measurement values begining from measurement start until now.
-  std::map<std::string, Summary> getValues(
-      std::chrono::milliseconds startDelta = std::chrono::milliseconds::zero(),
-      std::chrono::milliseconds stopDelta = std::chrono::milliseconds::zero());
+  std::map<std::string, Summary> getValues(std::chrono::milliseconds startDelta = std::chrono::milliseconds::zero(),
+                                           std::chrono::milliseconds stopDelta = std::chrono::milliseconds::zero());
 };
 
 } // namespace firestarter::measurement
diff --git a/include/firestarter/Measurement/MetricInterface.h b/include/firestarter/Measurement/MetricInterface.h
index dbea19e8..c0c1c58b 100644
--- a/include/firestarter/Measurement/MetricInterface.h
+++ b/include/firestarter/Measurement/MetricInterface.h
@@ -44,13 +44,13 @@ typedef struct {
 // load it during runtime.
 typedef struct {
   // the name of the metric
-  const char *name;
+  const char* name;
 
   // metric type with bitfield from metric_type_t
   metric_type_t type;
 
   // the unit of the metric
-  const char *unit;
+  const char* unit;
 
   uint64_t callback_time;
 
@@ -69,17 +69,15 @@ typedef struct {
   // Get a reading of the metric
   // Return EXIT_SUCCESS if we got a new value.
   // Set this function pointer to NULL if METRIC_INSERT_CALLBACK is specified.
-  int32_t (*get_reading)(double *value);
+  int32_t (*get_reading)(double* value);
 
   // Get error in case return code not being EXIT_SUCCESS
-  const char *(*get_error)(void);
+  const char* (*get_error)(void);
 
   // If METRIC_INSERT_CALLBACK is set in the type, this function will be passed
   // a callback and the first argument for the callback.
   // Further arguments of callback are the metric name, an unix timestamp (time
   // since epoch) and a metric value.
-  int32_t (*register_insert_callback)(void (*)(void *, const char *, int64_t,
-                                               double),
-                                      void *);
+  int32_t (*register_insert_callback)(void (*)(void*, const char*, int64_t, double), void*);
 
 } metric_interface_t;
diff --git a/include/firestarter/Measurement/Summary.hpp b/include/firestarter/Measurement/Summary.hpp
index 23f819f0..7f0d7899 100644
--- a/include/firestarter/Measurement/Summary.hpp
+++ b/include/firestarter/Measurement/Summary.hpp
@@ -21,9 +21,8 @@
 
 #pragma once
 
-#include <firestarter/Measurement/TimeValue.hpp>
-
 #include <chrono>
+#include <firestarter/Measurement/TimeValue.hpp>
 #include <nlohmann/json.hpp>
 #include <vector>
 
@@ -34,17 +33,14 @@ extern "C" {
 namespace firestarter::measurement {
 
 struct Summary {
-
   size_t num_timepoints;
   std::chrono::milliseconds duration;
 
   double average;
   double stddev;
 
-  static Summary calculate(std::vector<TimeValue>::iterator begin,
-                           std::vector<TimeValue>::iterator end,
-                           metric_type_t metricType,
-                           unsigned long long numThreads);
+  static Summary calculate(std::vector<TimeValue>::iterator begin, std::vector<TimeValue>::iterator end,
+                           metric_type_t metricType, unsigned long long numThreads);
 };
 
 } // namespace firestarter::measurement
diff --git a/include/firestarter/Measurement/TimeValue.hpp b/include/firestarter/Measurement/TimeValue.hpp
index eae3de23..bf9377c9 100644
--- a/include/firestarter/Measurement/TimeValue.hpp
+++ b/include/firestarter/Measurement/TimeValue.hpp
@@ -26,12 +26,11 @@
 namespace firestarter::measurement {
 
 struct TimeValue {
-
   TimeValue() = default;
 
-  constexpr TimeValue(std::chrono::high_resolution_clock::time_point t,
-                      double v)
-      : time(t), value(v){};
+  constexpr TimeValue(std::chrono::high_resolution_clock::time_point t, double v)
+      : time(t)
+      , value(v){};
 
   std::chrono::high_resolution_clock::time_point time;
   double value;
diff --git a/include/firestarter/OneAPI/OneAPI.hpp b/include/firestarter/OneAPI/OneAPI.hpp
index cf939388..0ed1844c 100644
--- a/include/firestarter/OneAPI/OneAPI.hpp
+++ b/include/firestarter/OneAPI/OneAPI.hpp
@@ -34,13 +34,11 @@ class OneAPI {
   std::condition_variable _waitForInitCv;
   std::mutex _waitForInitCvMutex;
 
-  static void initGpus(std::condition_variable &cv,
-                       volatile unsigned long long *loadVar, bool useFloat,
-                       bool useDouble, unsigned matrixSize, int gpus);
+  static void initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble,
+                       unsigned matrixSize, int gpus);
 
 public:
-  OneAPI(volatile unsigned long long *loadVar, bool useFloat, bool useDouble,
-       unsigned matrixSize, int gpus);
+  OneAPI(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus);
 
   ~OneAPI() {
     if (_initThread.joinable()) {
diff --git a/include/firestarter/Optimizer/Algorithm.hpp b/include/firestarter/Optimizer/Algorithm.hpp
index 14009183..d9186322 100644
--- a/include/firestarter/Optimizer/Algorithm.hpp
+++ b/include/firestarter/Optimizer/Algorithm.hpp
@@ -30,10 +30,9 @@ class Algorithm {
   Algorithm() {}
   virtual ~Algorithm() {}
 
-  virtual void checkPopulation(Population const &pop,
-                               std::size_t populationSize) = 0;
+  virtual void checkPopulation(Population const& pop, std::size_t populationSize) = 0;
 
-  virtual Population evolve(Population &pop) = 0;
+  virtual Population evolve(Population& pop) = 0;
 };
 
 } // namespace firestarter::optimizer
diff --git a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
index c1825f73..a144bb05 100644
--- a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
+++ b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
@@ -30,11 +30,9 @@ class NSGA2 : public Algorithm {
   NSGA2(unsigned gen, double cr, double m);
   ~NSGA2() {}
 
-  void checkPopulation(firestarter::optimizer::Population const &pop,
-                       std::size_t populationSize) override;
+  void checkPopulation(firestarter::optimizer::Population const& pop, std::size_t populationSize) override;
 
-  firestarter::optimizer::Population
-  evolve(firestarter::optimizer::Population &pop) override;
+  firestarter::optimizer::Population evolve(firestarter::optimizer::Population& pop) override;
 
 private:
   unsigned _gen;
diff --git a/include/firestarter/Optimizer/History.hpp b/include/firestarter/Optimizer/History.hpp
index 9dec066d..2922301f 100644
--- a/include/firestarter/Optimizer/History.hpp
+++ b/include/firestarter/Optimizer/History.hpp
@@ -21,15 +21,14 @@
 
 #pragma once
 
-#include <firestarter/Json/Summary.hpp>
-#include <firestarter/Logging/Log.hpp>
-#include <firestarter/Measurement/Summary.hpp>
-#include <firestarter/Optimizer/Individual.hpp>
-
 #include <algorithm>
 #include <cassert>
 #include <cstring>
 #include <ctime>
+#include <firestarter/Json/Summary.hpp>
+#include <firestarter/Logging/Log.hpp>
+#include <firestarter/Measurement/Summary.hpp>
+#include <firestarter/Optimizer/Individual.hpp>
 #include <fstream>
 #include <iomanip>
 #include <iostream>
@@ -48,18 +47,14 @@ struct History {
 private:
   // https://stackoverflow.com/questions/17074324/how-can-i-sort-two-vectors-in-the-same-way-with-criteria-that-uses-only-one-of/17074810#17074810
   template <typename T, typename Compare>
-  inline static std::vector<std::size_t>
-  sortPermutation(const std::vector<T> &vec, Compare &compare) {
+  inline static std::vector<std::size_t> sortPermutation(const std::vector<T>& vec, Compare& compare) {
     std::vector<std::size_t> p(vec.size());
     std::iota(p.begin(), p.end(), 0);
-    std::sort(p.begin(), p.end(), [&](std::size_t i, std::size_t j) {
-      return compare(vec[i], vec[j]);
-    });
+    std::sort(p.begin(), p.end(), [&](std::size_t i, std::size_t j) { return compare(vec[i], vec[j]); });
     return p;
   }
 
-  inline static void padding(std::stringstream &ss, std::size_t width,
-                             std::size_t taken, char c) {
+  inline static void padding(std::stringstream& ss, std::size_t width, std::size_t taken, char c) {
     for (std::size_t i = 0; i < (std::max)(width, taken) - taken; ++i) {
       ss << c;
     }
@@ -69,24 +64,18 @@ struct History {
   inline static std::size_t MIN_COLUMN_WIDTH = 10;
 
   inline static std::vector<Individual> _x = {};
-  inline static std::vector<
-      std::map<std::string, firestarter::measurement::Summary>>
-      _f = {};
+  inline static std::vector<std::map<std::string, firestarter::measurement::Summary>> _f = {};
 
 public:
-  inline static void append(
-      std::vector<unsigned> const &ind,
-      std::map<std::string, firestarter::measurement::Summary> const &metric) {
+  inline static void append(std::vector<unsigned> const& ind,
+                            std::map<std::string, firestarter::measurement::Summary> const& metric) {
     _x.push_back(ind);
     _f.push_back(metric);
   }
 
-  inline static std::optional<
-      std::map<std::string, firestarter::measurement::Summary>>
-  find(std::vector<unsigned> const &individual) {
-    auto findEqual = [individual](auto const &ind) {
-      return ind == individual;
-    };
+  inline static std::optional<std::map<std::string, firestarter::measurement::Summary>>
+  find(std::vector<unsigned> const& individual) {
+    auto findEqual = [individual](auto const& ind) { return ind == individual; };
     auto ind = std::find_if(_x.begin(), _x.end(), findEqual);
     if (ind == _x.end()) {
       return {};
@@ -95,25 +84,22 @@ struct History {
     return _f[dist];
   }
 
-  inline static void
-  printBest(std::vector<std::string> const &optimizationMetrics,
-            std::vector<std::string> const &payloadItems) {
+  inline static void printBest(std::vector<std::string> const& optimizationMetrics,
+                               std::vector<std::string> const& payloadItems) {
     // TODO: print paretto front
 
     // print the best 20 individuals for each metric in a format
     // where the user can give it to --run-instruction-groups directly
     std::map<std::string, std::size_t> columnWidth;
 
-    for (auto const &metric : optimizationMetrics) {
+    for (auto const& metric : optimizationMetrics) {
       columnWidth[metric] = (std::max)(metric.size(), MIN_COLUMN_WIDTH);
       firestarter::log::trace() << metric << ": " << columnWidth[metric];
     }
 
-    for (auto const &metric : optimizationMetrics) {
-      using SummaryMap =
-          std::map<std::string, firestarter::measurement::Summary>;
-      auto compareIndividual = [&metric](SummaryMap const &mapA,
-                                         SummaryMap const &mapB) {
+    for (auto const& metric : optimizationMetrics) {
+      using SummaryMap = std::map<std::string, firestarter::measurement::Summary>;
+      auto compareIndividual = [&metric](SummaryMap const& mapA, SummaryMap const& mapB) {
         auto summaryA = mapA.find(metric);
         auto summaryB = mapB.find(metric);
 
@@ -132,25 +118,24 @@ struct History {
 
       auto perm = sortPermutation(_f, compareIndividual);
 
-      auto formatIndividual =
-          [&payloadItems](std::vector<unsigned> const &individual) {
-            std::string result = "";
-            assert(payloadItems.size() == individual.size());
+      auto formatIndividual = [&payloadItems](std::vector<unsigned> const& individual) {
+        std::string result = "";
+        assert(payloadItems.size() == individual.size());
 
-            for (std::size_t i = 0; i < individual.size(); ++i) {
-              // skip zero values
-              if (individual[i] == 0) {
-                continue;
-              }
+        for (std::size_t i = 0; i < individual.size(); ++i) {
+          // skip zero values
+          if (individual[i] == 0) {
+            continue;
+          }
 
-              if (result.size() != 0) {
-                result += ",";
-              }
-              result += payloadItems[i] + ":" + std::to_string(individual[i]);
-            }
+          if (result.size() != 0) {
+            result += ",";
+          }
+          result += payloadItems[i] + ":" + std::to_string(individual[i]);
+        }
 
-            return result;
-          };
+        return result;
+      };
 
       auto begin = perm.begin();
       auto end = perm.end();
@@ -177,7 +162,7 @@ struct History {
       secondLine << "  ";
       padding(secondLine, (std::max)(max, ind.size()), 0, '-');
 
-      for (auto const &metric : optimizationMetrics) {
+      for (auto const& metric : optimizationMetrics) {
         auto width = columnWidth[metric];
 
         firstLine << " | ";
@@ -203,7 +188,7 @@ struct History {
         ss << "  " << ind;
         padding(ss, max, ind.size(), ' ');
 
-        for (auto const &metric : optimizationMetrics) {
+        for (auto const& metric : optimizationMetrics) {
           auto width = columnWidth[metric];
           std::string value;
 
@@ -230,26 +215,24 @@ struct History {
       firestarter::log::info() << ss.str();
     }
 
-    firestarter::log::info()
-        << "To run FIRESTARTER with the best individual of a given metric "
-           "use the command line argument "
-           "`--run-instruction-groups=INDIVIDUAL`";
+    firestarter::log::info() << "To run FIRESTARTER with the best individual of a given metric "
+                                "use the command line argument "
+                                "`--run-instruction-groups=INDIVIDUAL`";
   }
 
-  inline static void save(std::string const &path, std::string const &startTime,
-                          std::vector<std::string> const &payloadItems,
-                          const int argc, const char **argv) {
+  inline static void save(std::string const& path, std::string const& startTime,
+                          std::vector<std::string> const& payloadItems, const int argc, const char** argv) {
     using json = nlohmann::json;
 
     json j = json::object();
 
     j["individuals"] = json::array();
-    for (auto const &ind : _x) {
+    for (auto const& ind : _x) {
       j["individuals"].push_back(ind);
     }
 
     j["metrics"] = json::array();
-    for (auto const &eval : _f) {
+    for (auto const& eval : _f) {
       j["metrics"].push_back(eval);
     }
 
@@ -269,7 +252,7 @@ struct History {
 
     // save the payload items
     j["payloadItems"] = json::array();
-    for (auto const &item : payloadItems) {
+    for (auto const& item : payloadItems) {
       j["payloadItems"].push_back(item);
     }
 
@@ -286,7 +269,7 @@ struct History {
 
     std::string outpath = path;
     if (outpath.empty()) {
-      char *pwd = get_current_dir_name();
+      char* pwd = get_current_dir_name();
       if (pwd) {
         outpath = pwd;
         free(pwd);
diff --git a/include/firestarter/Optimizer/OptimizerWorker.hpp b/include/firestarter/Optimizer/OptimizerWorker.hpp
index 90eb80a5..816f4882 100644
--- a/include/firestarter/Optimizer/OptimizerWorker.hpp
+++ b/include/firestarter/Optimizer/OptimizerWorker.hpp
@@ -19,10 +19,9 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <chrono>
 #include <firestarter/Optimizer/Algorithm.hpp>
 #include <firestarter/Optimizer/Population.hpp>
-
-#include <chrono>
 #include <memory>
 
 extern "C" {
@@ -33,11 +32,9 @@ namespace firestarter::optimizer {
 
 class OptimizerWorker {
 public:
-  OptimizerWorker(
-      std::unique_ptr<firestarter::optimizer::Algorithm> &&algorithm,
-      firestarter::optimizer::Population &population,
-      std::string const &optimizationAlgorithm, unsigned individuals,
-      std::chrono::seconds const &preheat);
+  OptimizerWorker(std::unique_ptr<firestarter::optimizer::Algorithm>&& algorithm,
+                  firestarter::optimizer::Population& population, std::string const& optimizationAlgorithm,
+                  unsigned individuals, std::chrono::seconds const& preheat);
 
   ~OptimizerWorker() {}
 
@@ -46,7 +43,7 @@ class OptimizerWorker {
   void kill();
 
 private:
-  static void *optimizerThread(void *optimizerWorker);
+  static void* optimizerThread(void* optimizerWorker);
 
   std::unique_ptr<firestarter::optimizer::Algorithm> _algorithm;
   firestarter::optimizer::Population _population;
diff --git a/include/firestarter/Optimizer/Population.hpp b/include/firestarter/Optimizer/Population.hpp
index b02f451d..3bf3ac38 100644
--- a/include/firestarter/Optimizer/Population.hpp
+++ b/include/firestarter/Optimizer/Population.hpp
@@ -22,11 +22,10 @@
 #ifndef FIRESTARTER_OPTIMIZER_POPULATION_HPP
 #define FIRESTARTER_OPTIMIZER_POPULATION_HPP
 
+#include <cstring>
 #include <firestarter/Optimizer/History.hpp>
 #include <firestarter/Optimizer/Individual.hpp>
 #include <firestarter/Optimizer/Problem.hpp>
-
-#include <cstring>
 #include <memory>
 #include <optional>
 #include <random>
@@ -40,13 +39,17 @@ class Population {
   // Construct a population from a problem.
   Population() = default;
 
-  Population(std::shared_ptr<Problem> &&problem)
-      : _problem(std::move(problem)), gen(rd()) {}
+  Population(std::shared_ptr<Problem>&& problem)
+      : _problem(std::move(problem))
+      , gen(rd()) {}
 
-  Population(Population &pop)
-      : _problem(pop._problem), _x(pop._x), _f(pop._f), gen(rd()) {}
+  Population(Population& pop)
+      : _problem(pop._problem)
+      , _x(pop._x)
+      , _f(pop._f)
+      , gen(rd()) {}
 
-  Population &operator=(Population const &pop) {
+  Population& operator=(Population const& pop) {
     _problem = std::move(pop._problem);
     _x = pop._x;
     _f = pop._f;
@@ -62,10 +65,9 @@ class Population {
   std::size_t size() const;
 
   // add one individual to the population. fitness will be evaluated.
-  void append(Individual const &ind);
+  void append(Individual const& ind);
 
-  void insert(std::size_t idx, Individual const &ind,
-              std::vector<double> const &fit);
+  void insert(std::size_t idx, Individual const& ind, std::vector<double> const& fit);
 
   // get a random individual inside bounds of problem
   Individual getRandomIndividual();
@@ -74,14 +76,14 @@ class Population {
   // return nothing in case of mutli-objective.
   std::optional<Individual> bestIndividual() const;
 
-  Problem const &problem() const { return *_problem; }
+  Problem const& problem() const { return *_problem; }
 
-  std::vector<Individual> const &x() const { return _x; }
-  std::vector<std::vector<double>> const &f() const { return _f; }
+  std::vector<Individual> const& x() const { return _x; }
+  std::vector<std::vector<double>> const& f() const { return _f; }
 
 private:
   // add one individual to the population with a fitness.
-  void append(Individual const &ind, std::vector<double> const &fit);
+  void append(Individual const& ind, std::vector<double> const& fit);
 
   // our problem.
   std::shared_ptr<Problem> _problem;
diff --git a/include/firestarter/Optimizer/Problem.hpp b/include/firestarter/Optimizer/Problem.hpp
index f88b0bc3..009b4d01 100644
--- a/include/firestarter/Optimizer/Problem.hpp
+++ b/include/firestarter/Optimizer/Problem.hpp
@@ -21,10 +21,9 @@
 
 #pragma once
 
+#include <cstring>
 #include <firestarter/Measurement/Summary.hpp>
 #include <firestarter/Optimizer/Individual.hpp>
-
-#include <cstring>
 #include <map>
 #include <tuple>
 #include <vector>
@@ -33,16 +32,14 @@ namespace firestarter::optimizer {
 
 class Problem {
 public:
-  Problem() : _fevals(0) {}
+  Problem()
+      : _fevals(0) {}
   virtual ~Problem() {}
 
   // return the fitness for an individual
-  virtual std::map<std::string, firestarter::measurement::Summary>
-  metrics(Individual const &individual) = 0;
+  virtual std::map<std::string, firestarter::measurement::Summary> metrics(Individual const& individual) = 0;
 
-  virtual std::vector<double>
-  fitness(std::map<std::string, firestarter::measurement::Summary> const
-              &summaries) = 0;
+  virtual std::vector<double> fitness(std::map<std::string, firestarter::measurement::Summary> const& summaries) = 0;
 
   // get the bounds of the problem
   virtual std::vector<std::tuple<unsigned, unsigned>> getBounds() const = 0;
diff --git a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
index 1ca0de58..f24ae2f2 100644
--- a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
+++ b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
@@ -21,11 +21,10 @@
 
 #pragma once
 
-#include <firestarter/Optimizer/Problem.hpp>
-
 #include <cassert>
 #include <chrono>
 #include <cmath>
+#include <firestarter/Optimizer/Problem.hpp>
 #include <functional>
 #include <memory>
 #include <thread>
@@ -35,28 +34,26 @@
 namespace firestarter::optimizer::problem {
 
 class CLIArgumentProblem final : public firestarter::optimizer::Problem {
-
 public:
-  CLIArgumentProblem(
-      std::function<void(std::vector<std::pair<std::string, unsigned>> const &)>
-          &&changePayloadFunction,
-      std::shared_ptr<firestarter::measurement::MeasurementWorker> const
-          &measurementWorker,
-      std::vector<std::string> const &metrics, std::chrono::seconds timeout,
-      std::chrono::milliseconds startDelta, std::chrono::milliseconds stopDelta,
-      std::vector<std::string> const &instructionGroups)
-      : _changePayloadFunction(changePayloadFunction),
-        _measurementWorker(measurementWorker), _metrics(metrics),
-        _timeout(timeout), _startDelta(startDelta), _stopDelta(stopDelta),
-        _instructionGroups(instructionGroups) {
+  CLIArgumentProblem(std::function<void(std::vector<std::pair<std::string, unsigned>> const&)>&& changePayloadFunction,
+                     std::shared_ptr<firestarter::measurement::MeasurementWorker> const& measurementWorker,
+                     std::vector<std::string> const& metrics, std::chrono::seconds timeout,
+                     std::chrono::milliseconds startDelta, std::chrono::milliseconds stopDelta,
+                     std::vector<std::string> const& instructionGroups)
+      : _changePayloadFunction(changePayloadFunction)
+      , _measurementWorker(measurementWorker)
+      , _metrics(metrics)
+      , _timeout(timeout)
+      , _startDelta(startDelta)
+      , _stopDelta(stopDelta)
+      , _instructionGroups(instructionGroups) {
     assert(_metrics.size() != 0);
   }
 
   ~CLIArgumentProblem() {}
 
   // return all available metrics for the individual
-  std::map<std::string, firestarter::measurement::Summary>
-  metrics(std::vector<unsigned> const &individual) override {
+  std::map<std::string, firestarter::measurement::Summary> metrics(std::vector<unsigned> const& individual) override {
     // increment evaluation idx
     _fevals++;
 
@@ -71,32 +68,29 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem {
     _changePayloadFunction(payload);
 
     // start the measurement
-    // NOTE: starting the measurement must happen after switching to not mess up
-    // ipc-estimate metric
+    // NOTE: starting the measurement must happen after switching to not
+    // mess up ipc-estimate metric
     _measurementWorker->startMeasurement();
 
     // wait for the measurement to finish
     std::this_thread::sleep_for(_timeout);
 
     // FIXME: this is an ugly workaround for the ipc-estimate metric
-    // changeing the payload triggers a write of the iteration counter of the
-    // last payload, which we use to estimate the ipc.
+    // changeing the payload triggers a write of the iteration counter of
+    // the last payload, which we use to estimate the ipc.
     _changePayloadFunction(payload);
 
     // return the results
     return _measurementWorker->getValues(_startDelta, _stopDelta);
   }
 
-  std::vector<double> fitness(
-      std::map<std::string, firestarter::measurement::Summary> const &summaries)
-      override {
+  std::vector<double> fitness(std::map<std::string, firestarter::measurement::Summary> const& summaries) override {
     std::vector<double> values = {};
 
-    for (auto const &metricName : _metrics) {
-      auto findName = [metricName](auto const &summary) {
+    for (auto const& metricName : _metrics) {
+      auto findName = [metricName](auto const& summary) {
         auto invertedName = "-" + summary.first;
-        return metricName.compare(summary.first) == 0 ||
-               metricName.compare(invertedName) == 0;
+        return metricName.compare(summary.first) == 0 || metricName.compare(invertedName) == 0;
       };
 
       auto it = std::find_if(summaries.begin(), summaries.end(), findName);
@@ -121,8 +115,8 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem {
 
   // get the bounds of the problem
   std::vector<std::tuple<unsigned, unsigned>> getBounds() const override {
-    std::vector<std::tuple<unsigned, unsigned>> vec(
-        _instructionGroups.size(), std::make_tuple<unsigned, unsigned>(0, 100));
+    std::vector<std::tuple<unsigned, unsigned>> vec(_instructionGroups.size(),
+                                                    std::make_tuple<unsigned, unsigned>(0, 100));
 
     return vec;
   }
@@ -131,10 +125,8 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem {
   std::size_t getNobjs() const override { return _metrics.size(); }
 
 private:
-  std::function<void(std::vector<std::pair<std::string, unsigned>> const &)>
-      _changePayloadFunction;
-  std::shared_ptr<firestarter::measurement::MeasurementWorker>
-      _measurementWorker;
+  std::function<void(std::vector<std::pair<std::string, unsigned>> const&)> _changePayloadFunction;
+  std::shared_ptr<firestarter::measurement::MeasurementWorker> _measurementWorker;
   std::vector<std::string> _metrics;
   std::chrono::seconds _timeout;
   std::chrono::milliseconds _startDelta;
diff --git a/include/firestarter/Optimizer/Util/MultiObjective.hpp b/include/firestarter/Optimizer/Util/MultiObjective.hpp
index 00701bfd..da61bf73 100644
--- a/include/firestarter/Optimizer/Util/MultiObjective.hpp
+++ b/include/firestarter/Optimizer/Util/MultiObjective.hpp
@@ -22,7 +22,6 @@
 #pragma once
 
 #include <firestarter/Optimizer/Individual.hpp>
-
 #include <random>
 #include <utility>
 #include <vector>
@@ -33,37 +32,28 @@ bool less_than_f(double a, double b);
 
 bool greater_than_f(double a, double b);
 
-bool pareto_dominance(const std::vector<double> &obj1,
-                      const std::vector<double> &obj2);
+bool pareto_dominance(const std::vector<double>& obj1, const std::vector<double>& obj2);
 
-std::tuple<std::vector<std::vector<std::size_t>>,
-           std::vector<std::vector<std::size_t>>, std::vector<std::size_t>,
+std::tuple<std::vector<std::vector<std::size_t>>, std::vector<std::vector<std::size_t>>, std::vector<std::size_t>,
            std::vector<std::size_t>>
-fast_non_dominated_sorting(const std::vector<std::vector<double>> &points);
+fast_non_dominated_sorting(const std::vector<std::vector<double>>& points);
 
-std::vector<double>
-crowding_distance(const std::vector<std::vector<double>> &non_dom_front);
+std::vector<double> crowding_distance(const std::vector<std::vector<double>>& non_dom_front);
 
-std::vector<double>::size_type mo_tournament_selection(
-    std::vector<double>::size_type idx1, std::vector<double>::size_type idx2,
-    const std::vector<std::vector<double>::size_type> &non_domination_rank,
-    const std::vector<double> &crowding_d, std::mt19937 &mt);
+std::vector<double>::size_type
+mo_tournament_selection(std::vector<double>::size_type idx1, std::vector<double>::size_type idx2,
+                        const std::vector<std::vector<double>::size_type>& non_domination_rank,
+                        const std::vector<double>& crowding_d, std::mt19937& mt);
 
-std::pair<firestarter::optimizer::Individual,
-          firestarter::optimizer::Individual>
-sbx_crossover(const firestarter::optimizer::Individual &parent1,
-              const firestarter::optimizer::Individual &parent2,
-              const double p_cr, std::mt19937 &mt);
+std::pair<firestarter::optimizer::Individual, firestarter::optimizer::Individual>
+sbx_crossover(const firestarter::optimizer::Individual& parent1, const firestarter::optimizer::Individual& parent2,
+              const double p_cr, std::mt19937& mt);
 
-void polynomial_mutation(
-    firestarter::optimizer::Individual &child,
-    const std::vector<std::tuple<unsigned, unsigned>> &bounds, const double p_m,
-    std::mt19937 &mt);
+void polynomial_mutation(firestarter::optimizer::Individual& child,
+                         const std::vector<std::tuple<unsigned, unsigned>>& bounds, const double p_m, std::mt19937& mt);
 
-std::vector<std::size_t>
-select_best_N_mo(const std::vector<std::vector<double>> &input_f,
-                 std::size_t N);
+std::vector<std::size_t> select_best_N_mo(const std::vector<std::vector<double>>& input_f, std::size_t N);
 
-std::vector<double> ideal(const std::vector<std::vector<double>> &points);
+std::vector<double> ideal(const std::vector<std::vector<double>>& points);
 
 } // namespace firestarter::optimizer::util
diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp
index e5abece9..8a17021f 100644
--- a/src/firestarter/Cuda/Cuda.cpp
+++ b/src/firestarter/Cuda/Cuda.cpp
@@ -31,29 +31,29 @@
 #include <firestarter/Logging/Log.hpp>
 
 #ifdef FIRESTARTER_BUILD_CUDA
-  #include <cublas_v2.h>
-  #include <cuda.h>
-  #include <cuda_runtime_api.h>
-  #include <curand_kernel.h>
-  #define FS_ACCEL_PREFIX_LC_LONG cuda
-  #define FS_ACCEL_PREFIX_LC cu
-  #define FS_ACCEL_PREFIX_UC CU
-  #define FS_ACCEL_PREFIX_UC_LONG CUDA
-  #define FS_ACCEL_STRING "CUDA"
+#include <cublas_v2.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <curand_kernel.h>
+#define FS_ACCEL_PREFIX_LC_LONG cuda
+#define FS_ACCEL_PREFIX_LC cu
+#define FS_ACCEL_PREFIX_UC CU
+#define FS_ACCEL_PREFIX_UC_LONG CUDA
+#define FS_ACCEL_STRING "CUDA"
 #else
-  #ifdef FIRESTARTER_BUILD_HIP
-    #include <hipblas/hipblas.h>
-    #include <hip/hip_runtime.h>
-    #include <hip/hip_runtime_api.h>
-    #include <hiprand_kernel.h>
-  #define FS_ACCEL_PREFIX_LC_LONG hip
-  #define FS_ACCEL_PREFIX_LC hip
-  #define FS_ACCEL_PREFIX_UC HIP
-  #define FS_ACCEL_PREFIX_UC_LONG HIP
-  #define FS_ACCEL_STRING "HIP"
-  #else
-    #error "Attempting to compile file but neither CUDA nor HIP is used"
-  #endif
+#ifdef FIRESTARTER_BUILD_HIP
+#include <hip/hip_runtime.h>
+#include <hip/hip_runtime_api.h>
+#include <hipblas/hipblas.h>
+#include <hiprand_kernel.h>
+#define FS_ACCEL_PREFIX_LC_LONG hip
+#define FS_ACCEL_PREFIX_LC hip
+#define FS_ACCEL_PREFIX_UC HIP
+#define FS_ACCEL_PREFIX_UC_LONG HIP
+#define FS_ACCEL_STRING "HIP"
+#else
+#error "Attempting to compile file but neither CUDA nor HIP is used"
+#endif
 #endif
 #define CONCAT_(prefix, suffix) prefix##suffix
 /// Concatenate `prefix, suffix` into `prefixsuffix`
@@ -66,71 +66,66 @@
 #include <atomic>
 #include <type_traits>
 
-#define ACCELL_SAFE_CALL(cuerr, dev_index)                                       \
-  accell_safe_call(cuerr, dev_index, __FILE__, __LINE__)
+#define ACCELL_SAFE_CALL(cuerr, dev_index) accell_safe_call(cuerr, dev_index, __FILE__, __LINE__)
 #define SEED 123
 
 using namespace firestarter::cuda;
 
 // CUDA error checking
-static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC_LONG,Error_t) cuerr, int dev_index,
-                                  const char *file, const int line) {
-  if (cuerr != CONCAT(FS_ACCEL_PREFIX_LC_LONG,Success) && cuerr != 1) {
-    firestarter::log::error()
-        << FS_ACCEL_STRING" error at " << file << ":" << line << ": error code = " << cuerr
-        << " (" << CONCAT(FS_ACCEL_PREFIX_LC_LONG,GetErrorString)(cuerr)
-        << "), device index: " << dev_index;
+static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC_LONG, Error_t) cuerr, int dev_index, const char* file,
+                                    const int line) {
+  if (cuerr != CONCAT(FS_ACCEL_PREFIX_LC_LONG, Success) && cuerr != 1) {
+    firestarter::log::error() << FS_ACCEL_STRING " error at " << file << ":" << line << ": error code = " << cuerr
+                              << " (" << CONCAT(FS_ACCEL_PREFIX_LC_LONG, GetErrorString)(cuerr)
+                              << "), device index: " << dev_index;
     exit(cuerr);
   }
 
   return;
 }
 
-static const char *_accellGetErrorEnum(CONCAT(FS_ACCEL_PREFIX_LC,blasStatus_t) error) {
+static const char* _accellGetErrorEnum(CONCAT(FS_ACCEL_PREFIX_LC, blasStatus_t) error) {
   switch (error) {
-  case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_SUCCESS):
-    return FS_ACCEL_STRING"blas status: success";
-  case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_NOT_INITIALIZED):
-    return FS_ACCEL_STRING"blas status: not initialized";
-  case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_ALLOC_FAILED):
-    return FS_ACCEL_STRING"blas status: alloc failed";
-  case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_INVALID_VALUE):
-    return FS_ACCEL_STRING"blas status: invalid value";
-  case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_ARCH_MISMATCH):
-    return FS_ACCEL_STRING"blas status: arch mismatch";
-  case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_MAPPING_ERROR):
-    return FS_ACCEL_STRING"blas status: mapping error";
-  case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_EXECUTION_FAILED):
-    return FS_ACCEL_STRING"blas status: execution failed";
-  case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_INTERNAL_ERROR):
-    return FS_ACCEL_STRING"blas status: internal error";
-  case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_NOT_SUPPORTED):
-    return FS_ACCEL_STRING"blas status: not supported";
+  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_SUCCESS):
+    return FS_ACCEL_STRING "blas status: success";
+  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_NOT_INITIALIZED):
+    return FS_ACCEL_STRING "blas status: not initialized";
+  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_ALLOC_FAILED):
+    return FS_ACCEL_STRING "blas status: alloc failed";
+  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_INVALID_VALUE):
+    return FS_ACCEL_STRING "blas status: invalid value";
+  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_ARCH_MISMATCH):
+    return FS_ACCEL_STRING "blas status: arch mismatch";
+  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_MAPPING_ERROR):
+    return FS_ACCEL_STRING "blas status: mapping error";
+  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_EXECUTION_FAILED):
+    return FS_ACCEL_STRING "blas status: execution failed";
+  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_INTERNAL_ERROR):
+    return FS_ACCEL_STRING "blas status: internal error";
+  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_NOT_SUPPORTED):
+    return FS_ACCEL_STRING "blas status: not supported";
 #ifdef FIRESTARTER_BUILD_CUDA
-  case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_LICENSE_ERROR):
-    return FS_ACCEL_STRING"blas status: license error";
+  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_LICENSE_ERROR):
+    return FS_ACCEL_STRING "blas status: license error";
 #endif
 #ifdef FIRESTARTER_BUILD_HIP
-    case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_UNKNOWN):
-      return FS_ACCEL_STRING"blas status: unknown";
-    case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_HANDLE_IS_NULLPTR):
-      return FS_ACCEL_STRING"blas status: handle is null pointer";
-    case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_INVALID_ENUM):
-      return FS_ACCEL_STRING"blas status: invalid enum";
+  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_UNKNOWN):
+    return FS_ACCEL_STRING "blas status: unknown";
+  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_HANDLE_IS_NULLPTR):
+    return FS_ACCEL_STRING "blas status: handle is null pointer";
+  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_INVALID_ENUM):
+    return FS_ACCEL_STRING "blas status: invalid enum";
 #endif
   }
 
-
   return "<unknown>";
 }
 
-static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC,blasStatus_t) cuerr, int dev_index,
-                                  const char *file, const int line) {
-  if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_SUCCESS)) {
-    firestarter::log::error()
-        << FS_ACCEL_STRING"BLAS error at " << file << ":" << line
-        << ": error code = " << cuerr << " (" << _accellGetErrorEnum(cuerr)
-        << "), device index: " << dev_index;
+static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC, blasStatus_t) cuerr, int dev_index, const char* file,
+                                    const int line) {
+  if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_SUCCESS)) {
+    firestarter::log::error() << FS_ACCEL_STRING "BLAS error at " << file << ":" << line << ": error code = " << cuerr
+                              << " (" << _accellGetErrorEnum(cuerr) << "), device index: " << dev_index;
     exit(cuerr);
   }
 
@@ -138,16 +133,15 @@ static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC,blasStatus_t) cuer
 }
 
 #ifdef FIRESTARTER_BUILD_CUDA
-static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_UC,result) cuerr, int dev_index,
-                                  const char *file, const int line) {
-  if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC_LONG,_SUCCESS)) {
-    const char *errorString;
+static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_UC, result) cuerr, int dev_index, const char* file,
+                                    const int line) {
+  if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC_LONG, _SUCCESS)) {
+    const char* errorString;
 
-    ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,GetErrorName)(cuerr, &errorString), dev_index);
+    ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, GetErrorName)(cuerr, &errorString), dev_index);
 
-    firestarter::log::error()
-        << FS_ACCEL_STRING" error at " << file << ":" << line << ": error code = " << cuerr
-        << " (" << errorString << "), device index: " << dev_index;
+    firestarter::log::error() << FS_ACCEL_STRING " error at " << file << ":" << line << ": error code = " << cuerr
+                              << " (" << errorString << "), device index: " << dev_index;
     exit(cuerr);
   }
 
@@ -155,50 +149,48 @@ static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_UC,result) cuerr, int
 }
 #endif
 
-static const char *_accellrandGetErrorEnum(CONCAT(FS_ACCEL_PREFIX_LC,randStatus_t) cuerr) {
+static const char* _accellrandGetErrorEnum(CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t) cuerr) {
   switch (cuerr) {
-    case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_SUCCESS):
-      return FS_ACCEL_STRING"rand status: success";
-    case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_VERSION_MISMATCH):
-      return FS_ACCEL_STRING"rand status: version mismatch";
-    case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_NOT_INITIALIZED):
-      return FS_ACCEL_STRING"rand status: not initialized";
-    case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_ALLOCATION_FAILED):
-      return FS_ACCEL_STRING"rand status: allocation failed";
-    case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_TYPE_ERROR):
-      return FS_ACCEL_STRING"rand status: type error";
-    case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_OUT_OF_RANGE):
-      return FS_ACCEL_STRING"rand status: out of range";
-    case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_LENGTH_NOT_MULTIPLE):
-      return FS_ACCEL_STRING"rand status: length not multiple";
-    case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_DOUBLE_PRECISION_REQUIRED):
-      return FS_ACCEL_STRING"rand status: double precision required";
-    case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_LAUNCH_FAILURE):
-      return FS_ACCEL_STRING"rand status: launch failure";
-    case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_PREEXISTING_FAILURE):
-      return FS_ACCEL_STRING"rand status: preexisting failure";
-    case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_INITIALIZATION_FAILED):
-      return FS_ACCEL_STRING"rand status: initialization failed";
-    case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_ARCH_MISMATCH):
-      return FS_ACCEL_STRING"rand status: arch mismatch";
-    case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_INTERNAL_ERROR):
-      return FS_ACCEL_STRING"rand status: internal error";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_SUCCESS):
+    return FS_ACCEL_STRING "rand status: success";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_VERSION_MISMATCH):
+    return FS_ACCEL_STRING "rand status: version mismatch";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_NOT_INITIALIZED):
+    return FS_ACCEL_STRING "rand status: not initialized";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_ALLOCATION_FAILED):
+    return FS_ACCEL_STRING "rand status: allocation failed";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_TYPE_ERROR):
+    return FS_ACCEL_STRING "rand status: type error";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_OUT_OF_RANGE):
+    return FS_ACCEL_STRING "rand status: out of range";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_LENGTH_NOT_MULTIPLE):
+    return FS_ACCEL_STRING "rand status: length not multiple";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_DOUBLE_PRECISION_REQUIRED):
+    return FS_ACCEL_STRING "rand status: double precision required";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_LAUNCH_FAILURE):
+    return FS_ACCEL_STRING "rand status: launch failure";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_PREEXISTING_FAILURE):
+    return FS_ACCEL_STRING "rand status: preexisting failure";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_INITIALIZATION_FAILED):
+    return FS_ACCEL_STRING "rand status: initialization failed";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_ARCH_MISMATCH):
+    return FS_ACCEL_STRING "rand status: arch mismatch";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_INTERNAL_ERROR):
+    return FS_ACCEL_STRING "rand status: internal error";
 #ifdef FIRESTARTER_BUILD_HIP
-  case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_NOT_IMPLEMENTED):
-      return FS_ACCEL_STRING"rand status: not implemented";
+  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_NOT_IMPLEMENTED):
+    return FS_ACCEL_STRING "rand status: not implemented";
 #endif
   }
 
   return "<unknown>";
 }
 
-static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC,randStatus_t) cuerr, int dev_index,
-                                  const char *file, const int line) {
-  if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_SUCCESS)) {
-    firestarter::log::error()
-        << FS_ACCEL_STRING"RAND error at " << file << ":" << line
-        << ": error code = " << cuerr << " (" << _accellrandGetErrorEnum(cuerr)
-        << "), device index: " << dev_index;
+static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t) cuerr, int dev_index, const char* file,
+                                    const int line) {
+  if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_SUCCESS)) {
+    firestarter::log::error() << FS_ACCEL_STRING "RAND error at " << file << ":" << line << ": error code = " << cuerr
+                              << " (" << _accellrandGetErrorEnum(cuerr) << "), device index: " << dev_index;
     exit(cuerr);
   }
 
@@ -226,8 +218,8 @@ static int get_precision(int useDouble, struct hipDeviceProp_t properties) {
 #endif
 #endif
 #if (CUDART_VERSION >= 8000)
-// read precision ratio (dp/sp) of GPU to choose the right variant for maximum
-// workload
+  // read precision ratio (dp/sp) of GPU to choose the right variant for maximum
+  // workload
   if (useDouble == 2 && properties.singleToDoublePrecisionPerfRatio > 3) {
     return 0;
   } else if (useDouble) {
@@ -237,7 +229,7 @@ static int get_precision(int useDouble, struct hipDeviceProp_t properties) {
   }
 }
 #else
-// as precision ratio is not supported return default/user input value
+  // as precision ratio is not supported return default/user input value
   (void)properties;
 
   if (useDouble) {
@@ -263,9 +255,8 @@ static int get_precision(int device_index, int useDouble) {
   ACCELL_SAFE_CALL(hipSetDevice(device_index), device_index);
 #endif
 #endif
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,MemGetInfo)(&memory_avail, &memory_total), device_index);
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG,GetDeviceProperties)(&properties, device_index),
-                 device_index);
+  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, MemGetInfo)(&memory_avail, &memory_total), device_index);
+  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG, GetDeviceProperties)(&properties, device_index), device_index);
 
   useDouble = get_precision(useDouble, properties);
 
@@ -273,14 +264,12 @@ static int get_precision(int device_index, int useDouble) {
   // the user wants to compute DP on a SP-only-Card.
   if (useDouble && properties.major <= 1 && properties.minor <= 2) {
     std::stringstream ss;
-    ss << FS_ACCEL_STRING" GPU " << device_index << ": " << properties.name << " ";
+    ss << FS_ACCEL_STRING " GPU " << device_index << ": " << properties.name << " ";
 
-    firestarter::log::error()
-        << ss.str() << "Doesn't support double precision.\n"
-        << ss.str() << "Compute Capability: " << properties.major << "."
-        << properties.minor << ". Requiered for double precision: >=1.3\n"
-        << ss.str()
-        << "Stressing with single precision instead. Maybe use -f parameter.";
+    firestarter::log::error() << ss.str() << "Doesn't support double precision.\n"
+                              << ss.str() << "Compute Capability: " << properties.major << "." << properties.minor
+                              << ". Requiered for double precision: >=1.3\n"
+                              << ss.str() << "Stressing with single precision instead. Maybe use -f parameter.";
 
     useDouble = 0;
   }
@@ -305,65 +294,45 @@ static int get_msize(int device_index, int useDouble) {
 
   ACCELL_SAFE_CALL(cuCtxDestroy(context), device_index);
 
-  return round_up(
-      (int)(0.8 * sqrt(((memory_avail) /
-                        ((useDouble ? sizeof(double) : sizeof(float)) * 3)))),
-      1024); // a multiple of 1024 works always well
+  return round_up((int)(0.8 * sqrt(((memory_avail) / ((useDouble ? sizeof(double) : sizeof(float)) * 3)))),
+                  1024); // a multiple of 1024 works always well
 }
 #endif
 
-static CONCAT(FS_ACCEL_PREFIX_LC,blasStatus_t) gemm(
-                            CONCAT(FS_ACCEL_PREFIX_LC,blasHandle_t) handle,
-                            CONCAT(FS_ACCEL_PREFIX_LC,blasOperation_t) transa,
-                            CONCAT(FS_ACCEL_PREFIX_LC,blasOperation_t) transb,
-                            int &m, int &n, int &k,
-                            const float *alpha, const float *A, int &lda,
-                            const float *B, int &ldb, const float *beta,
-                            float *C, int &ldc) {
-  return CONCAT(FS_ACCEL_PREFIX_LC,blasSgemm)(handle, transa, transb, m, n, k,
-                                              alpha, A, lda, B, ldb,
-                                              beta, C, ldc);
+static CONCAT(FS_ACCEL_PREFIX_LC, blasStatus_t)
+    gemm(CONCAT(FS_ACCEL_PREFIX_LC, blasHandle_t) handle, CONCAT(FS_ACCEL_PREFIX_LC, blasOperation_t) transa,
+         CONCAT(FS_ACCEL_PREFIX_LC, blasOperation_t) transb, int& m, int& n, int& k, const float* alpha, const float* A,
+         int& lda, const float* B, int& ldb, const float* beta, float* C, int& ldc) {
+  return CONCAT(FS_ACCEL_PREFIX_LC, blasSgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-static CONCAT(FS_ACCEL_PREFIX_LC,blasStatus_t) gemm(
-                            CONCAT(FS_ACCEL_PREFIX_LC,blasHandle_t) handle,
-                            CONCAT(FS_ACCEL_PREFIX_LC,blasOperation_t) transa,
-                            CONCAT(FS_ACCEL_PREFIX_LC,blasOperation_t) transb,
-                            int &m, int &n, int &k,
-                            const double *alpha, const double *A, int &lda,
-                            const double *B, int &ldb, const double *beta,
-                            double *C, int &ldc) {
-  return CONCAT(FS_ACCEL_PREFIX_LC,blasDgemm)(handle, transa, transb, m, n, k,
-                                              alpha, A, lda, B, ldb,
-                                              beta, C, ldc);
+static CONCAT(FS_ACCEL_PREFIX_LC, blasStatus_t)
+    gemm(CONCAT(FS_ACCEL_PREFIX_LC, blasHandle_t) handle, CONCAT(FS_ACCEL_PREFIX_LC, blasOperation_t) transa,
+         CONCAT(FS_ACCEL_PREFIX_LC, blasOperation_t) transb, int& m, int& n, int& k, const double* alpha,
+         const double* A, int& lda, const double* B, int& ldb, const double* beta, double* C, int& ldc) {
+  return CONCAT(FS_ACCEL_PREFIX_LC, blasDgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
 }
 
-static CONCAT(FS_ACCEL_PREFIX_LC,randStatus_t) generateUniform(
-                            CONCAT(FS_ACCEL_PREFIX_LC,randGenerator_t) generator,
-                            float *outputPtr, size_t num) {
-  return CONCAT(FS_ACCEL_PREFIX_LC,randGenerateUniform)(generator, outputPtr, num);
+static CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t)
+    generateUniform(CONCAT(FS_ACCEL_PREFIX_LC, randGenerator_t) generator, float* outputPtr, size_t num) {
+  return CONCAT(FS_ACCEL_PREFIX_LC, randGenerateUniform)(generator, outputPtr, num);
 }
 
-static CONCAT(FS_ACCEL_PREFIX_LC,randStatus_t) generateUniform(
-                            CONCAT(FS_ACCEL_PREFIX_LC,randGenerator_t) generator,
-                            double *outputPtr, size_t num) {
-  return CONCAT(FS_ACCEL_PREFIX_LC,randGenerateUniformDouble)(generator, outputPtr, num);
+static CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t)
+    generateUniform(CONCAT(FS_ACCEL_PREFIX_LC, randGenerator_t) generator, double* outputPtr, size_t num) {
+  return CONCAT(FS_ACCEL_PREFIX_LC, randGenerateUniformDouble)(generator, outputPtr, num);
 }
 
 // GPU index. Used to pin this thread to the GPU.
 template <typename T>
-static void create_load(std::condition_variable &waitForInitCv,
-                        std::mutex &waitForInitCvMutex, int device_index,
-                        std::atomic<int> &initCount,
-                        volatile unsigned long long *loadVar, int matrixSize) {
-  static_assert(
-      std::is_same<T, float>::value || std::is_same<T, double>::value,
-      "create_load<T>: Template argument T must be either float or double");
+static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index,
+                        std::atomic<int>& initCount, volatile unsigned long long* loadVar, int matrixSize) {
+  static_assert(std::is_same<T, float>::value || std::is_same<T, double>::value,
+                "create_load<T>: Template argument T must be either float or double");
 
   int iterations, i;
 
-  firestarter::log::trace() << "Starting CUDA/HIP with given matrix size "
-                            << matrixSize;
+  firestarter::log::trace() << "Starting CUDA/HIP with given matrix size " << matrixSize;
 
   size_t size_use = 0;
   if (matrixSize > 0) {
@@ -387,43 +356,35 @@ static void create_load(std::condition_variable &waitForInitCv,
   // reserving the GPU and initializing cublas
 
   firestarter::log::trace() << "Getting " FS_ACCEL_STRING " device nr. " << device_index;
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,DeviceGet)(&device, device_index), device_index);
+  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, DeviceGet)(&device, device_index), device_index);
 
 #ifdef FIRESTARTER_BUILD_CUDA
-  firestarter::log::trace() << "Creating " FS_ACCEL_STRING " context for computation on device nr. "
-                     << device_index;
+  firestarter::log::trace() << "Creating " FS_ACCEL_STRING " context for computation on device nr. " << device_index;
   ACCELL_SAFE_CALL(cuCtxCreate(&context, 0, device), device_index);
 
-  firestarter::log::trace() << "Set created " FS_ACCEL_STRING " context on device nr. "
-                     << device_index;
+  firestarter::log::trace() << "Set created " FS_ACCEL_STRING " context on device nr. " << device_index;
   ACCELL_SAFE_CALL(cuCtxSetCurrent(context), device_index);
 #else
 #ifdef FIRESTARTER_BUILD_HIP
-  firestarter::log::trace() << "Creating " FS_ACCEL_STRING " Stream for computation on device nr. "
-                     << device_index;
+  firestarter::log::trace() << "Creating " FS_ACCEL_STRING " Stream for computation on device nr. " << device_index;
   ACCELL_SAFE_CALL(hipSetDevice(device_index), device_index);
   ACCELL_SAFE_CALL(hipStreamCreate(&stream), device_index);
 #endif
 #endif
 
-  firestarter::log::trace() << "Create " FS_ACCEL_STRING " Blas on device nr. "
-                     << device_index;
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,blasCreate)(&cublas), device_index);
+  firestarter::log::trace() << "Create " FS_ACCEL_STRING " Blas on device nr. " << device_index;
+  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, blasCreate)(&cublas), device_index);
 
   firestarter::log::trace() << "Get " FS_ACCEL_STRING " device properties (e.g., support for double)"
-                     << " on device nr. "
-                     << device_index;
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG,GetDeviceProperties)(&properties, device_index),
-                 device_index);
+                            << " on device nr. " << device_index;
+  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG, GetDeviceProperties)(&properties, device_index), device_index);
 
   // getting information about the GPU memory
   size_t memory_avail, memory_total;
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,MemGetInfo)(&memory_avail, &memory_total), device_index);
+  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, MemGetInfo)(&memory_avail, &memory_total), device_index);
 
-  firestarter::log::trace() << "Get " FS_ACCEL_STRING " Memory info on device nr. "
-                     << device_index
-                     <<": " << memory_avail << " B avail. from "
-                     << memory_total << " B total";
+  firestarter::log::trace() << "Get " FS_ACCEL_STRING " Memory info on device nr. " << device_index << ": "
+                            << memory_avail << " B avail. from " << memory_total << " B total";
 
   // defining memory pointers
 #ifdef FIRESTARTER_BUILD_CUDA
@@ -449,81 +410,55 @@ static void create_load(std::condition_variable &waitForInitCv,
   memory_size = sizeof(T) * size_use * size_use;
   iterations = (use_bytes - 2 * memory_size) / memory_size; // = 1;
 
-  firestarter::log::trace()
-      << "Allocating " FS_ACCEL_STRING " memory on device nr. "
-      << device_index;
+  firestarter::log::trace() << "Allocating " FS_ACCEL_STRING " memory on device nr. " << device_index;
 
   // allocating memory on the GPU
 #ifdef FIRESTARTER_BUILD_CUDA
   ACCELL_SAFE_CALL(cuMemAlloc(&a_data_ptr, memory_size), device_index);
   ACCELL_SAFE_CALL(cuMemAlloc(&b_data_ptr, memory_size), device_index);
-  ACCELL_SAFE_CALL(cuMemAlloc(&c_data_ptr, iterations * memory_size),
-                 device_index);
+  ACCELL_SAFE_CALL(cuMemAlloc(&c_data_ptr, iterations * memory_size), device_index);
 #else
 #ifdef FIRESTARTER_BUILD_HIP
   ACCELL_SAFE_CALL(hipMalloc(&a_data_ptr, memory_size), device_index);
   ACCELL_SAFE_CALL(hipMalloc(&b_data_ptr, memory_size), device_index);
-  ACCELL_SAFE_CALL(hipMalloc(&c_data_ptr, iterations * memory_size),
-                 device_index);
+  ACCELL_SAFE_CALL(hipMalloc(&c_data_ptr, iterations * memory_size), device_index);
 #endif
 #endif
 
-  firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. "
-                     << device_index
-                     <<". A: " << a_data_ptr << "(Size: "
-                     << memory_size << "B)"
-                     << "\n";
-
-  firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. "
-                     << device_index
-                     <<". B: " << b_data_ptr << "(Size: "
-                     << memory_size << "B)"
-                     << "\n";
-  firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. "
-                     << device_index
-                     <<". C: " << c_data_ptr << "(Size: "
-                     << iterations * memory_size << "B)"
-                     << "\n";
-
-  firestarter::log::trace() << "Initializing " FS_ACCEL_STRING " matrices a, b on device nr. "
-                            << device_index
-                            << ". Using "
-                            << size_use * size_use
-                            << " elements of size "
-                            << sizeof(T) << " Byte";
+  firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. " << device_index
+                            << ". A: " << a_data_ptr << "(Size: " << memory_size << "B)"
+                            << "\n";
+
+  firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. " << device_index
+                            << ". B: " << b_data_ptr << "(Size: " << memory_size << "B)"
+                            << "\n";
+  firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. " << device_index
+                            << ". C: " << c_data_ptr << "(Size: " << iterations * memory_size << "B)"
+                            << "\n";
+
+  firestarter::log::trace() << "Initializing " FS_ACCEL_STRING " matrices a, b on device nr. " << device_index
+                            << ". Using " << size_use * size_use << " elements of size " << sizeof(T) << " Byte";
   // initialize matrix A and B on the GPU with random values
-  CONCAT(FS_ACCEL_PREFIX_LC,randGenerator_t) random_gen;
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,randCreateGenerator)(
-                              &random_gen,
-                              CONCAT(FS_ACCEL_PREFIX_UC,RAND_RNG_PSEUDO_DEFAULT)),
-                  device_index);
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,randSetPseudoRandomGeneratorSeed)(
-                              random_gen, SEED),
-                   device_index);
+  CONCAT(FS_ACCEL_PREFIX_LC, randGenerator_t) random_gen;
   ACCELL_SAFE_CALL(
-      generateUniform(random_gen, (T *)a_data_ptr, size_use * size_use),
+      CONCAT(FS_ACCEL_PREFIX_LC, randCreateGenerator)(&random_gen, CONCAT(FS_ACCEL_PREFIX_UC, RAND_RNG_PSEUDO_DEFAULT)),
       device_index);
-  ACCELL_SAFE_CALL(
-      generateUniform(random_gen, (T *)b_data_ptr, size_use * size_use),
-      device_index);
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,randDestroyGenerator)(random_gen),
-                   device_index);
+  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, randSetPseudoRandomGeneratorSeed)(random_gen, SEED), device_index);
+  ACCELL_SAFE_CALL(generateUniform(random_gen, (T*)a_data_ptr, size_use * size_use), device_index);
+  ACCELL_SAFE_CALL(generateUniform(random_gen, (T*)b_data_ptr, size_use * size_use), device_index);
+  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, randDestroyGenerator)(random_gen), device_index);
 
   // initialize c_data_ptr with copies of A
   for (i = 0; i < iterations; i++) {
-      firestarter::log::trace() << "Initializing " FS_ACCEL_STRING " matrix c-"
-                                << i
-                                << " by copying "
-                                << memory_size
-                                << " byte from "
-                                << a_data_ptr
-                                << " to "
-                                << c_data_ptr + (size_t)(i * size_use * size_use * (float)sizeof(T)/(float)sizeof(c_data_ptr))
-                                << "\n";
-    ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,MemcpyDtoD)(
-                                c_data_ptr + (size_t)(i * size_use * size_use * (float)sizeof(T)/(float)sizeof(c_data_ptr)),
-                                a_data_ptr, memory_size),
-                   device_index);
+    firestarter::log::trace() << "Initializing " FS_ACCEL_STRING " matrix c-" << i << " by copying " << memory_size
+                              << " byte from " << a_data_ptr << " to "
+                              << c_data_ptr +
+                                     (size_t)(i * size_use * size_use * (float)sizeof(T) / (float)sizeof(c_data_ptr))
+                              << "\n";
+    ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, MemcpyDtoD)(
+                         c_data_ptr + (size_t)(i * size_use * size_use * (float)sizeof(T) / (float)sizeof(c_data_ptr)),
+                         a_data_ptr, memory_size),
+                     device_index);
   }
 
   // save gpuvar->init_count and sys.out
@@ -531,15 +466,12 @@ static void create_load(std::condition_variable &waitForInitCv,
     std::lock_guard<std::mutex> lk(waitForInitCvMutex);
 
 #define TO_MB(x) (unsigned long)(x / 1024 / 1024)
-  firestarter::log::info()
-      << "   GPU " << device_index << "\n"
-      << "    name:           " << properties.name << "\n"
-      << "    memory:         " << TO_MB(memory_avail) << "/"
-      << TO_MB(memory_total) << " MiB available (using " << TO_MB(use_bytes)
-      << " MiB)\n"
-      << "    matrix size:    " << size_use << "\n"
-      << "    used precision: "
-      << ((sizeof(T) == sizeof(double)) ? "double" : "single");
+    firestarter::log::info() << "   GPU " << device_index << "\n"
+                             << "    name:           " << properties.name << "\n"
+                             << "    memory:         " << TO_MB(memory_avail) << "/" << TO_MB(memory_total)
+                             << " MiB available (using " << TO_MB(use_bytes) << " MiB)\n"
+                             << "    matrix size:    " << size_use << "\n"
+                             << "    used precision: " << ((sizeof(T) == sizeof(double)) ? "double" : "single");
 #undef TO_MB
 
     initCount++;
@@ -553,17 +485,12 @@ static void create_load(std::condition_variable &waitForInitCv,
   // actual stress begins here
   while (*loadVar != LOAD_STOP) {
     for (i = 0; i < iterations; i++) {
-      ACCELL_SAFE_CALL(gemm(
-                          cublas,
-                          CONCAT(FS_ACCEL_PREFIX_UC,BLAS_OP_N),
-                          CONCAT(FS_ACCEL_PREFIX_UC,BLAS_OP_N),
-                          size_use_i, size_use_i,
-                          size_use_i, &alpha, (const T *)a_data_ptr, size_use_i,
-                          (const T *)b_data_ptr, size_use_i, &beta,
-                          (T *)c_data_ptr + i * size_use * size_use, size_use_i),
-                     device_index);
-      ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG,DeviceSynchronize)(),
+      ACCELL_SAFE_CALL(gemm(cublas, CONCAT(FS_ACCEL_PREFIX_UC, BLAS_OP_N), CONCAT(FS_ACCEL_PREFIX_UC, BLAS_OP_N),
+                            size_use_i, size_use_i, size_use_i, &alpha, (const T*)a_data_ptr, size_use_i,
+                            (const T*)b_data_ptr, size_use_i, &beta, (T*)c_data_ptr + i * size_use * size_use,
+                            size_use_i),
                        device_index);
+      ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG, DeviceSynchronize)(), device_index);
     }
   }
 
@@ -578,20 +505,18 @@ static void create_load(std::condition_variable &waitForInitCv,
   ACCELL_SAFE_CALL(hipFree(c_data_ptr), device_index);
 #endif
 #endif
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,blasDestroy)(cublas), device_index);
+  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, blasDestroy)(cublas), device_index);
 #ifdef FIRESTARTER_BUILD_CUDA
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,CtxDestroy)(context), device_index);
+  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, CtxDestroy)(context), device_index);
 #else
 #ifdef FIRESTARTER_BUILD_HIP
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,StreamDestroy)(stream), device_index);
+  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, StreamDestroy)(stream), device_index);
 #endif
 #endif
 }
 
-Cuda::Cuda(volatile unsigned long long *loadVar, bool useFloat, bool useDouble,
-           unsigned matrixSize, int gpus) {
-  std::thread t(Cuda::initGpus, std::ref(_waitForInitCv), loadVar, useFloat,
-                useDouble, matrixSize, gpus);
+Cuda::Cuda(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) {
+  std::thread t(Cuda::initGpus, std::ref(_waitForInitCv), loadVar, useFloat, useDouble, matrixSize, gpus);
   _initThread = std::move(t);
 
   std::unique_lock<std::mutex> lk(_waitForInitCvMutex);
@@ -599,14 +524,13 @@ Cuda::Cuda(volatile unsigned long long *loadVar, bool useFloat, bool useDouble,
   _waitForInitCv.wait(lk);
 }
 
-void Cuda::initGpus(std::condition_variable &cv,
-                    volatile unsigned long long *loadVar, bool useFloat,
-                    bool useDouble, unsigned matrixSize, int gpus) {
+void Cuda::initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble,
+                    unsigned matrixSize, int gpus) {
   std::condition_variable waitForInitCv;
   std::mutex waitForInitCvMutex;
 
   if (gpus) {
-    ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,Init)(0), -1);
+    ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, Init)(0), -1);
     int devCount;
 #ifdef FIRESTARTER_BUILD_CUDA
     ACCELL_SAFE_CALL(cuDeviceGetCount(&devCount), -1);
@@ -641,12 +565,10 @@ void Cuda::initGpus(std::condition_variable &cv,
       }
 
       if (gpus > devCount) {
-        firestarter::log::warn()
-            << "You requested more " FS_ACCEL_STRING " devices than available. "
-               "Maybe you set " FS_ACCEL_STRING "_VISIBLE_DEVICES?";
-        firestarter::log::warn()
-            << "FIRESTARTER will use " << devCount << " of the requested "
-            << gpus << " " FS_ACCEL_STRING " device(s)";
+        firestarter::log::warn() << "You requested more " FS_ACCEL_STRING " devices than available. "
+                                    "Maybe you set " FS_ACCEL_STRING "_VISIBLE_DEVICES?";
+        firestarter::log::warn() << "FIRESTARTER will use " << devCount << " of the requested " << gpus
+                                 << " " FS_ACCEL_STRING " device(s)";
         gpus = devCount;
       }
 
@@ -659,14 +581,12 @@ void Cuda::initGpus(std::condition_variable &cv,
           int precision = get_precision(i, use_double);
 
           if (precision) {
-            std::thread t(create_load<double>, std::ref(waitForInitCv),
-                          std::ref(waitForInitCvMutex), i, std::ref(initCount),
-                          loadVar, (int)matrixSize);
+            std::thread t(create_load<double>, std::ref(waitForInitCv), std::ref(waitForInitCvMutex), i,
+                          std::ref(initCount), loadVar, (int)matrixSize);
             gpuThreads.push_back(std::move(t));
           } else {
-            std::thread t(create_load<float>, std::ref(waitForInitCv),
-                          std::ref(waitForInitCvMutex), i, std::ref(initCount),
-                          loadVar, (int)matrixSize);
+            std::thread t(create_load<float>, std::ref(waitForInitCv), std::ref(waitForInitCvMutex), i,
+                          std::ref(initCount), loadVar, (int)matrixSize);
             gpuThreads.push_back(std::move(t));
           }
         }
@@ -682,19 +602,17 @@ void Cuda::initGpus(std::condition_variable &cv,
       cv.notify_all();
 
       /* join computation threads */
-      for (auto &t : gpuThreads) {
+      for (auto& t : gpuThreads) {
         t.join();
       }
     } else {
-      firestarter::log::info()
-          << "    - No " FS_ACCEL_STRING " devices. Just stressing CPU(s). Maybe use "
-             "FIRESTARTER instead of FIRESTARTER_" FS_ACCEL_STRING "?";
+      firestarter::log::info() << "    - No " FS_ACCEL_STRING " devices. Just stressing CPU(s). Maybe use "
+                                  "FIRESTARTER instead of FIRESTARTER_" FS_ACCEL_STRING "?";
       cv.notify_all();
     }
   } else {
-    firestarter::log::info()
-        << "    --gpus 0 is set. Just stressing CPU(s). Maybe use "
-           "FIRESTARTER instead of FIRESTARTER_" FS_ACCEL_STRING "?";
+    firestarter::log::info() << "    --gpus 0 is set. Just stressing CPU(s). Maybe use "
+                                "FIRESTARTER instead of FIRESTARTER_" FS_ACCEL_STRING "?";
     cv.notify_all();
   }
 }
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index 3f7ab6a9..c5d7b34e 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -55,24 +55,18 @@ static std::string registerNameBySize(unsigned registerSize) {
 }
 } // namespace
 
-int Firestarter::initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta,
-                                        std::string dumpFilePath) {
+int Firestarter::initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta, std::string dumpFilePath) {
 
-  auto data = std::make_unique<DumpRegisterWorkerData>(
-      this->loadThreads.begin()->second, dumpTimeDelta, dumpFilePath);
+  auto data = std::make_unique<DumpRegisterWorkerData>(this->loadThreads.begin()->second, dumpTimeDelta, dumpFilePath);
 
-  this->dumpRegisterWorkerThread =
-      std::thread(Firestarter::dumpRegisterWorker, std::move(data));
+  this->dumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(data));
 
   return EXIT_SUCCESS;
 }
 
-void Firestarter::joinDumpRegisterWorker() {
-  this->dumpRegisterWorkerThread.join();
-}
+void Firestarter::joinDumpRegisterWorker() { this->dumpRegisterWorkerThread.join(); }
 
-void Firestarter::dumpRegisterWorker(
-    std::unique_ptr<DumpRegisterWorkerData> data) {
+void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> data) {
 
   pthread_setname_np(pthread_self(), "DumpRegWorker");
 
@@ -81,21 +75,16 @@ void Firestarter::dumpRegisterWorker(
   std::string registerPrefix = registerNameBySize(registerSize);
   auto offset = sizeof(DumpRegisterStruct) / sizeof(unsigned long long);
 
-  auto dumpRegisterStruct = reinterpret_cast<DumpRegisterStruct *>(
-      data->loadWorkerData->addrMem - offset);
+  auto dumpRegisterStruct = reinterpret_cast<DumpRegisterStruct*>(data->loadWorkerData->addrMem - offset);
 
-  auto dumpVar = reinterpret_cast<volatile unsigned long long *>(
-      &dumpRegisterStruct->dumpVar);
+  auto dumpVar = reinterpret_cast<volatile unsigned long long*>(&dumpRegisterStruct->dumpVar);
   // memory of simd variables is before the padding
-  volatile unsigned long long *dumpMemAddr =
-      dumpRegisterStruct->padding - registerCount * registerSize;
+  volatile unsigned long long* dumpMemAddr = dumpRegisterStruct->padding - registerCount * registerSize;
 
   // TODO: maybe use aligned_malloc to make memcpy more efficient and don't
   // interrupt the workload as much?
-  unsigned long long *last = reinterpret_cast<unsigned long long *>(
-      malloc(sizeof(unsigned long long) * offset));
-  unsigned long long *current = reinterpret_cast<unsigned long long *>(
-      malloc(sizeof(unsigned long long) * offset));
+  unsigned long long* last = reinterpret_cast<unsigned long long*>(malloc(sizeof(unsigned long long) * offset));
+  unsigned long long* current = reinterpret_cast<unsigned long long*>(malloc(sizeof(unsigned long long) * offset));
 
   if (last == nullptr || current == nullptr) {
     log::error() << "Malloc failed in Firestarter::dumpRegisterWorker";
@@ -143,8 +132,7 @@ void Firestarter::dumpRegisterWorker(
     }
 
     // copy the register content to minimize the interruption of the load worker
-    std::memcpy(current, (void *)dumpMemAddr,
-                sizeof(unsigned long long) * offset);
+    std::memcpy(current, (void*)dumpMemAddr, sizeof(unsigned long long) * offset);
 
     // skip the first output, as we first have to get some valid values for last
     if (!skipFirst) {
@@ -162,8 +150,7 @@ void Firestarter::dumpRegisterWorker(
 
         for (auto j = 0; j < registerSize; j++) {
           auto index = registerSize * i + j;
-          auto hd = static_cast<unsigned long long>(
-              hammingDistance(current[index], last[index]));
+          auto hd = static_cast<unsigned long long>(hammingDistance(current[index], last[index]));
 
           dumpFile << hd;
           if (j != registerSize - 1) {
diff --git a/src/firestarter/Environment/CPUTopology.cpp b/src/firestarter/Environment/CPUTopology.cpp
index d7fb4bf0..a21bd9b8 100644
--- a/src/firestarter/Environment/CPUTopology.cpp
+++ b/src/firestarter/Environment/CPUTopology.cpp
@@ -32,18 +32,18 @@ extern "C" {
 
 using namespace firestarter::environment;
 
-std::ostream &CPUTopology::print(std::ostream &stream) const {
+std::ostream& CPUTopology::print(std::ostream& stream) const {
   stream << "  system summary:\n"
          << "    number of processors:        " << this->numPackages() << "\n"
          << "    number of cores (total)):    " << this->numCoresTotal() << "\n"
-         << "  (this includes only cores in the cgroup)"  << "\n"
-         << "    number of threads per core:  " << this->numThreadsPerCore()
+         << "  (this includes only cores in the cgroup)"
          << "\n"
+         << "    number of threads per core:  " << this->numThreadsPerCore() << "\n"
          << "    total number of threads:     " << this->numThreads() << "\n\n";
 
   std::stringstream ss;
 
-  for (auto const &ent : this->features()) {
+  for (auto const& ent : this->features()) {
     ss << ent << " ";
   }
 
@@ -52,20 +52,18 @@ std::ostream &CPUTopology::print(std::ostream &stream) const {
          << "    vendor:             " << this->vendor() << "\n"
          << "    processor-name:     " << this->processorName() << "\n"
          << "    model:              " << this->model() << "\n"
-         << "    frequency:          " << this->clockrate() / 1000000
-         << " MHz\n"
+         << "    frequency:          " << this->clockrate() / 1000000 << " MHz\n"
          << "    supported features: " << ss.str() << "\n"
          << "    Caches:";
 
   std::vector<hwloc_obj_type_t> caches = {
-      HWLOC_OBJ_L1CACHE,  HWLOC_OBJ_L1ICACHE, HWLOC_OBJ_L2CACHE,
-      HWLOC_OBJ_L2ICACHE, HWLOC_OBJ_L3CACHE,  HWLOC_OBJ_L3ICACHE,
-      HWLOC_OBJ_L4CACHE,  HWLOC_OBJ_L5CACHE,
+      HWLOC_OBJ_L1CACHE, HWLOC_OBJ_L1ICACHE, HWLOC_OBJ_L2CACHE, HWLOC_OBJ_L2ICACHE,
+      HWLOC_OBJ_L3CACHE, HWLOC_OBJ_L3ICACHE, HWLOC_OBJ_L4CACHE, HWLOC_OBJ_L5CACHE,
   };
 
   std::vector<std::string> cacheStrings = {};
 
-  for (hwloc_obj_type_t const &cache : caches) {
+  for (hwloc_obj_type_t const& cache : caches) {
     int width;
     char string[128];
     int shared;
@@ -93,8 +91,8 @@ std::ostream &CPUTopology::print(std::ostream &stream) const {
         break;
       }
 
-      ss << " Cache, " << cacheObj->attr->cache.size / 1024 << " KiB, "
-         << cacheObj->attr->cache.linesize << " B Cacheline, ";
+      ss << " Cache, " << cacheObj->attr->cache.size / 1024 << " KiB, " << cacheObj->attr->cache.linesize
+         << " B Cacheline, ";
 
       switch (cacheObj->attr->cache.associativity) {
       case -1:
@@ -131,8 +129,7 @@ CPUTopology::CPUTopology(std::string architecture)
   hwloc_topology_init(&this->topology);
 
   // do not filter icaches
-  hwloc_topology_set_cache_types_filter(this->topology,
-                                        HWLOC_TYPE_FILTER_KEEP_ALL);
+  hwloc_topology_set_cache_types_filter(this->topology, HWLOC_TYPE_FILTER_KEEP_ALL);
 
   hwloc_topology_load(this->topology);
 
@@ -162,7 +159,7 @@ CPUTopology::CPUTopology(std::string architecture)
     this->_numPackages = hwloc_get_nbobjs_by_depth(this->topology, depth);
   }
 
-    log::trace() << "Number of Packages:" << this->_numPackages;
+  log::trace() << "Number of Packages:" << this->_numPackages;
   // get number of cores per package
   depth = hwloc_get_type_depth(this->topology, HWLOC_OBJ_CORE);
 
@@ -170,9 +167,8 @@ CPUTopology::CPUTopology(std::string architecture)
     this->_numCoresTotal = 1;
     log::warn() << "Could not get number of cores";
   } else {
-    this->_numCoresTotal =
-        hwloc_get_nbobjs_by_depth(this->topology, depth);
-    if ( this->_numCoresTotal == 0 ) {
+    this->_numCoresTotal = hwloc_get_nbobjs_by_depth(this->topology, depth);
+    if (this->_numCoresTotal == 0) {
       log::warn() << "Could not get number of cores";
       this->_numCoresTotal = 1;
     }
@@ -186,10 +182,8 @@ CPUTopology::CPUTopology(std::string architecture)
     this->_numThreadsPerCore = 1;
     log::warn() << "Could not get number of threads";
   } else {
-    this->_numThreadsPerCore =
-        hwloc_get_nbobjs_by_depth(this->topology, depth) /
-        this->_numCoresTotal ;
-    if ( this->_numThreadsPerCore == 0 ) {
+    this->_numThreadsPerCore = hwloc_get_nbobjs_by_depth(this->topology, depth) / this->_numCoresTotal;
+    if (this->_numThreadsPerCore == 0) {
       log::warn() << "Could not get number of threads per core";
       this->_numThreadsPerCore = 1;
     }
@@ -233,30 +227,17 @@ CPUTopology::CPUTopology(std::string architecture)
   if (clockrate == "0") {
     firestarter::log::warn() << "Can't determine clockrate from /proc/cpuinfo";
   } else {
-    firestarter::log::trace()
-        << "Clockrate from /proc/cpuinfo is " << clockrate;
+    firestarter::log::trace() << "Clockrate from /proc/cpuinfo is " << clockrate;
     this->_clockrate = 1e6 * std::stoi(clockrate);
   }
 
   auto governor = this->scalingGovernor();
   if (!governor.empty()) {
 
-    auto scalingCurFreq =
-        this->getFileAsStream(
-                "/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq")
-            .str();
-    auto cpuinfoCurFreq =
-        this->getFileAsStream(
-                "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq")
-            .str();
-    auto scalingMaxFreq =
-        this->getFileAsStream(
-                "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq")
-            .str();
-    auto cpuinfoMaxFreq =
-        this->getFileAsStream(
-                "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq")
-            .str();
+    auto scalingCurFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq").str();
+    auto cpuinfoCurFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq").str();
+    auto scalingMaxFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq").str();
+    auto cpuinfoMaxFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq").str();
 
     if (governor.compare("performance") || governor.compare("powersave")) {
       if (scalingCurFreq.empty()) {
@@ -322,15 +303,14 @@ CPUTopology::CPUTopology(std::string architecture)
   int width = hwloc_get_nbobjs_by_type(this->topology, HWLOC_OBJ_L1ICACHE);
 
   if (width >= 1) {
-    hwloc_obj_t cacheObj =
-        hwloc_get_obj_by_type(this->topology, HWLOC_OBJ_L1ICACHE, 0);
+    hwloc_obj_t cacheObj = hwloc_get_obj_by_type(this->topology, HWLOC_OBJ_L1ICACHE, 0);
     this->_instructionCacheSize = cacheObj->attr->cache.size;
   }
 }
 
 CPUTopology::~CPUTopology() { hwloc_topology_destroy(this->topology); }
 
-std::stringstream CPUTopology::getFileAsStream(std::string const &filePath) {
+std::stringstream CPUTopology::getFileAsStream(std::string const& filePath) {
   std::ifstream file(filePath);
   std::stringstream ss;
 
@@ -345,9 +325,7 @@ std::stringstream CPUTopology::getFileAsStream(std::string const &filePath) {
 }
 
 std::string CPUTopology::scalingGovernor() const {
-  return this
-      ->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor")
-      .str();
+  return this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor").str();
 }
 
 int CPUTopology::getCoreIdFromPU(unsigned pu) const {
@@ -424,8 +402,7 @@ unsigned CPUTopology::maxNumThreads() const {
 
   // Find CPUs per kind
   for (int kind_index = 0; kind_index < nr_cpukinds; kind_index++) {
-    int result = hwloc_cpukinds_get_info(this->topology, kind_index, bitmap,
-                                         NULL, NULL, NULL, 0);
+    int result = hwloc_cpukinds_get_info(this->topology, kind_index, bitmap, NULL, NULL, NULL, 0);
     if (result) {
       log::warn() << "Could not get information for CPU kind " << kind_index;
     }
diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp
index d827ee83..34022c93 100644
--- a/src/firestarter/Environment/Environment.cpp
+++ b/src/firestarter/Environment/Environment.cpp
@@ -28,8 +28,7 @@
 
 using namespace firestarter::environment;
 
-#if (defined(linux) || defined(__linux__)) &&                                  \
-    defined(FIRESTARTER_THREAD_AFFINITY)
+#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
 
 extern "C" {
 #include <sched.h>
@@ -37,23 +36,23 @@ extern "C" {
 
 // this code is from the C version of FIRESTARTER
 // TODO: replace this with cpu affinity of hwloc
-#define ADD_CPU_SET(cpu, cpuset)                                               \
-  do {                                                                         \
-    if (this->cpuAllowed(cpu)) {                                               \
-      CPU_SET(cpu, &cpuset);                                                   \
-    } else {                                                                   \
-      if (cpu >= this->topology().numThreads()) {                              \
-        log::error() << "The given bind argument (-b/--bind) includes CPU "    \
-                     << cpu << " that is not available on this system.";       \
-      } else {                                                                 \
-        log::error() << "The given bind argument (-b/--bind) cannot "          \
-                        "be implemented with the cpuset given from the OS\n"   \
-                     << "This can be caused by the taskset tool, cgroups, "    \
-                        "the batch system, or similar mechanisms.\n"           \
-                     << "Please fix the argument to match the restrictions.";  \
-      }                                                                        \
-      return EACCES;                                                           \
-    }                                                                          \
+#define ADD_CPU_SET(cpu, cpuset)                                                                                       \
+  do {                                                                                                                 \
+    if (this->cpuAllowed(cpu)) {                                                                                       \
+      CPU_SET(cpu, &cpuset);                                                                                           \
+    } else {                                                                                                           \
+      if (cpu >= this->topology().numThreads()) {                                                                      \
+        log::error() << "The given bind argument (-b/--bind) includes CPU " << cpu                                     \
+                     << " that is not available on this system.";                                                      \
+      } else {                                                                                                         \
+        log::error() << "The given bind argument (-b/--bind) cannot "                                                  \
+                        "be implemented with the cpuset given from the OS\n"                                           \
+                     << "This can be caused by the taskset tool, cgroups, "                                            \
+                        "the batch system, or similar mechanisms.\n"                                                   \
+                     << "Please fix the argument to match the restrictions.";                                          \
+      }                                                                                                                \
+      return EACCES;                                                                                                   \
+    }                                                                                                                  \
   } while (0)
 
 int Environment::cpuSet(unsigned id) {
@@ -78,20 +77,16 @@ int Environment::cpuAllowed(unsigned id) {
 }
 #endif
 
-int Environment::evaluateCpuAffinity(unsigned requestedNumThreads,
-                                     std::string cpuBind) {
-#if not((defined(linux) || defined(__linux__)) &&                              \
-        defined(FIRESTARTER_THREAD_AFFINITY))
+int Environment::evaluateCpuAffinity(unsigned requestedNumThreads, std::string cpuBind) {
+#if not((defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY))
   (void)cpuBind;
 #endif
 
-  if (requestedNumThreads > 0 &&
-      requestedNumThreads > this->topology().numThreads()) {
+  if (requestedNumThreads > 0 && requestedNumThreads > this->topology().numThreads()) {
     log::warn() << "Not enough CPUs for requested number of threads";
   }
 
-#if (defined(linux) || defined(__linux__)) &&                                  \
-    defined(FIRESTARTER_THREAD_AFFINITY)
+#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
   cpu_set_t cpuset;
 
   CPU_ZERO(&cpuset);
@@ -161,8 +156,7 @@ int Environment::evaluateCpuAffinity(unsigned requestedNumThreads,
           s = 1;
         }
         if (y < x) {
-          log::error() << "y has to be >= x in x-y expressions of CPU list: "
-                       << token;
+          log::error() << "y has to be >= x in x-y expressions of CPU list: " << token;
           return EXIT_FAILURE;
         }
         for (unsigned long i = x; i <= y; i += s) {
@@ -185,8 +179,7 @@ int Environment::evaluateCpuAffinity(unsigned requestedNumThreads,
     log::error() << "Found no usable CPUs!";
     return 127;
   }
-#if (defined(linux) || defined(__linux__)) &&                                  \
-    defined(FIRESTARTER_THREAD_AFFINITY)
+#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
   else {
     for (unsigned i = 0; i < this->topology().maxNumThreads(); i++) {
       if (CPU_ISSET(i, &cpuset)) {
@@ -208,20 +201,18 @@ int Environment::evaluateCpuAffinity(unsigned requestedNumThreads,
 void Environment::printThreadSummary() {
   log::info() << "\n  using " << this->requestedNumThreads() << " threads";
 
-#if (defined(linux) || defined(__linux__)) &&                                  \
-    defined(FIRESTARTER_THREAD_AFFINITY)
+#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
   bool printCoreIdInfo = false;
   size_t i = 0;
 
   std::vector<unsigned> cpuBind(this->cpuBind);
   cpuBind.resize(this->requestedNumThreads());
-  for (auto const &bind : cpuBind) {
+  for (auto const& bind : cpuBind) {
     int coreId = this->topology().getCoreIdFromPU(bind);
     int pkgId = this->topology().getPkgIdFromPU(bind);
 
     if (coreId != -1 && pkgId != -1) {
-      log::info() << "    - Thread " << i << " run on CPU " << bind << ", core "
-                  << coreId << " in package: " << pkgId;
+      log::info() << "    - Thread " << i << " run on CPU " << bind << ", core " << coreId << " in package: " << pkgId;
       printCoreIdInfo = true;
     }
 
@@ -229,8 +220,7 @@ void Environment::printThreadSummary() {
   }
 
   if (printCoreIdInfo) {
-    log::info()
-        << "  The cores are numbered using the logical_index from hwloc.";
+    log::info() << "  The cores are numbered using the logical_index from hwloc.";
   }
 #endif
 }
@@ -241,8 +231,7 @@ int Environment::setCpuAffinity(unsigned thread) {
     return EXIT_FAILURE;
   }
 
-#if (defined(linux) || defined(__linux__)) &&                                  \
-    defined(FIRESTARTER_THREAD_AFFINITY)
+#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
   this->cpuSet(this->cpuBind.at(thread));
 #endif
 
diff --git a/src/firestarter/Environment/Payload/Payload.cpp b/src/firestarter/Environment/Payload/Payload.cpp
index 68cfc547..5cda6abc 100644
--- a/src/firestarter/Environment/Payload/Payload.cpp
+++ b/src/firestarter/Environment/Payload/Payload.cpp
@@ -26,12 +26,10 @@
 
 using namespace firestarter::environment::payload;
 
-unsigned
-Payload::getSequenceStartCount(const std::vector<std::string> &sequence,
-                               const std::string start) {
+unsigned Payload::getSequenceStartCount(const std::vector<std::string>& sequence, const std::string start) {
   unsigned i = 0;
 
-  for (const auto &item : sequence) {
+  for (const auto& item : sequence) {
     if (0 == item.rfind(start, 0)) {
       i++;
     }
@@ -40,13 +38,10 @@ Payload::getSequenceStartCount(const std::vector<std::string> &sequence,
   return i;
 }
 
-std::vector<std::string> Payload::generateSequence(
-    std::vector<std::pair<std::string, unsigned>> const &proportions) {
+std::vector<std::string> Payload::generateSequence(std::vector<std::pair<std::string, unsigned>> const& proportions) {
   std::vector<std::pair<std::string, unsigned>> prop = proportions;
 
-  prop.erase(std::remove_if(prop.begin(), prop.end(),
-                            [](auto const &pair) { return pair.second == 0; }),
-             prop.end());
+  prop.erase(std::remove_if(prop.begin(), prop.end(), [](auto const& pair) { return pair.second == 0; }), prop.end());
 
   std::vector<std::string> sequence = {};
 
@@ -62,8 +57,7 @@ std::vector<std::string> Payload::generateSequence(
   for (++it; it != prop.end(); ++it) {
     for (unsigned i = 0; i < it->second; i++) {
       insertIt = sequence.begin();
-      std::advance(insertIt, 1 + floor(i * (sequence.size() + it->second - i) /
-                                       (float)it->second));
+      std::advance(insertIt, 1 + floor(i * (sequence.size() + it->second - i) / (float)it->second));
       sequence.insert(insertIt, it->first);
     }
   }
@@ -71,38 +65,32 @@ std::vector<std::string> Payload::generateSequence(
   return sequence;
 }
 
-unsigned Payload::getL2LoopCount(const std::vector<std::string> &sequence,
-                                 const unsigned numberOfLines,
+unsigned Payload::getL2LoopCount(const std::vector<std::string>& sequence, const unsigned numberOfLines,
                                  const unsigned size, const unsigned threads) {
   if (this->getL2SequenceCount(sequence) == 0) {
     return 0;
   }
-  return (0.8 * size / 64 / threads /
-          (this->getL2SequenceCount(sequence) *
-           this->getNumberOfSequenceRepetitions(sequence,
-                                                numberOfLines / threads)));
+  return (
+      0.8 * size / 64 / threads /
+      (this->getL2SequenceCount(sequence) * this->getNumberOfSequenceRepetitions(sequence, numberOfLines / threads)));
 }
 
-unsigned Payload::getL3LoopCount(const std::vector<std::string> &sequence,
-                                 const unsigned numberOfLines,
+unsigned Payload::getL3LoopCount(const std::vector<std::string>& sequence, const unsigned numberOfLines,
                                  const unsigned size, const unsigned threads) {
   if (this->getL3SequenceCount(sequence) == 0) {
     return 0;
   }
-  return (0.8 * size / 64 / threads /
-          (this->getL3SequenceCount(sequence) *
-           this->getNumberOfSequenceRepetitions(sequence,
-                                                numberOfLines / threads)));
+  return (
+      0.8 * size / 64 / threads /
+      (this->getL3SequenceCount(sequence) * this->getNumberOfSequenceRepetitions(sequence, numberOfLines / threads)));
 }
 
-unsigned Payload::getRAMLoopCount(const std::vector<std::string> &sequence,
-                                  const unsigned numberOfLines,
+unsigned Payload::getRAMLoopCount(const std::vector<std::string>& sequence, const unsigned numberOfLines,
                                   const unsigned size, const unsigned threads) {
   if (this->getRAMSequenceCount(sequence) == 0) {
     return 0;
   }
-  return (1.0 * size / 64 / threads /
-          (this->getRAMSequenceCount(sequence) *
-           this->getNumberOfSequenceRepetitions(sequence,
-                                                numberOfLines / threads)));
+  return (
+      1.0 * size / 64 / threads /
+      (this->getRAMSequenceCount(sequence) * this->getNumberOfSequenceRepetitions(sequence, numberOfLines / threads)));
 }
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index 9316ed39..2c23d1c4 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -25,29 +25,25 @@ using namespace firestarter::environment::x86::payload;
 using namespace asmjit;
 using namespace asmjit::x86;
 
-int AVX512Payload::compilePayload(
-    std::vector<std::pair<std::string, unsigned>> const &proportion,
-    unsigned instructionCacheSize,
-    std::list<unsigned> const &dataCacheBufferSize, unsigned ramBufferSize,
-    unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-    bool errorDetection) {
+int AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
+                                  unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
+                                  unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
+                                  bool errorDetection) {
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
   auto sequence = this->generateSequence(proportion);
-  auto repetitions =
-      this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
+  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
 
   // compute count of flops and memory access for performance report
   unsigned flops = 0;
   unsigned bytes = 0;
 
-  for (const auto &item : sequence) {
+  for (const auto& item : sequence) {
     auto it = this->instructionFlops.find(item);
 
     if (it == this->instructionFlops.end()) {
-      workerLog::error() << "Instruction group " << item << " undefined in "
-                         << name() << ".";
+      workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
@@ -75,12 +71,9 @@ int AVX512Payload::compilePayload(
   auto ram_size = ramBufferSize / thread;
 
   // calculate the reset counters for the buffers
-  auto l2_loop_count =
-      getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
-  auto l3_loop_count =
-      getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
-  auto ram_loop_count =
-      getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
+  auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
+  auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
+  auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
 
   CodeHolder code;
   code.init(this->rt.environment());
@@ -90,9 +83,8 @@ int AVX512Payload::compilePayload(
   }
 
   Builder cb(&code);
-  cb.addDiagnosticOptions(
-    asmjit::DiagnosticOptions::kValidateAssembler | 
-    asmjit::DiagnosticOptions::kValidateIntermediate );
+  cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
+                          asmjit::DiagnosticOptions::kValidateIntermediate);
 
   auto pointer_reg = rax;
   auto l1_addr = rbx;
@@ -116,8 +108,7 @@ int AVX512Payload::compilePayload(
   auto ram_reg = zmm30;
 
   FuncDetail func;
-  func.init(FuncSignatureT<unsigned long long, unsigned long long *,
-                           volatile unsigned long long *, unsigned long long>(
+  func.init(FuncSignatureT<unsigned long long, unsigned long long*, volatile unsigned long long*, unsigned long long>(
                 CallConvId::kCDecl),
             this->rt.environment());
 
@@ -132,10 +123,9 @@ int AVX512Payload::compilePayload(
     frame.addDirtyRegs(Mm(i));
   }
   // make all other used registers dirty except RAX
-  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg,
-                     l3_count_reg, ram_count_reg, temp_reg, offset_reg,
-                     addrHigh_reg, iter_reg, ram_addr);
-  for (const auto &reg : shift_reg) {
+  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg,
+                     offset_reg, addrHigh_reg, iter_reg, ram_addr);
+  for (const auto& reg : shift_reg) {
     frame.addDirtyRegs(reg);
   }
 
@@ -161,7 +151,7 @@ int AVX512Payload::compilePayload(
   cb.mov(offset_reg,
          Imm(64)); // increment after each cache/memory access
   // Initialize registers for shift operations
-  for (auto const &reg : shift_reg32) {
+  for (auto const& reg : shift_reg32) {
     cb.mov(reg, Imm(0xAAAAAAAA));
   }
   // Initialize AVX512-Registers for FMA Operations
@@ -183,23 +173,14 @@ int AVX512Payload::compilePayload(
   cb.mov(ram_addr, pointer_reg);
   cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer
   cb.mov(l2_count_reg, Imm(l2_loop_count));
-  workerLog::trace() << "reset counter for L2-buffer with "
-                     << l2_loop_count
-                     << " cache line accesses per loop ("
-		     << l2_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop ("
+                     << l2_size / 1024 << ") KiB";
   cb.mov(l3_count_reg, Imm(l3_loop_count));
-  workerLog::trace() << "reset counter for L3-buffer with "
-                     << l3_loop_count
-                     << " cache line accesses per loop ("
-		     << l3_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop ("
+                     << l3_size / 1024 << ") KiB";
   cb.mov(ram_count_reg, Imm(ram_loop_count));
-  workerLog::trace() << "reset counter for RAM-buffer with "
-                     << ram_loop_count
-                     << " cache line accesses per loop ("
-		     << ram_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop ("
+                     << ram_size / 1024 << ") KiB";
 
   cb.align(AlignMode::kCode, 64);
 
@@ -213,13 +194,13 @@ int AVX512Payload::compilePayload(
   auto mov_src = mov_dst + 1;
   unsigned l1_offset = 0;
 
-#define L1_INCREMENT()                                                         \
-  l1_offset += 64;                                                             \
-  if (l1_offset < l1_size * 0.5) {                                             \
-    cb.add(l1_addr, offset_reg);                                               \
-  } else {                                                                     \
-    l1_offset = 0;                                                             \
-    cb.mov(l1_addr, pointer_reg);                                              \
+#define L1_INCREMENT()                                                                                                 \
+  l1_offset += 64;                                                                                                     \
+  if (l1_offset < l1_size * 0.5) {                                                                                     \
+    cb.add(l1_addr, offset_reg);                                                                                       \
+  } else {                                                                                                             \
+    l1_offset = 0;                                                                                                     \
+    cb.mov(l1_addr, pointer_reg);                                                                                      \
   }
 
 #define L2_INCREMENT() cb.add(l2_addr, offset_reg)
@@ -229,12 +210,11 @@ int AVX512Payload::compilePayload(
 #define RAM_INCREMENT() cb.add(ram_addr, offset_reg)
 
   for (unsigned count = 0; count < repetitions; count++) {
-    for (const auto &item : sequence) {
+    for (const auto& item : sequence) {
       if (item == "REG") {
         cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2);
         cb.vfmadd231pd(Zmm(mov_dst), zmm2, zmm1);
-        cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs],
-                temp_reg);
+        cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], temp_reg);
         mov_dst++;
       } else if (item == "L1_L") {
         cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2);
@@ -297,8 +277,7 @@ int AVX512Payload::compilePayload(
         cb.prefetcht2(ptr(ram_addr));
         RAM_INCREMENT();
       } else {
-        workerLog::error() << "Instruction group " << item << " not found in "
-                           << this->name() << ".";
+        workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
         return EXIT_FAILURE;
       }
 
@@ -378,9 +357,7 @@ int AVX512Payload::compilePayload(
 
     // dump all the ymm register
     for (int i = 0; i < (int)this->registerCount(); i++) {
-      cb.vmovapd(
-          zmmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)),
-          Zmm(i));
+      cb.vmovapd(zmmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Zmm(i));
     }
 
     // set read flag
@@ -390,8 +367,7 @@ int AVX512Payload::compilePayload(
   }
 
   if (errorDetection) {
-    this->emitErrorDetectionCode<decltype(iter_reg), Zmm>(
-        cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
+    this->emitErrorDetectionCode<decltype(iter_reg), Zmm>(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
   }
 
   cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH));
@@ -410,8 +386,7 @@ int AVX512Payload::compilePayload(
 
   Error err = this->rt.add(&this->loadFunction, &code);
   if (err) {
-    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in "
-                       << __FILE__ << " at " << __LINE__;
+    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
   }
 
@@ -424,8 +399,7 @@ int AVX512Payload::compilePayload(
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size
-                       << " Bytes (" << instructionCachePercentage
+    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
     workerLog::trace() << "Sequence size: " << sequence.size();
     workerLog::trace() << "Repetition count: " << repetitions;
@@ -437,14 +411,12 @@ int AVX512Payload::compilePayload(
 std::list<std::string> AVX512Payload::getAvailableInstructions() const {
   std::list<std::string> instructions;
 
-  transform(this->instructionFlops.begin(), this->instructionFlops.end(),
-            back_inserter(instructions),
-            [](const auto &item) { return item.first; });
+  transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions),
+            [](const auto& item) { return item.first; });
 
   return instructions;
 }
 
-void AVX512Payload::init(unsigned long long *memoryAddr,
-                         unsigned long long bufferSize) {
+void AVX512Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) {
   X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index b6899025..c925f538 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -29,28 +29,24 @@ using namespace firestarter::environment::x86::payload;
 using namespace asmjit;
 using namespace asmjit::x86;
 
-int AVXPayload::compilePayload(
-    std::vector<std::pair<std::string, unsigned>> const &proportion,
-    unsigned instructionCacheSize,
-    std::list<unsigned> const &dataCacheBufferSize, unsigned ramBufferSize,
-    unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-    bool errorDetection) {
+int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
+                               unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
+                               unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
+                               bool errorDetection) {
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
   auto sequence = this->generateSequence(proportion);
-  auto repetitions =
-      this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
+  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
 
   // compute count of flops and memory access for performance report
   unsigned flops = 0;
   unsigned bytes = 0;
 
-  for (const auto &item : sequence) {
+  for (const auto& item : sequence) {
     auto it = this->instructionFlops.find(item);
 
     if (it == this->instructionFlops.end()) {
-      workerLog::error() << "Instruction group " << item << " undefined in "
-                         << name() << ".";
+      workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
@@ -78,12 +74,9 @@ int AVXPayload::compilePayload(
   auto ram_size = ramBufferSize / thread;
 
   // calculate the reset counters for the buffers
-  auto l2_loop_count =
-      getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
-  auto l3_loop_count =
-      getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
-  auto ram_loop_count =
-      getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
+  auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
+  auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
+  auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
 
   CodeHolder code;
   code.init(this->rt.environment());
@@ -93,9 +86,8 @@ int AVXPayload::compilePayload(
   }
 
   Builder cb(&code);
-  cb.addDiagnosticOptions(
-    asmjit::DiagnosticOptions::kValidateAssembler | 
-    asmjit::DiagnosticOptions::kValidateIntermediate );
+  cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
+                          asmjit::DiagnosticOptions::kValidateIntermediate);
 
   auto pointer_reg = rax;
   auto l1_addr = rbx;
@@ -115,8 +107,7 @@ int AVXPayload::compilePayload(
   auto trans_regs = 6;
 
   FuncDetail func;
-  func.init(FuncSignatureT<unsigned long long, unsigned long long *,
-                           volatile unsigned long long *, unsigned long long>(
+  func.init(FuncSignatureT<unsigned long long, unsigned long long*, volatile unsigned long long*, unsigned long long>(
                 CallConvId::kCDecl),
             this->rt.environment());
 
@@ -132,9 +123,8 @@ int AVXPayload::compilePayload(
     frame.addDirtyRegs(Mm(i));
   }
   // make all other used registers dirty except RAX
-  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg,
-                     l3_count_reg, ram_count_reg, temp_reg, temp_reg2,
-                     offset_reg, addrHigh_reg, iter_reg);
+  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg,
+                     temp_reg2, offset_reg, addrHigh_reg, iter_reg);
 
   FuncArgsAssignment args(&func);
   args.assignAll(pointer_reg, addrHigh_reg, iter_reg);
@@ -185,8 +175,7 @@ int AVXPayload::compilePayload(
     }
     cb.pinsrq(Xmm(trans_start), temp_reg, Imm(0));
     cb.pinsrq(Xmm(trans_start), temp_reg, Imm(1));
-    cb.vinsertf128(Ymm(trans_start), Ymm(trans_start), Xmm(trans_start),
-                   Imm(1));
+    cb.vinsertf128(Ymm(trans_start), Ymm(trans_start), Xmm(trans_start), Imm(1));
     for (int i = trans_start + 1; i <= trans_end; i++) {
       if (i % 2 == 0) {
         cb.shr(temp_reg, Imm(4));
@@ -207,23 +196,14 @@ int AVXPayload::compilePayload(
   cb.mov(ram_addr, pointer_reg);
   cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer
   cb.mov(l2_count_reg, Imm(l2_loop_count));
-  workerLog::trace() << "reset counter for L2-buffer with "
-                     << l2_loop_count
-                     << " cache line accesses per loop ("
-		     << l2_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop ("
+                     << l2_size / 1024 << ") KiB";
   cb.mov(l3_count_reg, Imm(l3_loop_count));
-  workerLog::trace() << "reset counter for L3-buffer with "
-                     << l3_loop_count
-                     << " cache line accesses per loop ("
-		     << l3_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop ("
+                     << l3_size / 1024 << ") KiB";
   cb.mov(ram_count_reg, Imm(ram_loop_count));
-  workerLog::trace() << "reset counter for RAM-buffer with "
-                     << ram_loop_count
-                     << " cache line accesses per loop ("
-		     << ram_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop ("
+                     << ram_size / 1024 << ") KiB";
 
   cb.align(AlignMode::kCode, 64);
 
@@ -237,13 +217,13 @@ int AVXPayload::compilePayload(
   auto mov_src = mov_dst + 1;
   unsigned l1_offset = 0;
 
-#define L1_INCREMENT()                                                         \
-  l1_offset += 64;                                                             \
-  if (l1_offset < l1_size * 0.5) {                                             \
-    cb.add(l1_addr, offset_reg);                                               \
-  } else {                                                                     \
-    l1_offset = 0;                                                             \
-    cb.mov(l1_addr, pointer_reg);                                              \
+#define L1_INCREMENT()                                                                                                 \
+  l1_offset += 64;                                                                                                     \
+  if (l1_offset < l1_size * 0.5) {                                                                                     \
+    cb.add(l1_addr, offset_reg);                                                                                       \
+  } else {                                                                                                             \
+    l1_offset = 0;                                                                                                     \
+    cb.mov(l1_addr, pointer_reg);                                                                                      \
   }
 
 #define L2_INCREMENT() cb.add(l2_addr, offset_reg);
@@ -253,19 +233,15 @@ int AVXPayload::compilePayload(
 #define RAM_INCREMENT() cb.add(ram_addr, offset_reg)
 
   for (unsigned count = 0; count < repetitions; count++) {
-    for (const auto &item : sequence) {
+    for (const auto& item : sequence) {
       if (item == "REG") {
-        cb.vaddpd(
-            Ymm(add_dest), Ymm(add_dest),
-            Ymm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
+        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
         cb.vmovdqa(Ymm(mov_dst), Ymm(mov_src));
       } else if (item == "L1_L") {
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32));
         L1_INCREMENT();
       } else if (item == "L1_S") {
-        cb.vaddpd(
-            Ymm(add_dest), Ymm(add_dest),
-            Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
+        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.vmovapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest));
         L1_INCREMENT();
         this->_instructions++;
@@ -278,9 +254,7 @@ int AVXPayload::compilePayload(
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l2_addr, 64));
         L2_INCREMENT();
       } else if (item == "L2_S") {
-        cb.vaddpd(
-            Ymm(add_dest), Ymm(add_dest),
-            Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
+        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.vmovapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest));
         L2_INCREMENT();
         this->_instructions++;
@@ -293,9 +267,7 @@ int AVXPayload::compilePayload(
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64));
         L3_INCREMENT();
       } else if (item == "L3_S") {
-        cb.vaddpd(
-            Ymm(add_dest), Ymm(add_dest),
-            Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
+        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
         L3_INCREMENT();
         this->_instructions++;
@@ -313,9 +285,7 @@ int AVXPayload::compilePayload(
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(ram_addr, 64));
         RAM_INCREMENT();
       } else if (item == "RAM_S") {
-        cb.vaddpd(
-            Ymm(add_dest), Ymm(add_dest),
-            Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
+        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
         RAM_INCREMENT();
         this->_instructions++;
@@ -330,19 +300,16 @@ int AVXPayload::compilePayload(
         RAM_INCREMENT();
         this->_instructions++;
       } else {
-        workerLog::error() << "Instruction group " << item << " not found in "
-                           << this->name() << ".";
+        workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
         return EXIT_FAILURE;
       }
 
       if (shift_regs > 1) {
         this->_instructions++;
         if (left) {
-          cb.psrlw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs),
-                   Mm(shift_dst));
+          cb.psrlw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs), Mm(shift_dst));
         } else {
-          cb.psllw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs),
-                   Mm(shift_dst));
+          cb.psllw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs), Mm(shift_dst));
         }
       }
 
@@ -420,9 +387,7 @@ int AVXPayload::compilePayload(
 
     // dump all the ymm register
     for (int i = 0; i < (int)this->registerCount(); i++) {
-      cb.vmovapd(
-          ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)),
-          Ymm(i));
+      cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i));
     }
 
     // set read flag
@@ -432,8 +397,7 @@ int AVXPayload::compilePayload(
   }
 
   if (errorDetection) {
-    this->emitErrorDetectionCode<decltype(iter_reg), Ymm>(
-        cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
+    this->emitErrorDetectionCode<decltype(iter_reg), Ymm>(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
   }
 
   cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH));
@@ -452,8 +416,7 @@ int AVXPayload::compilePayload(
 
   Error err = this->rt.add(&this->loadFunction, &code);
   if (err) {
-    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in "
-                       << __FILE__ << " at " << __LINE__;
+    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
   }
 
@@ -466,8 +429,7 @@ int AVXPayload::compilePayload(
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size
-                       << " Bytes (" << instructionCachePercentage
+    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
     workerLog::trace() << "Sequence size: " << sequence.size();
     workerLog::trace() << "Repetition count: " << repetitions;
@@ -479,15 +441,12 @@ int AVXPayload::compilePayload(
 std::list<std::string> AVXPayload::getAvailableInstructions() const {
   std::list<std::string> instructions;
 
-  transform(this->instructionFlops.begin(), this->instructionFlops.end(),
-            back_inserter(instructions),
-            [](const auto &item) { return item.first; });
+  transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions),
+            [](const auto& item) { return item.first; });
 
   return instructions;
 }
 
-void AVXPayload::init(unsigned long long *memoryAddr,
-                      unsigned long long bufferSize) {
-  X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10,
-                   1.654738925401e-15);
+void AVXPayload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) {
+  X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index 32e81752..1e5ffa85 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -29,28 +29,24 @@ using namespace firestarter::environment::x86::payload;
 using namespace asmjit;
 using namespace asmjit::x86;
 
-int FMA4Payload::compilePayload(
-    std::vector<std::pair<std::string, unsigned>> const &proportion,
-    unsigned instructionCacheSize,
-    std::list<unsigned> const &dataCacheBufferSize, unsigned ramBufferSize,
-    unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-    bool errorDetection) {
+int FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
+                                unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
+                                unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
+                                bool errorDetection) {
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
   auto sequence = this->generateSequence(proportion);
-  auto repetitions =
-      this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
+  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
 
   // compute count of flops and memory access for performance report
   unsigned flops = 0;
   unsigned bytes = 0;
 
-  for (const auto &item : sequence) {
+  for (const auto& item : sequence) {
     auto it = this->instructionFlops.find(item);
 
     if (it == this->instructionFlops.end()) {
-      workerLog::error() << "Instruction group " << item << " undefined in "
-                         << name() << ".";
+      workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
@@ -78,12 +74,9 @@ int FMA4Payload::compilePayload(
   auto ram_size = ramBufferSize / thread;
 
   // calculate the reset counters for the buffers
-  auto l2_loop_count =
-      getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
-  auto l3_loop_count =
-      getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
-  auto ram_loop_count =
-      getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
+  auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
+  auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
+  auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
 
   CodeHolder code;
   code.init(this->rt.environment());
@@ -93,9 +86,8 @@ int FMA4Payload::compilePayload(
   }
 
   Builder cb(&code);
-  cb.addDiagnosticOptions(
-    asmjit::DiagnosticOptions::kValidateAssembler | 
-    asmjit::DiagnosticOptions::kValidateIntermediate );
+  cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
+                          asmjit::DiagnosticOptions::kValidateIntermediate);
 
   auto pointer_reg = rax;
   auto l1_addr = rbx;
@@ -119,8 +111,7 @@ int FMA4Payload::compilePayload(
   auto ram_reg = xmm15;
 
   FuncDetail func;
-  func.init(FuncSignatureT<unsigned long long, unsigned long long *,
-                           volatile unsigned long long *, unsigned long long>(
+  func.init(FuncSignatureT<unsigned long long, unsigned long long*, volatile unsigned long long*, unsigned long long>(
                 CallConvId::kCDecl),
             this->rt.environment());
 
@@ -135,10 +126,9 @@ int FMA4Payload::compilePayload(
     frame.addDirtyRegs(Mm(i));
   }
   // make all other used registers dirty except RAX
-  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg,
-                     l3_count_reg, ram_count_reg, temp_reg, temp_reg2,
-                     offset_reg, addrHigh_reg, iter_reg, ram_addr);
-  for (const auto &reg : shift_reg) {
+  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg,
+                     temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr);
+  for (const auto& reg : shift_reg) {
     frame.addDirtyRegs(reg);
   }
 
@@ -164,7 +154,7 @@ int FMA4Payload::compilePayload(
   cb.mov(offset_reg,
          Imm(64)); // increment after each cache/memory access
   // Initialize registers for shift operations
-  for (auto const &reg : shift_reg32) {
+  for (auto const& reg : shift_reg32) {
     cb.mov(reg, Imm(0xAAAAAAAA));
   }
   // Initialize AVX-Registers for FMA4 Operations
@@ -185,23 +175,14 @@ int FMA4Payload::compilePayload(
   cb.mov(ram_addr, pointer_reg);
   cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer
   cb.mov(l2_count_reg, Imm(l2_loop_count));
-  workerLog::trace() << "reset counter for L2-buffer with "
-                     << l2_loop_count
-                     << " cache line accesses per loop ("
-		     << l2_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop ("
+                     << l2_size / 1024 << ") KiB";
   cb.mov(l3_count_reg, Imm(l3_loop_count));
-  workerLog::trace() << "reset counter for L3-buffer with "
-                     << l3_loop_count
-                     << " cache line accesses per loop ("
-		     << l3_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop ("
+                     << l3_size / 1024 << ") KiB";
   cb.mov(ram_count_reg, Imm(ram_loop_count));
-  workerLog::trace() << "reset counter for RAM-buffer with "
-                     << ram_loop_count
-                     << " cache line accesses per loop ("
-		     << ram_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop ("
+                     << ram_size / 1024 << ") KiB";
 
   cb.align(AlignMode::kCode, 64);
 
@@ -215,13 +196,13 @@ int FMA4Payload::compilePayload(
   auto mov_src = mov_dst + 1;
   unsigned l1_offset = 0;
 
-#define L1_INCREMENT()                                                         \
-  l1_offset += 64;                                                             \
-  if (l1_offset < l1_size * 0.5) {                                             \
-    cb.add(l1_addr, offset_reg);                                               \
-  } else {                                                                     \
-    l1_offset = 0;                                                             \
-    cb.mov(l1_addr, pointer_reg);                                              \
+#define L1_INCREMENT()                                                                                                 \
+  l1_offset += 64;                                                                                                     \
+  if (l1_offset < l1_size * 0.5) {                                                                                     \
+    cb.add(l1_addr, offset_reg);                                                                                       \
+  } else {                                                                                                             \
+    l1_offset = 0;                                                                                                     \
+    cb.mov(l1_addr, pointer_reg);                                                                                      \
   }
 
 #define L2_INCREMENT() cb.add(l2_addr, offset_reg);
@@ -231,101 +212,80 @@ int FMA4Payload::compilePayload(
 #define RAM_INCREMENT() cb.add(ram_addr, offset_reg)
 
   for (unsigned count = 0; count < repetitions; count++) {
-    for (const auto &item : sequence) {
+    for (const auto& item : sequence) {
       if (item == "REG") {
-        cb.vfmaddpd(
-            Xmm(add_dest), Xmm(add_dest), xmm0,
-            Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        cb.vfmaddpd(
-            Xmm(mov_dst), Xmm(mov_dst), xmm1,
-            Xmm(add_start + (add_dest - add_start + add_regs + 2) % add_regs));
-        cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs],
-                temp_reg);
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
+                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
+        cb.vfmaddpd(Xmm(mov_dst), Xmm(mov_dst), xmm1,
+                    Xmm(add_start + (add_dest - add_start + add_regs + 2) % add_regs));
+        cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], temp_reg);
         mov_dst++;
       } else if (item == "L1_L") {
-        cb.vfmaddpd(
-            Xmm(add_dest), Xmm(add_dest), xmm0,
-            Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm1,
-                    ymmword_ptr(l1_addr, 32));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
+                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
+        cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm1, ymmword_ptr(l1_addr, 32));
         L1_INCREMENT();
       } else if (item == "L1_S") {
         cb.vmovapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest));
-        cb.vfmaddpd(
-            Ymm(add_dest), Ymm(add_dest), ymm0,
-            Ymm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
+        cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm0,
+                    Ymm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
         L1_INCREMENT();
       } else if (item == "L1_LS") {
         cb.vmovapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest));
-        cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm0,
-                    ymmword_ptr(l1_addr, 32));
+        cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 32));
         L1_INCREMENT();
       } else if (item == "L2_L") {
-        cb.vfmaddpd(
-            Xmm(add_dest), Xmm(add_dest), xmm0,
-            Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm1,
-                    xmmword_ptr(l2_addr, 64));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
+                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm1, xmmword_ptr(l2_addr, 64));
         L2_INCREMENT();
       } else if (item == "L2_S") {
         cb.vmovapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest));
-        cb.vfmaddpd(
-            Xmm(add_dest), Xmm(add_dest), xmm0,
-            Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
+                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
         L2_INCREMENT();
       } else if (item == "L2_LS") {
         cb.vmovapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest));
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
-                    xmmword_ptr(l2_addr, 64));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l2_addr, 64));
         L2_INCREMENT();
       } else if (item == "L3_L") {
-        cb.vfmaddpd(
-            Xmm(add_dest), Xmm(add_dest), xmm0,
-            Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm1,
-                    xmmword_ptr(l3_addr, 64));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
+                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm1, xmmword_ptr(l3_addr, 64));
         L3_INCREMENT();
       } else if (item == "L3_S") {
         cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
-        cb.vfmaddpd(
-            Xmm(add_dest), Xmm(add_dest), xmm0,
-            Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
+                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
         L3_INCREMENT();
       } else if (item == "L3_LS") {
         cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
-                    xmmword_ptr(l3_addr, 64));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l3_addr, 64));
         L3_INCREMENT();
       } else if (item == "L3_P") {
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
-                    xmmword_ptr(l1_addr, 32));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l1_addr, 32));
         cb.prefetcht2(ptr(l3_addr));
         L3_INCREMENT();
       } else if (item == "RAM_L") {
-        cb.vfmaddpd(
-            Xmm(add_dest), Xmm(add_dest), xmm0,
-            Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
+                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
         cb.vfmaddpd(ram_reg, ram_reg, xmm1, xmmword_ptr(ram_addr, 64));
         RAM_INCREMENT();
       } else if (item == "RAM_S") {
         cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
-        cb.vfmaddpd(
-            Xmm(add_dest), Xmm(add_dest), xmm0,
-            Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
+                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
         RAM_INCREMENT();
       } else if (item == "RAM_LS") {
         cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
-                    xmmword_ptr(ram_addr, 32));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(ram_addr, 32));
         RAM_INCREMENT();
       } else if (item == "RAM_P") {
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
-                    xmmword_ptr(l1_addr, 32));
+        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l1_addr, 32));
         cb.prefetcht2(ptr(ram_addr));
         RAM_INCREMENT();
       } else {
-        workerLog::error() << "Instruction group " << item << " not found in "
-                           << this->name() << ".";
+        workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
         return EXIT_FAILURE;
       }
 
@@ -405,9 +365,7 @@ int FMA4Payload::compilePayload(
 
     // dump all the ymm register
     for (int i = 0; i < (int)this->registerCount(); i++) {
-      cb.vmovapd(
-          ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)),
-          Ymm(i));
+      cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i));
     }
 
     // set read flag
@@ -417,8 +375,7 @@ int FMA4Payload::compilePayload(
   }
 
   if (errorDetection) {
-    this->emitErrorDetectionCode<decltype(iter_reg), Ymm>(
-        cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
+    this->emitErrorDetectionCode<decltype(iter_reg), Ymm>(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
   }
 
   cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH));
@@ -437,8 +394,7 @@ int FMA4Payload::compilePayload(
 
   Error err = this->rt.add(&this->loadFunction, &code);
   if (err) {
-    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in "
-                       << __FILE__ << " at " << __LINE__;
+    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
   }
 
@@ -451,8 +407,7 @@ int FMA4Payload::compilePayload(
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size
-                       << " Bytes (" << instructionCachePercentage
+    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
     workerLog::trace() << "Sequence size: " << sequence.size();
     workerLog::trace() << "Repetition count: " << repetitions;
@@ -464,14 +419,12 @@ int FMA4Payload::compilePayload(
 std::list<std::string> FMA4Payload::getAvailableInstructions() const {
   std::list<std::string> instructions;
 
-  transform(this->instructionFlops.begin(), this->instructionFlops.end(),
-            back_inserter(instructions),
-            [](const auto &item) { return item.first; });
+  transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions),
+            [](const auto& item) { return item.first; });
 
   return instructions;
 }
 
-void FMA4Payload::init(unsigned long long *memoryAddr,
-                       unsigned long long bufferSize) {
+void FMA4Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) {
   X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index e3087c01..3a432bfb 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -29,28 +29,24 @@ using namespace firestarter::environment::x86::payload;
 using namespace asmjit;
 using namespace asmjit::x86;
 
-int FMAPayload::compilePayload(
-    std::vector<std::pair<std::string, unsigned>> const &proportion,
-    unsigned instructionCacheSize,
-    std::list<unsigned> const &dataCacheBufferSize, unsigned ramBufferSize,
-    unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-    bool errorDetection) {
+int FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
+                               unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
+                               unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
+                               bool errorDetection) {
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
   auto sequence = this->generateSequence(proportion);
-  auto repetitions =
-      this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
+  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
 
   // compute count of flops and memory access for performance report
   unsigned flops = 0;
   unsigned bytes = 0;
 
-  for (const auto &item : sequence) {
+  for (const auto& item : sequence) {
     auto it = this->instructionFlops.find(item);
 
     if (it == this->instructionFlops.end()) {
-      workerLog::error() << "Instruction group " << item << " undefined in "
-                         << name() << ".";
+      workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
@@ -78,12 +74,9 @@ int FMAPayload::compilePayload(
   auto ram_size = ramBufferSize / thread;
 
   // calculate the reset counters for the buffers
-  auto l2_loop_count =
-      getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
-  auto l3_loop_count =
-      getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
-  auto ram_loop_count =
-      getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
+  auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
+  auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
+  auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
 
   CodeHolder code;
   code.init(this->rt.environment());
@@ -93,9 +86,8 @@ int FMAPayload::compilePayload(
   }
 
   Builder cb(&code);
-  cb.addDiagnosticOptions(
-    asmjit::DiagnosticOptions::kValidateAssembler | 
-    asmjit::DiagnosticOptions::kValidateIntermediate );
+  cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
+                          asmjit::DiagnosticOptions::kValidateIntermediate);
 
   auto pointer_reg = rax;
   auto l1_addr = rbx;
@@ -119,8 +111,7 @@ int FMAPayload::compilePayload(
   auto ram_reg = ymm15;
 
   FuncDetail func;
-  func.init(FuncSignatureT<unsigned long long, unsigned long long *,
-                           volatile unsigned long long *, unsigned long long>(
+  func.init(FuncSignatureT<unsigned long long, unsigned long long*, volatile unsigned long long*, unsigned long long>(
                 CallConvId::kCDecl),
             this->rt.environment());
 
@@ -135,10 +126,9 @@ int FMAPayload::compilePayload(
     frame.addDirtyRegs(Mm(i));
   }
   // make all other used registers dirty except RAX
-  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg,
-                     l3_count_reg, ram_count_reg, temp_reg, temp_reg2,
-                     offset_reg, addrHigh_reg, iter_reg, ram_addr);
-  for (const auto &reg : shift_reg) {
+  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg,
+                     temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr);
+  for (const auto& reg : shift_reg) {
     frame.addDirtyRegs(reg);
   }
 
@@ -164,7 +154,7 @@ int FMAPayload::compilePayload(
   cb.mov(offset_reg,
          Imm(64)); // increment after each cache/memory access
   // Initialize registers for shift operations
-  for (auto const &reg : shift_reg32) {
+  for (auto const& reg : shift_reg32) {
     cb.mov(reg, Imm(0xAAAAAAAA));
   }
   // Initialize AVX-Registers for FMA Operations
@@ -186,23 +176,14 @@ int FMAPayload::compilePayload(
   cb.mov(ram_addr, pointer_reg);
   cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer
   cb.mov(l2_count_reg, Imm(l2_loop_count));
-  workerLog::trace() << "reset counter for L2-buffer with "
-                     << l2_loop_count
-                     << " cache line accesses per loop ("
-		     << l2_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop ("
+                     << l2_size / 1024 << ") KiB";
   cb.mov(l3_count_reg, Imm(l3_loop_count));
-  workerLog::trace() << "reset counter for L3-buffer with "
-                     << l3_loop_count
-                     << " cache line accesses per loop ("
-		     << l3_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop ("
+                     << l3_size / 1024 << ") KiB";
   cb.mov(ram_count_reg, Imm(ram_loop_count));
-  workerLog::trace() << "reset counter for RAM-buffer with "
-                     << ram_loop_count
-                     << " cache line accesses per loop ("
-		     << ram_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop ("
+                     << ram_size / 1024 << ") KiB";
 
   cb.align(AlignMode::kCode, 64);
 
@@ -216,22 +197,22 @@ int FMAPayload::compilePayload(
   auto mov_src = mov_dst + 1;
   unsigned l1_offset = 0;
 
-#define L1_INCREMENT_TIMES(n)                                                  \
-  l1_offset += n * 64;                                                         \
-  if (l1_offset < l1_size * 0.5) {                                             \
-    cb.add(l1_addr, offset_reg);                                               \
-  } else {                                                                     \
-    l1_offset = 0;                                                             \
-    cb.mov(l1_addr, pointer_reg);                                              \
+#define L1_INCREMENT_TIMES(n)                                                                                          \
+  l1_offset += n * 64;                                                                                                 \
+  if (l1_offset < l1_size * 0.5) {                                                                                     \
+    cb.add(l1_addr, offset_reg);                                                                                       \
+  } else {                                                                                                             \
+    l1_offset = 0;                                                                                                     \
+    cb.mov(l1_addr, pointer_reg);                                                                                      \
   }
 
 #define L1_INCREMENT() L1_INCREMENT_TIMES(1)
 
-#define L2_INCREMENT_TIMES(n)                                                  \
-  if (n == 1) {                                                                \
-    cb.add(l2_addr, offset_reg);                                               \
-  } else {                                                                     \
-    cb.add(l2_addr, n * 64);                                                   \
+#define L2_INCREMENT_TIMES(n)                                                                                          \
+  if (n == 1) {                                                                                                        \
+    cb.add(l2_addr, offset_reg);                                                                                       \
+  } else {                                                                                                             \
+    cb.add(l2_addr, n * 64);                                                                                           \
   }
 
 #define L2_INCREMENT() L2_INCREMENT_TIMES(1)
@@ -241,12 +222,11 @@ int FMAPayload::compilePayload(
 #define RAM_INCREMENT() cb.add(ram_addr, offset_reg)
 
   for (unsigned count = 0; count < repetitions; count++) {
-    for (const auto &item : sequence) {
+    for (const auto& item : sequence) {
       if (item == "REG") {
         cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2);
         cb.vfmadd231pd(Ymm(mov_dst), ymm2, ymm1);
-        cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs],
-                temp_reg);
+        cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], temp_reg);
         mov_dst++;
       } else if (item == "L1_L") {
         cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2);
@@ -331,8 +311,7 @@ int FMAPayload::compilePayload(
         cb.prefetcht2(ptr(ram_addr));
         RAM_INCREMENT();
       } else {
-        workerLog::error() << "Instruction group " << item << " not found in "
-                           << this->name() << ".";
+        workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
         return EXIT_FAILURE;
       }
 
@@ -414,9 +393,7 @@ int FMAPayload::compilePayload(
 
     // dump all the ymm register
     for (int i = 0; i < (int)this->registerCount(); i++) {
-      cb.vmovapd(
-          ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)),
-          Ymm(i));
+      cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i));
     }
 
     // set read flag
@@ -426,8 +403,7 @@ int FMAPayload::compilePayload(
   }
 
   if (errorDetection) {
-    this->emitErrorDetectionCode<decltype(iter_reg), Ymm>(
-        cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
+    this->emitErrorDetectionCode<decltype(iter_reg), Ymm>(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
   }
 
   cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH));
@@ -446,8 +422,7 @@ int FMAPayload::compilePayload(
 
   Error err = this->rt.add(&this->loadFunction, &code);
   if (err) {
-    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in "
-                       << __FILE__ << " at " << __LINE__;
+    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
   }
 
@@ -460,8 +435,7 @@ int FMAPayload::compilePayload(
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size
-                       << " Bytes (" << instructionCachePercentage
+    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
     workerLog::trace() << "Sequence size: " << sequence.size();
     workerLog::trace() << "Repetition count: " << repetitions;
@@ -473,14 +447,12 @@ int FMAPayload::compilePayload(
 std::list<std::string> FMAPayload::getAvailableInstructions() const {
   std::list<std::string> instructions;
 
-  transform(this->instructionFlops.begin(), this->instructionFlops.end(),
-            back_inserter(instructions),
-            [](const auto &item) { return item.first; });
+  transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions),
+            [](const auto& item) { return item.first; });
 
   return instructions;
 }
 
-void FMAPayload::init(unsigned long long *memoryAddr,
-                      unsigned long long bufferSize) {
+void FMAPayload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) {
   X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index d22880d1..d3d0147f 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -29,28 +29,24 @@ using namespace firestarter::environment::x86::payload;
 using namespace asmjit;
 using namespace asmjit::x86;
 
-int SSE2Payload::compilePayload(
-    std::vector<std::pair<std::string, unsigned>> const &proportion,
-    unsigned instructionCacheSize,
-    std::list<unsigned> const &dataCacheBufferSize, unsigned ramBufferSize,
-    unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-    bool errorDetection) {
+int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
+                                unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
+                                unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
+                                bool errorDetection) {
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
   auto sequence = this->generateSequence(proportion);
-  auto repetitions =
-      this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
+  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
 
   // compute count of flops and memory access for performance report
   unsigned flops = 0;
   unsigned bytes = 0;
 
-  for (const auto &item : sequence) {
+  for (const auto& item : sequence) {
     auto it = this->instructionFlops.find(item);
 
     if (it == this->instructionFlops.end()) {
-      workerLog::error() << "Instruction group " << item << " undefined in "
-                         << name() << ".";
+      workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
@@ -78,12 +74,9 @@ int SSE2Payload::compilePayload(
   auto ram_size = ramBufferSize / thread;
 
   // calculate the reset counters for the buffers
-  auto l2_loop_count =
-      getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
-  auto l3_loop_count =
-      getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
-  auto ram_loop_count =
-      getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
+  auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
+  auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
+  auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
 
   CodeHolder code;
   code.init(this->rt.environment());
@@ -93,9 +86,8 @@ int SSE2Payload::compilePayload(
   }
 
   Builder cb(&code);
-  cb.addDiagnosticOptions(
-    asmjit::DiagnosticOptions::kValidateAssembler | 
-    asmjit::DiagnosticOptions::kValidateIntermediate );
+  cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
+                          asmjit::DiagnosticOptions::kValidateIntermediate);
 
   auto pointer_reg = rax;
   auto l1_addr = rbx;
@@ -115,8 +107,7 @@ int SSE2Payload::compilePayload(
   auto trans_regs = 2;
 
   FuncDetail func;
-  func.init(FuncSignatureT<unsigned long long, unsigned long long *,
-                           volatile unsigned long long *, unsigned long long>(
+  func.init(FuncSignatureT<unsigned long long, unsigned long long*, volatile unsigned long long*, unsigned long long>(
                 CallConvId::kCDecl),
             this->rt.environment());
 
@@ -132,9 +123,8 @@ int SSE2Payload::compilePayload(
     frame.addDirtyRegs(Mm(i));
   }
   // make all other used registers dirty except RAX
-  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg,
-                     l3_count_reg, ram_count_reg, temp_reg, temp_reg2,
-                     offset_reg, addrHigh_reg, iter_reg);
+  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg,
+                     temp_reg2, offset_reg, addrHigh_reg, iter_reg);
 
   FuncArgsAssignment args(&func);
   args.assignAll(pointer_reg, addrHigh_reg, iter_reg);
@@ -204,23 +194,14 @@ int SSE2Payload::compilePayload(
   cb.mov(ram_addr, pointer_reg);
   cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer
   cb.mov(l2_count_reg, Imm(l2_loop_count));
-  workerLog::trace() << "reset counter for L2-buffer with "
-                     << l2_loop_count
-                     << " cache line accesses per loop ("
-		     << l2_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop ("
+                     << l2_size / 1024 << ") KiB";
   cb.mov(l3_count_reg, Imm(l3_loop_count));
-  workerLog::trace() << "reset counter for L3-buffer with "
-                     << l3_loop_count
-                     << " cache line accesses per loop ("
-		     << l3_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop ("
+                     << l3_size / 1024 << ") KiB";
   cb.mov(ram_count_reg, Imm(ram_loop_count));
-  workerLog::trace() << "reset counter for RAM-buffer with "
-                     << ram_loop_count
-                     << " cache line accesses per loop ("
-		     << ram_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop ("
+                     << ram_size / 1024 << ") KiB";
 
   cb.align(AlignMode::kCode, 64);
 
@@ -233,13 +214,13 @@ int SSE2Payload::compilePayload(
   auto mov_src = mov_dst + 1;
   unsigned l1_offset = 0;
 
-#define L1_INCREMENT()                                                         \
-  l1_offset += 64;                                                             \
-  if (l1_offset < l1_size * 0.5) {                                             \
-    cb.add(l1_addr, offset_reg);                                               \
-  } else {                                                                     \
-    l1_offset = 0;                                                             \
-    cb.mov(l1_addr, pointer_reg);                                              \
+#define L1_INCREMENT()                                                                                                 \
+  l1_offset += 64;                                                                                                     \
+  if (l1_offset < l1_size * 0.5) {                                                                                     \
+    cb.add(l1_addr, offset_reg);                                                                                       \
+  } else {                                                                                                             \
+    l1_offset = 0;                                                                                                     \
+    cb.mov(l1_addr, pointer_reg);                                                                                      \
   }
 
 #define L2_INCREMENT() cb.add(l2_addr, offset_reg);
@@ -249,19 +230,15 @@ int SSE2Payload::compilePayload(
 #define RAM_INCREMENT() cb.add(ram_addr, offset_reg)
 
   for (unsigned count = 0; count < repetitions; count++) {
-    for (const auto &item : sequence) {
+    for (const auto& item : sequence) {
       if (item == "REG") {
-        cb.addpd(
-            Xmm(add_dest),
-            Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
+        cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
         cb.movdqa(Xmm(mov_dst), Xmm(mov_src));
       } else if (item == "L1_L") {
         cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32));
         L1_INCREMENT();
       } else if (item == "L1_S") {
-        cb.addpd(
-            Xmm(add_dest),
-            Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
+        cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.movapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest));
         L1_INCREMENT();
         this->_instructions++;
@@ -274,9 +251,7 @@ int SSE2Payload::compilePayload(
         cb.addpd(Xmm(add_dest), xmmword_ptr(l2_addr, 64));
         L2_INCREMENT();
       } else if (item == "L2_S") {
-        cb.addpd(
-            Xmm(add_dest),
-            Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
+        cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.movapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest));
         L2_INCREMENT();
         this->_instructions++;
@@ -289,9 +264,7 @@ int SSE2Payload::compilePayload(
         cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64));
         L3_INCREMENT();
       } else if (item == "L3_S") {
-        cb.addpd(
-            Xmm(add_dest),
-            Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
+        cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.movapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
         L3_INCREMENT();
         this->_instructions++;
@@ -309,9 +282,7 @@ int SSE2Payload::compilePayload(
         cb.addpd(Xmm(add_dest), xmmword_ptr(ram_addr, 64));
         RAM_INCREMENT();
       } else if (item == "RAM_S") {
-        cb.addpd(
-            Xmm(add_dest),
-            Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
+        cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.movapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
         RAM_INCREMENT();
         this->_instructions++;
@@ -326,16 +297,13 @@ int SSE2Payload::compilePayload(
         RAM_INCREMENT();
         this->_instructions++;
       } else {
-        workerLog::error() << "Instruction group " << item << " not found in "
-                           << this->name() << ".";
+        workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
         return EXIT_FAILURE;
       }
 
       if (mov_regs > 0) {
         this->_instructions++;
-        cb.movq(
-            Mm(mov_start + (movq_dst - mov_start + mov_regs - 1) % mov_regs),
-            Mm(movq_dst));
+        cb.movq(Mm(mov_start + (movq_dst - mov_start + mov_regs - 1) % mov_regs), Mm(movq_dst));
       }
 
       add_dest++;
@@ -411,9 +379,7 @@ int SSE2Payload::compilePayload(
 
     // dump all the xmm register
     for (int i = 0; i < (int)this->registerCount(); i++) {
-      cb.movapd(
-          xmmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)),
-          Xmm(i));
+      cb.movapd(xmmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Xmm(i));
     }
 
     // set read flag
@@ -423,8 +389,7 @@ int SSE2Payload::compilePayload(
   }
 
   if (errorDetection) {
-    this->emitErrorDetectionCode<decltype(iter_reg), Xmm>(
-        cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
+    this->emitErrorDetectionCode<decltype(iter_reg), Xmm>(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
   }
 
   cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH));
@@ -443,8 +408,7 @@ int SSE2Payload::compilePayload(
 
   Error err = this->rt.add(&this->loadFunction, &code);
   if (err) {
-    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in "
-                       << __FILE__ << " at " << __LINE__;
+    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
   }
 
@@ -457,8 +421,7 @@ int SSE2Payload::compilePayload(
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size
-                       << " Bytes (" << instructionCachePercentage
+    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
     workerLog::trace() << "Sequence size: " << sequence.size();
     workerLog::trace() << "Repetition count: " << repetitions;
@@ -470,15 +433,12 @@ int SSE2Payload::compilePayload(
 std::list<std::string> SSE2Payload::getAvailableInstructions() const {
   std::list<std::string> instructions;
 
-  transform(this->instructionFlops.begin(), this->instructionFlops.end(),
-            back_inserter(instructions),
-            [](const auto &item) { return item.first; });
+  transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions),
+            [](const auto& item) { return item.first; });
 
   return instructions;
 }
 
-void SSE2Payload::init(unsigned long long *memoryAddr,
-                       unsigned long long bufferSize) {
-  X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10,
-                   1.654738925401e-15);
+void SSE2Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) {
+  X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index 42a2fa5b..8d85dc2d 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -32,8 +32,7 @@
 
 using namespace firestarter::environment::x86::payload;
 
-void X86Payload::lowLoadFunction(volatile unsigned long long *addrHigh,
-                                 unsigned long long period) {
+void X86Payload::lowLoadFunction(volatile unsigned long long* addrHigh, unsigned long long period) {
   int nap;
 #ifdef _MSC_VER
   std::array<int, 4> cpuid;
@@ -70,46 +69,36 @@ void X86Payload::lowLoadFunction(volatile unsigned long long *addrHigh,
   }
 }
 
-void X86Payload::init(unsigned long long *memoryAddr,
-                      unsigned long long bufferSize, double firstValue,
+void X86Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize, double firstValue,
                       double lastValue) {
   unsigned long long i = 0;
 
   for (; i < INIT_BLOCKSIZE; i++)
-    *((double *)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * firstValue;
+    *((double*)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * firstValue;
   for (; i <= bufferSize - INIT_BLOCKSIZE; i += INIT_BLOCKSIZE)
-    std::memcpy(memoryAddr + i, memoryAddr + i - INIT_BLOCKSIZE,
-                sizeof(unsigned long long) * INIT_BLOCKSIZE);
+    std::memcpy(memoryAddr + i, memoryAddr + i - INIT_BLOCKSIZE, sizeof(unsigned long long) * INIT_BLOCKSIZE);
   for (; i < bufferSize; i++)
-    *((double *)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * lastValue;
+    *((double*)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * lastValue;
 }
 
-unsigned long long
-X86Payload::highLoadFunction(unsigned long long *addrMem,
-                             volatile unsigned long long *addrHigh,
-                             unsigned long long iterations) {
+unsigned long long X86Payload::highLoadFunction(unsigned long long* addrMem, volatile unsigned long long* addrHigh,
+                                                unsigned long long iterations) {
   return this->loadFunction(addrMem, addrHigh, iterations);
 }
 
 // add MM regs to dirty regs
 // zmm31 is used for backup if VectorReg is of type asmjit::x86::Zmm
 template <class IterReg, class VectorReg>
-void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder &cb,
-                                        IterReg iter_reg,
-                                        asmjit::x86::Gpq addrHigh_reg,
-                                        asmjit::x86::Gpq pointer_reg,
-                                        asmjit::x86::Gpq temp_reg,
+void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder& cb, IterReg iter_reg, asmjit::x86::Gpq addrHigh_reg,
+                                        asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg,
                                         asmjit::x86::Gpq temp_reg2) {
   // we don't want anything to break... so we use asserts for everything that
   // could break it
-  static_assert(std::is_base_of<asmjit::x86::Vec, VectorReg>::value,
-                "VectorReg must be of asmjit::asmjit::x86::Vec");
-  static_assert(std::is_same<asmjit::x86::Xmm, VectorReg>::value ||
-                    std::is_same<asmjit::x86::Ymm, VectorReg>::value ||
+  static_assert(std::is_base_of<asmjit::x86::Vec, VectorReg>::value, "VectorReg must be of asmjit::asmjit::x86::Vec");
+  static_assert(std::is_same<asmjit::x86::Xmm, VectorReg>::value || std::is_same<asmjit::x86::Ymm, VectorReg>::value ||
                     std::is_same<asmjit::x86::Zmm, VectorReg>::value,
                 "VectorReg ist not of any supported type");
-  static_assert(std::is_same<asmjit::x86::Mm, IterReg>::value ||
-                    std::is_same<asmjit::x86::Gpq, IterReg>::value,
+  static_assert(std::is_same<asmjit::x86::Mm, IterReg>::value || std::is_same<asmjit::x86::Gpq, IterReg>::value,
                 "IterReg is not of any supported type");
 
   if constexpr (std::is_same<asmjit::x86::Mm, IterReg>::value) {
@@ -281,8 +270,7 @@ void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder &cb,
     cb.movq(temp_reg2, asmjit::x86::Mm(4));
     cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1));
 
-    cb.vinsertf128(asmjit::x86::ymm0, asmjit::x86::ymm0, asmjit::x86::xmm0,
-                   asmjit::Imm(1));
+    cb.vinsertf128(asmjit::x86::ymm0, asmjit::x86::ymm0, asmjit::x86::xmm0, asmjit::Imm(1));
 
     cb.movq(temp_reg2, asmjit::x86::Mm(7));
     cb.movq(asmjit::x86::xmm0, temp_reg2);
@@ -463,24 +451,16 @@ void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder &cb,
   cb.bind(SkipErrorDetection);
 }
 
-template void
-X86Payload::emitErrorDetectionCode<asmjit::x86::Gpq, asmjit::x86::Xmm>(
-    asmjit::x86::Builder &cb, asmjit::x86::Gpq iter_reg,
-    asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg,
+template void X86Payload::emitErrorDetectionCode<asmjit::x86::Gpq, asmjit::x86::Xmm>(
+    asmjit::x86::Builder& cb, asmjit::x86::Gpq iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg,
     asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2);
-template void
-X86Payload::emitErrorDetectionCode<asmjit::x86::Gpq, asmjit::x86::Ymm>(
-    asmjit::x86::Builder &cb, asmjit::x86::Gpq iter_reg,
-    asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg,
+template void X86Payload::emitErrorDetectionCode<asmjit::x86::Gpq, asmjit::x86::Ymm>(
+    asmjit::x86::Builder& cb, asmjit::x86::Gpq iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg,
     asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2);
 
-template void
-X86Payload::emitErrorDetectionCode<asmjit::x86::Mm, asmjit::x86::Ymm>(
-    asmjit::x86::Builder &cb, asmjit::x86::Mm iter_reg,
-    asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg,
+template void X86Payload::emitErrorDetectionCode<asmjit::x86::Mm, asmjit::x86::Ymm>(
+    asmjit::x86::Builder& cb, asmjit::x86::Mm iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg,
     asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2);
-template void
-X86Payload::emitErrorDetectionCode<asmjit::x86::Mm, asmjit::x86::Zmm>(
-    asmjit::x86::Builder &cb, asmjit::x86::Mm iter_reg,
-    asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg,
+template void X86Payload::emitErrorDetectionCode<asmjit::x86::Mm, asmjit::x86::Zmm>(
+    asmjit::x86::Builder& cb, asmjit::x86::Mm iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg,
     asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2);
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index 9e99ca2d..b933dcd1 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -29,28 +29,24 @@ using namespace firestarter::environment::x86::payload;
 using namespace asmjit;
 using namespace asmjit::x86;
 
-int ZENFMAPayload::compilePayload(
-    std::vector<std::pair<std::string, unsigned>> const &proportion,
-    unsigned instructionCacheSize,
-    std::list<unsigned> const &dataCacheBufferSize, unsigned ramBufferSize,
-    unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-    bool errorDetection) {
+int ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
+                                  unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
+                                  unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
+                                  bool errorDetection) {
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
   auto sequence = this->generateSequence(proportion);
-  auto repetitions =
-      this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
+  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
 
   // compute count of flops and memory access for performance report
   unsigned flops = 0;
   unsigned bytes = 0;
 
-  for (const auto &item : sequence) {
+  for (const auto& item : sequence) {
     auto it = this->instructionFlops.find(item);
 
     if (it == this->instructionFlops.end()) {
-      workerLog::error() << "Instruction group " << item << " undefined in "
-                         << name() << ".";
+      workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
@@ -78,12 +74,9 @@ int ZENFMAPayload::compilePayload(
   auto ram_size = ramBufferSize / thread;
 
   // calculate the reset counters for the buffers
-  auto l2_loop_count =
-      getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
-  auto l3_loop_count =
-      getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
-  auto ram_loop_count =
-      getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
+  auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
+  auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
+  auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
 
   CodeHolder code;
   code.init(this->rt.environment());
@@ -93,9 +86,8 @@ int ZENFMAPayload::compilePayload(
   }
 
   Builder cb(&code);
-  cb.addDiagnosticOptions(
-    asmjit::DiagnosticOptions::kValidateAssembler | 
-    asmjit::DiagnosticOptions::kValidateIntermediate );
+  cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
+                          asmjit::DiagnosticOptions::kValidateIntermediate);
 
   auto pointer_reg = rax;
   auto l1_addr = rbx;
@@ -116,8 +108,7 @@ int ZENFMAPayload::compilePayload(
   auto ram_reg = ymm15;
 
   FuncDetail func;
-  func.init(FuncSignatureT<unsigned long long, unsigned long long *,
-                           volatile unsigned long long *, unsigned long long>(
+  func.init(FuncSignatureT<unsigned long long, unsigned long long*, volatile unsigned long long*, unsigned long long>(
                 CallConvId::kCDecl),
             this->rt.environment());
 
@@ -132,10 +123,9 @@ int ZENFMAPayload::compilePayload(
     frame.addDirtyRegs(Mm(i));
   }
   // make all other used registers dirty except RAX
-  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg,
-                     l3_count_reg, ram_count_reg, temp_reg, temp_reg2,
-                     offset_reg, addrHigh_reg, iter_reg, ram_addr);
-  for (const auto &reg : shift_reg) {
+  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg,
+                     temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr);
+  for (const auto& reg : shift_reg) {
     frame.addDirtyRegs(reg);
   }
 
@@ -161,7 +151,7 @@ int ZENFMAPayload::compilePayload(
   cb.mov(offset_reg,
          Imm(64)); // increment after each cache/memory access
   // Initialize registers for shift operations
-  for (auto const &reg : shift_reg) {
+  for (auto const& reg : shift_reg) {
     cb.mov(reg, Imm(0xAAAAAAAAAAAAAAAA));
   }
   // Initialize AVX-Registers for FMA Operations
@@ -190,23 +180,14 @@ int ZENFMAPayload::compilePayload(
   cb.mov(ram_addr, pointer_reg);
   cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer
   cb.mov(l2_count_reg, Imm(l2_loop_count));
-  workerLog::trace() << "reset counter for L2-buffer with "
-                     << l2_loop_count
-                     << " cache line accesses per loop ("
-		     << l2_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop ("
+                     << l2_size / 1024 << ") KiB";
   cb.mov(l3_count_reg, Imm(l3_loop_count));
-  workerLog::trace() << "reset counter for L3-buffer with "
-                     << l3_loop_count
-                     << " cache line accesses per loop ("
-		     << l3_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop ("
+                     << l3_size / 1024 << ") KiB";
   cb.mov(ram_count_reg, Imm(ram_loop_count));
-  workerLog::trace() << "reset counter for RAM-buffer with "
-                     << ram_loop_count
-                     << " cache line accesses per loop ("
-		     << ram_size/1024
-                     << ") KiB";
+  workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop ("
+                     << ram_size / 1024 << ") KiB";
 
   cb.align(AlignMode::kCode, 64);
 
@@ -219,13 +200,13 @@ int ZENFMAPayload::compilePayload(
   auto add_dest = add_regs_start;
   unsigned l1_offset = 0;
 
-#define L1_INCREMENT()                                                         \
-  l1_offset += 64;                                                             \
-  if (l1_offset < l1_size * 0.5) {                                             \
-    cb.add(l1_addr, offset_reg);                                               \
-  } else {                                                                     \
-    l1_offset = 0;                                                             \
-    cb.mov(l1_addr, pointer_reg);                                              \
+#define L1_INCREMENT()                                                                                                 \
+  l1_offset += 64;                                                                                                     \
+  if (l1_offset < l1_size * 0.5) {                                                                                     \
+    cb.add(l1_addr, offset_reg);                                                                                       \
+  } else {                                                                                                             \
+    l1_offset = 0;                                                                                                     \
+    cb.mov(l1_addr, pointer_reg);                                                                                      \
   }
 
 #define L2_INCREMENT() cb.add(l2_addr, offset_reg);
@@ -235,7 +216,7 @@ int ZENFMAPayload::compilePayload(
 #define RAM_INCREMENT() cb.add(ram_addr, offset_reg)
 
   for (unsigned count = 0; count < repetitions; count++) {
-    for (const auto &item : sequence) {
+    for (const auto& item : sequence) {
 
       // swap second and third param of fma instruction to force bitchanges on
       // the pipes to its execution units
@@ -251,8 +232,7 @@ int ZENFMAPayload::compilePayload(
 
       if (item == "REG") {
         cb.vfmadd231pd(Ymm(add_dest), secondParam, thirdParam);
-        cb.xor_(temp_reg,
-                shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]);
+        cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]);
         if (left) {
           cb.shr(shift_reg[shift_pos], Imm(1));
         } else {
@@ -264,28 +244,23 @@ int ZENFMAPayload::compilePayload(
         L1_INCREMENT();
       } else if (item == "L2_L") {
         cb.vfmadd231pd(Ymm(add_dest), secondParam, ymmword_ptr(l2_addr, 64));
-        cb.xor_(temp_reg,
-                shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]);
+        cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]);
         L2_INCREMENT();
       } else if (item == "L3_L") {
         cb.vfmadd231pd(Ymm(add_dest), secondParam, ymmword_ptr(l3_addr, 64));
-        cb.xor_(temp_reg,
-                shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]);
+        cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]);
         L3_INCREMENT();
       } else if (item == "RAM_L") {
         cb.vfmadd231pd(Ymm(ram_reg), secondParam, ymmword_ptr(ram_addr, 32));
-        cb.xor_(temp_reg,
-                shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]);
+        cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]);
         RAM_INCREMENT();
       } else {
-        workerLog::error() << "Instruction group " << item << " not found in "
-                           << this->name() << ".";
+        workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
         return EXIT_FAILURE;
       }
 
       // make sure the shifts do could end up shifting out the data one end.
-      if (itemCount < (int)(sequence.size() * repetitions -
-                            (sequence.size() * repetitions) % 4)) {
+      if (itemCount < (int)(sequence.size() * repetitions - (sequence.size() * repetitions) % 4)) {
         switch (itemCount % 4) {
         case 0:
           cb.vpsrlq(Xmm(13), Xmm(13), Imm(1));
@@ -369,9 +344,7 @@ int ZENFMAPayload::compilePayload(
 
     // dump all the ymm register
     for (int i = 0; i < (int)this->registerCount(); i++) {
-      cb.vmovapd(
-          ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)),
-          Ymm(i));
+      cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i));
     }
 
     // set read flag
@@ -381,8 +354,7 @@ int ZENFMAPayload::compilePayload(
   }
 
   if (errorDetection) {
-    this->emitErrorDetectionCode<decltype(iter_reg), Ymm>(
-        cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
+    this->emitErrorDetectionCode<decltype(iter_reg), Ymm>(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
   }
 
   cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH));
@@ -401,8 +373,7 @@ int ZENFMAPayload::compilePayload(
 
   Error err = this->rt.add(&this->loadFunction, &code);
   if (err) {
-    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in "
-                       << __FILE__ << " at " << __LINE__;
+    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
   }
 
@@ -415,8 +386,7 @@ int ZENFMAPayload::compilePayload(
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size
-                       << " Bytes (" << instructionCachePercentage
+    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
     workerLog::trace() << "Sequence size: " << sequence.size();
     workerLog::trace() << "Repetition count: " << repetitions;
@@ -428,14 +398,12 @@ int ZENFMAPayload::compilePayload(
 std::list<std::string> ZENFMAPayload::getAvailableInstructions() const {
   std::list<std::string> instructions;
 
-  transform(this->instructionFlops.begin(), this->instructionFlops.end(),
-            back_inserter(instructions),
-            [](const auto &item) { return item.first; });
+  transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions),
+            [](const auto& item) { return item.first; });
 
   return instructions;
 }
 
-void ZENFMAPayload::init(unsigned long long *memoryAddr,
-                         unsigned long long bufferSize) {
+void ZENFMAPayload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) {
   X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
diff --git a/src/firestarter/Environment/X86/X86CPUTopology.cpp b/src/firestarter/Environment/X86/X86CPUTopology.cpp
index 8b8abe2b..6e7eb288 100644
--- a/src/firestarter/Environment/X86/X86CPUTopology.cpp
+++ b/src/firestarter/Environment/X86/X86CPUTopology.cpp
@@ -34,12 +34,12 @@
 using namespace firestarter::environment::x86;
 
 X86CPUTopology::X86CPUTopology()
-    : CPUTopology("x86_64"), cpuInfo(asmjit::CpuInfo::host()),
-      _vendor(this->cpuInfo.vendor()) {
+    : CPUTopology("x86_64")
+    , cpuInfo(asmjit::CpuInfo::host())
+    , _vendor(this->cpuInfo.vendor()) {
 
   std::stringstream ss;
-  ss << "Family " << this->familyId() << ", Model " << this->modelId()
-     << ", Stepping " << this->stepping();
+  ss << "Family " << this->familyId() << ", Model " << this->modelId() << ", Stepping " << this->stepping();
   this->_model = ss.str();
 
   for (int i = 0; i <= (int)asmjit::CpuFeatures::X86::Id::kMaxValue; i++) {
@@ -152,8 +152,7 @@ unsigned long long X86CPUTopology::clockrate() const {
   }
 
   /* non invariant TSCs can be used if CPUs run at fixed frequency */
-  if (!this->hasInvariantRdtsc() && governor.compare("performance") &&
-      governor.compare("powersave")) {
+  if (!this->hasInvariantRdtsc() && governor.compare("performance") && governor.compare("powersave")) {
     return CPUTopology::clockrate();
   }
 
@@ -181,8 +180,7 @@ unsigned long long X86CPUTopology::clockrate() const {
       end_time = Clock::now();
       end2_tsc = this->timestamp();
 
-      time_diff =
-          std::chrono::duration_cast<ticks>(end_time - start_time).count();
+      time_diff = std::chrono::duration_cast<ticks>(end_time - start_time).count();
     } while (0 == time_diff);
 
     clock_lower_bound = (((end1_tsc - start2_tsc) * 1000000) / (time_diff));
@@ -190,8 +188,7 @@ unsigned long long X86CPUTopology::clockrate() const {
 
     // if both values differ significantly, the measurement could have been
     // interrupted between 2 rdtsc's
-    if (((double)clock_lower_bound > (((double)clock_upper_bound) * 0.999)) &&
-        ((time_diff) > 2000)) {
+    if (((double)clock_lower_bound > (((double)clock_upper_bound) * 0.999)) && ((time_diff) > 2000)) {
       num_measurements++;
       clock = (clock_lower_bound + clock_upper_bound) / 2;
       if (clockrate == 0)
@@ -230,8 +227,8 @@ unsigned long long X86CPUTopology::timestamp() const {
 #endif
 }
 
-void X86CPUTopology::cpuid(unsigned long long *a, unsigned long long *b,
-                           unsigned long long *c, unsigned long long *d) const {
+void X86CPUTopology::cpuid(unsigned long long* a, unsigned long long* b, unsigned long long* c,
+                           unsigned long long* d) const {
 #ifndef _MSC_VER
   unsigned long long reg_a, reg_b, reg_c, reg_d;
 
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index d981358d..b923fbf4 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -31,48 +31,42 @@ using namespace firestarter::environment::x86;
 void X86Environment::evaluateFunctions() {
   for (auto ctor : this->platformConfigsCtor) {
     // add asmjit for model and family detection
-    this->platformConfigs.push_back(
-        ctor(this->topology().featuresAsmjit(), this->topology().familyId(),
-             this->topology().modelId(), this->topology().numThreadsPerCore()));
+    this->platformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(),
+                                         this->topology().modelId(), this->topology().numThreadsPerCore()));
   }
 
   for (auto ctor : this->fallbackPlatformConfigsCtor) {
-    this->fallbackPlatformConfigs.push_back(
-        ctor(this->topology().featuresAsmjit(), this->topology().familyId(),
-             this->topology().modelId(), this->topology().numThreadsPerCore()));
+    this->fallbackPlatformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(),
+                                                 this->topology().modelId(), this->topology().numThreadsPerCore()));
   }
 }
 
-int X86Environment::selectFunction(unsigned functionId,
-                                   bool allowUnavailablePayload) {
+int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePayload) {
   unsigned id = 1;
   std::string defaultPayloadName("");
 
   // if functionId is 0 get the default or fallback
   for (auto config : this->platformConfigs) {
-    for (auto const &[thread, functionName] : config->getThreadMap()) {
+    for (auto const& [thread, functionName] : config->getThreadMap()) {
       // the selected function
       if (id == functionId) {
         if (!config->isAvailable()) {
-          log::error() << "Function " << functionId << " (\"" << functionName
-                       << "\") requires " << config->payload().name()
-                       << ", which is not supported by the processor.";
+          log::error() << "Function " << functionId << " (\"" << functionName << "\") requires "
+                       << config->payload().name() << ", which is not supported by the processor.";
           if (!allowUnavailablePayload) {
             return EXIT_FAILURE;
           }
         }
         // found function
-        this->_selectedConfig =
-            new ::firestarter::environment::platform::RuntimeConfig(
-                *config, thread, this->topology().instructionCacheSize());
+        this->_selectedConfig = new ::firestarter::environment::platform::RuntimeConfig(
+            *config, thread, this->topology().instructionCacheSize());
         return EXIT_SUCCESS;
       }
       // default function
       if (0 == functionId && config->isDefault()) {
         if (thread == this->topology().numThreadsPerCore()) {
-          this->_selectedConfig =
-              new ::firestarter::environment::platform::RuntimeConfig(
-                  *config, thread, this->topology().instructionCacheSize());
+          this->_selectedConfig = new ::firestarter::environment::platform::RuntimeConfig(
+              *config, thread, this->topology().instructionCacheSize());
           return EXIT_SUCCESS;
         } else {
           defaultPayloadName = config->payload().name();
@@ -88,8 +82,7 @@ int X86Environment::selectFunction(unsigned functionId,
     if (!defaultPayloadName.empty()) {
       // default payload available, but number of threads per core is not
       // supported
-      log::warn() << "No " << defaultPayloadName << " code path for "
-                  << this->topology().numThreadsPerCore()
+      log::warn() << "No " << defaultPayloadName << " code path for " << this->topology().numThreadsPerCore()
                   << " threads per core!";
     }
     log::warn() << this->topology().vendor() << " " << this->topology().model()
@@ -102,7 +95,7 @@ int X86Environment::selectFunction(unsigned functionId,
       if (config->isAvailable()) {
         auto selectedThread = 0;
         auto selectedFunctionName = std::string("");
-        for (auto const &[thread, functionName] : config->getThreadMap()) {
+        for (auto const& [thread, functionName] : config->getThreadMap()) {
           if (thread == this->topology().numThreadsPerCore()) {
             selectedThread = thread;
             selectedFunctionName = functionName;
@@ -112,12 +105,9 @@ int X86Environment::selectFunction(unsigned functionId,
           selectedThread = config->getThreadMap().begin()->first;
           selectedFunctionName = config->getThreadMap().begin()->second;
         }
-        this->_selectedConfig =
-            new ::firestarter::environment::platform::RuntimeConfig(
-                *config, selectedThread,
-                this->topology().instructionCacheSize());
-        log::warn() << "Using function " << selectedFunctionName
-                    << " as fallback.\n"
+        this->_selectedConfig = new ::firestarter::environment::platform::RuntimeConfig(
+            *config, selectedThread, this->topology().instructionCacheSize());
+        log::warn() << "Using function " << selectedFunctionName << " as fallback.\n"
                     << "You can use the parameter --function to try other "
                        "functions.";
         return EXIT_SUCCESS;
@@ -130,18 +120,14 @@ int X86Environment::selectFunction(unsigned functionId,
     return EXIT_FAILURE;
   }
 
-  log::error() << "unknown function id: " << functionId
-               << ", see --avail for available ids";
+  log::error() << "unknown function id: " << functionId << ", see --avail for available ids";
   return EXIT_FAILURE;
 }
 
 int X86Environment::selectInstructionGroups(std::string groups) {
   const std::string delimiter = ",";
   const std::regex re("^(\\w+):(\\d+)$");
-  const auto availableInstructionGroups = this->selectedConfig()
-                                              .platformConfig()
-                                              .payload()
-                                              .getAvailableInstructions();
+  const auto availableInstructionGroups = this->selectedConfig().platformConfig().payload().getAvailableInstructions();
 
   std::stringstream ss(groups);
   std::vector<std::pair<std::string, unsigned>> payloadSettings = {};
@@ -152,29 +138,25 @@ int X86Environment::selectInstructionGroups(std::string groups) {
     std::getline(ss, token, ',');
 
     if (std::regex_match(token, m, re)) {
-      if (std::find(availableInstructionGroups.begin(),
-                    availableInstructionGroups.end(),
-                    m[1].str()) == availableInstructionGroups.end()) {
-        log::error()
-            << "Invalid instruction-group: " << m[1].str()
-            << "\n       --run-instruction-groups format: multiple INST:VAL "
-               "pairs comma-seperated";
+      if (std::find(availableInstructionGroups.begin(), availableInstructionGroups.end(), m[1].str()) ==
+          availableInstructionGroups.end()) {
+        log::error() << "Invalid instruction-group: " << m[1].str()
+                     << "\n       --run-instruction-groups format: multiple INST:VAL "
+                        "pairs comma-seperated";
         return EXIT_FAILURE;
       }
       int num = std::stoul(m[2].str());
       if (num == 0) {
-        log::error()
-            << "instruction-group VAL may not contain number 0"
-            << "\n       --run-instruction-groups format: multiple INST:VAL "
-               "pairs comma-seperated";
+        log::error() << "instruction-group VAL may not contain number 0"
+                     << "\n       --run-instruction-groups format: multiple INST:VAL "
+                        "pairs comma-seperated";
         return EXIT_FAILURE;
       }
       payloadSettings.push_back(std::make_pair(m[1].str(), num));
     } else {
-      log::error()
-          << "Invalid symbols in instruction-group: " << token
-          << "\n       --run-instruction-groups format: multiple INST:VAL "
-             "pairs comma-seperated";
+      log::error() << "Invalid symbols in instruction-group: " << token
+                   << "\n       --run-instruction-groups format: multiple INST:VAL "
+                      "pairs comma-seperated";
       return EXIT_FAILURE;
     }
   }
@@ -189,10 +171,7 @@ int X86Environment::selectInstructionGroups(std::string groups) {
 void X86Environment::printAvailableInstructionGroups() {
   std::stringstream ss;
 
-  for (auto const &item : this->selectedConfig()
-                              .platformConfig()
-                              .payload()
-                              .getAvailableInstructions()) {
+  for (auto const& item : this->selectedConfig().platformConfig().payload().getAvailableInstructions()) {
     ss << item << ",";
   }
 
@@ -202,18 +181,13 @@ void X86Environment::printAvailableInstructionGroups() {
   }
 
   log::info() << " available instruction-groups for payload "
-              << this->selectedConfig().platformConfig().payload().name()
-              << ":\n"
+              << this->selectedConfig().platformConfig().payload().name() << ":\n"
               << "  " << s;
 }
 
-void X86Environment::setLineCount(unsigned lineCount) {
-  this->selectedConfig().setLineCount(lineCount);
-}
+void X86Environment::setLineCount(unsigned lineCount) { this->selectedConfig().setLineCount(lineCount); }
 
-void X86Environment::printSelectedCodePathSummary() {
-  this->selectedConfig().printCodePathSummary();
-}
+void X86Environment::printSelectedCodePathSummary() { this->selectedConfig().printCodePathSummary(); }
 
 void X86Environment::printFunctionSummary() {
   log::info() << " available load-functions:\n"
@@ -226,16 +200,14 @@ void X86Environment::printFunctionSummary() {
 
   unsigned id = 1;
 
-  for (auto const &config : this->platformConfigs) {
-    for (auto const &[thread, functionName] : config->getThreadMap()) {
-      const char *available = config->isAvailable() ? "yes" : "no";
-      const char *fmt = "  %4u | %-30s | %-24s | %s";
-      int sz =
-          std::snprintf(nullptr, 0, fmt, id, functionName.c_str(), available,
-                        config->getDefaultPayloadSettingsString().c_str());
+  for (auto const& config : this->platformConfigs) {
+    for (auto const& [thread, functionName] : config->getThreadMap()) {
+      const char* available = config->isAvailable() ? "yes" : "no";
+      const char* fmt = "  %4u | %-30s | %-24s | %s";
+      int sz = std::snprintf(nullptr, 0, fmt, id, functionName.c_str(), available,
+                             config->getDefaultPayloadSettingsString().c_str());
       std::vector<char> buf(sz + 1);
-      std::snprintf(&buf[0], buf.size(), fmt, id, functionName.c_str(),
-                    available,
+      std::snprintf(&buf[0], buf.size(), fmt, id, functionName.c_str(), available,
                     config->getDefaultPayloadSettingsString().c_str());
       log::info() << std::string(&buf[0]);
       id++;
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 5fb58ad4..7dd511f5 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -40,41 +40,47 @@ extern "C" {
 
 using namespace firestarter;
 
-Firestarter::Firestarter(
-    const int argc, const char **argv, std::chrono::seconds const &timeout,
-    unsigned loadPercent, std::chrono::microseconds const &period,
-    unsigned requestedNumThreads, std::string const &cpuBind,
-    bool printFunctionSummary, unsigned functionId, bool listInstructionGroups,
-    std::string const &instructionGroups, unsigned lineCount,
-    bool allowUnavailablePayload, bool dumpRegisters,
-    std::chrono::seconds const &dumpRegistersTimeDelta,
-    std::string const &dumpRegistersOutpath, bool errorDetection, int gpus,
-    unsigned gpuMatrixSize, bool gpuUseFloat, bool gpuUseDouble,
-    bool listMetrics, bool measurement,
-    std::chrono::milliseconds const &startDelta,
-    std::chrono::milliseconds const &stopDelta,
-    std::chrono::milliseconds const &measurementInterval,
-    std::vector<std::string> const &metricPaths,
-    std::vector<std::string> const &stdinMetrics, bool optimize,
-    std::chrono::seconds const &preheat,
-    std::string const &optimizationAlgorithm,
-    std::vector<std::string> const &optimizationMetrics,
-    std::chrono::seconds const &evaluationDuration, unsigned individuals,
-    std::string const &optimizeOutfile, unsigned generations, double nsga2_cr,
-    double nsga2_m)
-    : _argc(argc), _argv(argv), _timeout(timeout), _loadPercent(loadPercent),
-      _period(period), _dumpRegisters(dumpRegisters),
-      _dumpRegistersTimeDelta(dumpRegistersTimeDelta),
-      _dumpRegistersOutpath(dumpRegistersOutpath),
-      _errorDetection(errorDetection), _gpus(gpus),
-      _gpuMatrixSize(gpuMatrixSize), _gpuUseFloat(gpuUseFloat),
-      _gpuUseDouble(gpuUseDouble), _startDelta(startDelta),
-      _stopDelta(stopDelta), _measurement(measurement), _optimize(optimize),
-      _preheat(preheat), _optimizationAlgorithm(optimizationAlgorithm),
-      _optimizationMetrics(optimizationMetrics),
-      _evaluationDuration(evaluationDuration), _individuals(individuals),
-      _optimizeOutfile(optimizeOutfile), _generations(generations),
-      _nsga2_cr(nsga2_cr), _nsga2_m(nsga2_m) {
+Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds const& timeout, unsigned loadPercent,
+                         std::chrono::microseconds const& period, unsigned requestedNumThreads,
+                         std::string const& cpuBind, bool printFunctionSummary, unsigned functionId,
+                         bool listInstructionGroups, std::string const& instructionGroups, unsigned lineCount,
+                         bool allowUnavailablePayload, bool dumpRegisters,
+                         std::chrono::seconds const& dumpRegistersTimeDelta, std::string const& dumpRegistersOutpath,
+                         bool errorDetection, int gpus, unsigned gpuMatrixSize, bool gpuUseFloat, bool gpuUseDouble,
+                         bool listMetrics, bool measurement, std::chrono::milliseconds const& startDelta,
+                         std::chrono::milliseconds const& stopDelta,
+                         std::chrono::milliseconds const& measurementInterval,
+                         std::vector<std::string> const& metricPaths, std::vector<std::string> const& stdinMetrics,
+                         bool optimize, std::chrono::seconds const& preheat, std::string const& optimizationAlgorithm,
+                         std::vector<std::string> const& optimizationMetrics,
+                         std::chrono::seconds const& evaluationDuration, unsigned individuals,
+                         std::string const& optimizeOutfile, unsigned generations, double nsga2_cr, double nsga2_m)
+    : _argc(argc)
+    , _argv(argv)
+    , _timeout(timeout)
+    , _loadPercent(loadPercent)
+    , _period(period)
+    , _dumpRegisters(dumpRegisters)
+    , _dumpRegistersTimeDelta(dumpRegistersTimeDelta)
+    , _dumpRegistersOutpath(dumpRegistersOutpath)
+    , _errorDetection(errorDetection)
+    , _gpus(gpus)
+    , _gpuMatrixSize(gpuMatrixSize)
+    , _gpuUseFloat(gpuUseFloat)
+    , _gpuUseDouble(gpuUseDouble)
+    , _startDelta(startDelta)
+    , _stopDelta(stopDelta)
+    , _measurement(measurement)
+    , _optimize(optimize)
+    , _preheat(preheat)
+    , _optimizationAlgorithm(optimizationAlgorithm)
+    , _optimizationMetrics(optimizationMetrics)
+    , _evaluationDuration(evaluationDuration)
+    , _individuals(individuals)
+    , _optimizeOutfile(optimizeOutfile)
+    , _generations(generations)
+    , _nsga2_cr(nsga2_cr)
+    , _nsga2_m(nsga2_m) {
   int returnCode;
 
   _load = (_period * _loadPercent) / 100;
@@ -90,18 +96,15 @@ Firestarter::Firestarter(
   (void)stdinMetrics;
 #endif
 
-#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||            \
-    defined(_M_X64)
+#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
   this->_environment = new environment::x86::X86Environment();
 #endif
 
-  if (EXIT_SUCCESS != (returnCode = this->environment().evaluateCpuAffinity(
-                           requestedNumThreads, cpuBind))) {
+  if (EXIT_SUCCESS != (returnCode = this->environment().evaluateCpuAffinity(requestedNumThreads, cpuBind))) {
     std::exit(returnCode);
   }
 
-#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||            \
-    defined(_M_X64)
+#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
   // Error detection uses crc32 instruction added by the SSE4.2 extension to x86
   if (_errorDetection) {
     if (!_environment->topology().featuresAsmjit().has(asmjit::CpuFeatures::X86::kSSE4_2)) {
@@ -112,10 +115,9 @@ Firestarter::Firestarter(
 #endif
 
   if (_errorDetection && this->environment().requestedNumThreads() < 2) {
-    throw std::invalid_argument(
-        "Option --error-detection must run with 2 or more threads. Number of "
-        "threads is " +
-        std::to_string(this->environment().requestedNumThreads()) + "\n");
+    throw std::invalid_argument("Option --error-detection must run with 2 or more threads. Number of "
+                                "threads is " +
+                                std::to_string(this->environment().requestedNumThreads()) + "\n");
   }
 
   this->environment().evaluateFunctions();
@@ -125,8 +127,7 @@ Firestarter::Firestarter(
     std::exit(EXIT_SUCCESS);
   }
 
-  if (EXIT_SUCCESS != (returnCode = this->environment().selectFunction(
-                           functionId, allowUnavailablePayload))) {
+  if (EXIT_SUCCESS != (returnCode = this->environment().selectFunction(functionId, allowUnavailablePayload))) {
     std::exit(returnCode);
   }
 
@@ -136,9 +137,7 @@ Firestarter::Firestarter(
   }
 
   if (!instructionGroups.empty()) {
-    if (EXIT_SUCCESS !=
-        (returnCode =
-             this->environment().selectInstructionGroups(instructionGroups))) {
+    if (EXIT_SUCCESS != (returnCode = this->environment().selectInstructionGroups(instructionGroups))) {
       std::exit(returnCode);
     }
   }
@@ -150,8 +149,7 @@ Firestarter::Firestarter(
 #if defined(linux) || defined(__linux__)
   if (_measurement || listMetrics || _optimize) {
     _measurementWorker = std::make_shared<measurement::MeasurementWorker>(
-        measurementInterval, this->environment().requestedNumThreads(),
-        metricPaths, stdinMetrics);
+        measurementInterval, this->environment().requestedNumThreads(), metricPaths, stdinMetrics);
 
     if (listMetrics) {
       log::info() << _measurementWorker->availableMetrics();
@@ -168,23 +166,19 @@ Firestarter::Firestarter(
     }
 
     // check if selected metrics are initialized
-    for (auto const &optimizationMetric : optimizationMetrics) {
-      auto nameEqual = [optimizationMetric](auto const &name) {
+    for (auto const& optimizationMetric : optimizationMetrics) {
+      auto nameEqual = [optimizationMetric](auto const& name) {
         auto invertedName = "-" + name;
-        return name.compare(optimizationMetric) == 0 ||
-               invertedName.compare(optimizationMetric) == 0;
+        return name.compare(optimizationMetric) == 0 || invertedName.compare(optimizationMetric) == 0;
       };
       // metric name is not found
       if (std::find_if(all.begin(), all.end(), nameEqual) == all.end()) {
-        log::error() << "Metric \"" << optimizationMetric
-                     << "\" does not exist.";
+        log::error() << "Metric \"" << optimizationMetric << "\" does not exist.";
         std::exit(EXIT_FAILURE);
       }
       // metric has not initialized properly
-      if (std::find_if(initialized.begin(), initialized.end(), nameEqual) ==
-          initialized.end()) {
-        log::error() << "Metric \"" << optimizationMetric
-                     << "\" failed to initialize.";
+      if (std::find_if(initialized.begin(), initialized.end(), nameEqual) == initialized.end()) {
+        log::error() << "Metric \"" << optimizationMetric << "\" failed to initialize.";
         std::exit(EXIT_FAILURE);
       }
     }
@@ -192,23 +186,23 @@ Firestarter::Firestarter(
 
   if (_optimize) {
     auto applySettings = std::bind(
-        [this](std::vector<std::pair<std::string, unsigned>> const &setting) {
+        [this](std::vector<std::pair<std::string, unsigned>> const& setting) {
           using Clock = std::chrono::high_resolution_clock;
           auto start = Clock::now();
 
-          for (auto &thread : this->loadThreads) {
+          for (auto& thread : this->loadThreads) {
             auto td = thread.second;
 
             td->config().setPayloadSettings(setting);
           }
 
-          for (auto const &thread : this->loadThreads) {
+          for (auto const& thread : this->loadThreads) {
             auto td = thread.second;
 
             td->mutex.lock();
           }
 
-          for (auto const &thread : this->loadThreads) {
+          for (auto const& thread : this->loadThreads) {
             auto td = thread.second;
 
             td->comm = THREAD_SWITCH;
@@ -217,7 +211,7 @@ Firestarter::Firestarter(
 
           this->loadVar = LOAD_SWITCH;
 
-          for (auto const &thread : this->loadThreads) {
+          for (auto const& thread : this->loadThreads) {
             auto td = thread.second;
             bool ack;
 
@@ -239,7 +233,7 @@ Firestarter::Firestarter(
           unsigned long long startTimestamp = 0xffffffffffffffff;
           unsigned long long stopTimestamp = 0;
 
-          for (auto const &thread : this->loadThreads) {
+          for (auto const& thread : this->loadThreads) {
             auto td = thread.second;
 
             if (startTimestamp > td->lastStartTsc) {
@@ -250,46 +244,33 @@ Firestarter::Firestarter(
             }
           }
 
-          for (auto const &thread : this->loadThreads) {
+          for (auto const& thread : this->loadThreads) {
             auto td = thread.second;
-            ipc_estimate_metric_insert(
-                (double)td->lastIterations *
-                (double)this->loadThreads.front()
-                    .second->config()
-                    .payload()
-                    .instructions() /
-                (double)(stopTimestamp - startTimestamp));
+            ipc_estimate_metric_insert((double)td->lastIterations *
+                                       (double)this->loadThreads.front().second->config().payload().instructions() /
+                                       (double)(stopTimestamp - startTimestamp));
           }
 
           auto end = Clock::now();
 
           log::trace() << "Switching payload took "
-                       << std::chrono::duration_cast<std::chrono::milliseconds>(
-                              end - start)
-                              .count()
-                       << "ms";
+                       << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms";
         },
         std::placeholders::_1);
 
-    auto prob =
-        std::make_shared<firestarter::optimizer::problem::CLIArgumentProblem>(
-            std::move(applySettings), _measurementWorker, _optimizationMetrics,
-            _evaluationDuration, _startDelta, _stopDelta,
-            this->environment().selectedConfig().payloadItems());
+    auto prob = std::make_shared<firestarter::optimizer::problem::CLIArgumentProblem>(
+        std::move(applySettings), _measurementWorker, _optimizationMetrics, _evaluationDuration, _startDelta,
+        _stopDelta, this->environment().selectedConfig().payloadItems());
 
     _population = firestarter::optimizer::Population(std::move(prob));
 
     if (_optimizationAlgorithm == "NSGA2") {
-      _algorithm = std::make_unique<firestarter::optimizer::algorithm::NSGA2>(
-          _generations, _nsga2_cr, _nsga2_m);
+      _algorithm = std::make_unique<firestarter::optimizer::algorithm::NSGA2>(_generations, _nsga2_cr, _nsga2_m);
     } else {
-      throw std::invalid_argument("Algorithm " + _optimizationAlgorithm +
-                                  " unknown.");
+      throw std::invalid_argument("Algorithm " + _optimizationAlgorithm + " unknown.");
     }
 
-    _algorithm->checkPopulation(
-        static_cast<firestarter::optimizer::Population const &>(_population),
-        _individuals);
+    _algorithm->checkPopulation(static_cast<firestarter::optimizer::Population const&>(_population), _individuals);
   }
 #endif
 
@@ -299,8 +280,7 @@ Firestarter::Firestarter(
 
   // setup thread with either high or low load configured at the start
   // low loads has to know the length of the period
-  if (EXIT_SUCCESS != (returnCode = this->initLoadWorkers((_loadPercent == 0),
-                                                          _period.count()))) {
+  if (EXIT_SUCCESS != (returnCode = this->initLoadWorkers((_loadPercent == 0), _period.count()))) {
     std::exit(returnCode);
   }
 
@@ -328,16 +308,13 @@ void Firestarter::mainThread() {
   this->environment().printThreadSummary();
 
 #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
-  _cuda = std::make_unique<cuda::Cuda>(&this->loadVar, _gpuUseFloat,
-                                       _gpuUseDouble, _gpuMatrixSize, _gpus);
+  _cuda = std::make_unique<cuda::Cuda>(&this->loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus);
 #endif
 
 #ifdef FIRESTARTER_BUILD_ONEAPI
-  _oneapi = std::make_unique<oneapi::OneAPI>(&this->loadVar, _gpuUseFloat,
-                                       _gpuUseDouble, _gpuMatrixSize, _gpus);
+  _oneapi = std::make_unique<oneapi::OneAPI>(&this->loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus);
 #endif
 
-
 #if defined(linux) || defined(__linux__)
   // if measurement is enabled, start it here
   if (_measurement) {
@@ -350,8 +327,7 @@ void Firestarter::mainThread() {
 #ifdef FIRESTARTER_DEBUG_FEATURES
   if (_dumpRegisters) {
     int returnCode;
-    if (EXIT_SUCCESS != (returnCode = this->initDumpRegisterWorker(
-                             _dumpRegistersTimeDelta, _dumpRegistersOutpath))) {
+    if (EXIT_SUCCESS != (returnCode = this->initDumpRegisterWorker(_dumpRegistersTimeDelta, _dumpRegistersOutpath))) {
       std::exit(returnCode);
     }
   }
@@ -366,20 +342,17 @@ void Firestarter::mainThread() {
     auto startTime = optimizer::History::getTime();
 
     Firestarter::_optimizer = std::make_unique<optimizer::OptimizerWorker>(
-        std::move(_algorithm), _population, _optimizationAlgorithm,
-        _individuals, _preheat);
+        std::move(_algorithm), _population, _optimizationAlgorithm, _individuals, _preheat);
 
     // wait here until optimizer thread terminates
     Firestarter::_optimizer->join();
 
     auto payloadItems = this->environment().selectedConfig().payloadItems();
 
-    firestarter::optimizer::History::save(_optimizeOutfile, startTime,
-                                          payloadItems, _argc, _argv);
+    firestarter::optimizer::History::save(_optimizeOutfile, startTime, payloadItems, _argc, _argv);
 
     // print the best 20 according to each metric
-    firestarter::optimizer::History::printBest(_optimizationMetrics,
-                                               payloadItems);
+    firestarter::optimizer::History::printBest(_optimizationMetrics, payloadItems);
 
     // stop all the load threads
     std::raise(SIGTERM);
@@ -403,11 +376,9 @@ void Firestarter::mainThread() {
   if (_measurement) {
     // TODO: clear this up
     log::info() << "metric,num_timepoints,duration_ms,average,stddev";
-    for (auto const &[name, sum] :
-         _measurementWorker->getValues(_startDelta, _stopDelta)) {
-      log::info() << std::quoted(name) << "," << sum.num_timepoints << ","
-                  << sum.duration.count() << "," << sum.average << ","
-                  << sum.stddev;
+    for (auto const& [name, sum] : _measurementWorker->getValues(_startDelta, _stopDelta)) {
+      log::info() << std::quoted(name) << "," << sum.num_timepoints << "," << sum.duration.count() << "," << sum.average
+                  << "," << sum.stddev;
     }
   }
 #endif
@@ -420,8 +391,7 @@ void Firestarter::mainThread() {
 void Firestarter::setLoad(unsigned long long value) {
   // signal load change to workers
   Firestarter::loadVar = value;
-#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||            \
-    defined(_M_X64)
+#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
 #ifndef _MSC_VER
   __asm__ __volatile__("mfence;");
 #else
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index 3c922cf6..53323187 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -43,7 +43,7 @@ extern "C" {
 
 using namespace firestarter;
 
-auto aligned_free_deleter = [](void *p) { ALIGNED_FREE(p); };
+auto aligned_free_deleter = [](void* p) { ALIGNED_FREE(p); };
 
 int Firestarter::initLoadWorkers(bool lowLoad, unsigned long long period) {
   int returnCode;
@@ -62,38 +62,32 @@ int Firestarter::initLoadWorkers(bool lowLoad, unsigned long long period) {
   // communication pointers and add these to the threaddata
   if (_errorDetection) {
     for (unsigned long long i = 0; i < numThreads; i++) {
-      auto commPtr = reinterpret_cast<unsigned long long *>(
-          ALIGNED_MALLOC(2 * sizeof(unsigned long long), 64));
+      auto commPtr = reinterpret_cast<unsigned long long*>(ALIGNED_MALLOC(2 * sizeof(unsigned long long), 64));
       assert(commPtr);
-      this->errorCommunication.push_back(
-          std::shared_ptr<unsigned long long>(commPtr, aligned_free_deleter));
-      log::debug() << "Threads " << (i + numThreads - 1) % numThreads << " and "
-                   << i << " commPtr = 0x" << std::setfill('0')
-                   << std::setw(sizeof(unsigned long long) * 2) << std::hex
+      this->errorCommunication.push_back(std::shared_ptr<unsigned long long>(commPtr, aligned_free_deleter));
+      log::debug() << "Threads " << (i + numThreads - 1) % numThreads << " and " << i << " commPtr = 0x"
+                   << std::setfill('0') << std::setw(sizeof(unsigned long long) * 2) << std::hex
                    << (unsigned long long)commPtr;
     }
   }
 
   for (unsigned long long i = 0; i < numThreads; i++) {
-    auto td = std::make_shared<LoadWorkerData>(i, this->environment(),
-                                               &this->loadVar, period,
-                                               _dumpRegisters, _errorDetection);
+    auto td = std::make_shared<LoadWorkerData>(i, this->environment(), &this->loadVar, period, _dumpRegisters,
+                                               _errorDetection);
 
     if (_errorDetection) {
       // distribute pointers for error deteciton. (set threads in a ring)
       // give this thread the left pointer i and right pointer (i+1) %
       // requestedNumThreads().
-      td->setErrorCommunication(this->errorCommunication[i],
-                                this->errorCommunication[(i + 1) % numThreads]);
+      td->setErrorCommunication(this->errorCommunication[i], this->errorCommunication[(i + 1) % numThreads]);
     }
 
-    auto dataCacheSizeIt =
-        td->config().platformConfig().dataCacheBufferSize().begin();
+    auto dataCacheSizeIt = td->config().platformConfig().dataCacheBufferSize().begin();
     auto ramBufferSize = td->config().platformConfig().ramBufferSize();
 
-    td->buffersizeMem = (*dataCacheSizeIt + *std::next(dataCacheSizeIt, 1) +
-                         *std::next(dataCacheSizeIt, 2) + ramBufferSize) /
-                        td->config().thread() / sizeof(unsigned long long);
+    td->buffersizeMem =
+        (*dataCacheSizeIt + *std::next(dataCacheSizeIt, 1) + *std::next(dataCacheSizeIt, 2) + ramBufferSize) /
+        td->config().thread() / sizeof(unsigned long long);
 
     // create the thread
     std::thread t(Firestarter::loadThreadWorker, td);
@@ -102,8 +96,7 @@ int Firestarter::initLoadWorkers(bool lowLoad, unsigned long long period) {
 
     if (i == 0) {
       // only show error for all worker threads except first.
-      firestarter::logging::FirstWorkerThreadFilter<
-          firestarter::logging::record>::setFirstThread(t.get_id());
+      firestarter::logging::FirstWorkerThreadFilter<firestarter::logging::record>::setFirstThread(t.get_id());
     }
 
     this->loadThreads.push_back(std::make_pair(std::move(t), td));
@@ -118,20 +111,20 @@ void Firestarter::signalLoadWorkers(int comm) {
   bool ack;
 
   // start the work
-  for (auto const &thread : this->loadThreads) {
+  for (auto const& thread : this->loadThreads) {
     auto td = thread.second;
 
     td->mutex.lock();
   }
 
-  for (auto const &thread : this->loadThreads) {
+  for (auto const& thread : this->loadThreads) {
     auto td = thread.second;
 
     td->comm = comm;
     td->mutex.unlock();
   }
 
-  for (auto const &thread : this->loadThreads) {
+  for (auto const& thread : this->loadThreads) {
     auto td = thread.second;
 
     do {
@@ -148,7 +141,7 @@ void Firestarter::signalLoadWorkers(int comm) {
 
 void Firestarter::joinLoadWorkers() {
   // wait for threads after watchdog has requested termination
-  for (auto &thread : this->loadThreads) {
+  for (auto& thread : this->loadThreads) {
     thread.first.join();
   }
 }
@@ -160,8 +153,7 @@ void Firestarter::printThreadErrorReport() {
     std::vector<bool> errors(maxSize, false);
 
     for (decltype(maxSize) i = 0; i < maxSize; i++) {
-      auto errorDetectionStruct =
-          this->loadThreads[i].second->errorDetectionStruct();
+      auto errorDetectionStruct = this->loadThreads[i].second->errorDetectionStruct();
 
       if (errorDetectionStruct->errorLeft) {
         errors[(i + maxSize - 1) % maxSize] = true;
@@ -173,10 +165,8 @@ void Firestarter::printThreadErrorReport() {
 
     for (decltype(maxSize) i = 0; i < maxSize; i++) {
       if (errors[i]) {
-        log::fatal()
-            << "Data mismatch between Threads " << i << " and "
-            << (i + 1) % maxSize
-            << ".\n       This may be caused by bit-flips in the hardware.";
+        log::fatal() << "Data mismatch between Threads " << i << " and " << (i + 1) % maxSize
+                     << ".\n       This may be caused by bit-flips in the hardware.";
       }
     }
   }
@@ -191,7 +181,7 @@ void Firestarter::printPerformanceReport() {
 
   log::debug() << "\nperformance report:\n";
 
-  for (auto const &thread : this->loadThreads) {
+  for (auto const& thread : this->loadThreads) {
     auto td = thread.second;
 
     log::debug() << "Thread " << td->id() << ": " << td->iterations
@@ -207,39 +197,33 @@ void Firestarter::printPerformanceReport() {
     iterations += td->iterations;
   }
 
-  double runtime = (double)(stopTimestamp - startTimestamp) /
-                   (double)this->environment().topology().clockrate();
+  double runtime = (double)(stopTimestamp - startTimestamp) / (double)this->environment().topology().clockrate();
   double gFlops =
-      (double)this->loadThreads.front().second->config().payload().flops() *
-      0.000000001 * (double)iterations / runtime;
+      (double)this->loadThreads.front().second->config().payload().flops() * 0.000000001 * (double)iterations / runtime;
   double bandwidth =
-      (double)this->loadThreads.front().second->config().payload().bytes() *
-      0.000000001 * (double)iterations / runtime;
+      (double)this->loadThreads.front().second->config().payload().bytes() * 0.000000001 * (double)iterations / runtime;
 
   // insert values for ipc-estimate metric
   // if we are on linux
 #if defined(linux) || defined(__linux__)
   if (_measurement) {
-    for (auto const &thread : this->loadThreads) {
+    for (auto const& thread : this->loadThreads) {
       auto td = thread.second;
       ipc_estimate_metric_insert((double)td->iterations *
-                                 (double)this->loadThreads.front()
-                                     .second->config()
-                                     .payload()
-                                     .instructions() /
+                                 (double)this->loadThreads.front().second->config().payload().instructions() /
                                  (double)(stopTimestamp - startTimestamp));
     }
   }
 #endif
 
   // format runtime, gflops and bandwidth %.2f
-  const char *fmt = "%.2f";
+  const char* fmt = "%.2f";
   int size;
 
-#define FORMAT(input)                                                          \
-  size = std::snprintf(nullptr, 0, fmt, input);                                \
-  std::vector<char> input##Vector(size + 1);                                   \
-  std::snprintf(&input##Vector[0], input##Vector.size(), fmt, input);          \
+#define FORMAT(input)                                                                                                  \
+  size = std::snprintf(nullptr, 0, fmt, input);                                                                        \
+  std::vector<char> input##Vector(size + 1);                                                                           \
+  std::snprintf(&input##Vector[0], input##Vector.size(), fmt, input);                                                  \
   auto input##String = std::string(&input##Vector[0])
 
   FORMAT(runtime);
@@ -248,21 +232,19 @@ void Firestarter::printPerformanceReport() {
 
 #undef FORMAT
 
-  log::debug()
-      << "\n"
-      << "total iterations: " << iterations << "\n"
-      << "runtime: " << runtimeString << " seconds ("
-      << stopTimestamp - startTimestamp << " cycles)\n"
-      << "\n"
-      << "estimated floating point performance: " << gFlopsString << " GFLOPS\n"
-      << "estimated memory bandwidth*: " << bandwidthString << " GB/s\n"
-      << "\n"
-      << "* this estimate is highly unreliable if --function is used in order "
-         "to "
-         "select\n"
-      << "  a function that is not optimized for your architecture, or if "
-         "FIRESTARTER is\n"
-      << "  executed on an unsupported architecture!";
+  log::debug() << "\n"
+               << "total iterations: " << iterations << "\n"
+               << "runtime: " << runtimeString << " seconds (" << stopTimestamp - startTimestamp << " cycles)\n"
+               << "\n"
+               << "estimated floating point performance: " << gFlopsString << " GFLOPS\n"
+               << "estimated memory bandwidth*: " << bandwidthString << " GB/s\n"
+               << "\n"
+               << "* this estimate is highly unreliable if --function is used in order "
+                  "to "
+                  "select\n"
+               << "  a function that is not optimized for your architecture, or if "
+                  "FIRESTARTER is\n"
+               << "  executed on an unsupported architecture!";
 }
 
 void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
@@ -296,36 +278,30 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
       td->environment().setCpuAffinity(td->id());
 
       // compile payload
-      td->config().payload().compilePayload(
-          td->config().payloadSettings(), td->config().instructionCacheSize(),
-          td->config().dataCacheBufferSize(), td->config().ramBufferSize(),
-          td->config().thread(), td->config().lines(), td->dumpRegisters,
-          td->errorDetection);
+      td->config().payload().compilePayload(td->config().payloadSettings(), td->config().instructionCacheSize(),
+                                            td->config().dataCacheBufferSize(), td->config().ramBufferSize(),
+                                            td->config().thread(), td->config().lines(), td->dumpRegisters,
+                                            td->errorDetection);
 
       // allocate memory
       // if we should dump some registers, we use the first part of the memory
       // for them.
-      td->addrMem =
-          reinterpret_cast<unsigned long long *>(ALIGNED_MALLOC(
-              (td->buffersizeMem + td->addrOffset) * sizeof(unsigned long long),
-              64)) +
-          td->addrOffset;
+      td->addrMem = reinterpret_cast<unsigned long long*>(
+                        ALIGNED_MALLOC((td->buffersizeMem + td->addrOffset) * sizeof(unsigned long long), 64)) +
+                    td->addrOffset;
 
       // exit application on error
       if (td->addrMem - td->addrOffset == nullptr) {
-        workerLog::error() << "Could not allocate memory for CPU load thread "
-                           << td->id() << "\n";
+        workerLog::error() << "Could not allocate memory for CPU load thread " << td->id() << "\n";
         exit(ENOMEM);
       }
 
       if (td->dumpRegisters) {
-        reinterpret_cast<DumpRegisterStruct *>(td->addrMem - td->addrOffset)
-            ->dumpVar = DumpVariable::Wait;
+        reinterpret_cast<DumpRegisterStruct*>(td->addrMem - td->addrOffset)->dumpVar = DumpVariable::Wait;
       }
 
       if (td->errorDetection) {
-        auto errorDetectionStruct = reinterpret_cast<ErrorDetectionStruct *>(
-            td->addrMem - td->addrOffset);
+        auto errorDetectionStruct = reinterpret_cast<ErrorDetectionStruct*>(td->addrMem - td->addrOffset);
 
         std::memset(errorDetectionStruct, 0, sizeof(ErrorDetectionStruct));
 
@@ -334,8 +310,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
         errorDetectionStruct->communicationRight = td->communicationRight.get();
 
         // do first touch memset 0 for the communication pointers
-        std::memset((void *)errorDetectionStruct->communicationLeft, 0,
-                    sizeof(unsigned long long) * 2);
+        std::memset((void*)errorDetectionStruct->communicationLeft, 0, sizeof(unsigned long long) * 2);
       }
 
       // call init function
@@ -354,11 +329,9 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
         VT_USER_START("HIGH_LOAD_FUNC");
 #endif
 #ifdef ENABLE_SCOREP
-        SCOREP_USER_REGION_BY_NAME_BEGIN("HIGH",
-                                         SCOREP_USER_REGION_TYPE_COMMON);
+        SCOREP_USER_REGION_BY_NAME_BEGIN("HIGH", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
-        td->iterations = td->config().payload().highLoadFunction(
-            td->addrMem, td->addrHigh, td->iterations);
+        td->iterations = td->config().payload().highLoadFunction(td->addrMem, td->addrHigh, td->iterations);
 
         // call low load function
 #ifdef ENABLE_VTRACING
@@ -393,11 +366,10 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
       break;
     case THREAD_SWITCH:
       // compile payload
-      td->config().payload().compilePayload(
-          td->config().payloadSettings(), td->config().instructionCacheSize(),
-          td->config().dataCacheBufferSize(), td->config().ramBufferSize(),
-          td->config().thread(), td->config().lines(), td->dumpRegisters,
-          td->errorDetection);
+      td->config().payload().compilePayload(td->config().payloadSettings(), td->config().instructionCacheSize(),
+                                            td->config().dataCacheBufferSize(), td->config().ramBufferSize(),
+                                            td->config().thread(), td->config().lines(), td->dumpRegisters,
+                                            td->errorDetection);
 
       // call init function
       td->config().payload().init(td->addrMem, td->buffersizeMem);
diff --git a/src/firestarter/Main.cpp b/src/firestarter/Main.cpp
index fa96740f..5076d812 100644
--- a/src/firestarter/Main.cpp
+++ b/src/firestarter/Main.cpp
@@ -27,16 +27,16 @@
 #include <string>
 
 struct Config {
-  inline static const std::vector<std::pair<std::string, std::string>>
-      optionsMap = {{"information", "Information Options:\n"},
-                    {"general", "General Options:\n"},
-                    {"specialized-workloads", "Specialized workloads:\n"},
+  inline static const std::vector<std::pair<std::string, std::string>> optionsMap = {
+      {"information", "Information Options:\n"},
+      {"general", "General Options:\n"},
+      {"specialized-workloads", "Specialized workloads:\n"},
 #ifdef FIRESTARTER_DEBUG_FEATURES
-                    {"debug", "Debugging:\n"},
+      {"debug", "Debugging:\n"},
 #endif
 #if defined(linux) || defined(__linux__)
-                    {"measurement", "Measurement:\n"},
-                    {"optimization", "Optimization:\n"}
+      {"measurement", "Measurement:\n"},
+      {"optimization", "Optimization:\n"}
 #endif
   };
 
@@ -84,48 +84,41 @@ struct Config {
   double nsga2_cr;
   double nsga2_m;
 
-  Config(int argc, const char **argv);
+  Config(int argc, const char** argv);
 };
 
 void print_copyright() {
-  firestarter::log::info()
-      << "This program is free software: you can redistribute it and/or "
-         "modify\n"
-      << "it under the terms of the GNU General Public License as published "
-         "by\n"
-      << "the Free Software Foundation, either version 3 of the License, or\n"
-      << "(at your option) any later version.\n"
-      << "\n"
-      << "You should have received a copy of the GNU General Public License\n"
-      << "along with this program.  If not, see "
-         "<http://www.gnu.org/licenses/>.\n";
+  firestarter::log::info() << "This program is free software: you can redistribute it and/or "
+                              "modify\n"
+                           << "it under the terms of the GNU General Public License as published "
+                              "by\n"
+                           << "the Free Software Foundation, either version 3 of the License, or\n"
+                           << "(at your option) any later version.\n"
+                           << "\n"
+                           << "You should have received a copy of the GNU General Public License\n"
+                           << "along with this program.  If not, see "
+                              "<http://www.gnu.org/licenses/>.\n";
 }
 
 void print_warranty() {
-  firestarter::log::info()
-      << "This program is distributed in the hope that it will be useful,\n"
-      << "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
-      << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
-      << "GNU General Public License for more details.\n"
-      << "\n"
-      << "You should have received a copy of the GNU General Public License\n"
-      << "along with this program.  If not, see "
-         "<http://www.gnu.org/licenses/>.\n";
+  firestarter::log::info() << "This program is distributed in the hope that it will be useful,\n"
+                           << "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+                           << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
+                           << "GNU General Public License for more details.\n"
+                           << "\n"
+                           << "You should have received a copy of the GNU General Public License\n"
+                           << "along with this program.  If not, see "
+                              "<http://www.gnu.org/licenses/>.\n";
 }
 
-void print_help(cxxopts::Options const &parser, std::string const &section) {
-  std::vector<std::pair<std::string, std::string>> options(
-      Config::optionsMap.size());
+void print_help(cxxopts::Options const& parser, std::string const& section) {
+  std::vector<std::pair<std::string, std::string>> options(Config::optionsMap.size());
 
   if (section.size() == 0) {
-    std::copy(Config::optionsMap.begin(), Config::optionsMap.end(),
-              options.begin());
+    std::copy(Config::optionsMap.begin(), Config::optionsMap.end(), options.begin());
   } else {
-    auto findSection = [&](std::pair<std::string, std::string> const &pair) {
-      return pair.first == section;
-    };
-    auto it = std::copy_if(Config::optionsMap.begin(), Config::optionsMap.end(),
-                           options.begin(), findSection);
+    auto findSection = [&](std::pair<std::string, std::string> const& pair) { return pair.first == section; };
+    auto it = std::copy_if(Config::optionsMap.begin(), Config::optionsMap.end(), options.begin(), findSection);
     options.resize(std::distance(options.begin(), it));
   }
 
@@ -162,7 +155,7 @@ void print_help(cxxopts::Options const &parser, std::string const &section) {
   // clang-format on
 }
 
-Config::Config(int argc, const char **argv) {
+Config::Config(int argc, const char** argv) {
 
   cxxopts::Options parser(argv[0]);
 
@@ -270,17 +263,13 @@ Config::Config(int argc, const char **argv) {
     auto options = parser.parse(argc, argv);
 
     if (options.count("quiet")) {
-      firestarter::logging::filter<firestarter::logging::record>::set_severity(
-          nitro::log::severity_level::warn);
+      firestarter::logging::filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::warn);
     } else if (options.count("report")) {
-      firestarter::logging::filter<firestarter::logging::record>::set_severity(
-          nitro::log::severity_level::debug);
+      firestarter::logging::filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::debug);
     } else if (options.count("debug")) {
-      firestarter::logging::filter<firestarter::logging::record>::set_severity(
-          nitro::log::severity_level::trace);
+      firestarter::logging::filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::trace);
     } else {
-      firestarter::logging::filter<firestarter::logging::record>::set_severity(
-          nitro::log::severity_level::info);
+      firestarter::logging::filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::info);
     }
 
     if (options.count("version")) {
@@ -297,25 +286,18 @@ Config::Config(int argc, const char **argv) {
       std::exit(EXIT_SUCCESS);
     }
 
-    firestarter::log::info()
-        << "This program comes with ABSOLUTELY NO WARRANTY; for details run `"
-        << argv[0] << " -w`.\n"
-        << "This is free software, and you are welcome to redistribute it\n"
-        << "under certain conditions; run `" << argv[0]
-        << " -c` for details.\n";
+    firestarter::log::info() << "This program comes with ABSOLUTELY NO WARRANTY; for details run `" << argv[0]
+                             << " -w`.\n"
+                             << "This is free software, and you are welcome to redistribute it\n"
+                             << "under certain conditions; run `" << argv[0] << " -c` for details.\n";
 
     if (options.count("help")) {
       auto section = options["help"].as<std::string>();
 
       // section not found
-      auto findSection = [&](std::pair<std::string, std::string> const &pair) {
-        return pair.first == section;
-      };
-      if (std::find_if(optionsMap.begin(), optionsMap.end(), findSection) ==
-              optionsMap.end() &&
-          section.size() != 0) {
-        throw std::invalid_argument("Section \"" + section +
-                                    "\" not found in help.");
+      auto findSection = [&](std::pair<std::string, std::string> const& pair) { return pair.first == section; };
+      if (std::find_if(optionsMap.begin(), optionsMap.end(), findSection) == optionsMap.end() && section.size() != 0) {
+        throw std::invalid_argument("Section \"" + section + "\" not found in help.");
       }
 
       print_help(parser, section);
@@ -340,29 +322,25 @@ Config::Config(int argc, const char **argv) {
     allowUnavailablePayload = options.count("allow-unavailable-payload");
     dumpRegisters = options.count("dump-registers");
     if (dumpRegisters) {
-      dumpRegistersTimeDelta =
-          std::chrono::seconds(options["dump-registers"].as<unsigned>());
+      dumpRegistersTimeDelta = std::chrono::seconds(options["dump-registers"].as<unsigned>());
       if (timeout != std::chrono::microseconds::zero() && loadPercent != 100) {
         throw std::invalid_argument("Option --dump-registers may only be used "
                                     "without a timeout and full load.");
       }
       if (errorDetection) {
-        throw std::invalid_argument(
-            "Options --dump-registers and --error-detection cannot be used "
-            "together.");
+        throw std::invalid_argument("Options --dump-registers and --error-detection cannot be used "
+                                    "together.");
       }
     }
 #endif
 
     requestedNumThreads = options["threads"].as<unsigned>();
 
-#if (defined(linux) || defined(__linux__)) &&                                  \
-    defined(FIRESTARTER_THREAD_AFFINITY)
+#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
     cpuBind = options["bind"].as<std::string>();
     if (!cpuBind.empty()) {
       if (requestedNumThreads != 0) {
-        throw std::invalid_argument(
-            "Options -b/--bind and -n/--threads cannot be used together.");
+        throw std::invalid_argument("Options -b/--bind and -n/--threads cannot be used together.");
       }
     }
 #endif
@@ -378,8 +356,7 @@ Config::Config(int argc, const char **argv) {
 
     gpuMatrixSize = options["matrixsize"].as<unsigned>();
     if (gpuMatrixSize > 0 && gpuMatrixSize < 64) {
-      throw std::invalid_argument(
-          "Option -m/--matrixsize may not be below 64.");
+      throw std::invalid_argument("Option -m/--matrixsize may not be below 64.");
     }
 
     gpus = options["gpus"].as<int>();
@@ -396,17 +373,14 @@ Config::Config(int argc, const char **argv) {
     }
 
 #if defined(linux) || defined(__linux__)
-    startDelta =
-        std::chrono::milliseconds(options["start-delta"].as<unsigned>());
+    startDelta = std::chrono::milliseconds(options["start-delta"].as<unsigned>());
     stopDelta = std::chrono::milliseconds(options["stop-delta"].as<unsigned>());
-    measurementInterval = std::chrono::milliseconds(
-        options["measurement-interval"].as<unsigned>());
+    measurementInterval = std::chrono::milliseconds(options["measurement-interval"].as<unsigned>());
 #ifndef FIRESTARTER_LINK_STATIC
     metricPaths = options["metric-path"].as<std::vector<std::string>>();
 #endif
     if (options.count("metric-from-stdin")) {
-      stdinMetrics =
-          options["metric-from-stdin"].as<std::vector<std::string>>();
+      stdinMetrics = options["metric-from-stdin"].as<std::vector<std::string>>();
     }
     measurement = options.count("measurement");
     listMetrics = options.count("list-metrics");
@@ -417,22 +391,19 @@ Config::Config(int argc, const char **argv) {
                                     "cannot be used together.");
       }
       if (measurement) {
-        throw std::invalid_argument(
-            "Options --measurement and --optimize cannot be used together.");
+        throw std::invalid_argument("Options --measurement and --optimize cannot be used together.");
       }
       preheat = std::chrono::seconds(options["preheat"].as<unsigned>());
       optimizationAlgorithm = options["optimize"].as<std::string>();
       if (options.count("optimization-metric")) {
-        optimizationMetrics =
-            options["optimization-metric"].as<std::vector<std::string>>();
+        optimizationMetrics = options["optimization-metric"].as<std::vector<std::string>>();
       }
       if (loadPercent != 100) {
         throw std::invalid_argument("Options -p | --period and -l | --load are "
                                     "not compatible with --optimize.");
       }
       if (timeout == std::chrono::seconds::zero()) {
-        throw std::invalid_argument(
-            "Option -t | --timeout must be specified for optimization.");
+        throw std::invalid_argument("Option -t | --timeout must be specified for optimization.");
       }
       evaluationDuration = timeout;
       // this will deactivate the watchdog worker
@@ -451,42 +422,37 @@ Config::Config(int argc, const char **argv) {
     }
 #endif
 
-  } catch (std::exception &e) {
+  } catch (std::exception& e) {
     firestarter::log::error() << e.what() << "\n";
     print_help(parser, "");
     std::exit(EXIT_FAILURE);
   }
 }
 
-int main(int argc, const char **argv) {
+int main(int argc, const char** argv) {
 
-  firestarter::log::info()
-      << "FIRESTARTER - A Processor Stress Test Utility, Version "
-      << _FIRESTARTER_VERSION_STRING << "\n"
-      << "Copyright (C) " << _FIRESTARTER_BUILD_YEAR
-      << " TU Dresden, Center for Information Services and High Performance "
-         "Computing"
-      << "\n";
+  firestarter::log::info() << "FIRESTARTER - A Processor Stress Test Utility, Version " << _FIRESTARTER_VERSION_STRING
+                           << "\n"
+                           << "Copyright (C) " << _FIRESTARTER_BUILD_YEAR
+                           << " TU Dresden, Center for Information Services and High Performance "
+                              "Computing"
+                           << "\n";
 
   Config cfg{argc, argv};
 
   try {
     firestarter::Firestarter firestarter(
-        argc, argv, cfg.timeout, cfg.loadPercent, cfg.period,
-        cfg.requestedNumThreads, cfg.cpuBind, cfg.printFunctionSummary,
-        cfg.functionId, cfg.listInstructionGroups, cfg.instructionGroups,
-        cfg.lineCount, cfg.allowUnavailablePayload, cfg.dumpRegisters,
-        cfg.dumpRegistersTimeDelta, cfg.dumpRegistersOutpath,
-        cfg.errorDetection, cfg.gpus, cfg.gpuMatrixSize, cfg.gpuUseFloat,
-        cfg.gpuUseDouble, cfg.listMetrics, cfg.measurement, cfg.startDelta,
-        cfg.stopDelta, cfg.measurementInterval, cfg.metricPaths,
-        cfg.stdinMetrics, cfg.optimize, cfg.preheat, cfg.optimizationAlgorithm,
-        cfg.optimizationMetrics, cfg.evaluationDuration, cfg.individuals,
-        cfg.optimizeOutfile, cfg.generations, cfg.nsga2_cr, cfg.nsga2_m);
+        argc, argv, cfg.timeout, cfg.loadPercent, cfg.period, cfg.requestedNumThreads, cfg.cpuBind,
+        cfg.printFunctionSummary, cfg.functionId, cfg.listInstructionGroups, cfg.instructionGroups, cfg.lineCount,
+        cfg.allowUnavailablePayload, cfg.dumpRegisters, cfg.dumpRegistersTimeDelta, cfg.dumpRegistersOutpath,
+        cfg.errorDetection, cfg.gpus, cfg.gpuMatrixSize, cfg.gpuUseFloat, cfg.gpuUseDouble, cfg.listMetrics,
+        cfg.measurement, cfg.startDelta, cfg.stopDelta, cfg.measurementInterval, cfg.metricPaths, cfg.stdinMetrics,
+        cfg.optimize, cfg.preheat, cfg.optimizationAlgorithm, cfg.optimizationMetrics, cfg.evaluationDuration,
+        cfg.individuals, cfg.optimizeOutfile, cfg.generations, cfg.nsga2_cr, cfg.nsga2_m);
 
     firestarter.mainThread();
 
-  } catch (std::exception const &e) {
+  } catch (std::exception const& e) {
     firestarter::log::error() << e.what();
     return EXIT_FAILURE;
   }
diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp
index 498330ab..efd7a4bc 100644
--- a/src/firestarter/Measurement/MeasurementWorker.cpp
+++ b/src/firestarter/Measurement/MeasurementWorker.cpp
@@ -30,27 +30,25 @@ extern "C" {
 }
 #endif
 
-void insertCallback(void *cls, const char *metricName, int64_t timeSinceEpoch,
-                    double value) {
-  static_cast<firestarter::measurement::MeasurementWorker *>(cls)
-      ->insertCallback(metricName, timeSinceEpoch, value);
+void insertCallback(void* cls, const char* metricName, int64_t timeSinceEpoch, double value) {
+  static_cast<firestarter::measurement::MeasurementWorker*>(cls)->insertCallback(metricName, timeSinceEpoch, value);
 }
 
 using namespace firestarter::measurement;
 
-MeasurementWorker::MeasurementWorker(
-    std::chrono::milliseconds updateInterval, unsigned long long numThreads,
-    std::vector<std::string> const &metricDylibs,
-    std::vector<std::string> const &stdinMetrics)
-    : updateInterval(updateInterval), numThreads(numThreads) {
+MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, unsigned long long numThreads,
+                                     std::vector<std::string> const& metricDylibs,
+                                     std::vector<std::string> const& stdinMetrics)
+    : updateInterval(updateInterval)
+    , numThreads(numThreads) {
 
 #ifndef FIRESTARTER_LINK_STATIC
   // open dylibs and find metric symbol.
   // create an entry in _metricDylibs with handle from dlopen and
   // metric_interface_t structure. add this structe as a pointer to metrics.
-  for (auto const &dylib : metricDylibs) {
-    void *handle;
-    const char *filename = dylib.c_str();
+  for (auto const& dylib : metricDylibs) {
+    void* handle;
+    const char* filename = dylib.c_str();
 
     handle = dlopen(dylib.c_str(), RTLD_NOW | RTLD_LOCAL);
 
@@ -62,11 +60,11 @@ MeasurementWorker::MeasurementWorker(
     // clear existing error
     dlerror();
 
-    metric_interface_t *metric = nullptr;
+    metric_interface_t* metric = nullptr;
 
-    metric = (metric_interface_t *)dlsym(handle, "metric");
+    metric = (metric_interface_t*)dlsym(handle, "metric");
 
-    char *error;
+    char* error;
     if ((error = dlerror()) != NULL) {
       firestarter::log::error() << filename << ": " << error;
       dlclose(handle);
@@ -74,8 +72,7 @@ MeasurementWorker::MeasurementWorker(
     }
 
     if (this->findMetricByName(metric->name) != nullptr) {
-      firestarter::log::error()
-          << "A metric named \"" << metric->name << "\" is already loaded.";
+      firestarter::log::error() << "A metric named \"" << metric->name << "\" is already loaded.";
       dlclose(handle);
       continue;
     }
@@ -89,10 +86,9 @@ MeasurementWorker::MeasurementWorker(
 #endif
 
   // setup metric objects for metric names passed from stdin.
-  for (auto const &name : stdinMetrics) {
+  for (auto const& name : stdinMetrics) {
     if (this->findMetricByName(name) != nullptr) {
-      firestarter::log::error()
-          << "A metric named \"" << name << "\" is already loaded.";
+      firestarter::log::error() << "A metric named \"" << name << "\" is already loaded.";
       continue;
     }
 
@@ -103,7 +99,7 @@ MeasurementWorker::MeasurementWorker(
   unsigned maxLength = 0;
   std::map<std::string, bool> available;
 
-  for (auto const &metric : this->metrics) {
+  for (auto const& metric : this->metrics) {
     std::string name(metric->name);
     maxLength = maxLength < name.size() ? name.size() : maxLength;
     int returnCode = metric->init();
@@ -114,7 +110,7 @@ MeasurementWorker::MeasurementWorker(
   unsigned padding = maxLength > 6 ? maxLength - 6 : 0;
   ss << "  METRIC" << std::string(padding + 1, ' ') << "| available\n";
   ss << "  " << std::string(padding + 7, '-') << "-----------\n";
-  for (auto const &[key, value] : available) {
+  for (auto const& [key, value] : available) {
     ss << "  " << key << std::string(padding + 7 - key.size(), ' ') << "| ";
     ss << (value ? "yes" : "no") << "\n";
   }
@@ -122,16 +118,12 @@ MeasurementWorker::MeasurementWorker(
   this->availableMetricsString = ss.str();
 
   pthread_create(&this->workerThread, NULL,
-                 reinterpret_cast<void *(*)(void *)>(
-                     MeasurementWorker::dataAcquisitionWorker),
-                 this);
+                 reinterpret_cast<void* (*)(void*)>(MeasurementWorker::dataAcquisitionWorker), this);
 
   // create a worker for getting metric values from stdin
   if (this->_stdinMetrics.size() > 0) {
     pthread_create(&this->stdinThread, NULL,
-                   reinterpret_cast<void *(*)(void *)>(
-                       MeasurementWorker::stdinDataAcquisitionWorker),
-                   this);
+                   reinterpret_cast<void* (*)(void*)>(MeasurementWorker::stdinDataAcquisitionWorker), this);
   }
 }
 
@@ -146,7 +138,7 @@ MeasurementWorker::~MeasurementWorker() {
     pthread_join(this->stdinThread, NULL);
   }
 
-  for (auto const &[key, value] : this->values) {
+  for (auto const& [key, value] : this->values) {
     auto metric = this->findMetricByName(key);
     if (metric == nullptr) {
       continue;
@@ -164,48 +156,39 @@ MeasurementWorker::~MeasurementWorker() {
 
 std::vector<std::string> MeasurementWorker::metricNames() {
   std::vector<std::string> metrics;
-  std::transform(
-      this->metrics.begin(), this->metrics.end(), std::back_inserter(metrics),
-      [](auto &metric) -> std::string { return std::string(metric->name); });
-  for (auto const &name : this->_stdinMetrics) {
+  std::transform(this->metrics.begin(), this->metrics.end(), std::back_inserter(metrics),
+                 [](auto& metric) -> std::string { return std::string(metric->name); });
+  for (auto const& name : this->_stdinMetrics) {
     metrics.push_back(name);
   }
 
   return metrics;
 }
 
-const metric_interface_t *
-MeasurementWorker::findMetricByName(std::string metricName) {
-  auto name_equal = [metricName](auto &metricInterface) {
-    return metricName.compare(metricInterface->name) == 0;
-  };
-  auto metric =
-      std::find_if(this->metrics.begin(), this->metrics.end(), name_equal);
+const metric_interface_t* MeasurementWorker::findMetricByName(std::string metricName) {
+  auto name_equal = [metricName](auto& metricInterface) { return metricName.compare(metricInterface->name) == 0; };
+  auto metric = std::find_if(this->metrics.begin(), this->metrics.end(), name_equal);
 
   // metric not found
   if (metric == this->metrics.end()) {
     return nullptr;
   }
   // metric found
-  return const_cast<const metric_interface_t *>(*metric);
+  return const_cast<const metric_interface_t*>(*metric);
 }
 
 // this must be called by the main thread.
 // if not done so things like perf_event_attr.inherit might not work as expected
-std::vector<std::string>
-MeasurementWorker::initMetrics(std::vector<std::string> const &metricNames) {
+std::vector<std::string> MeasurementWorker::initMetrics(std::vector<std::string> const& metricNames) {
   this->values_mutex.lock();
 
   std::vector<std::string> initialized = {};
 
   // try to find each metric and initialize it
-  for (auto const &metricName : metricNames) {
+  for (auto const& metricName : metricNames) {
     // init values map with empty vector
-    auto name_equal = [metricName](auto const &pair) {
-      return metricName.compare(pair.first) == 0;
-    };
-    auto pair =
-        std::find_if(this->values.begin(), this->values.end(), name_equal);
+    auto name_equal = [metricName](auto const& pair) { return metricName.compare(pair.first) == 0; };
+    auto pair = std::find_if(this->values.begin(), this->values.end(), name_equal);
     if (pair != this->values.end()) {
       pair->second.clear();
     } else {
@@ -213,8 +196,7 @@ MeasurementWorker::initMetrics(std::vector<std::string> const &metricNames) {
       if (metric != nullptr) {
         int returnValue = metric->init();
         if (returnValue != EXIT_SUCCESS) {
-          log::error() << "Metric " << metric->name << ": "
-                       << metric->get_error();
+          log::error() << "Metric " << metric->name << ": " << metric->get_error();
           continue;
         }
       }
@@ -233,19 +215,13 @@ MeasurementWorker::initMetrics(std::vector<std::string> const &metricNames) {
   return initialized;
 }
 
-void MeasurementWorker::insertCallback(const char *metricName,
-                                       int64_t timeSinceEpoch, double value) {
+void MeasurementWorker::insertCallback(const char* metricName, int64_t timeSinceEpoch, double value) {
   this->values_mutex.lock();
 
   using Duration = std::chrono::duration<int64_t, std::nano>;
-  auto time =
-      std::chrono::time_point<std::chrono::high_resolution_clock, Duration>(
-          Duration(timeSinceEpoch));
-  auto name_equal = [metricName](auto const &pair) {
-    return std::string(metricName).compare(pair.first) == 0;
-  };
-  auto pair =
-      std::find_if(this->values.begin(), this->values.end(), name_equal);
+  auto time = std::chrono::time_point<std::chrono::high_resolution_clock, Duration>(Duration(timeSinceEpoch));
+  auto name_equal = [metricName](auto const& pair) { return std::string(metricName).compare(pair.first) == 0; };
+  auto pair = std::find_if(this->values.begin(), this->values.end(), name_equal);
 
   if (pair != this->values.end()) {
     pair->second.push_back(TimeValue(time, value));
@@ -254,18 +230,15 @@ void MeasurementWorker::insertCallback(const char *metricName,
   this->values_mutex.unlock();
 }
 
-void MeasurementWorker::startMeasurement() {
-  this->startTime = std::chrono::high_resolution_clock::now();
-}
+void MeasurementWorker::startMeasurement() { this->startTime = std::chrono::high_resolution_clock::now(); }
 
-std::map<std::string, Summary>
-MeasurementWorker::getValues(std::chrono::milliseconds startDelta,
-                             std::chrono::milliseconds stopDelta) {
+std::map<std::string, Summary> MeasurementWorker::getValues(std::chrono::milliseconds startDelta,
+                                                            std::chrono::milliseconds stopDelta) {
   std::map<std::string, Summary> measurment = {};
 
   this->values_mutex.lock();
 
-  for (auto &[key, values] : this->values) {
+  for (auto& [key, values] : this->values) {
     auto startTime = this->startTime;
     auto endTime = std::chrono::high_resolution_clock::now();
     auto metric = this->findMetricByName(key);
@@ -288,15 +261,11 @@ MeasurementWorker::getValues(std::chrono::milliseconds startDelta,
 
     decltype(values) croppedValues(values.size());
 
-    auto findAll = [startTime, endTime](auto const &tv) {
-      return startTime <= tv.time && tv.time <= endTime;
-    };
-    auto it = std::copy_if(values.begin(), values.end(), croppedValues.begin(),
-                           findAll);
+    auto findAll = [startTime, endTime](auto const& tv) { return startTime <= tv.time && tv.time <= endTime; };
+    auto it = std::copy_if(values.begin(), values.end(), croppedValues.begin(), findAll);
     croppedValues.resize(std::distance(croppedValues.begin(), it));
 
-    Summary sum = Summary::calculate(croppedValues.begin(), croppedValues.end(),
-                                     type, this->numThreads);
+    Summary sum = Summary::calculate(croppedValues.begin(), croppedValues.end(), type, this->numThreads);
 
     measurment[key] = sum;
   }
@@ -306,11 +275,11 @@ MeasurementWorker::getValues(std::chrono::milliseconds startDelta,
   return measurment;
 }
 
-int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) {
+int* MeasurementWorker::dataAcquisitionWorker(void* measurementWorker) {
 
   pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
 
-  auto _this = reinterpret_cast<MeasurementWorker *>(measurementWorker);
+  auto _this = reinterpret_cast<MeasurementWorker*>(measurementWorker);
 
 #ifndef __APPLE__
   pthread_setname_np(pthread_self(), "DataAcquisition");
@@ -319,8 +288,7 @@ int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) {
   using clock = std::chrono::high_resolution_clock;
 
   using callbackTuple =
-      std::tuple<void (*)(void), std::chrono::microseconds,
-                 std::chrono::high_resolution_clock::time_point>;
+      std::tuple<void (*)(void), std::chrono::microseconds, std::chrono::high_resolution_clock::time_point>;
   auto callbackTupleComparator = [](callbackTuple left, callbackTuple right) {
     return std::get<2>(left) > std::get<2>(right);
   };
@@ -328,29 +296,26 @@ int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) {
   // this datastructure holds a tuple of our callback, the callback frequency
   // and the next timepoint. it will be sorted, so the pop function will give
   // back the next callback
-  std::priority_queue<callbackTuple, std::vector<callbackTuple>,
-                      decltype(callbackTupleComparator)>
-      callbackQueue(callbackTupleComparator);
+  std::priority_queue<callbackTuple, std::vector<callbackTuple>, decltype(callbackTupleComparator)> callbackQueue(
+      callbackTupleComparator);
 
   _this->values_mutex.lock();
 
-  for (auto const &[key, value] : _this->values) {
+  for (auto const& [key, value] : _this->values) {
     auto metric_interface = _this->findMetricByName(key);
 
     if (metric_interface == nullptr) {
       continue;
     }
 
-    auto callbackTime =
-        std::chrono::microseconds(metric_interface->callback_time);
+    auto callbackTime = std::chrono::microseconds(metric_interface->callback_time);
     if (callbackTime.count() == 0) {
       continue;
     }
 
     auto currentTime = clock::now();
 
-    callbackQueue.push(
-        std::make_tuple(metric_interface->callback, callbackTime, currentTime));
+    callbackQueue.push(std::make_tuple(metric_interface->callback, callbackTime, currentTime));
   }
 
   _this->values_mutex.unlock();
@@ -363,7 +328,7 @@ int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) {
     if (nextFetch <= now) {
       _this->values_mutex.lock();
 
-      for (auto &[metricName, values] : _this->values) {
+      for (auto& [metricName, values] : _this->values) {
         auto metric_interface = _this->findMetricByName(metricName);
 
         if (metric_interface == nullptr) {
@@ -372,11 +337,9 @@ int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) {
 
         double value;
 
-        if (!metric_interface->type.insert_callback &&
-            metric_interface->get_reading != nullptr) {
+        if (!metric_interface->type.insert_callback && metric_interface->get_reading != nullptr) {
           if (EXIT_SUCCESS == metric_interface->get_reading(&value)) {
-            auto tv =
-                TimeValue(std::chrono::high_resolution_clock::now(), value);
+            auto tv = TimeValue(std::chrono::high_resolution_clock::now(), value);
             values.push_back(tv);
           }
         }
@@ -401,8 +364,7 @@ int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) {
 
         // add it with the updated callback time to the queue again
         nextCallback = now + callbackTime;
-        callbackQueue.push(
-            std::make_tuple(callbackFunction, callbackTime, nextCallback));
+        callbackQueue.push(std::make_tuple(callbackFunction, callbackTime, nextCallback));
       }
 
       nextWake = nextCallback < nextWake ? nextCallback : nextWake;
@@ -412,11 +374,11 @@ int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) {
   }
 }
 
-int *MeasurementWorker::stdinDataAcquisitionWorker(void *measurementWorker) {
+int* MeasurementWorker::stdinDataAcquisitionWorker(void* measurementWorker) {
 
   pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
 
-  auto _this = reinterpret_cast<MeasurementWorker *>(measurementWorker);
+  auto _this = reinterpret_cast<MeasurementWorker*>(measurementWorker);
 
 #ifndef __APPLE__
   pthread_setname_np(pthread_self(), "StdinDataAcquis");
@@ -427,11 +389,8 @@ int *MeasurementWorker::stdinDataAcquisitionWorker(void *measurementWorker) {
     double value;
     char name[128];
     if (std::sscanf(line.c_str(), "%127s %ld %lf", name, &time, &value) == 3) {
-      auto name_equal = [name](auto const &allowedName) {
-        return allowedName.compare(std::string(name)) == 0;
-      };
-      auto item = std::find_if(_this->stdinMetrics().begin(),
-                               _this->stdinMetrics().end(), name_equal);
+      auto name_equal = [name](auto const& allowedName) { return allowedName.compare(std::string(name)) == 0; };
+      auto item = std::find_if(_this->stdinMetrics().begin(), _this->stdinMetrics().end(), name_equal);
       // metric name is allowed
       if (item != _this->stdinMetrics().end()) {
         _this->insertCallback(name, time, value);
diff --git a/src/firestarter/Measurement/Metric/IPCEstimate.cpp b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
index a58f91bb..145f02ae 100644
--- a/src/firestarter/Measurement/Metric/IPCEstimate.cpp
+++ b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
@@ -30,8 +30,8 @@ extern "C" {
 
 static std::string errorString = "";
 
-static void (*callback)(void *, const char *, int64_t, double) = nullptr;
-static void *callback_arg = nullptr;
+static void (*callback)(void*, const char*, int64_t, double) = nullptr;
+static void* callback_arg = nullptr;
 
 static int32_t fini(void) {
   callback = nullptr;
@@ -46,14 +46,12 @@ static int32_t init(void) {
   return EXIT_SUCCESS;
 }
 
-static const char *get_error(void) {
-  const char *errorCString = errorString.c_str();
+static const char* get_error(void) {
+  const char* errorCString = errorString.c_str();
   return errorCString;
 }
 
-static int32_t register_insert_callback(void (*c)(void *, const char *, int64_t,
-                                                  double),
-                                        void *arg) {
+static int32_t register_insert_callback(void (*c)(void*, const char*, int64_t, double), void* arg) {
   callback = c;
   callback_arg = arg;
   return EXIT_SUCCESS;
@@ -64,9 +62,9 @@ void ipc_estimate_metric_insert(double value) {
     return;
   }
 
-  int64_t t = std::chrono::duration_cast<std::chrono::nanoseconds>(
-                  std::chrono::high_resolution_clock::now().time_since_epoch())
-                  .count();
+  int64_t t =
+      std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch())
+          .count();
 
   callback(callback_arg, "ipc-estimate", t, value);
 }
diff --git a/src/firestarter/Measurement/Metric/Perf.cpp b/src/firestarter/Measurement/Metric/Perf.cpp
index 48f3120b..a7266db2 100644
--- a/src/firestarter/Measurement/Metric/Perf.cpp
+++ b/src/firestarter/Measurement/Metric/Perf.cpp
@@ -52,8 +52,7 @@ static int32_t init_value;
 
 static struct read_format last;
 
-static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
-                            int cpu, int group_fd, unsigned long flags) {
+static long perf_event_open(struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) {
   return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
 }
 
@@ -80,9 +79,7 @@ static int32_t init(void) {
     // The official way of knowing if perf_event_open() support is enabled
     // is checking for the existence of the file
     // /proc/sys/kernel/perf_event_paranoid.
-    errorString =
-        "syscall perf_event_open not supported or file " PERF_EVENT_PARANOID
-        " does not exist";
+    errorString = "syscall perf_event_open not supported or file " PERF_EVENT_PARANOID " does not exist";
     init_value = EXIT_FAILURE;
     init_done = true;
     return EXIT_FAILURE;
@@ -117,17 +114,16 @@ static int32_t init(void) {
   cpu_cycles_attr.exclude_kernel = 1;
   cpu_cycles_attr.exclude_hv = 1;
 
-  if ((cpu_cycles_fd = perf_event_open(
-           &cpu_cycles_attr,
-           // pid == 0 and cpu == -1
-           // This measures the calling process/thread on any CPU.
-           0, -1,
-           // The group_fd argument allows event groups to be created.  An event
-           // group has one event which is the group leader.  The leader is
-           // created first, with group_fd = -1.  The rest of the group members
-           // are created with subsequent perf_event_open() calls with group_fd
-           // being set to the file descriptor of the group leader.
-           -1, 0)) < 0) {
+  if ((cpu_cycles_fd = perf_event_open(&cpu_cycles_attr,
+                                       // pid == 0 and cpu == -1
+                                       // This measures the calling process/thread on any CPU.
+                                       0, -1,
+                                       // The group_fd argument allows event groups to be created.  An event
+                                       // group has one event which is the group leader.  The leader is
+                                       // created first, with group_fd = -1.  The rest of the group members
+                                       // are created with subsequent perf_event_open() calls with group_fd
+                                       // being set to the file descriptor of the group leader.
+                                       -1, 0)) < 0) {
     fini();
     errorString = "perf_event_open failed for PERF_COUNT_HW_CPU_CYCLES";
     init_value = EXIT_FAILURE;
@@ -147,17 +143,16 @@ static int32_t init(void) {
   instructions_attr.exclude_kernel = 1;
   instructions_attr.exclude_hv = 1;
 
-  if ((instructions_fd = perf_event_open(
-           &instructions_attr,
-           // pid == 0 and cpu == -1
-           // This measures the calling process/thread on any CPU.
-           0, -1,
-           // The group_fd argument allows event groups to be created.  An event
-           // group has one event which is the group leader.  The leader is
-           // created first, with group_fd = -1.  The rest of the group members
-           // are created with subsequent perf_event_open() calls with group_fd
-           // being set to the file descriptor of the group leader.
-           cpu_cycles_fd, 0)) < 0) {
+  if ((instructions_fd = perf_event_open(&instructions_attr,
+                                         // pid == 0 and cpu == -1
+                                         // This measures the calling process/thread on any CPU.
+                                         0, -1,
+                                         // The group_fd argument allows event groups to be created.  An event
+                                         // group has one event which is the group leader.  The leader is
+                                         // created first, with group_fd = -1.  The rest of the group members
+                                         // are created with subsequent perf_event_open() calls with group_fd
+                                         // being set to the file descriptor of the group leader.
+                                         cpu_cycles_fd, 0)) < 0) {
     fini();
     errorString = "perf_event_open failed for PERF_COUNT_HW_INSTRUCTIONS";
     init_value = EXIT_FAILURE;
@@ -183,7 +178,7 @@ static int32_t init(void) {
   return EXIT_SUCCESS;
 }
 
-static uint64_t value_from_id(struct read_format *values, uint64_t id) {
+static uint64_t value_from_id(struct read_format* values, uint64_t id) {
   for (decltype(values->nr) i = 0; i < values->nr; ++i) {
     if (id == values->values[i].id) {
       return values->values[i].value;
@@ -193,7 +188,7 @@ static uint64_t value_from_id(struct read_format *values, uint64_t id) {
   return 0;
 }
 
-static int32_t get_reading(double *ipc_value, double *freq_value) {
+static int32_t get_reading(double* ipc_value, double* freq_value) {
 
   if (cpu_cycles_fd < 0 || instructions_fd < 0) {
     fini();
@@ -210,10 +205,8 @@ static int32_t get_reading(double *ipc_value, double *freq_value) {
 
   if (ipc_value != nullptr) {
     uint64_t diff[2];
-    diff[0] = value_from_id(&read_values, instructions_id) -
-              value_from_id(&last, instructions_id);
-    diff[1] = value_from_id(&read_values, cpu_cycles_id) -
-              value_from_id(&last, cpu_cycles_id);
+    diff[0] = value_from_id(&read_values, instructions_id) - value_from_id(&last, instructions_id);
+    diff[1] = value_from_id(&read_values, cpu_cycles_id) - value_from_id(&last, cpu_cycles_id);
 
     std::memcpy(&last, &read_values, sizeof(last));
 
@@ -227,16 +220,12 @@ static int32_t get_reading(double *ipc_value, double *freq_value) {
   return EXIT_SUCCESS;
 }
 
-static int32_t get_reading_ipc(double *value) {
-  return get_reading(value, nullptr);
-}
+static int32_t get_reading_ipc(double* value) { return get_reading(value, nullptr); }
 
-static int32_t get_reading_freq(double *value) {
-  return get_reading(nullptr, value);
-}
+static int32_t get_reading_freq(double* value) { return get_reading(nullptr, value); }
 
-static const char *get_error(void) {
-  const char *errorCString = errorString.c_str();
+static const char* get_error(void) {
+  const char* errorCString = errorString.c_str();
   return errorCString;
 }
 }
diff --git a/src/firestarter/Measurement/Metric/RAPL.cpp b/src/firestarter/Measurement/Metric/RAPL.cpp
index 5f6b4bd7..e6d28f1d 100644
--- a/src/firestarter/Measurement/Metric/RAPL.cpp
+++ b/src/firestarter/Measurement/Metric/RAPL.cpp
@@ -37,19 +37,19 @@ extern "C" {
 static std::string errorString = "";
 
 struct reader_def {
-  char *path;
+  char* path;
   long long int last_reading;
   long long int overflow;
   long long int max;
 };
 
 struct reader_def_free {
-  void operator()(struct reader_def *def) {
+  void operator()(struct reader_def* def) {
     if (def != nullptr) {
-      if (((void *)def->path) != nullptr) {
-        free((void *)def->path);
+      if (((void*)def->path) != nullptr) {
+        free((void*)def->path);
       }
-      free((void *)def);
+      free((void*)def);
     }
   }
 };
@@ -65,7 +65,7 @@ static int32_t fini(void) {
 static int32_t init(void) {
   errorString = "";
 
-  DIR *raplDir = opendir(RAPL_PATH);
+  DIR* raplDir = opendir(RAPL_PATH);
   if (raplDir == NULL) {
     errorString = "Could not open " RAPL_PATH;
     return EXIT_FAILURE;
@@ -81,7 +81,7 @@ static int32_t init(void) {
   // a vector of all paths to package and dram
   std::vector<std::string> paths = {};
 
-  struct dirent *dir;
+  struct dirent* dir;
   while ((dir = readdir(raplDir)) != NULL) {
     std::stringstream path;
     std::stringstream namePath;
@@ -120,7 +120,7 @@ static int32_t init(void) {
     return EXIT_FAILURE;
   }
 
-  for (auto const &path : paths) {
+  for (auto const& path : paths) {
     std::stringstream energyUjPath;
     energyUjPath << path << "/energy_uj";
     std::ifstream energyReadingStream(energyUjPath.str());
@@ -147,8 +147,7 @@ static int32_t init(void) {
 
     if (read == 0) {
       std::stringstream ss;
-      ss << "Contents in file " << energyUjPath.str()
-         << " do not conform to mask (unsigned long long)";
+      ss << "Contents in file " << energyUjPath.str() << " do not conform to mask (unsigned long long)";
       errorString = ss.str();
       break;
     }
@@ -158,21 +157,18 @@ static int32_t init(void) {
 
     if (read == 0) {
       std::stringstream ss;
-      ss << "Contents in file " << maxEnergyUjRangePath.str()
-         << " do not conform to mask (unsigned long long)";
+      ss << "Contents in file " << maxEnergyUjRangePath.str() << " do not conform to mask (unsigned long long)";
       errorString = ss.str();
       break;
     }
 
-    std::shared_ptr<struct reader_def> def(
-        reinterpret_cast<struct reader_def *>(
-            malloc(sizeof(struct reader_def))),
-        reader_def_free());
+    std::shared_ptr<struct reader_def> def(reinterpret_cast<struct reader_def*>(malloc(sizeof(struct reader_def))),
+                                           reader_def_free());
     auto pathName = path.c_str();
     size_t size = (strlen(pathName) + 1) * sizeof(char);
-    void *name = malloc(size);
+    void* name = malloc(size);
     memcpy(name, pathName, size);
-    def->path = (char *)name;
+    def->path = (char*)name;
     def->max = max;
     def->last_reading = reading;
     def->overflow = 0;
@@ -188,10 +184,10 @@ static int32_t init(void) {
   return EXIT_SUCCESS;
 }
 
-static int32_t get_reading(double *value) {
+static int32_t get_reading(double* value) {
   double finalReading = 0.0;
 
-  for (auto &def : readers) {
+  for (auto& def : readers) {
     long long int reading;
     std::string buffer;
 
@@ -207,8 +203,7 @@ static int32_t get_reading(double *value) {
 
     def->last_reading = reading;
 
-    finalReading +=
-        1.0E-6 * (double)(def->overflow * def->max + def->last_reading);
+    finalReading += 1.0E-6 * (double)(def->overflow * def->max + def->last_reading);
   }
 
   if (value != nullptr) {
@@ -218,8 +213,8 @@ static int32_t get_reading(double *value) {
   return EXIT_SUCCESS;
 }
 
-static const char *get_error(void) {
-  const char *errorCString = errorString.c_str();
+static const char* get_error(void) {
+  const char* errorCString = errorString.c_str();
   return errorCString;
 }
 
diff --git a/src/firestarter/Measurement/Summary.cpp b/src/firestarter/Measurement/Summary.cpp
index 590c4e01..2d1bd8f4 100644
--- a/src/firestarter/Measurement/Summary.cpp
+++ b/src/firestarter/Measurement/Summary.cpp
@@ -28,10 +28,8 @@ using namespace firestarter::measurement;
 
 // this functions borows a lot of code from
 // https://github.com/metricq/metricq-cpp/blob/master/tools/metricq-summary/src/summary.cpp
-Summary Summary::calculate(std::vector<TimeValue>::iterator begin,
-                           std::vector<TimeValue>::iterator end,
-                           metric_type_t metricType,
-                           unsigned long long numThreads) {
+Summary Summary::calculate(std::vector<TimeValue>::iterator begin, std::vector<TimeValue>::iterator end,
+                           metric_type_t metricType, unsigned long long numThreads) {
   std::vector<TimeValue> values = {};
 
   // TODO: i would really like to make this code a bit more readable, but i
@@ -43,10 +41,7 @@ Summary Summary::calculate(std::vector<TimeValue>::iterator begin,
       prev = *begin++;
       for (auto it = begin; it != end; ++it) {
         auto time_diff =
-            1e-6 *
-            (double)std::chrono::duration_cast<std::chrono::microseconds>(
-                it->time - prev.time)
-                .count();
+            1e-6 * (double)std::chrono::duration_cast<std::chrono::microseconds>(it->time - prev.time).count();
         auto value_diff = it->value - prev.value;
 
         double value = value_diff / time_diff;
@@ -84,8 +79,7 @@ Summary Summary::calculate(std::vector<TimeValue>::iterator begin,
 
     auto last = begin;
     std::advance(last, summary.num_timepoints - 1);
-    summary.duration = std::chrono::duration_cast<std::chrono::milliseconds>(
-        last->time - begin->time);
+    summary.duration = std::chrono::duration_cast<std::chrono::milliseconds>(last->time - begin->time);
 
     auto sum_over_nths = [&begin, end, summary](auto fn) {
       double acc = 0.0;
diff --git a/src/firestarter/OneAPI/OneAPI.cpp b/src/firestarter/OneAPI/OneAPI.cpp
index f09f79b0..c31ae6cd 100644
--- a/src/firestarter/OneAPI/OneAPI.cpp
+++ b/src/firestarter/OneAPI/OneAPI.cpp
@@ -22,14 +22,12 @@
 /* OneAPI for GPUs, based on CUDA component
  *****************************************************************************/
 
-#include <firestarter/OneAPI/OneAPI.hpp>
 #include <firestarter/LoadWorkerData.hpp>
 #include <firestarter/Logging/Log.hpp>
+#include <firestarter/OneAPI/OneAPI.hpp>
 
-
-#include <sycl/sycl.hpp>
 #include <oneapi/mkl.hpp>
-
+#include <sycl/sycl.hpp>
 
 #include <algorithm>
 #include <atomic>
@@ -37,34 +35,28 @@
 
 using namespace firestarter::oneapi;
 
-
 /* Random number generation helpers */
-template <typename T>
-void generate_random_data(size_t elems, T *v)
-{
-    for (size_t i = 0; i < elems; i++)
-        v[i] = double(std::rand()) / RAND_MAX;
+template <typename T> void generate_random_data(size_t elems, T* v) {
+  for (size_t i = 0; i < elems; i++)
+    v[i] = double(std::rand()) / RAND_MAX;
 }
 
-template <typename T>
-void replicate_data(sycl::queue &Q, T *dst, size_t dst_elems, const T *src, size_t src_elems)
-{
-    firestarter::log::trace() << "replicate_data " << dst_elems << " elements from " <<
-                                 src << " to " << dst ;
-    while (dst_elems > 0) {
-        auto copy_elems = std::min(dst_elems, src_elems);
-        Q.copy(src,  dst, copy_elems);
-        dst += copy_elems;
-        dst_elems -= copy_elems;
-    }
-    Q.wait();
+template <typename T> void replicate_data(sycl::queue& Q, T* dst, size_t dst_elems, const T* src, size_t src_elems) {
+  firestarter::log::trace() << "replicate_data " << dst_elems << " elements from " << src << " to " << dst;
+  while (dst_elems > 0) {
+    auto copy_elems = std::min(dst_elems, src_elems);
+    Q.copy(src, dst, copy_elems);
+    dst += copy_elems;
+    dst_elems -= copy_elems;
+  }
+  Q.wait();
 }
 
 static int get_precision(int device_index, int useDouble) {
 
   firestarter::log::trace() << "Checking useDouble " << useDouble;
 
-  if (!useDouble){
+  if (!useDouble) {
     return 0;
   }
 
@@ -79,11 +71,11 @@ static int get_precision(int device_index, int useDouble) {
   // Choose a platform based on specific criteria (e.g., device type)
   sycl::platform chosenPlatform;
   auto nr_gpus = 0;
-  for (const auto &platform : platforms) {
+  for (const auto& platform : platforms) {
     firestarter::log::trace() << "Checking SYCL platform " << platform.get_info<sycl::info::platform::name>();
     auto devices = platform.get_devices();
     nr_gpus = 0;
-    for (const auto &device : devices) {
+    for (const auto& device : devices) {
       firestarter::log::trace() << "Checking SYCL device " << device.get_info<sycl::info::device::name>();
       if (device.is_gpu()) { // Choose GPU, you can use other criteria
         firestarter::log::trace() << " ... is GPU";
@@ -99,13 +91,11 @@ static int get_precision(int device_index, int useDouble) {
   }
   // Get a list of devices for the chosen platform
 
-
   firestarter::log::trace() << "Get support for double"
-                     << " on device nr. "
-                     << device_index;
+                            << " on device nr. " << device_index;
   auto devices = chosenPlatform.get_devices();
   if (devices[device_index].has(sycl::aspect::fp64))
-    supports_double=1;
+    supports_double = 1;
 
   return supports_double;
 }
@@ -123,19 +113,14 @@ static int round_up(int num_to_round, int multiple) {
   return num_to_round + multiple - remainder;
 }
 
-
 // GPU index. Used to pin this thread to the GPU.
 template <typename T>
-static void create_load(std::condition_variable &waitForInitCv,
-                        std::mutex &waitForInitCvMutex, int device_index,
-                        std::atomic<int> &initCount,
-                        volatile unsigned long long *loadVar, int matrixSize) {
-  static_assert(
-      std::is_same<T, float>::value || std::is_same<T, double>::value,
-      "create_load<T>: Template argument T must be either float or double");
+static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index,
+                        std::atomic<int>& initCount, volatile unsigned long long* loadVar, int matrixSize) {
+  static_assert(std::is_same<T, float>::value || std::is_same<T, double>::value,
+                "create_load<T>: Template argument T must be either float or double");
 
-  firestarter::log::trace() << "Starting OneAPI with given matrix size "
-                            << matrixSize;
+  firestarter::log::trace() << "Starting OneAPI with given matrix size " << matrixSize;
 
   size_t size_use = 0;
   if (matrixSize > 0) {
@@ -158,14 +143,14 @@ static void create_load(std::condition_variable &waitForInitCv,
   // Choose a platform based on specific criteria (e.g., device type)
   sycl::platform chosenPlatform;
   auto nr_gpus = 0;
-  for (const auto &platform : platforms) {
+  for (const auto& platform : platforms) {
     auto devices = platform.get_devices();
     nr_gpus = 0;
-    for (const auto &device : devices) {
-        if (device.is_gpu()) { // Choose GPU, you can use other criteria
-            chosenPlatform = platform;
-            nr_gpus++;
-        }
+    for (const auto& device : devices) {
+      if (device.is_gpu()) { // Choose GPU, you can use other criteria
+        chosenPlatform = platform;
+        nr_gpus++;
+      }
     }
   }
 
@@ -174,24 +159,20 @@ static void create_load(std::condition_variable &waitForInitCv,
     return;
   }
 
-    // Get a list of devices for the chosen platform
+  // Get a list of devices for the chosen platform
   auto devices = chosenPlatform.get_devices();
-  
 
-  firestarter::log::trace() << "Creating SYCL queue for computation on device nr. "
-                     << device_index;
+  firestarter::log::trace() << "Creating SYCL queue for computation on device nr. " << device_index;
   auto chosenDevice = devices[device_index];
   sycl::queue device_queue(chosenDevice);
 
   firestarter::log::trace() << "Get memory size on device nr. " << device_index;
-  
 
   // getting information about the GPU memory
   size_t memory_total = devices[device_index].get_info<sycl::info::device::global_mem_size>();
 
-  firestarter::log::trace() << "Get Memory info on device nr. "
-                     << device_index
-                     <<": has " << memory_total << " B global memory";
+  firestarter::log::trace() << "Get Memory info on device nr. " << device_index << ": has " << memory_total
+                            << " B global memory";
 
   // check if the user has not set a matrix OR has set a too big matrixsite and
   // if this is true: set a good matrixsize
@@ -201,27 +182,21 @@ static void create_load(std::condition_variable &waitForInitCv,
   }
 
   firestarter::log::trace() << "Set OneAPI matrix size in B: " << size_use;
-  use_bytes =sizeof(T) * size_use * size_use * 3;
-
-
+  use_bytes = sizeof(T) * size_use * size_use * 3;
 
   /* Allocate A/B/C matrices */
 
-  firestarter::log::trace()
-      << "Allocating memory on device nr. "
-      << device_index;
+  firestarter::log::trace() << "Allocating memory on device nr. " << device_index;
   auto A = malloc_device<T>(size_use * size_use, device_queue);
   auto B = malloc_device<T>(size_use * size_use, device_queue);
   auto C = malloc_device<T>(size_use * size_use, device_queue);
 
   /* Create 64 MB random data on Host */
-  constexpr int rd_size = 1024*1024*64;
+  constexpr int rd_size = 1024 * 1024 * 64;
   auto random_data = malloc_host<T>(rd_size, device_queue);
   generate_random_data(rd_size, random_data);
 
-  firestarter::log::trace()
-      << "Copy memory to device nr. "
-      << device_index;
+  firestarter::log::trace() << "Copy memory to device nr. " << device_index;
   /* fill A and B with random data */
   replicate_data(device_queue, A, size_use * size_use, random_data, rd_size);
   replicate_data(device_queue, B, size_use * size_use, random_data, rd_size);
@@ -230,14 +205,13 @@ static void create_load(std::condition_variable &waitForInitCv,
     std::lock_guard<std::mutex> lk(waitForInitCvMutex);
 
 #define TO_MB(x) (unsigned long)(x / 1024 / 1024)
-  firestarter::log::info()
-      << "   GPU " << device_index << "\n"
-      << "    name:           " << devices[device_index].get_info<sycl::info::device::name>() << "\n"
-      << "    memory:         " << TO_MB(memory_total) << " MiB total (using " << TO_MB(use_bytes)
-      << " MiB)\n"
-      << "    matrix size:    " << size_use << "\n"
-      << "    used precision: "
-      << ((sizeof(T) == sizeof(double)) ? "double" : "single");
+    firestarter::log::info() << "   GPU " << device_index << "\n"
+                             << "    name:           " << devices[device_index].get_info<sycl::info::device::name>()
+                             << "\n"
+                             << "    memory:         " << TO_MB(memory_total) << " MiB total (using "
+                             << TO_MB(use_bytes) << " MiB)\n"
+                             << "    matrix size:    " << size_use << "\n"
+                             << "    used precision: " << ((sizeof(T) == sizeof(double)) ? "double" : "single");
 #undef TO_MB
 
     initCount++;
@@ -246,26 +220,24 @@ static void create_load(std::condition_variable &waitForInitCv,
 
   firestarter::log::trace() << "Run gemm on device nr. " << device_index;
   /* With this, we could run multiple gemms ...*/
-/*  auto run_gemms = [=, &device_queue](int runs) -> double {
-      using namespace oneapi::mkl;
-      for (int i = 0; i < runs; i++)
-          
-      return runs;
-  };
-*/
+  /*  auto run_gemms = [=, &device_queue](int runs) -> double {
+        using namespace oneapi::mkl;
+        for (int i = 0; i < runs; i++)
+
+        return runs;
+    };
+  */
   while (*loadVar != LOAD_STOP) {
-  firestarter::log::trace() << "Run gemm on device nr. " << device_index;
-    oneapi::mkl::blas::gemm(device_queue, oneapi::mkl::transpose::N, oneapi::mkl::transpose::N, size_use, size_use, size_use, 1, A, size_use, B, size_use, 0, C, size_use);
-  firestarter::log::trace() << "wait gemm on device nr. " << device_index;
+    firestarter::log::trace() << "Run gemm on device nr. " << device_index;
+    oneapi::mkl::blas::gemm(device_queue, oneapi::mkl::transpose::N, oneapi::mkl::transpose::N, size_use, size_use,
+                            size_use, 1, A, size_use, B, size_use, 0, C, size_use);
+    firestarter::log::trace() << "wait gemm on device nr. " << device_index;
     device_queue.wait_and_throw();
   }
-
 }
 
-OneAPI::OneAPI(volatile unsigned long long *loadVar, bool useFloat, bool useDouble,
-           unsigned matrixSize, int gpus) {
-  std::thread t(OneAPI::initGpus, std::ref(_waitForInitCv), loadVar, useFloat,
-                useDouble, matrixSize, gpus);
+OneAPI::OneAPI(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) {
+  std::thread t(OneAPI::initGpus, std::ref(_waitForInitCv), loadVar, useFloat, useDouble, matrixSize, gpus);
   _initThread = std::move(t);
 
   std::unique_lock<std::mutex> lk(_waitForInitCvMutex);
@@ -273,9 +245,8 @@ OneAPI::OneAPI(volatile unsigned long long *loadVar, bool useFloat, bool useDoub
   _waitForInitCv.wait(lk);
 }
 
-void OneAPI::initGpus(std::condition_variable &cv,
-                    volatile unsigned long long *loadVar, bool useFloat,
-                    bool useDouble, unsigned matrixSize, int gpus) {
+void OneAPI::initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble,
+                      unsigned matrixSize, int gpus) {
   std::condition_variable waitForInitCv;
   std::mutex waitForInitCvMutex;
 
@@ -291,14 +262,14 @@ void OneAPI::initGpus(std::condition_variable &cv,
     // Choose a platform based on specific criteria (e.g., device type)
     sycl::platform chosenPlatform;
     auto devCount = 0;
-    for (const auto &platform : platforms) {
+    for (const auto& platform : platforms) {
       auto devices = platform.get_devices();
       devCount = 0;
-      for (const auto &device : devices) {
-          if (device.is_gpu()) { // Choose GPU, you can use other criteria
-              chosenPlatform = platform;
-              devCount++;
-          }
+      for (const auto& device : devices) {
+        if (device.is_gpu()) { // Choose GPU, you can use other criteria
+          chosenPlatform = platform;
+          devCount++;
+        }
       }
     }
 
@@ -327,9 +298,8 @@ void OneAPI::initGpus(std::condition_variable &cv,
       }
       if (gpus > devCount) {
         firestarter::log::warn() << "You requested more OneAPI devices than available.";
-        firestarter::log::warn()
-            << "FIRESTARTER will use " << devCount << " of the requested "
-            << gpus << " OneAPI device(s)";
+        firestarter::log::warn() << "FIRESTARTER will use " << devCount << " of the requested " << gpus
+                                 << " OneAPI device(s)";
         gpus = devCount;
       }
 
@@ -340,21 +310,19 @@ void OneAPI::initGpus(std::condition_variable &cv,
           // if there's a GPU in the system without Double Precision support, we
           // have to correct this.
           int precision = get_precision(i, use_double);
-          if (precision == -1){
+          if (precision == -1) {
             firestarter::log::warn() << "This should not have happened. Could not get precision via SYCL.";
           }
 
           if (precision) {
             firestarter::log::trace() << "Starting OneAPI GPU double workload.";
-            std::thread t(create_load<double>, std::ref(waitForInitCv),
-                          std::ref(waitForInitCvMutex), i, std::ref(initCount),
-                          loadVar, (int)matrixSize);
+            std::thread t(create_load<double>, std::ref(waitForInitCv), std::ref(waitForInitCvMutex), i,
+                          std::ref(initCount), loadVar, (int)matrixSize);
             gpuThreads.push_back(std::move(t));
           } else {
             firestarter::log::trace() << "Starting OneAPI GPU float workload.";
-            std::thread t(create_load<float>, std::ref(waitForInitCv),
-                          std::ref(waitForInitCvMutex), i, std::ref(initCount),
-                          loadVar, (int)matrixSize);
+            std::thread t(create_load<float>, std::ref(waitForInitCv), std::ref(waitForInitCvMutex), i,
+                          std::ref(initCount), loadVar, (int)matrixSize);
             gpuThreads.push_back(std::move(t));
           }
         }
@@ -370,19 +338,17 @@ void OneAPI::initGpus(std::condition_variable &cv,
       cv.notify_all();
 
       /* join computation threads */
-      for (auto &t : gpuThreads) {
+      for (auto& t : gpuThreads) {
         t.join();
       }
     } else {
-      firestarter::log::info()
-          << "    - No OneAPI devices. Just stressing CPU(s). Maybe use "
-             "FIRESTARTER instead of FIRESTARTER_OneAPI?";
+      firestarter::log::info() << "    - No OneAPI devices. Just stressing CPU(s). Maybe use "
+                                  "FIRESTARTER instead of FIRESTARTER_OneAPI?";
       cv.notify_all();
     }
   } else {
-    firestarter::log::info()
-        << "    --gpus 0 is set. Just stressing CPU(s). Maybe use "
-           "FIRESTARTER instead of FIRESTARTER_OneAPI?";
+    firestarter::log::info() << "    --gpus 0 is set. Just stressing CPU(s). Maybe use "
+                                "FIRESTARTER instead of FIRESTARTER_OneAPI?";
     cv.notify_all();
   }
 }
diff --git a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
index 7c8a8146..972c0d0a 100644
--- a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
+++ b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
@@ -31,7 +31,10 @@
 using namespace firestarter::optimizer::algorithm;
 
 NSGA2::NSGA2(unsigned gen, double cr, double m)
-    : Algorithm(), _gen(gen), _cr(cr), _m(m) {
+    : Algorithm()
+    , _gen(gen)
+    , _cr(cr)
+    , _m(m) {
   if (cr >= 1. || cr < 0.) {
     throw std::invalid_argument("The crossover probability must be in the "
                                 "[0,1[ range, while a value of " +
@@ -44,14 +47,12 @@ NSGA2::NSGA2(unsigned gen, double cr, double m)
   }
 }
 
-void NSGA2::checkPopulation(firestarter::optimizer::Population const &pop,
-                            std::size_t populationSize) {
-  const auto &prob = pop.problem();
+void NSGA2::checkPopulation(firestarter::optimizer::Population const& pop, std::size_t populationSize) {
+  const auto& prob = pop.problem();
 
   if (!prob.isMO()) {
-    throw std::invalid_argument(
-        "NSGA2 is a multiobjective algorithms, while number of objectives is " +
-        std::to_string(prob.getNobjs()));
+    throw std::invalid_argument("NSGA2 is a multiobjective algorithms, while number of objectives is " +
+                                std::to_string(prob.getNobjs()));
   }
 
   if (populationSize < 5u || (populationSize % 4 != 0u)) {
@@ -63,15 +64,13 @@ void NSGA2::checkPopulation(firestarter::optimizer::Population const &pop,
   }
 }
 
-firestarter::optimizer::Population
-NSGA2::evolve(firestarter::optimizer::Population &pop) {
-  const auto &prob = pop.problem();
+firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Population& pop) {
+  const auto& prob = pop.problem();
   const auto bounds = prob.getBounds();
   auto NP = pop.size();
   auto fevals0 = prob.getFevals();
 
-  this->checkPopulation(
-      const_cast<firestarter::optimizer::Population const &>(pop), NP);
+  this->checkPopulation(const_cast<firestarter::optimizer::Population const&>(pop), NP);
 
   std::random_device rd;
   std::mt19937 rng(rd());
@@ -117,15 +116,11 @@ NSGA2::evolve(firestarter::optimizer::Population &pop) {
     // We compute crowding distance and non dominated rank for the current
     // population
     auto fnds_res = util::fast_non_dominated_sorting(pop.f());
-    auto ndf =
-        std::get<0>(fnds_res); // non dominated fronts [[0,3,2],[1,5,6],[4],...]
-    std::vector<double> pop_cd(
-        NP); // crowding distances of the whole population
-    auto ndr =
-        std::get<3>(fnds_res); // non domination rank [0,1,0,0,2,1,1, ... ]
-    for (const auto &front_idxs : ndf) {
-      if (front_idxs.size() ==
-          1u) { // handles the case where the front has collapsed to one point
+    auto ndf = std::get<0>(fnds_res); // non dominated fronts [[0,3,2],[1,5,6],[4],...]
+    std::vector<double> pop_cd(NP);   // crowding distances of the whole population
+    auto ndr = std::get<3>(fnds_res); // non domination rank [0,1,0,0,2,1,1, ... ]
+    for (const auto& front_idxs : ndf) {
+      if (front_idxs.size() == 1u) { // handles the case where the front has collapsed to one point
         pop_cd[front_idxs[0]] = std::numeric_limits<double>::infinity();
       } else if (front_idxs.size() == 2u) { // handles the case where the front
         // has collapsed to one point
@@ -147,12 +142,9 @@ NSGA2::evolve(firestarter::optimizer::Population &pop) {
     // of parents that will each create 2 new offspring
     for (decltype(NP) i = 0u; i < NP; i += 4) {
       // We create two offsprings using the shuffled list 1
-      parent1_idx = util::mo_tournament_selection(shuffle1[i], shuffle1[i + 1],
-                                                  ndr, pop_cd, rng);
-      parent2_idx = util::mo_tournament_selection(
-          shuffle1[i + 2], shuffle1[i + 3], ndr, pop_cd, rng);
-      children = util::sbx_crossover(pop.x()[parent1_idx], pop.x()[parent2_idx],
-                                     _cr, rng);
+      parent1_idx = util::mo_tournament_selection(shuffle1[i], shuffle1[i + 1], ndr, pop_cd, rng);
+      parent2_idx = util::mo_tournament_selection(shuffle1[i + 2], shuffle1[i + 3], ndr, pop_cd, rng);
+      children = util::sbx_crossover(pop.x()[parent1_idx], pop.x()[parent2_idx], _cr, rng);
       util::polynomial_mutation(children.first, bounds, _m, rng);
       util::polynomial_mutation(children.second, bounds, _m, rng);
 
@@ -160,12 +152,9 @@ NSGA2::evolve(firestarter::optimizer::Population &pop) {
       popnew.append(children.second);
 
       // We repeat with the shuffled list 2
-      parent1_idx = util::mo_tournament_selection(shuffle2[i], shuffle2[i + 1],
-                                                  ndr, pop_cd, rng);
-      parent2_idx = util::mo_tournament_selection(
-          shuffle2[i + 2], shuffle2[i + 3], ndr, pop_cd, rng);
-      children = util::sbx_crossover(pop.x()[parent1_idx], pop.x()[parent2_idx],
-                                     _cr, rng);
+      parent1_idx = util::mo_tournament_selection(shuffle2[i], shuffle2[i + 1], ndr, pop_cd, rng);
+      parent2_idx = util::mo_tournament_selection(shuffle2[i + 2], shuffle2[i + 3], ndr, pop_cd, rng);
+      children = util::sbx_crossover(pop.x()[parent1_idx], pop.x()[parent2_idx], _cr, rng);
       util::polynomial_mutation(children.first, bounds, _m, rng);
       util::polynomial_mutation(children.second, bounds, _m, rng);
 
diff --git a/src/firestarter/Optimizer/OptimizerWorker.cpp b/src/firestarter/Optimizer/OptimizerWorker.cpp
index 48819fd5..7cb98cce 100644
--- a/src/firestarter/Optimizer/OptimizerWorker.cpp
+++ b/src/firestarter/Optimizer/OptimizerWorker.cpp
@@ -25,18 +25,16 @@
 
 using namespace firestarter::optimizer;
 
-OptimizerWorker::OptimizerWorker(
-    std::unique_ptr<firestarter::optimizer::Algorithm> &&algorithm,
-    firestarter::optimizer::Population &population,
-    std::string const &optimizationAlgorithm, unsigned individuals,
-    std::chrono::seconds const &preheat)
-    : _algorithm(std::move(algorithm)), _population(population),
-      _optimizationAlgorithm(optimizationAlgorithm), _individuals(individuals),
-      _preheat(preheat) {
-  pthread_create(
-      &this->workerThread, NULL,
-      reinterpret_cast<void *(*)(void *)>(OptimizerWorker::optimizerThread),
-      this);
+OptimizerWorker::OptimizerWorker(std::unique_ptr<firestarter::optimizer::Algorithm>&& algorithm,
+                                 firestarter::optimizer::Population& population,
+                                 std::string const& optimizationAlgorithm, unsigned individuals,
+                                 std::chrono::seconds const& preheat)
+    : _algorithm(std::move(algorithm))
+    , _population(population)
+    , _optimizationAlgorithm(optimizationAlgorithm)
+    , _individuals(individuals)
+    , _preheat(preheat) {
+  pthread_create(&this->workerThread, NULL, reinterpret_cast<void* (*)(void*)>(OptimizerWorker::optimizerThread), this);
 }
 
 void OptimizerWorker::kill() {
@@ -49,10 +47,10 @@ void OptimizerWorker::join() {
   pthread_join(this->workerThread, NULL);
 }
 
-void *OptimizerWorker::optimizerThread(void *optimizerWorker) {
+void* OptimizerWorker::optimizerThread(void* optimizerWorker) {
   pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
 
-  auto _this = reinterpret_cast<OptimizerWorker *>(optimizerWorker);
+  auto _this = reinterpret_cast<OptimizerWorker*>(optimizerWorker);
 
 #ifndef __APPLE__
   pthread_setname_np(pthread_self(), "Optimizer");
diff --git a/src/firestarter/Optimizer/Population.cpp b/src/firestarter/Optimizer/Population.cpp
index 7d3a7e1a..d7915bd7 100644
--- a/src/firestarter/Optimizer/Population.cpp
+++ b/src/firestarter/Optimizer/Population.cpp
@@ -29,8 +29,7 @@
 using namespace firestarter::optimizer;
 
 void Population::generateInitialPopulation(std::size_t populationSize) {
-  firestarter::log::trace() << "Generating " << populationSize
-                            << " random individuals for initial population.";
+  firestarter::log::trace() << "Generating " << populationSize << " random individuals for initial population.";
 
   auto dims = this->problem().getDims();
   auto remaining = populationSize;
@@ -44,10 +43,8 @@ void Population::generateInitialPopulation(std::size_t populationSize) {
 
     remaining -= dims;
   } else {
-    firestarter::log::trace()
-        << "Population size (" << std::to_string(populationSize)
-        << ") is less than size of problem dimension (" << std::to_string(dims)
-        << ")";
+    firestarter::log::trace() << "Population size (" << std::to_string(populationSize)
+                              << ") is less than size of problem dimension (" << std::to_string(dims) << ")";
   }
 
   for (decltype(remaining) i = 0; i < remaining; i++) {
@@ -57,7 +54,7 @@ void Population::generateInitialPopulation(std::size_t populationSize) {
 
 std::size_t Population::size() const { return _x.size(); }
 
-void Population::append(Individual const &ind) {
+void Population::append(Individual const& ind) {
   assert(this->problem().getDims() == ind.size());
 
   std::map<std::string, firestarter::measurement::Summary> metrics;
@@ -79,10 +76,10 @@ void Population::append(Individual const &ind) {
   }
 }
 
-void Population::append(Individual const &ind, std::vector<double> const &fit) {
+void Population::append(Individual const& ind, std::vector<double> const& fit) {
   std::stringstream ss;
   ss << "  - Fitness: ";
-  for (auto const &v : fit) {
+  for (auto const& v : fit) {
     ss << v << " ";
   }
   firestarter::log::trace() << ss.str();
@@ -94,8 +91,7 @@ void Population::append(Individual const &ind, std::vector<double> const &fit) {
   this->_f.push_back(fit);
 }
 
-void Population::insert(std::size_t idx, Individual const &ind,
-                        std::vector<double> const &fit) {
+void Population::insert(std::size_t idx, Individual const& ind, std::vector<double> const& fit) {
   // assert that population is big enough
   assert(_x.size() > idx);
 
@@ -117,8 +113,7 @@ Individual Population::getRandomIndividual() {
 
     out[i] = std::uniform_int_distribution<unsigned>(lb, ub)(this->gen);
 
-    firestarter::log::trace()
-        << "  - " << i << ": [" << lb << "," << ub << "]: " << out[i];
+    firestarter::log::trace() << "  - " << i << ": [" << lb << "," << ub << "]: " << out[i];
   }
 
   return out;
@@ -134,8 +129,7 @@ std::optional<Individual> Population::bestIndividual() const {
   // assert that we have individuals
   assert(this->_x.size() > 0);
 
-  auto best = std::max_element(this->_x.begin(), this->_x.end(),
-                               [](auto a, auto b) { return a < b; });
+  auto best = std::max_element(this->_x.begin(), this->_x.end(), [](auto a, auto b) { return a < b; });
 
   assert(best != this->_x.end());
 
diff --git a/src/firestarter/Optimizer/Util/MultiObjective.cpp b/src/firestarter/Optimizer/Util/MultiObjective.cpp
index 2c87ba2f..df24effa 100644
--- a/src/firestarter/Optimizer/Util/MultiObjective.cpp
+++ b/src/firestarter/Optimizer/Util/MultiObjective.cpp
@@ -81,13 +81,11 @@ bool greater_than_f(double a, double b) {
  * @throws std::invalid_argument if the dimensions of the two objectives are
  * different
  */
-bool pareto_dominance(const std::vector<double> &obj1,
-                      const std::vector<double> &obj2) {
+bool pareto_dominance(const std::vector<double>& obj1, const std::vector<double>& obj2) {
   if (obj1.size() != obj2.size()) {
     throw std::invalid_argument(
-        "Different number of objectives found in input fitnesses: " +
-        std::to_string(obj1.size()) + " and " + std::to_string(obj2.size()) +
-        ". I cannot define dominance");
+        "Different number of objectives found in input fitnesses: " + std::to_string(obj1.size()) + " and " +
+        std::to_string(obj2.size()) + ". I cannot define dominance");
   }
   bool found_strictly_dominating_dimension = false;
   for (decltype(obj1.size()) i = 0u; i < obj1.size(); ++i) {
@@ -130,16 +128,14 @@ bool pareto_dominance(const std::vector<double> &obj1,
  *
  * @throws std::invalid_argument If the size of \p points is not at least 2
  */
-std::tuple<std::vector<std::vector<std::size_t>>,
-           std::vector<std::vector<std::size_t>>, std::vector<std::size_t>,
+std::tuple<std::vector<std::vector<std::size_t>>, std::vector<std::vector<std::size_t>>, std::vector<std::size_t>,
            std::vector<std::size_t>>
-fast_non_dominated_sorting(const std::vector<std::vector<double>> &points) {
+fast_non_dominated_sorting(const std::vector<std::vector<double>>& points) {
   auto N = points.size();
   // We make sure to have two points at least (one could also be allowed)
   if (N < 2u) {
-    throw std::invalid_argument(
-        "At least two points are needed for fast_non_dominated_sorting: " +
-        std::to_string(N) + " detected.");
+    throw std::invalid_argument("At least two points are needed for fast_non_dominated_sorting: " + std::to_string(N) +
+                                " detected.");
   }
   // Initialize the return values
   std::vector<std::vector<std::size_t>> non_dom_fronts(1u);
@@ -174,8 +170,7 @@ fast_non_dominated_sorting(const std::vector<std::vector<double>> &points) {
   while (current_front.size() != 0u) {
     std::vector<std::size_t> next_front;
     for (decltype(current_front.size()) p = 0u; p < current_front.size(); ++p) {
-      for (decltype(dom_list[current_front[p]].size()) q = 0u;
-           q < dom_list[current_front[p]].size(); ++q) {
+      for (decltype(dom_list[current_front[p]].size()) q = 0u; q < dom_list[current_front[p]].size(); ++q) {
         --dom_count_copy[dom_list[current_front[p]][q]];
         if (dom_count_copy[dom_list[current_front[p]][q]] == 0u) {
           non_dom_rank[dom_list[current_front[p]][q]] = front_counter + 1u;
@@ -189,8 +184,7 @@ fast_non_dominated_sorting(const std::vector<std::vector<double>> &points) {
       non_dom_fronts.push_back(current_front);
     }
   }
-  return std::make_tuple(std::move(non_dom_fronts), std::move(dom_list),
-                         std::move(dom_count), std::move(non_dom_rank));
+  return std::make_tuple(std::move(non_dom_fronts), std::move(dom_list), std::move(dom_count), std::move(non_dom_rank));
 }
 
 /// Crowding distance
@@ -218,14 +212,12 @@ fast_non_dominated_sorting(const std::vector<std::vector<double>> &points) {
  * @throws std::invalid_argument If points in \p non_dom_front do not all have
  * the same dimensionality
  */
-std::vector<double>
-crowding_distance(const std::vector<std::vector<double>> &non_dom_front) {
+std::vector<double> crowding_distance(const std::vector<std::vector<double>>& non_dom_front) {
   auto N = non_dom_front.size();
   // We make sure to have two points at least
   if (N < 2u) {
-    throw std::invalid_argument(
-        "A non dominated front must contain at least two points: " +
-        std::to_string(N) + " detected.");
+    throw std::invalid_argument("A non dominated front must contain at least two points: " + std::to_string(N) +
+                                " detected.");
   }
   auto M = non_dom_front[0].size();
   // We make sure the first point of the input non dominated front contains at
@@ -236,9 +228,8 @@ crowding_distance(const std::vector<std::vector<double>> &non_dom_front) {
                                 std::to_string(M) + " detected.");
   }
   // We make sure all points contain the same number of objectives
-  if (!std::all_of(
-          non_dom_front.begin(), non_dom_front.end(),
-          [M](const std::vector<double> &item) { return item.size() == M; })) {
+  if (!std::all_of(non_dom_front.begin(), non_dom_front.end(),
+                   [M](const std::vector<double>& item) { return item.size() == M; })) {
     throw std::invalid_argument("A non dominated front must contain points of "
                                 "uniform dimensionality. Some "
                                 "different sizes were instead detected.");
@@ -247,19 +238,14 @@ crowding_distance(const std::vector<std::vector<double>> &non_dom_front) {
   std::iota(indexes.begin(), indexes.end(), std::size_t(0u));
   std::vector<double> retval(N, 0.);
   for (decltype(M) i = 0u; i < M; ++i) {
-    std::sort(indexes.begin(), indexes.end(),
-              [i, &non_dom_front](std::size_t idx1, std::size_t idx2) {
-                return less_than_f(non_dom_front[idx1][i],
-                                   non_dom_front[idx2][i]);
-              });
+    std::sort(indexes.begin(), indexes.end(), [i, &non_dom_front](std::size_t idx1, std::size_t idx2) {
+      return less_than_f(non_dom_front[idx1][i], non_dom_front[idx2][i]);
+    });
     retval[indexes[0]] = std::numeric_limits<double>::infinity();
     retval[indexes[N - 1u]] = std::numeric_limits<double>::infinity();
-    double df =
-        non_dom_front[indexes[N - 1u]][i] - non_dom_front[indexes[0]][i];
+    double df = non_dom_front[indexes[N - 1u]][i] - non_dom_front[indexes[0]][i];
     for (decltype(N - 2u) j = 1u; j < N - 1u; ++j) {
-      retval[indexes[j]] += (non_dom_front[indexes[j + 1u]][i] -
-                             non_dom_front[indexes[j - 1u]][i]) /
-                            df;
+      retval[indexes[j]] += (non_dom_front[indexes[j + 1u]][i] - non_dom_front[indexes[j - 1u]][i]) / df;
     }
   }
   return retval;
@@ -267,10 +253,10 @@ crowding_distance(const std::vector<std::vector<double>> &non_dom_front) {
 
 // Multi-objective tournament selection. Requires all sizes to be consistent.
 // Does not check if input is well formed.
-std::vector<double>::size_type mo_tournament_selection(
-    std::vector<double>::size_type idx1, std::vector<double>::size_type idx2,
-    const std::vector<std::vector<double>::size_type> &non_domination_rank,
-    const std::vector<double> &crowding_d, std::mt19937 &mt) {
+std::vector<double>::size_type
+mo_tournament_selection(std::vector<double>::size_type idx1, std::vector<double>::size_type idx2,
+                        const std::vector<std::vector<double>::size_type>& non_domination_rank,
+                        const std::vector<double>& crowding_d, std::mt19937& mt) {
   if (non_domination_rank[idx1] < non_domination_rank[idx2])
     return idx1;
   if (non_domination_rank[idx1] > non_domination_rank[idx2])
@@ -288,11 +274,9 @@ std::vector<double>::size_type mo_tournament_selection(
 // otherwise Requires dimensions of the parent and bounds to be equal -> out of
 // bound reads. nix is the integer dimension (integer alleles assumed at the end
 // of the chromosome)
-std::pair<firestarter::optimizer::Individual,
-          firestarter::optimizer::Individual>
-sbx_crossover(const firestarter::optimizer::Individual &parent1,
-              const firestarter::optimizer::Individual &parent2,
-              const double p_cr, std::mt19937 &mt) {
+std::pair<firestarter::optimizer::Individual, firestarter::optimizer::Individual>
+sbx_crossover(const firestarter::optimizer::Individual& parent1, const firestarter::optimizer::Individual& parent2,
+              const double p_cr, std::mt19937& mt) {
   // Decision vector dimensions
   auto nix = parent1.size();
   firestarter::optimizer::Individual::size_type site1, site2;
@@ -304,14 +288,11 @@ sbx_crossover(const firestarter::optimizer::Individual &parent1,
                                         1.); // to generate a number in [0, 1)
 
   // This implements a Simulated Binary Crossover SBX
-  if (drng(mt) <
-      p_cr) { // No crossever at all will happen with probability p_cr
+  if (drng(mt) < p_cr) { // No crossever at all will happen with probability p_cr
     // This implements two-points crossover and applies it to the integer part
     // of the chromosome.
     if (nix > 0u) {
-      std::uniform_int_distribution<
-          firestarter::optimizer::Individual::size_type>
-          ra_num(0, nix - 1u);
+      std::uniform_int_distribution<firestarter::optimizer::Individual::size_type> ra_num(0, nix - 1u);
       site1 = ra_num(mt);
       site2 = ra_num(mt);
       if (site1 > site2) {
@@ -328,10 +309,9 @@ sbx_crossover(const firestarter::optimizer::Individual &parent1,
 
 // Performs polynomial mutation. Requires all sizes to be consistent. Does not
 // check if input is well formed. p_m is the mutation probability
-void polynomial_mutation(
-    firestarter::optimizer::Individual &child,
-    const std::vector<std::tuple<unsigned, unsigned>> &bounds, const double p_m,
-    std::mt19937 &mt) {
+void polynomial_mutation(firestarter::optimizer::Individual& child,
+                         const std::vector<std::tuple<unsigned, unsigned>>& bounds, const double p_m,
+                         std::mt19937& mt) {
   // Decision vector dimensions
   auto nix = child.size();
   // Random distributions
@@ -343,9 +323,7 @@ void polynomial_mutation(
       // We need to draw a random integer in [lb, ub].
       auto lb = std::get<0>(bounds[j]);
       auto ub = std::get<1>(bounds[j]);
-      std::uniform_int_distribution<
-          firestarter::optimizer::Individual::size_type>
-          dist(lb, ub);
+      std::uniform_int_distribution<firestarter::optimizer::Individual::size_type> dist(lb, ub);
       auto mutated = dist(mt);
       child[j] = mutated;
     }
@@ -384,9 +362,7 @@ void polynomial_mutation(
  * @throws unspecified all exceptions thrown by
  * pagmo::fast_non_dominated_sorting and pagmo::crowding_distance
  */
-std::vector<std::size_t>
-select_best_N_mo(const std::vector<std::vector<double>> &input_f,
-                 std::size_t N) {
+std::vector<std::size_t> select_best_N_mo(const std::vector<std::vector<double>>& input_f, std::size_t N) {
   if (N == 0u) { // corner case
     return {};
   }
@@ -406,7 +382,7 @@ select_best_N_mo(const std::vector<std::vector<double>> &input_f,
   // Run fast-non-dominated sorting
   auto tuple = fast_non_dominated_sorting(input_f);
   // Insert all non dominated fronts if not more than N
-  for (const auto &front : std::get<0>(tuple)) {
+  for (const auto& front : std::get<0>(tuple)) {
     if (retval.size() + front.size() <= N) {
       for (auto i : front) {
         retval.push_back(i);
@@ -430,10 +406,9 @@ select_best_N_mo(const std::vector<std::vector<double>> &input_f,
   // crowding
   std::vector<std::size_t> idxs(front.size());
   std::iota(idxs.begin(), idxs.end(), std::size_t(0u));
-  std::sort(idxs.begin(), idxs.end(),
-            [&cds](std::size_t idx1, std::size_t idx2) {
-              return greater_than_f(cds[idx1], cds[idx2]);
-            }); // Descending order1
+  std::sort(idxs.begin(), idxs.end(), [&cds](std::size_t idx1, std::size_t idx2) {
+    return greater_than_f(cds[idx1], cds[idx2]);
+  }); // Descending order1
   auto remaining = N - retval.size();
   for (decltype(remaining) i = 0u; i < remaining; ++i) {
     retval.push_back(front[idxs[i]]);
@@ -458,7 +433,7 @@ select_best_N_mo(const std::vector<std::vector<double>> &input_f,
  * @throws std::invalid_argument if the input objective vectors are not all of
  * the same size
  */
-std::vector<double> ideal(const std::vector<std::vector<double>> &points) {
+std::vector<double> ideal(const std::vector<std::vector<double>>& points) {
   // Corner case
   if (points.size() == 0u) {
     return {};
@@ -466,7 +441,7 @@ std::vector<double> ideal(const std::vector<std::vector<double>> &points) {
 
   // Sanity checks
   auto M = points[0].size();
-  for (const auto &f : points) {
+  for (const auto& f : points) {
     if (f.size() != M) {
       throw std::invalid_argument("Input vector of objectives must contain "
                                   "fitness vector of equal dimension " +
@@ -476,11 +451,10 @@ std::vector<double> ideal(const std::vector<std::vector<double>> &points) {
   // Actual algorithm
   std::vector<double> retval(M);
   for (decltype(M) i = 0u; i < M; ++i) {
-    retval[i] = (*std::min_element(
-        points.begin(), points.end(),
-        [i](const std::vector<double> &f1, const std::vector<double> &f2) {
-          return util::greater_than_f(f1[i], f2[i]);
-        }))[i];
+    retval[i] = (*std::min_element(points.begin(), points.end(),
+                                   [i](const std::vector<double>& f1, const std::vector<double>& f2) {
+                                     return util::greater_than_f(f1[i], f2[i]);
+                                   }))[i];
   }
   return retval;
 }
diff --git a/src/firestarter/WatchdogWorker.cpp b/src/firestarter/WatchdogWorker.cpp
index 6a3f6b95..f5091fa2 100644
--- a/src/firestarter/WatchdogWorker.cpp
+++ b/src/firestarter/WatchdogWorker.cpp
@@ -30,8 +30,7 @@
 
 using namespace firestarter;
 
-int Firestarter::watchdogWorker(std::chrono::microseconds period,
-                                std::chrono::microseconds load,
+int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::microseconds load,
                                 std::chrono::seconds timeout) {
 
   using clock = std::chrono::high_resolution_clock;
@@ -58,14 +57,13 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period,
 
       // get the time already advanced in the current timeslice
       // this can happen if a load function does not terminates just on time
-      nsec advance = std::chrono::duration_cast<nsec>(currentTime - startTime) %
-                     std::chrono::duration_cast<nsec>(period);
+      nsec advance =
+          std::chrono::duration_cast<nsec>(currentTime - startTime) % std::chrono::duration_cast<nsec>(period);
 
       // subtract the advaned time from our timeslice by spilting it based on
       // the load level
       nsec load_reduction =
-          (std::chrono::duration_cast<nsec>(load).count() * advance) /
-          std::chrono::duration_cast<nsec>(period).count();
+          (std::chrono::duration_cast<nsec>(load).count() * advance) / std::chrono::duration_cast<nsec>(period).count();
       nsec idle_reduction = advance - load_reduction;
 
       // signal high load level
@@ -79,14 +77,12 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period,
       VT_USER_START("WD_HIGH");
 #endif
 #ifdef ENABLE_SCOREP
-      SCOREP_USER_REGION_BY_NAME_BEGIN("WD_HIGH",
-                                       SCOREP_USER_REGION_TYPE_COMMON);
+      SCOREP_USER_REGION_BY_NAME_BEGIN("WD_HIGH", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
       {
         std::unique_lock<std::mutex> lk(this->_watchdogTerminateMutex);
         // abort waiting if we get the interrupt signal
-        this->_watchdogTerminateAlert.wait_for(
-            lk, load_nsec, [this]() { return this->_watchdog_terminate; });
+        this->_watchdogTerminateAlert.wait_for(lk, load_nsec, [this]() { return this->_watchdog_terminate; });
         // terminate on interrupt
         if (this->_watchdog_terminate) {
           return EXIT_SUCCESS;
@@ -110,14 +106,12 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period,
       VT_USER_START("WD_LOW");
 #endif
 #ifdef ENABLE_SCOREP
-      SCOREP_USER_REGION_BY_NAME_BEGIN("WD_LOW",
-                                       SCOREP_USER_REGION_TYPE_COMMON);
+      SCOREP_USER_REGION_BY_NAME_BEGIN("WD_LOW", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
       {
         std::unique_lock<std::mutex> lk(this->_watchdogTerminateMutex);
         // abort waiting if we get the interrupt signal
-        this->_watchdogTerminateAlert.wait_for(
-            lk, idle_nsec, [this]() { return this->_watchdog_terminate; });
+        this->_watchdogTerminateAlert.wait_for(lk, idle_nsec, [this]() { return this->_watchdog_terminate; });
         // terminate on interrupt
         if (this->_watchdog_terminate) {
           return EXIT_SUCCESS;
@@ -136,8 +130,7 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period,
       // exit when termination signal is received or timeout is reached
       {
         std::lock_guard<std::mutex> lk(this->_watchdogTerminateMutex);
-        if (this->_watchdog_terminate ||
-            (timeout > sec::zero() && (time > timeout))) {
+        if (this->_watchdog_terminate || (timeout > sec::zero() && (time > timeout))) {
           this->setLoad(LOAD_STOP);
 
           return EXIT_SUCCESS;
@@ -152,8 +145,7 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period,
     {
       std::unique_lock<std::mutex> lk(Firestarter::_watchdogTerminateMutex);
       // abort waiting if we get the interrupt signal
-      Firestarter::_watchdogTerminateAlert.wait_for(
-          lk, timeout, []() { return Firestarter::_watchdog_terminate; });
+      Firestarter::_watchdogTerminateAlert.wait_for(lk, timeout, []() { return Firestarter::_watchdog_terminate; });
     }
 
     this->setLoad(LOAD_STOP);

From f4fd0eca8351e7dd5d493723e2b226b765ec3294 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 19 Sep 2024 18:34:33 +0200
Subject: [PATCH 004/167] ignore clang-format in blame

---
 .git-blame-ignore-revs | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .git-blame-ignore-revs

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 00000000..d1806bac
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1 @@
+9732bdb59717274f666e9c1497289d1f9a0d7858

From de5ffd6b42e7c8fc28aecee204161568ab65dc49 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 19 Sep 2024 18:37:50 +0200
Subject: [PATCH 005/167] add clang-format ci job

---
 .github/workflows/clang-format.yml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 .github/workflows/clang-format.yml

diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml
new file mode 100644
index 00000000..ef004c50
--- /dev/null
+++ b/.github/workflows/clang-format.yml
@@ -0,0 +1,19 @@
+name: clang-format-review
+
+# You can be more specific, but it currently only works on pull requests
+on: [push, pull_request]
+
+jobs:
+  clang-format:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Install clang-tidy
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y clang-tidy
+      - name: Analyze
+        run: |
+          clang-format --dry-run --Werror -style=file $(find ./src/ -name '*.cpp' -print)
+          clang-format --dry-run --Werror -style=file $(find ./include/ -name '*.hpp' -print)
+          clang-format --dry-run --Werror -style=file $(find ./include/ -name '*.h' -print)

From 7473e5fdaad0fd30538246f7cc0ce94e99d57a0a Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 19 Sep 2024 18:38:25 +0200
Subject: [PATCH 006/167] add .cache folder to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index c4fde123..e157a461 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ result*
 *.swp
 *.swo
 build*/
+.cache/

From c513d311a45c0464c871677e143bdf3026c82122 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 19 Sep 2024 19:23:40 +0200
Subject: [PATCH 007/167] add .clang-tidy

---
 .clang-tidy | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 .clang-tidy

diff --git a/.clang-tidy b/.clang-tidy
new file mode 100644
index 00000000..2c3f3334
--- /dev/null
+++ b/.clang-tidy
@@ -0,0 +1,45 @@
+---
+# Configure clang-tidy for this project.
+
+#  -bugprone-narrowing-conversions: too many false positives around
+#      `std::size_t`  vs. `*::difference_type`.
+
+Checks: >
+  -*,
+  boost-*,
+  bugprone-*,
+  cert-*,
+  clang-analyzer-*,
+  concurrency-*,
+  cppcoreguidelines-*,
+  google-*,
+  misc-*,
+  modernize-*,
+  performance-*,
+  portability-*,
+  readability-*,
+  -bugprone-narrowing-conversions,
+  -cppcoreguidelines-special-member-functions
+  
+# Turn all the warnings from the checks above into errors.
+WarningsAsErrors: "*"
+
+HeaderFilterRegex: "(include/).*\\.(h|hpp)$"
+
+CheckOptions:
+  - { key: readability-identifier-naming.NamespaceCase,          value: lower_cases }
+  - { key: readability-identifier-naming.ClassCase,              value: CamelCase  }
+  - { key: readability-identifier-naming.StructCase,             value: CamelCase  }
+  - { key: readability-identifier-naming.FunctionCase,           value: camelBack  }
+  - { key: readability-identifier-naming.MemberCase,        value: CamelCase }
+  - { key: readability-identifier-naming.VariableCase,           value: CamelCase }
+  - { key: readability-identifier-naming.EnumCase,           value: CamelCase }
+  - { key: readability-identifier-naming.ParameterCase,           value: CamelCase }
+  - { key: readability-identifier-naming.UnionCase,           value: CamelCase }
+  - { key: readability-identifier-naming.IgnoreMainLikeFunctions,  value: 1 }
+  - { key: readability-redundant-member-init.IgnoreBaseInCopyConstructors,  value: 1 }
+  - { key: modernize-use-default-member-init.UseAssignment,  value: 1 }
+  - { key: readability-implicit-bool-conversion.AllowIntegerConditions,  value: 1 }
+  - { key: readability-implicit-bool-conversion.AllowPointerConditions,  value: 1 }
+  - { key: readability-function-cognitive-complexity.IgnoreMacros,  value: 1 }
+  - { key: misc-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic, value: "true" }
\ No newline at end of file

From 29df30a2e2645f2a02b81d7a9f24e4019d04ca55 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 24 Sep 2024 14:03:08 +0200
Subject: [PATCH 008/167] disable clang-tidy extension that causes crash

---
 .clang-tidy | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.clang-tidy b/.clang-tidy
index 2c3f3334..c3a4c174 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -4,6 +4,8 @@
 #  -bugprone-narrowing-conversions: too many false positives around
 #      `std::size_t`  vs. `*::difference_type`.
 
+#  -boost-use-ranges: crash of clangd https://github.com/llvm/llvm-project/issues/109037
+
 Checks: >
   -*,
   boost-*,
@@ -19,7 +21,8 @@ Checks: >
   portability-*,
   readability-*,
   -bugprone-narrowing-conversions,
-  -cppcoreguidelines-special-member-functions
+  -cppcoreguidelines-special-member-functions,
+  -boost-use-ranges
   
 # Turn all the warnings from the checks above into errors.
 WarningsAsErrors: "*"

From f3960af25f5a4566b2f87282ab2ba1fc6ca81697 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 24 Sep 2024 14:12:05 +0200
Subject: [PATCH 009/167] add clang-tidy workflow

---
 .github/workflows/clang-tidy.yml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 .github/workflows/clang-tidy.yml

diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
new file mode 100644
index 00000000..562b5079
--- /dev/null
+++ b/.github/workflows/clang-tidy.yml
@@ -0,0 +1,21 @@
+name: clang-tidy-review
+
+on: [pull_request]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - uses: ZedThree/clang-tidy-review@v0.14.0
+      id: review
+      with:
+        split_workflow: true
+
+    - uses: ZedThree/clang-tidy-review/upload@v0.14.0
+      id: upload-review
+
+    - if: steps.review.outputs.total_comments > 0
+      run: exit 1

From b79754ef0abc37ff92d04dafbea957e5e3933e2f Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 24 Sep 2024 22:08:24 +0200
Subject: [PATCH 010/167] first pass of clang-tidy for the header files

---
 include/firestarter/Cuda/Cuda.hpp             |  17 +-
 include/firestarter/DumpRegisterStruct.hpp    |   9 +-
 .../firestarter/DumpRegisterWorkerData.hpp    |  29 +-
 .../firestarter/Environment/CPUTopology.hpp   |   6 +-
 .../firestarter/Environment/Environment.hpp   |  51 +-
 .../Environment/Payload/Payload.hpp           | 108 ++--
 .../Environment/Platform/PlatformConfig.hpp   |  92 ++--
 .../Environment/Platform/RuntimeConfig.hpp    | 105 ++--
 .../Environment/X86/Payload/AVX512Payload.hpp |  22 +-
 .../Environment/X86/Payload/AVXPayload.hpp    |  22 +-
 .../Environment/X86/Payload/FMA4Payload.hpp   |  24 +-
 .../Environment/X86/Payload/FMAPayload.hpp    |  22 +-
 .../Environment/X86/Payload/SSE2Payload.hpp   |  23 +-
 .../Environment/X86/Payload/X86Payload.hpp    |  53 +-
 .../Environment/X86/Payload/ZENFMAPayload.hpp |  23 +-
 .../Environment/X86/X86CPUTopology.hpp        |  44 +-
 .../Environment/X86/X86Environment.hpp        |  24 +-
 include/firestarter/ErrorDetectionStruct.hpp  |  17 +-
 include/firestarter/Firestarter.hpp           | 126 ++---
 include/firestarter/Json/Summary.hpp          |   8 +-
 include/firestarter/LoadWorkerData.hpp        |  91 ++--
 .../Logging/FirstWorkerThreadFilter.hpp       |  18 +-
 .../Measurement/MeasurementWorker.hpp         |  55 +-
 .../Measurement/Metric/IPCEstimate.h          |   7 +-
 include/firestarter/Measurement/Metric/Perf.h |   7 +-
 include/firestarter/Measurement/Metric/RAPL.h |   5 +-
 .../firestarter/Measurement/MetricInterface.h |  62 ++-
 include/firestarter/Measurement/Summary.hpp   |  12 +-
 include/firestarter/Measurement/TimeValue.hpp |  10 +-
 include/firestarter/OneAPI/OneAPI.hpp         |  17 +-
 include/firestarter/Optimizer/Algorithm.hpp   |   8 +-
 .../firestarter/Optimizer/Algorithm/NSGA2.hpp |  14 +-
 include/firestarter/Optimizer/History.hpp     | 182 ++++---
 .../firestarter/Optimizer/OptimizerWorker.hpp |  22 +-
 include/firestarter/Optimizer/Population.hpp  |  59 ++-
 include/firestarter/Optimizer/Problem.hpp     |  22 +-
 .../Optimizer/Problem/CLIArgumentProblem.hpp  | 106 ++--
 .../Optimizer/Util/MultiObjective.hpp         |  35 +-
 src/firestarter/Cuda/Cuda.cpp                 |   6 +-
 src/firestarter/DumpRegisterWorker.cpp        |  38 +-
 src/firestarter/Environment/CPUTopology.cpp   |  54 +-
 src/firestarter/Environment/Environment.cpp   | 146 +++---
 .../Environment/Payload/Payload.cpp           |  83 +--
 .../Environment/X86/Payload/AVX512Payload.cpp |  85 ++--
 .../Environment/X86/Payload/AVXPayload.cpp    |  60 +--
 .../Environment/X86/Payload/FMA4Payload.cpp   |  38 +-
 .../Environment/X86/Payload/FMAPayload.cpp    |  37 +-
 .../Environment/X86/Payload/SSE2Payload.cpp   |  58 ++-
 .../Environment/X86/Payload/X86Payload.cpp    | 473 +++++++++---------
 .../Environment/X86/Payload/ZENFMAPayload.cpp |  37 +-
 .../Environment/X86/X86CPUTopology.cpp        |  52 +-
 .../Environment/X86/X86Environment.cpp        |  20 +-
 src/firestarter/Firestarter.cpp               | 258 +++++-----
 src/firestarter/LoadWorker.cpp                | 171 ++++---
 .../Measurement/MeasurementWorker.cpp         | 130 ++---
 .../Measurement/Metric/IPCEstimate.cpp        |  36 +-
 src/firestarter/Measurement/Metric/Perf.cpp   |  64 +--
 src/firestarter/Measurement/Metric/RAPL.cpp   |  46 +-
 src/firestarter/Measurement/Summary.cpp       |  68 +--
 src/firestarter/OneAPI/OneAPI.cpp             |   6 +-
 src/firestarter/Optimizer/Algorithm/NSGA2.cpp |  75 ++-
 src/firestarter/Optimizer/OptimizerWorker.cpp |  24 +-
 src/firestarter/Optimizer/Population.cpp      |  24 +-
 .../Optimizer/Util/MultiObjective.cpp         |  45 +-
 src/firestarter/WatchdogWorker.cpp            |  20 +-
 65 files changed, 1873 insertions(+), 1838 deletions(-)

diff --git a/include/firestarter/Cuda/Cuda.hpp b/include/firestarter/Cuda/Cuda.hpp
index d7911eb4..1c6f234e 100644
--- a/include/firestarter/Cuda/Cuda.hpp
+++ b/include/firestarter/Cuda/Cuda.hpp
@@ -24,25 +24,24 @@
 #include <condition_variable>
 #include <mutex>
 #include <thread>
-#include <vector>
 
 namespace firestarter::cuda {
 
 class Cuda {
 private:
-  std::thread _initThread;
-  std::condition_variable _waitForInitCv;
-  std::mutex _waitForInitCvMutex;
+  std::thread InitThread;
+  std::condition_variable WaitForInitCv;
+  std::mutex WaitForInitCvMutex;
 
-  static void initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble,
-                       unsigned matrixSize, int gpus);
+  static void initGpus(std::condition_variable& Cv, volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble,
+                       unsigned MatrixSize, int Gpus);
 
 public:
-  Cuda(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus);
+  Cuda(volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus);
 
   ~Cuda() {
-    if (_initThread.joinable()) {
-      _initThread.join();
+    if (InitThread.joinable()) {
+      InitThread.join();
     }
   }
 };
diff --git a/include/firestarter/DumpRegisterStruct.hpp b/include/firestarter/DumpRegisterStruct.hpp
index 7e80c111..d5f162d3 100644
--- a/include/firestarter/DumpRegisterStruct.hpp
+++ b/include/firestarter/DumpRegisterStruct.hpp
@@ -21,20 +21,21 @@
 
 #pragma once
 
+#include <cstdint>
 namespace firestarter {
 
 /* DO NOT CHANGE! the asm load-loop tests if it should dump the current register
  * content */
-enum DumpVariable : unsigned long long { Start = 0, Wait = 1 };
+enum DumpVariable : uint64_t { Start = 0, Wait = 1 };
 
 #define REGISTER_MAX_NUM 32
 
 struct DumpRegisterStruct {
   // REGISTER_MAX_NUM cachelines
-  volatile double registerValues[REGISTER_MAX_NUM * 8];
+  volatile double RegisterValues[REGISTER_MAX_NUM * 8];
   // pad to use a whole cacheline
-  volatile unsigned long long padding[7];
-  volatile DumpVariable dumpVar;
+  volatile uint64_t Padding[7];
+  volatile DumpVariable DumpVar;
 };
 
 #undef REGISTER_MAX_NUM
diff --git a/include/firestarter/DumpRegisterWorkerData.hpp b/include/firestarter/DumpRegisterWorkerData.hpp
index 14ccc95f..a05863d5 100644
--- a/include/firestarter/DumpRegisterWorkerData.hpp
+++ b/include/firestarter/DumpRegisterWorkerData.hpp
@@ -24,6 +24,7 @@
 #include <chrono>
 #include <firestarter/DumpRegisterStruct.hpp>
 #include <firestarter/LoadWorkerData.hpp>
+#include <utility>
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
 
@@ -31,28 +32,28 @@ namespace firestarter {
 
 class DumpRegisterWorkerData {
 public:
-  DumpRegisterWorkerData(std::shared_ptr<LoadWorkerData> loadWorkerData, std::chrono::seconds dumpTimeDelta,
-                         std::string dumpFilePath)
-      : loadWorkerData(loadWorkerData)
-      , dumpTimeDelta(dumpTimeDelta) {
-    if (dumpFilePath.empty()) {
-      char cwd[PATH_MAX];
-      if (getcwd(cwd, sizeof(cwd)) != NULL) {
-        this->dumpFilePath = cwd;
+  DumpRegisterWorkerData(std::shared_ptr<LoadWorkerData> LoadWorkerDataPtr, std::chrono::seconds DumpTimeDelta,
+                         const std::string& DumpFilePath)
+      : LoadWorkerDataPtr(std::move(LoadWorkerDataPtr))
+      , DumpTimeDelta(DumpTimeDelta) {
+    if (DumpFilePath.empty()) {
+      char Cwd[PATH_MAX];
+      if (getcwd(Cwd, sizeof(Cwd)) != nullptr) {
+        this->DumpFilePath = Cwd;
       } else {
         log::error() << "getcwd() failed. Set --dump-registers-outpath to /tmp";
-        this->dumpFilePath = "/tmp";
+        this->DumpFilePath = "/tmp";
       }
     } else {
-      this->dumpFilePath = dumpFilePath;
+      this->DumpFilePath = DumpFilePath;
     }
   }
 
-  ~DumpRegisterWorkerData() {}
+  ~DumpRegisterWorkerData() = default;
 
-  std::shared_ptr<LoadWorkerData> loadWorkerData;
-  const std::chrono::seconds dumpTimeDelta;
-  std::string dumpFilePath;
+  std::shared_ptr<LoadWorkerData> LoadWorkerDataPtr;
+  const std::chrono::seconds DumpTimeDelta;
+  std::string DumpFilePath;
 };
 
 } // namespace firestarter
diff --git a/include/firestarter/Environment/CPUTopology.hpp b/include/firestarter/Environment/CPUTopology.hpp
index af749d78..ba10df3c 100644
--- a/include/firestarter/Environment/CPUTopology.hpp
+++ b/include/firestarter/Environment/CPUTopology.hpp
@@ -52,12 +52,12 @@ class CPUTopology {
   unsigned instructionCacheSize() const { return _instructionCacheSize; }
 
   // return the cpu clockrate in Hz
-  virtual unsigned long long clockrate() const { return _clockrate; }
+  virtual uint64_t clockrate() const { return _clockrate; }
   // return the cpu features
   virtual std::list<std::string> const& features() const = 0;
 
   // get a timestamp
-  virtual unsigned long long timestamp() const = 0;
+  virtual uint64_t timestamp() const = 0;
 
   int getPkgIdFromPU(unsigned pu) const;
   int getCoreIdFromPU(unsigned pu) const;
@@ -76,7 +76,7 @@ class CPUTopology {
   std::string _vendor = "";
   std::string _processorName = "";
   unsigned _instructionCacheSize = 0;
-  unsigned long long _clockrate = 0;
+  uint64_t _clockrate = 0;
   hwloc_topology_t topology;
 };
 
diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index 24722dc3..5f204ccc 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include <cassert>
+#include <cstdint>
 #include <firestarter/Environment/CPUTopology.hpp>
 #include <firestarter/Environment/Platform/PlatformConfig.hpp>
 #include <firestarter/Environment/Platform/RuntimeConfig.hpp>
@@ -31,61 +32,61 @@ namespace firestarter::environment {
 
 class Environment {
 public:
-  Environment(CPUTopology* topology)
-      : _topology(topology) {}
-  ~Environment() {
-    delete this->_topology;
-    if (_selectedConfig != nullptr) {
-      delete _selectedConfig;
-    }
+  Environment() = delete;
+  explicit Environment(CPUTopology* Topology)
+      : Topology(Topology) {}
+  virtual ~Environment() {
+    delete this->Topology;
+
+    delete SelectedConfig;
   }
 
-  int evaluateCpuAffinity(unsigned requestedNumThreads, std::string cpuBind);
-  int setCpuAffinity(unsigned thread);
+  auto evaluateCpuAffinity(unsigned RequestedNumThreads, std::string CpuBind) -> int;
+  auto setCpuAffinity(unsigned Thread) -> int;
   void printThreadSummary();
 
   virtual void evaluateFunctions() = 0;
-  virtual int selectFunction(unsigned functionId, bool allowUnavailablePayload) = 0;
-  virtual int selectInstructionGroups(std::string groups) = 0;
+  virtual auto selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) -> int = 0;
+  virtual auto selectInstructionGroups(std::string Groups) -> int = 0;
   virtual void printAvailableInstructionGroups() = 0;
-  virtual void setLineCount(unsigned lineCount) = 0;
+  virtual void setLineCount(unsigned LineCount) = 0;
   virtual void printSelectedCodePathSummary() = 0;
   virtual void printFunctionSummary() = 0;
 
-  platform::RuntimeConfig& selectedConfig() const {
+  [[nodiscard]] auto selectedConfig() const -> platform::RuntimeConfig& {
 #if defined(__clang__)
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wunused-value"
 #endif
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-value"
-    assert(("No RuntimeConfig selected", _selectedConfig != nullptr));
+    assert(("No RuntimeConfig selected", SelectedConfig != nullptr));
 #pragma GCC diagnostic pop
 #if defined(__clang__)
 #pragma clang diagnostic pop
 #endif
-    return *_selectedConfig;
+    return *SelectedConfig;
   }
 
-  unsigned long long requestedNumThreads() const { return _requestedNumThreads; }
+  [[nodiscard]] auto requestedNumThreads() const -> uint64_t { return RequestedNumThreads; }
 
-  CPUTopology const& topology() const {
-    assert(_topology != nullptr);
-    return *_topology;
+  [[nodiscard]] auto topology() const -> CPUTopology const& {
+    assert(Topology != nullptr);
+    return *Topology;
   }
 
 protected:
-  platform::RuntimeConfig* _selectedConfig = nullptr;
-  CPUTopology* _topology = nullptr;
+  platform::RuntimeConfig* SelectedConfig = nullptr;
+  CPUTopology* Topology = nullptr;
 
 private:
-  unsigned long long _requestedNumThreads;
+  uint64_t RequestedNumThreads = 0;
 
   // TODO: replace these functions with the builtins one from hwloc
-  int cpuAllowed(unsigned id);
-  int cpuSet(unsigned id);
+  auto cpuAllowed(unsigned Id) -> int;
+  auto cpuSet(unsigned Id) -> int;
 
-  std::vector<unsigned> cpuBind;
+  std::vector<unsigned> CpuBind;
 };
 
 } // namespace firestarter::environment
diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index f16d6879..3871400f 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -21,81 +21,87 @@
 
 #pragma once
 
-#include <initializer_list>
 #include <list>
 #include <string>
+#include <utility>
 #include <vector>
 
 namespace firestarter::environment::payload {
 
 class Payload {
 private:
-  std::string _name;
-  unsigned getSequenceStartCount(const std::vector<std::string>& sequence, const std::string start);
+  std::string Name;
+  [[nodiscard]] static auto getSequenceStartCount(const std::vector<std::string>& Sequence, const std::string& Start)
+      -> unsigned;
 
 protected:
-  unsigned _flops;
-  unsigned _bytes;
+  unsigned Flops = 0;
+  unsigned Bytes = 0;
   // number of instructions in load loop
-  unsigned _instructions;
+  unsigned Instructions = 0;
   // size of used simd registers in bytes
-  unsigned _registerSize;
+  unsigned RegisterSize = 0;
   // number of used simd registers
-  unsigned _registerCount;
+  unsigned RegisterCount = 0;
 
-  std::vector<std::string> generateSequence(const std::vector<std::pair<std::string, unsigned>>& proportion);
-  unsigned getL2SequenceCount(const std::vector<std::string>& sequence) {
-    return getSequenceStartCount(sequence, "L2");
+  [[nodiscard]] static auto generateSequence(const std::vector<std::pair<std::string, unsigned>>& Proportion)
+      -> std::vector<std::string>;
+  [[nodiscard]] static auto getL2SequenceCount(const std::vector<std::string>& Sequence) -> unsigned {
+    return getSequenceStartCount(Sequence, "L2");
   };
-  unsigned getL3SequenceCount(const std::vector<std::string>& sequence) {
-    return getSequenceStartCount(sequence, "L3");
+  [[nodiscard]] static auto getL3SequenceCount(const std::vector<std::string>& Sequence) -> unsigned {
+    return getSequenceStartCount(Sequence, "L3");
   };
-  unsigned getRAMSequenceCount(const std::vector<std::string>& sequence) {
-    return getSequenceStartCount(sequence, "RAM");
+  [[nodiscard]] static auto getRAMSequenceCount(const std::vector<std::string>& Sequence) -> unsigned {
+    return getSequenceStartCount(Sequence, "RAM");
   };
 
-  unsigned getNumberOfSequenceRepetitions(const std::vector<std::string>& sequence, const unsigned numberOfLines) {
-    if (sequence.size() == 0) {
+  [[nodiscard]] static auto getNumberOfSequenceRepetitions(const std::vector<std::string>& Sequence,
+                                                           const unsigned NumberOfLines) -> unsigned {
+    if (Sequence.size() == 0) {
       return 0;
     }
-    return numberOfLines / sequence.size();
+    return NumberOfLines / Sequence.size();
   };
 
-  unsigned getL2LoopCount(const std::vector<std::string>& sequence, const unsigned numberOfLines, const unsigned size,
-                          const unsigned threads);
-  unsigned getL3LoopCount(const std::vector<std::string>& sequence, const unsigned numberOfLines, const unsigned size,
-                          const unsigned threads);
-  unsigned getRAMLoopCount(const std::vector<std::string>& sequence, const unsigned numberOfLines, const unsigned size,
-                           const unsigned threads);
+  [[nodiscard]] static auto getL2LoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
+                                           unsigned Size, unsigned Threads) -> unsigned;
+  [[nodiscard]] static auto getL3LoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
+                                           unsigned Size, unsigned Threads) -> unsigned;
+  [[nodiscard]] static auto getRAMLoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
+                                            unsigned Size, unsigned Threads) -> unsigned;
 
 public:
-  Payload(std::string name, unsigned registerSize, unsigned registerCount)
-      : _name(name)
-      , _registerSize(registerSize)
-      , _registerCount(registerCount) {}
-  virtual ~Payload() {}
-
-  const std::string& name() const { return _name; }
-  unsigned flops() const { return _flops; }
-  unsigned bytes() const { return _bytes; }
-  unsigned instructions() const { return _instructions; }
-  unsigned registerSize() const { return _registerSize; }
-  unsigned registerCount() const { return _registerCount; }
-
-  virtual bool isAvailable() const = 0;
-
-  virtual void lowLoadFunction(volatile unsigned long long* addrHigh, unsigned long long period) = 0;
-
-  virtual int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
-                             unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
-                             unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-                             bool errorDetection) = 0;
-  virtual std::list<std::string> getAvailableInstructions() const = 0;
-  virtual void init(unsigned long long* memoryAddr, unsigned long long bufferSize) = 0;
-  virtual unsigned long long highLoadFunction(unsigned long long* addrMem, volatile unsigned long long* addrHigh,
-                                              unsigned long long iterations) = 0;
-
-  virtual Payload* clone() const = 0;
+  Payload() = delete;
+
+  Payload(std::string Name, unsigned RegisterSize, unsigned RegisterCount)
+      : Name(std::move(Name))
+      , RegisterSize(RegisterSize)
+      , RegisterCount(RegisterCount) {}
+  virtual ~Payload() = default;
+
+  [[nodiscard]] auto name() const -> const std::string& { return Name; }
+  [[nodiscard]] auto flops() const -> unsigned { return Flops; }
+  [[nodiscard]] auto bytes() const -> unsigned { return Bytes; }
+  [[nodiscard]] auto instructions() const -> unsigned { return Instructions; }
+  [[nodiscard]] auto registerSize() const -> unsigned { return RegisterSize; }
+  [[nodiscard]] auto registerCount() const -> unsigned { return RegisterCount; }
+
+  [[nodiscard]] virtual auto isAvailable() const -> bool = 0;
+
+  virtual void lowLoadFunction(volatile uint64_t* AddrHigh, uint64_t Period) = 0;
+
+  [[nodiscard]] virtual auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
+                                            unsigned InstructionCacheSize,
+                                            std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize,
+                                            unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+                                            bool ErrorDetection) -> int = 0;
+  [[nodiscard]] virtual auto getAvailableInstructions() const -> std::list<std::string> = 0;
+  virtual void init(uint64_t* MemoryAddr, uint64_t BufferSize) = 0;
+  [[nodiscard]] virtual auto highLoadFunction(uint64_t* AddrMem, volatile uint64_t* AddrHigh, uint64_t Iterations)
+      -> uint64_t = 0;
+
+  [[nodiscard]] virtual auto clone() const -> Payload* = 0;
 };
 
 } // namespace firestarter::environment::payload
diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp
index b396d134..5588cb8a 100644
--- a/include/firestarter/Environment/Platform/PlatformConfig.hpp
+++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp
@@ -21,79 +21,81 @@
 
 #pragma once
 
-#include <algorithm>
 #include <firestarter/Environment/Payload/Payload.hpp>
 #include <firestarter/Logging/Log.hpp>
 #include <initializer_list>
 #include <map>
 #include <sstream>
 #include <string>
+#include <utility>
 
 namespace firestarter::environment::platform {
 
 class PlatformConfig {
 private:
-  std::string _name;
-  std::list<unsigned> _threads;
-  payload::Payload* _payload;
+  std::string Name;
+  std::list<unsigned> Threads;
+  payload::Payload* Payload;
 
 protected:
-  unsigned _instructionCacheSize;
-  std::list<unsigned> _dataCacheBufferSize;
-  unsigned _ramBufferSize;
-  unsigned _lines;
+  unsigned InstructionCacheSize;
+  std::list<unsigned> DataCacheBufferSize;
+  unsigned RamBufferSize;
+  unsigned Lines;
 
 public:
-  PlatformConfig(std::string name, std::list<unsigned> threads, unsigned instructionCacheSize,
-                 std::initializer_list<unsigned> dataCacheBufferSize, unsigned ramBufferSize, unsigned lines,
-                 payload::Payload* payload)
-      : _name(name)
-      , _threads(threads)
-      , _payload(payload)
-      , _instructionCacheSize(instructionCacheSize)
-      , _dataCacheBufferSize(dataCacheBufferSize)
-      , _ramBufferSize(ramBufferSize)
-      , _lines(lines) {}
-  virtual ~PlatformConfig() { delete _payload; }
-
-  const std::string& name() const { return _name; }
-  unsigned instructionCacheSize() const { return _instructionCacheSize; }
-  const std::list<unsigned>& dataCacheBufferSize() const { return _dataCacheBufferSize; }
-  unsigned ramBufferSize() const { return _ramBufferSize; }
-  unsigned lines() const { return _lines; }
-  payload::Payload const& payload() const { return *_payload; }
-
-  std::map<unsigned, std::string> getThreadMap() const {
-    std::map<unsigned, std::string> threadMap;
-
-    for (auto const& thread : _threads) {
-      std::stringstream functionName;
-      functionName << "FUNC_" << name() << "_" << payload().name() << "_" << thread << "T";
-      threadMap[thread] = functionName.str();
+  PlatformConfig() = delete;
+
+  PlatformConfig(std::string Name, std::list<unsigned> Threads, unsigned InstructionCacheSize,
+                 std::initializer_list<unsigned> DataCacheBufferSize, unsigned RamBufferSize, unsigned Lines,
+                 payload::Payload* Payload)
+      : Name(std::move(Name))
+      , Threads(std::move(Threads))
+      , Payload(Payload)
+      , InstructionCacheSize(InstructionCacheSize)
+      , DataCacheBufferSize(DataCacheBufferSize)
+      , RamBufferSize(RamBufferSize)
+      , Lines(Lines) {}
+  virtual ~PlatformConfig() { delete Payload; }
+
+  [[nodiscard]] auto name() const -> const std::string& { return Name; }
+  [[nodiscard]] auto instructionCacheSize() const -> unsigned { return InstructionCacheSize; }
+  [[nodiscard]] auto dataCacheBufferSize() const -> const std::list<unsigned>& { return DataCacheBufferSize; }
+  [[nodiscard]] auto ramBufferSize() const -> unsigned { return RamBufferSize; }
+  [[nodiscard]] auto lines() const -> unsigned { return Lines; }
+  [[nodiscard]] auto payload() const -> payload::Payload const& { return *Payload; }
+
+  [[nodiscard]] auto getThreadMap() const -> std::map<unsigned, std::string> {
+    std::map<unsigned, std::string> ThreadMap;
+
+    for (auto const& Thread : Threads) {
+      std::stringstream FunctionName;
+      FunctionName << "FUNC_" << name() << "_" << payload().name() << "_" << Thread << "T";
+      ThreadMap[Thread] = FunctionName.str();
     }
 
-    return threadMap;
+    return ThreadMap;
   }
 
-  bool isAvailable() const { return payload().isAvailable(); }
+  [[nodiscard]] auto isAvailable() const -> bool { return payload().isAvailable(); }
 
-  virtual bool isDefault() const = 0;
+  [[nodiscard]] virtual auto isDefault() const -> bool = 0;
 
-  virtual std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const = 0;
+  [[nodiscard]] virtual auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> = 0;
 
-  std::string getDefaultPayloadSettingsString() const {
-    std::stringstream ss;
+  [[nodiscard]] auto getDefaultPayloadSettingsString() const -> std::string {
+    std::stringstream Ss;
 
     for (auto const& [name, value] : this->getDefaultPayloadSettings()) {
-      ss << name << ":" << value << ",";
+      Ss << name << ":" << value << ",";
     }
 
-    auto str = ss.str();
-    if (str.size() > 0) {
-      str.pop_back();
+    auto Str = Ss.str();
+    if (Str.size() > 0) {
+      Str.pop_back();
     }
 
-    return str;
+    return Str;
   }
 };
 
diff --git a/include/firestarter/Environment/Platform/RuntimeConfig.hpp b/include/firestarter/Environment/Platform/RuntimeConfig.hpp
index 86946877..3d1d1786 100644
--- a/include/firestarter/Environment/Platform/RuntimeConfig.hpp
+++ b/include/firestarter/Environment/Platform/RuntimeConfig.hpp
@@ -28,44 +28,44 @@ namespace firestarter::environment::platform {
 
 class RuntimeConfig {
 private:
-  PlatformConfig const& _platformConfig;
-  std::unique_ptr<payload::Payload> _payload;
-  unsigned _thread;
-  std::vector<std::pair<std::string, unsigned>> _payloadSettings;
-  unsigned _instructionCacheSize;
-  std::list<unsigned> _dataCacheBufferSize;
-  unsigned _ramBufferSize;
-  unsigned _lines;
+  PlatformConfig const& PlatformConfigValue;
+  std::unique_ptr<payload::Payload> Payload;
+  unsigned Thread;
+  std::vector<std::pair<std::string, unsigned>> PayloadSettings;
+  unsigned InstructionCacheSize;
+  std::list<unsigned> DataCacheBufferSize;
+  unsigned RamBufferSize;
+  unsigned Lines;
 
 public:
-  RuntimeConfig(PlatformConfig const& platformConfig, unsigned thread, unsigned detectedInstructionCacheSize)
-      : _platformConfig(platformConfig)
-      , _payload(nullptr)
-      , _thread(thread)
-      , _payloadSettings(platformConfig.getDefaultPayloadSettings())
-      , _instructionCacheSize(platformConfig.instructionCacheSize())
-      , _dataCacheBufferSize(platformConfig.dataCacheBufferSize())
-      , _ramBufferSize(platformConfig.ramBufferSize())
-      , _lines(platformConfig.lines()) {
-    if (detectedInstructionCacheSize != 0) {
-      this->_instructionCacheSize = detectedInstructionCacheSize;
+  RuntimeConfig(PlatformConfig const& PlatformConfigValue, unsigned Thread, unsigned DetectedInstructionCacheSize)
+      : PlatformConfigValue(PlatformConfigValue)
+      , Payload(nullptr)
+      , Thread(Thread)
+      , PayloadSettings(PlatformConfigValue.getDefaultPayloadSettings())
+      , InstructionCacheSize(PlatformConfigValue.instructionCacheSize())
+      , DataCacheBufferSize(PlatformConfigValue.dataCacheBufferSize())
+      , RamBufferSize(PlatformConfigValue.ramBufferSize())
+      , Lines(PlatformConfigValue.lines()) {
+    if (DetectedInstructionCacheSize != 0) {
+      this->InstructionCacheSize = DetectedInstructionCacheSize;
     }
   };
 
-  RuntimeConfig(const RuntimeConfig& c)
-      : _platformConfig(c.platformConfig())
-      , _payload(c.platformConfig().payload().clone())
-      , _thread(c.thread())
-      , _payloadSettings(c.payloadSettings())
-      , _instructionCacheSize(c.instructionCacheSize())
-      , _dataCacheBufferSize(c.dataCacheBufferSize())
-      , _ramBufferSize(c.ramBufferSize())
-      , _lines(c.lines()) {}
+  RuntimeConfig(const RuntimeConfig& Other)
+      : PlatformConfigValue(Other.platformConfig())
+      , Payload(Other.platformConfig().payload().clone())
+      , Thread(Other.thread())
+      , PayloadSettings(Other.payloadSettings())
+      , InstructionCacheSize(Other.instructionCacheSize())
+      , DataCacheBufferSize(Other.dataCacheBufferSize())
+      , RamBufferSize(Other.ramBufferSize())
+      , Lines(Other.lines()) {}
 
-  ~RuntimeConfig() { _payload.reset(); }
+  ~RuntimeConfig() { Payload.reset(); }
 
-  PlatformConfig const& platformConfig() const { return _platformConfig; }
-  payload::Payload& payload() const {
+  [[nodiscard]] auto platformConfig() const -> PlatformConfig const& { return PlatformConfigValue; }
+  [[nodiscard]] auto payload() const -> payload::Payload& {
 #if defined(__clang__)
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wunused-value"
@@ -74,33 +74,36 @@ class RuntimeConfig {
 #pragma GCC diagnostic ignored "-Wunused-value"
     assert(("Payload pointer is null. Each thread has to use it's own "
             "RuntimeConfig",
-            _payload != nullptr));
+            Payload != nullptr));
 #pragma GCC diagnostic pop
 #if defined(__clang__)
 #pragma clang diagnostic pop
 #endif
-    return *_payload;
+    return *Payload;
   }
-  unsigned thread() const { return _thread; }
-  const std::vector<std::pair<std::string, unsigned>>& payloadSettings() const { return _payloadSettings; }
-  std::vector<std::string> payloadItems() const {
-    std::vector<std::string> items;
-    for (auto const& pair : _payloadSettings) {
-      items.push_back(pair.first);
+  [[nodiscard]] auto thread() const -> unsigned { return Thread; }
+  [[nodiscard]] auto payloadSettings() const -> const std::vector<std::pair<std::string, unsigned>>& {
+    return PayloadSettings;
+  }
+  [[nodiscard]] auto payloadItems() const -> std::vector<std::string> {
+    std::vector<std::string> Items;
+    Items.reserve(PayloadSettings.size());
+    for (auto const& Pair : PayloadSettings) {
+      Items.push_back(Pair.first);
     }
-    return items;
+    return Items;
   }
 
-  unsigned instructionCacheSize() const { return _instructionCacheSize; }
-  const std::list<unsigned>& dataCacheBufferSize() const { return _dataCacheBufferSize; }
-  unsigned ramBufferSize() const { return _ramBufferSize; }
-  unsigned lines() const { return _lines; }
+  [[nodiscard]] auto instructionCacheSize() const -> unsigned { return InstructionCacheSize; }
+  [[nodiscard]] auto dataCacheBufferSize() const -> const std::list<unsigned>& { return DataCacheBufferSize; }
+  [[nodiscard]] auto ramBufferSize() const -> unsigned { return RamBufferSize; }
+  [[nodiscard]] auto lines() const -> unsigned { return Lines; }
 
-  void setPayloadSettings(std::vector<std::pair<std::string, unsigned>> const& payloadSettings) {
-    this->_payloadSettings = payloadSettings;
+  void setPayloadSettings(std::vector<std::pair<std::string, unsigned>> const& PayloadSettings) {
+    this->PayloadSettings = PayloadSettings;
   }
 
-  void setLineCount(unsigned lineCount) { this->_lines = lineCount; }
+  void setLineCount(unsigned LineCount) { this->Lines = LineCount; }
 
   void printCodePathSummary() const {
     log::info() << "\n"
@@ -112,10 +115,10 @@ class RuntimeConfig {
       log::info() << "    - L1i-Cache: " << instructionCacheSize() / thread() << " Bytes";
     }
 
-    unsigned i = 1;
-    for (auto const& bytes : dataCacheBufferSize()) {
-      log::info() << "    - L" << i << "d-Cache: " << bytes / thread() << " Bytes";
-      i++;
+    unsigned I = 1;
+    for (auto const& Bytes : dataCacheBufferSize()) {
+      log::info() << "    - L" << I << "d-Cache: " << Bytes / thread() << " Bytes";
+      I++;
     }
 
     log::info() << "    - Memory: " << ramBufferSize() / thread() << " Bytes";
diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
index e5fa736f..317b8196 100644
--- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
@@ -26,26 +26,28 @@
 namespace firestarter::environment::x86::payload {
 class AVX512Payload final : public X86Payload {
 public:
-  AVX512Payload(asmjit::CpuFeatures const& supportedFeatures)
-      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX512_F}, "AVX512", 8, 32) {}
+  AVX512Payload() = delete;
 
-  int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion, unsigned instructionCacheSize,
-                     std::list<unsigned> const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread,
-                     unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override;
-  std::list<std::string> getAvailableInstructions() const override;
-  void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override;
+  explicit AVX512Payload(asmjit::CpuFeatures const& SupportedFeatures)
+      : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kAVX512_F}, "AVX512", 8, 32) {}
 
-  firestarter::environment::payload::Payload* clone() const override {
+  auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
+                      std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
+                      unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
+  [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
+  void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
+
+  [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override {
     return new AVX512Payload(this->supportedFeatures());
   };
 
 private:
-  const std::map<std::string, unsigned> instructionFlops = {
+  const std::map<std::string, unsigned> InstructionFlops = {
       {"REG", 32},   {"L1_L", 32},  {"L1_BROADCAST", 16}, {"L1_S", 16}, {"L1_LS", 16}, {"L2_L", 32},
       {"L2_S", 16},  {"L2_LS", 16}, {"L3_L", 32},         {"L3_S", 16}, {"L3_LS", 16}, {"L3_P", 16},
       {"RAM_L", 32}, {"RAM_S", 16}, {"RAM_LS", 16},       {"RAM_P", 16}};
 
-  const std::map<std::string, unsigned> instructionMemory = {
+  const std::map<std::string, unsigned> InstructionMemory = {
       {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
 };
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
index d0e7b381..6516c0de 100644
--- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
@@ -26,25 +26,27 @@
 namespace firestarter::environment::x86::payload {
 class AVXPayload final : public X86Payload {
 public:
-  AVXPayload(asmjit::CpuFeatures const& supportedFeatures)
-      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX}, "AVX", 4, 16) {}
+  AVXPayload() = delete;
 
-  int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion, unsigned instructionCacheSize,
-                     std::list<unsigned> const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread,
-                     unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override;
-  std::list<std::string> getAvailableInstructions() const override;
-  void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override;
+  explicit AVXPayload(asmjit::CpuFeatures const& SupportedFeatures)
+      : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kAVX}, "AVX", 4, 16) {}
 
-  firestarter::environment::payload::Payload* clone() const override {
+  auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
+                      std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
+                      unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
+  [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
+  void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
+
+  [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override {
     return new AVXPayload(this->supportedFeatures());
   };
 
 private:
-  const std::map<std::string, unsigned> instructionFlops = {
+  const std::map<std::string, unsigned> InstructionFlops = {
       {"REG", 4},  {"L1_L", 4},  {"L1_S", 4}, {"L1_LS", 4}, {"L2_L", 4},  {"L2_S", 4},   {"L2_LS", 4}, {"L3_L", 4},
       {"L3_S", 4}, {"L3_LS", 4}, {"L3_P", 4}, {"RAM_L", 4}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}};
 
-  const std::map<std::string, unsigned> instructionMemory = {
+  const std::map<std::string, unsigned> InstructionMemory = {
       {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
 };
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
index 6a1d3ee5..bb623e68 100644
--- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
@@ -27,26 +27,28 @@ namespace firestarter::environment::x86::payload {
 
 class FMA4Payload final : public X86Payload {
 public:
-  FMA4Payload(asmjit::CpuFeatures const& supportedFeatures)
-      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4}, "FMA4", 4,
+  FMA4Payload() = delete;
+
+  explicit FMA4Payload(asmjit::CpuFeatures const& SupportedFeatures)
+      : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4}, "FMA4", 4,
                    16) {}
 
-  int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion, unsigned instructionCacheSize,
-                     std::list<unsigned> const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread,
-                     unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override;
-  std::list<std::string> getAvailableInstructions() const override;
-  void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override;
+  auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
+                      std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
+                      unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
+  [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
+  void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
 
-  firestarter::environment::payload::Payload* clone() const override {
+  [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override {
     return new FMA4Payload(this->supportedFeatures());
   };
 
 private:
-  const std::map<std::string, unsigned> instructionFlops = {
+  const std::map<std::string, unsigned> InstructionFlops = {
       {"REG", 8},  {"L1_L", 12}, {"L1_S", 8}, {"L1_LS", 8}, {"L2_L", 8},  {"L2_S", 4},   {"L2_LS", 4}, {"L3_L", 8},
       {"L3_S", 4}, {"L3_LS", 4}, {"L3_P", 4}, {"RAM_L", 8}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}};
 
-  const std::map<std::string, unsigned> instructionMemory = {
+  const std::map<std::string, unsigned> InstructionMemory = {
       {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
 };
-} // namespace firestarter::environment::x86::payload
+} // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
index da6c2b5a..b610a838 100644
--- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
@@ -26,27 +26,29 @@
 namespace firestarter::environment::x86::payload {
 class FMAPayload final : public X86Payload {
 public:
-  FMAPayload(asmjit::CpuFeatures const& supportedFeatures)
-      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, "FMA", 4, 16) {}
+  FMAPayload() = delete;
 
-  int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion, unsigned instructionCacheSize,
-                     std::list<unsigned> const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread,
-                     unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override;
-  std::list<std::string> getAvailableInstructions() const override;
-  void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override;
+  explicit FMAPayload(asmjit::CpuFeatures const& SupportedFeatures)
+      : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, "FMA", 4, 16) {}
 
-  firestarter::environment::payload::Payload* clone() const override {
+  auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
+                      std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
+                      unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
+  [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
+  void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
+
+  [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override {
     return new FMAPayload(this->supportedFeatures());
   };
 
 private:
-  const std::map<std::string, unsigned> instructionFlops = {
+  const std::map<std::string, unsigned> InstructionFlops = {
       {"REG", 16},        {"L1_L", 16},  {"L1_2L", 16}, {"L1_S", 8},      {"L1_LS", 8},     {"L1_LS_256", 8},
       {"L1_2LS_256", 16}, {"L2_L", 16},  {"L2_S", 8},   {"L2_LS", 8},     {"L2_LS_256", 8}, {"L2_2LS_256", 16},
       {"L3_L", 16},       {"L3_S", 8},   {"L3_LS", 8},  {"L3_LS_256", 8}, {"L3_P", 8},      {"RAM_L", 16},
       {"RAM_S", 8},       {"RAM_LS", 8}, {"RAM_P", 8}};
 
-  const std::map<std::string, unsigned> instructionMemory = {
+  const std::map<std::string, unsigned> InstructionMemory = {
       {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
 };
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
index d923c9b3..538837b4 100644
--- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
@@ -21,30 +21,33 @@
 
 #pragma once
 
+#include <cstdint>
 #include <firestarter/Environment/X86/Payload/X86Payload.hpp>
 
 namespace firestarter::environment::x86::payload {
 class SSE2Payload final : public X86Payload {
 public:
-  SSE2Payload(asmjit::CpuFeatures const& supportedFeatures)
-      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kSSE2}, "SSE2", 2, 16) {}
+  SSE2Payload() = delete;
 
-  int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion, unsigned instructionCacheSize,
-                     std::list<unsigned> const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread,
-                     unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override;
-  std::list<std::string> getAvailableInstructions() const override;
-  void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override;
+  explicit SSE2Payload(asmjit::CpuFeatures const& SupportedFeatures)
+      : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kSSE2}, "SSE2", 2, 16) {}
 
-  firestarter::environment::payload::Payload* clone() const override {
+  auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
+                      std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
+                      unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
+  [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
+  void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
+
+  [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override {
     return new SSE2Payload(this->supportedFeatures());
   };
 
 private:
-  const std::map<std::string, unsigned> instructionFlops = {
+  const std::map<std::string, unsigned> InstructionFlops = {
       {"REG", 2},  {"L1_L", 2},  {"L1_S", 2}, {"L1_LS", 2}, {"L2_L", 2},  {"L2_S", 2},   {"L2_LS", 2}, {"L3_L", 2},
       {"L3_S", 2}, {"L3_LS", 2}, {"L3_P", 2}, {"RAM_L", 2}, {"RAM_S", 2}, {"RAM_LS", 2}, {"RAM_P", 2}};
 
-  const std::map<std::string, unsigned> instructionMemory = {
+  const std::map<std::string, unsigned> InstructionMemory = {
       {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
 };
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index 87d5e0be..9e947143 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -23,10 +23,12 @@
 
 #include <asmjit/x86.h>
 
+#include <cstdint>
 #include <firestarter/DumpRegisterWorkerData.hpp>
 #include <firestarter/Environment/Payload/Payload.hpp>
 #include <firestarter/LoadWorkerData.hpp>
 #include <firestarter/Logging/Log.hpp>
+#include <utility>
 
 #define INIT_BLOCKSIZE 1024
 
@@ -35,38 +37,38 @@ namespace firestarter::environment::x86::payload {
 class X86Payload : public environment::payload::Payload {
 private:
   // we can use this to check, if our platform support this payload
-  asmjit::CpuFeatures const& _supportedFeatures;
-  std::list<asmjit::CpuFeatures::X86::Id> featureRequests;
+  asmjit::CpuFeatures const& SupportedFeatures;
+  std::list<asmjit::CpuFeatures::X86::Id> FeatureRequests;
 
 protected:
   //  asmjit::CodeHolder code;
-  asmjit::JitRuntime rt;
+  asmjit::JitRuntime Rt;
   // typedef int (*LoadFunction)(firestarter::ThreadData *);
-  typedef unsigned long long (*LoadFunction)(unsigned long long*, volatile unsigned long long*, unsigned long long);
-  LoadFunction loadFunction = nullptr;
+  using LoadFunctionType = uint64_t (*)(uint64_t*, volatile uint64_t*, uint64_t);
+  LoadFunctionType LoadFunction = nullptr;
 
-  asmjit::CpuFeatures const& supportedFeatures() const { return this->_supportedFeatures; }
+  [[nodiscard]] auto supportedFeatures() const -> asmjit::CpuFeatures const& { return this->SupportedFeatures; }
 
-  template <class IterReg, class VectorReg>
-  void emitErrorDetectionCode(asmjit::x86::Builder& cb, IterReg iter_reg, asmjit::x86::Gpq addrHigh_reg,
-                              asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2);
+  template <class IterRegT, class VectorRegT>
+  void emitErrorDetectionCode(asmjit::x86::Builder& Cb, IterRegT IterReg, asmjit::x86::Gpq AddrHighReg,
+                              asmjit::x86::Gpq PointerReg, asmjit::x86::Gpq TempReg, asmjit::x86::Gpq TempReg2);
 
 public:
-  X86Payload(asmjit::CpuFeatures const& supportedFeatures,
-             std::initializer_list<asmjit::CpuFeatures::X86::Id> featureRequests, std::string name,
-             unsigned registerSize, unsigned registerCount)
-      : Payload(name, registerSize, registerCount)
-      , _supportedFeatures(supportedFeatures)
-      , featureRequests(featureRequests) {}
-
-  bool isAvailable() const override {
-    bool available = true;
-
-    for (auto const& feature : featureRequests) {
-      available &= this->_supportedFeatures.has(feature);
+  X86Payload(asmjit::CpuFeatures const& SupportedFeatures,
+             std::initializer_list<asmjit::CpuFeatures::X86::Id> FeatureRequests, std::string Name,
+             unsigned RegisterSize, unsigned RegisterCount)
+      : Payload(std::move(Name), RegisterSize, RegisterCount)
+      , SupportedFeatures(SupportedFeatures)
+      , FeatureRequests(FeatureRequests) {}
+
+  [[nodiscard]] auto isAvailable() const -> bool override {
+    bool Available = true;
+
+    for (auto const& Feature : FeatureRequests) {
+      Available &= this->SupportedFeatures.has(Feature);
     }
 
-    return available;
+    return Available;
   };
 
     // A generic implemenation for all x86 payloads
@@ -76,16 +78,15 @@ class X86Payload : public environment::payload::Payload {
 #endif
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Woverloaded-virtual"
-  void init(unsigned long long* memoryAddr, unsigned long long bufferSize, double firstValue, double lastValue);
+  void init(uint64_t* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue);
 #pragma GCC diagnostic pop
 #if defined(__clang__)
 #pragma clang diagnostic pop
 #endif
   // use cpuid and usleep as low load
-  void lowLoadFunction(volatile unsigned long long* addrHigh, unsigned long long period) override;
+  void lowLoadFunction(volatile uint64_t* AddrHigh, uint64_t Period) override;
 
-  unsigned long long highLoadFunction(unsigned long long* addrMem, volatile unsigned long long* addrHigh,
-                                      unsigned long long iterations) override;
+  auto highLoadFunction(uint64_t* AddrMem, volatile uint64_t* AddrHigh, uint64_t Iterations) -> uint64_t override;
 };
 
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
index 7254cb55..425dd600 100644
--- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
@@ -21,29 +21,32 @@
 
 #pragma once
 
+#include <cstdint>
 #include <firestarter/Environment/X86/Payload/X86Payload.hpp>
 
 namespace firestarter::environment::x86::payload {
 class ZENFMAPayload final : public X86Payload {
 public:
-  ZENFMAPayload(asmjit::CpuFeatures const& supportedFeatures)
-      : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA},
+  ZENFMAPayload() = delete;
+
+  explicit ZENFMAPayload(asmjit::CpuFeatures const& SupportedFeatures)
+      : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA},
                    "ZENFMA", 4, 16) {}
 
-  int compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion, unsigned instructionCacheSize,
-                     std::list<unsigned> const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread,
-                     unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override;
-  std::list<std::string> getAvailableInstructions() const override;
-  void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override;
+  auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
+                      std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
+                      unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
+  [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
+  void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
 
-  firestarter::environment::payload::Payload* clone() const override {
+  [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override {
     return new ZENFMAPayload(this->supportedFeatures());
   };
 
 private:
-  const std::map<std::string, unsigned> instructionFlops = {
+  const std::map<std::string, unsigned> InstructionFlops = {
       {"REG", 8}, {"L1_LS", 8}, {"L2_L", 8}, {"L3_L", 8}, {"RAM_L", 8}};
 
-  const std::map<std::string, unsigned> instructionMemory = {{"RAM_L", 64}};
+  const std::map<std::string, unsigned> InstructionMemory = {{"RAM_L", 64}};
 };
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/X86CPUTopology.hpp b/include/firestarter/Environment/X86/X86CPUTopology.hpp
index fa3b033f..d9ca6393 100644
--- a/include/firestarter/Environment/X86/X86CPUTopology.hpp
+++ b/include/firestarter/Environment/X86/X86CPUTopology.hpp
@@ -31,38 +31,38 @@ class X86CPUTopology final : public CPUTopology {
 public:
   X86CPUTopology();
 
-  friend std::ostream& operator<<(std::ostream& stream, X86CPUTopology const& cpuTopology);
+  friend auto operator<<(std::ostream& Stream, X86CPUTopology const& CpuTopology) -> std::ostream&;
 
-  std::list<std::string> const& features() const override { return this->featureList; }
-  const asmjit::CpuFeatures& featuresAsmjit() const { return this->cpuInfo.features(); }
+  [[nodiscard]] auto features() const -> std::list<std::string> const& override { return this->FeatureList; }
+  [[nodiscard]] auto featuresAsmjit() const -> const asmjit::CpuFeatures& { return this->CpuInfo.features(); }
 
-  std::string const& vendor() const override { return this->_vendor; }
-  std::string const& model() const override { return this->_model; }
+  [[nodiscard]] auto vendor() const -> std::string const& override { return this->Vendor; }
+  [[nodiscard]] auto model() const -> std::string const& override { return this->Model; }
 
-  unsigned long long clockrate() const override;
+  [[nodiscard]] auto clockrate() const -> uint64_t override;
 
-  unsigned long long timestamp() const override;
+  [[nodiscard]] auto timestamp() const -> uint64_t override;
 
-  unsigned familyId() const { return this->cpuInfo.familyId(); }
-  unsigned modelId() const { return this->cpuInfo.modelId(); }
-  unsigned stepping() const { return this->cpuInfo.stepping(); }
+  [[nodiscard]] auto familyId() const -> unsigned { return this->CpuInfo.familyId(); }
+  [[nodiscard]] auto modelId() const -> unsigned { return this->CpuInfo.modelId(); }
+  [[nodiscard]] auto stepping() const -> unsigned { return this->CpuInfo.stepping(); }
 
 private:
-  bool hasRdtsc() const { return this->_hasRdtsc; }
-  bool hasInvariantRdtsc() const { return this->_hasInvariantRdtsc; }
-  void cpuid(unsigned long long* a, unsigned long long* b, unsigned long long* c, unsigned long long* d) const;
+  [[nodiscard]] auto hasRdtsc() const -> bool { return this->HasRdtsc; }
+  [[nodiscard]] auto hasInvariantRdtsc() const -> bool { return this->HasInvariantRdtsc; }
+  void cpuid(uint64_t* A, uint64_t* B, uint64_t* C, uint64_t* D) const;
 
-  asmjit::CpuInfo cpuInfo;
-  std::list<std::string> featureList;
+  asmjit::CpuInfo CpuInfo;
+  std::list<std::string> FeatureList;
 
-  bool _hasRdtsc;
-  bool _hasInvariantRdtsc;
-  std::string _vendor;
-  std::string _model;
+  bool HasRdtsc;
+  bool HasInvariantRdtsc;
+  std::string Vendor;
+  std::string Model;
 };
 
-inline std::ostream& operator<<(std::ostream& stream, X86CPUTopology const& cpuTopology) {
-  return cpuTopology.print(stream);
+inline auto operator<<(std::ostream& Stream, X86CPUTopology const& CpuTopology) -> std::ostream& {
+  return CpuTopology.print(Stream);
 }
 
-} // namespace firestarter::environment::x86
+} // namespace firestarter::environment::x86
\ No newline at end of file
diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp
index b0e3aa8d..7873c9c4 100644
--- a/include/firestarter/Environment/X86/X86Environment.hpp
+++ b/include/firestarter/Environment/X86/X86Environment.hpp
@@ -54,21 +54,21 @@ class X86Environment final : public Environment {
       : Environment(new X86CPUTopology()) {}
 
   ~X86Environment() {
-    for (auto const& config : platformConfigs) {
-      delete config;
+    for (auto const& Config : PlatformConfigs) {
+      delete Config;
     }
-    for (auto const& config : fallbackPlatformConfigs) {
-      delete config;
+    for (auto const& Config : FallbackPlatformConfigs) {
+      delete Config;
     }
   }
 
-  X86CPUTopology const& topology() { return *reinterpret_cast<X86CPUTopology*>(this->_topology); }
+  auto topology() -> X86CPUTopology const& { return *reinterpret_cast<X86CPUTopology*>(this->Topology); }
 
   void evaluateFunctions() override;
-  int selectFunction(unsigned functionId, bool allowUnavailablePayload) override;
-  int selectInstructionGroups(std::string groups) override;
+  auto selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) -> int override;
+  auto selectInstructionGroups(std::string Groups) -> int override;
   void printAvailableInstructionGroups() override;
-  void setLineCount(unsigned lineCount) override;
+  void setLineCount(unsigned LineCount) override;
   void printSelectedCodePathSummary() override;
   void printFunctionSummary() override;
 
@@ -77,16 +77,16 @@ class X86Environment final : public Environment {
   // of PlatformConfig. Add new PlatformConfig at the bottom to maintain
   // stable IDs.
   const std::list<std::function<platform::X86PlatformConfig*(asmjit::CpuFeatures const&, unsigned, unsigned, unsigned)>>
-      platformConfigsCtor = {REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig),   REGISTER(SkylakeSPConfig),
+      PlatformConfigsCtor = {REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig),   REGISTER(SkylakeSPConfig),
                              REGISTER(HaswellConfig),        REGISTER(HaswellEPConfig), REGISTER(SandyBridgeConfig),
                              REGISTER(SandyBridgeEPConfig),  REGISTER(NehalemConfig),   REGISTER(NehalemEPConfig),
                              REGISTER(BulldozerConfig),      REGISTER(NaplesConfig),    REGISTER(RomeConfig)};
 
-  std::list<platform::X86PlatformConfig*> platformConfigs;
+  std::list<platform::X86PlatformConfig*> PlatformConfigs;
 
   // List of fallback PlatformConfig. Add one for each x86 extension.
   const std::list<std::function<platform::X86PlatformConfig*(asmjit::CpuFeatures const&, unsigned, unsigned, unsigned)>>
-      fallbackPlatformConfigsCtor = {
+      FallbackPlatformConfigsCtor = {
           REGISTER(SkylakeSPConfig),   // AVX512
           REGISTER(BulldozerConfig),   // FMA4
           REGISTER(HaswellConfig),     // FMA
@@ -94,7 +94,7 @@ class X86Environment final : public Environment {
           REGISTER(NehalemConfig)      // SSE2
       };
 
-  std::list<platform::X86PlatformConfig*> fallbackPlatformConfigs;
+  std::list<platform::X86PlatformConfig*> FallbackPlatformConfigs;
 
 #undef REGISTER
 };
diff --git a/include/firestarter/ErrorDetectionStruct.hpp b/include/firestarter/ErrorDetectionStruct.hpp
index 4ed2e9fa..598cc4ed 100644
--- a/include/firestarter/ErrorDetectionStruct.hpp
+++ b/include/firestarter/ErrorDetectionStruct.hpp
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include <cstdint>
 namespace firestarter {
 
 struct ErrorDetectionStruct {
@@ -28,19 +29,19 @@ struct ErrorDetectionStruct {
   // one ptr (8B)
 
   // the pointer to 16B of communication
-  volatile unsigned long long* communicationLeft;
-  volatile unsigned long long localsLeft[4];
+  volatile uint64_t* CommunicationLeft;
+  volatile uint64_t LocalsLeft[4];
   // if this variable is not 0, an error occured in the comparison with the
   // left thread.
-  volatile unsigned long long errorLeft;
-  volatile unsigned long long paddingLeft[2];
+  volatile uint64_t ErrorLeft;
+  volatile uint64_t PaddingLeft[2];
 
-  volatile unsigned long long* communicationRight;
-  volatile unsigned long long localsRight[4];
+  volatile uint64_t* CommunicationRight;
+  volatile uint64_t LocalsRight[4];
   // if this variable is not 0, an error occured in the comparison with the
   // right thread.
-  volatile unsigned long long errorRight;
-  volatile unsigned long long paddingRight[2];
+  volatile uint64_t ErrorRight;
+  volatile uint64_t PaddingRight[2];
 };
 
 } // namespace firestarter
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index cb0218f0..8009c1c9 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -47,7 +47,6 @@
 
 #include <chrono>
 #include <condition_variable>
-#include <list>
 #include <memory>
 #include <mutex>
 #include <string>
@@ -63,57 +62,57 @@ namespace firestarter {
 
 class Firestarter {
 public:
-  Firestarter(const int argc, const char** argv, std::chrono::seconds const& timeout, unsigned loadPercent,
-              std::chrono::microseconds const& period, unsigned requestedNumThreads, std::string const& cpuBind,
-              bool printFunctionSummary, unsigned functionId, bool listInstructionGroups,
-              std::string const& instructionGroups, unsigned lineCount, bool allowUnavailablePayload,
-              bool dumpRegisters, std::chrono::seconds const& dumpRegistersTimeDelta,
-              std::string const& dumpRegistersOutpath, bool errorDetection, int gpus, unsigned gpuMatrixSize,
-              bool gpuUseFloat, bool gpuUseDouble, bool listMetrics, bool measurement,
-              std::chrono::milliseconds const& startDelta, std::chrono::milliseconds const& stopDelta,
-              std::chrono::milliseconds const& measurementInterval, std::vector<std::string> const& metricPaths,
-              std::vector<std::string> const& stdinMetrics, bool optimize, std::chrono::seconds const& preheat,
-              std::string const& optimizationAlgorithm, std::vector<std::string> const& optimizationMetrics,
-              std::chrono::seconds const& evaluationDuration, unsigned individuals, std::string const& optimizeOutfile,
-              unsigned generations, double nsga2_cr, double nsga2_m);
+  Firestarter(int Argc, const char** Argv, std::chrono::seconds const& Timeout, unsigned LoadPercent,
+              std::chrono::microseconds const& Period, unsigned RequestedNumThreads, std::string const& CpuBind,
+              bool PrintFunctionSummary, unsigned FunctionId, bool ListInstructionGroups,
+              std::string const& InstructionGroups, unsigned LineCount, bool AllowUnavailablePayload,
+              bool DumpRegisters, std::chrono::seconds const& DumpRegistersTimeDelta,
+              std::string const& DumpRegistersOutpath, bool ErrorDetection, int Gpus, unsigned GpuMatrixSize,
+              bool GpuUseFloat, bool GpuUseDouble, bool ListMetrics, bool Measurement,
+              std::chrono::milliseconds const& StartDelta, std::chrono::milliseconds const& StopDelta,
+              std::chrono::milliseconds const& MeasurementInterval, std::vector<std::string> const& MetricPaths,
+              std::vector<std::string> const& StdinMetrics, bool Optimize, std::chrono::seconds const& Preheat,
+              std::string const& OptimizationAlgorithm, std::vector<std::string> const& OptimizationMetrics,
+              std::chrono::seconds const& EvaluationDuration, unsigned Individuals, std::string const& OptimizeOutfile,
+              unsigned Generations, double Nsga2Cr, double Nsga2M);
 
   ~Firestarter();
 
   void mainThread();
 
 private:
-  const int _argc;
-  const char** _argv;
-  const std::chrono::seconds _timeout;
-  const unsigned _loadPercent;
-  std::chrono::microseconds _load;
-  std::chrono::microseconds _period;
-  const bool _dumpRegisters;
-  const std::chrono::seconds _dumpRegistersTimeDelta;
-  const std::string _dumpRegistersOutpath;
-  const bool _errorDetection;
-  const int _gpus;
-  const unsigned _gpuMatrixSize;
-  const bool _gpuUseFloat;
-  const bool _gpuUseDouble;
-  const std::chrono::milliseconds _startDelta;
-  const std::chrono::milliseconds _stopDelta;
-  const bool _measurement;
-  const bool _optimize;
-  const std::chrono::seconds _preheat;
-  const std::string _optimizationAlgorithm;
-  const std::vector<std::string> _optimizationMetrics;
-  const std::chrono::seconds _evaluationDuration;
-  const unsigned _individuals;
-  const std::string _optimizeOutfile;
-  const unsigned _generations;
-  const double _nsga2_cr;
-  const double _nsga2_m;
+  const int Argc;
+  const char** Argv;
+  const std::chrono::seconds Timeout;
+  const unsigned LoadPercent;
+  std::chrono::microseconds Load;
+  std::chrono::microseconds Period;
+  const bool DumpRegisters;
+  const std::chrono::seconds DumpRegistersTimeDelta;
+  const std::string DumpRegistersOutpath;
+  const bool ErrorDetection;
+  const int Gpus;
+  const unsigned GpuMatrixSize;
+  const bool GpuUseFloat;
+  const bool GpuUseDouble;
+  const std::chrono::milliseconds StartDelta;
+  const std::chrono::milliseconds StopDelta;
+  const bool Measurement;
+  const bool Optimize;
+  const std::chrono::seconds Preheat;
+  const std::string OptimizationAlgorithm;
+  const std::vector<std::string> OptimizationMetrics;
+  const std::chrono::seconds EvaluationDuration;
+  const unsigned Individuals;
+  const std::string OptimizeOutfile;
+  const unsigned Generations;
+  const double Nsga2Cr;
+  const double Nsga2M;
 
 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-  environment::x86::X86Environment* _environment = nullptr;
+  environment::x86::X86Environment* Environment = nullptr;
 
-  environment::x86::X86Environment& environment() const { return *_environment; }
+  [[nodiscard]] auto environment() const -> environment::x86::X86Environment& { return *Environment; }
 #else
 #error "FIRESTARTER is not implemented for this ISA"
 #endif
@@ -127,14 +126,14 @@ class Firestarter {
 #endif
 
 #if defined(linux) || defined(__linux__)
-  inline static std::unique_ptr<optimizer::OptimizerWorker> _optimizer;
-  std::shared_ptr<measurement::MeasurementWorker> _measurementWorker;
-  std::unique_ptr<firestarter::optimizer::Algorithm> _algorithm;
-  firestarter::optimizer::Population _population;
+  inline static std::unique_ptr<optimizer::OptimizerWorker> Optimizer;
+  std::shared_ptr<measurement::MeasurementWorker> MeasurementWorker;
+  std::unique_ptr<firestarter::optimizer::Algorithm> Algorithm;
+  firestarter::optimizer::Population Population;
 #endif
 
   // LoadThreadWorker.cpp
-  int initLoadWorkers(bool lowLoad, unsigned long long period);
+  auto initLoadWorkers(bool LowLoad, uint64_t Period) -> int;
   void joinLoadWorkers();
   void printThreadErrorReport();
   void printPerformanceReport();
@@ -142,42 +141,43 @@ class Firestarter {
   void signalWork() { signalLoadWorkers(THREAD_WORK); };
 
   // WatchdogWorker.cpp
-  int watchdogWorker(std::chrono::microseconds period, std::chrono::microseconds load, std::chrono::seconds timeout);
+  auto watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load, std::chrono::seconds Timeout)
+      -> int;
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
   // DumpRegisterWorker.cpp
-  int initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta, std::string dumpFilePath);
+  auto initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, std::string DumpFilePath) -> int;
   void joinDumpRegisterWorker();
 #endif
 
   // LoadThreadWorker.cpp
-  void signalLoadWorkers(int comm);
-  static void loadThreadWorker(std::shared_ptr<LoadWorkerData> td);
+  void signalLoadWorkers(int Comm);
+  static void loadThreadWorker(std::shared_ptr<LoadWorkerData> Td);
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
   // DumpRegisterWorker.cpp
-  static void dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> data);
+  static void dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Data);
 #endif
 
-  static void setLoad(unsigned long long value);
+  static void setLoad(uint64_t Value);
 
-  static void sigalrmHandler(int signum);
-  static void sigtermHandler(int signum);
+  static void sigalrmHandler(int Signum);
+  static void sigtermHandler(int Signum);
 
   // variables to control the termination of the watchdog
-  inline static bool _watchdog_terminate = false;
-  inline static std::condition_variable _watchdogTerminateAlert;
-  inline static std::mutex _watchdogTerminateMutex;
+  inline static bool WatchdogTerminate = false;
+  inline static std::condition_variable WatchdogTerminateAlert;
+  inline static std::mutex WatchdogTerminateMutex;
 
   // variable to control the load of the threads
-  inline static volatile unsigned long long loadVar = LOAD_LOW;
+  inline static volatile uint64_t LoadVar = LOAD_LOW;
 
-  std::vector<std::pair<std::thread, std::shared_ptr<LoadWorkerData>>> loadThreads;
+  std::vector<std::pair<std::thread, std::shared_ptr<LoadWorkerData>>> LoadThreads;
 
-  std::vector<std::shared_ptr<unsigned long long>> errorCommunication;
+  std::vector<std::shared_ptr<uint64_t>> ErrorCommunication;
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
-  std::thread dumpRegisterWorkerThread;
+  std::thread DumpRegisterWorkerThread;
 #endif
 };
 
diff --git a/include/firestarter/Json/Summary.hpp b/include/firestarter/Json/Summary.hpp
index d9a923cc..a2e8e03a 100644
--- a/include/firestarter/Json/Summary.hpp
+++ b/include/firestarter/Json/Summary.hpp
@@ -34,10 +34,10 @@ template <> struct adl_serializer<firestarter::measurement::Summary> {
   static void to_json(json& j, firestarter::measurement::Summary s) {
     j = json::object();
 
-    j["num_timepoints"] = s.num_timepoints;
-    j["duration"] = s.duration.count();
-    j["average"] = s.average;
-    j["stddev"] = s.stddev;
+    j["num_timepoints"] = s.NumTimepoints;
+    j["duration"] = s.Duration.count();
+    j["average"] = s.Average;
+    j["stddev"] = s.Stddev;
   }
 };
 } // namespace nlohmann
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index 78b11b80..eb7e0c3c 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -28,6 +28,7 @@
 #include <firestarter/ErrorDetectionStruct.hpp>
 #include <memory>
 #include <mutex>
+#include <utility>
 
 #define PAD_SIZE(size, align) align*(int)std::ceil((double)size / (double)align)
 
@@ -49,72 +50,72 @@ namespace firestarter {
 
 class LoadWorkerData {
 public:
-  LoadWorkerData(int id, environment::Environment& environment, volatile unsigned long long* loadVar,
-                 unsigned long long period, bool dumpRegisters, bool errorDetection)
-      : addrHigh(loadVar)
-      , period(period)
-      , dumpRegisters(dumpRegisters)
-      , errorDetection(errorDetection)
-      , _id(id)
-      , _environment(environment)
-      , _config(new environment::platform::RuntimeConfig(environment.selectedConfig())) {
+  LoadWorkerData(int Id, environment::Environment& Environment, volatile uint64_t* LoadVar, uint64_t Period,
+                 bool DumpRegisters, bool ErrorDetection)
+      : AddrHigh(LoadVar)
+      , Period(Period)
+      , DumpRegisters(DumpRegisters)
+      , ErrorDetection(ErrorDetection)
+      , Id(Id)
+      , Environment(Environment)
+      , Config(new environment::platform::RuntimeConfig(Environment.selectedConfig())) {
     // use REGISTER_MAX_NUM cache lines for the dumped registers
     // and another cache line for the control variable.
     // as we are doing aligned moves we only have the option to waste a
     // whole cacheline
-    addrOffset = dumpRegisters ? sizeof(DumpRegisterStruct) / sizeof(unsigned long long) : 0;
+    AddrOffset += DumpRegisters ? sizeof(DumpRegisterStruct) / sizeof(uint64_t) : 0;
 
-    addrOffset += errorDetection ? sizeof(ErrorDetectionStruct) / sizeof(unsigned long long) : 0;
+    AddrOffset += ErrorDetection ? sizeof(ErrorDetectionStruct) / sizeof(uint64_t) : 0;
   }
 
   ~LoadWorkerData() {
-    delete _config;
-    if (addrMem - addrOffset != nullptr) {
-      ALIGNED_FREE(addrMem - addrOffset);
+    delete Config;
+    if (AddrMem - AddrOffset != nullptr) {
+      ALIGNED_FREE(AddrMem - AddrOffset);
     }
   }
 
-  void setErrorCommunication(std::shared_ptr<unsigned long long> communicationLeft,
-                             std::shared_ptr<unsigned long long> communicationRight) {
-    this->communicationLeft = communicationLeft;
-    this->communicationRight = communicationRight;
+  void setErrorCommunication(std::shared_ptr<uint64_t> CommunicationLeft,
+                             std::shared_ptr<uint64_t> CommunicationRight) {
+    this->CommunicationLeft = std::move(CommunicationLeft);
+    this->CommunicationRight = std::move(CommunicationRight);
   }
 
-  int id() const { return _id; }
-  environment::Environment& environment() const { return _environment; }
-  environment::platform::RuntimeConfig& config() const { return *_config; }
+  [[nodiscard]] auto id() const -> int { return Id; }
+  [[nodiscard]] auto environment() const -> environment::Environment& { return Environment; }
+  [[nodiscard]] auto config() const -> environment::platform::RuntimeConfig& { return *Config; }
 
-  const ErrorDetectionStruct* errorDetectionStruct() const {
-    return reinterpret_cast<ErrorDetectionStruct*>(addrMem - addrOffset);
+  [[nodiscard]] auto errorDetectionStruct() const -> const ErrorDetectionStruct* {
+    return reinterpret_cast<ErrorDetectionStruct*>(AddrMem - AddrOffset);
   }
 
-  int comm = THREAD_WAIT;
-  bool ack = false;
-  std::mutex mutex;
-  unsigned long long* addrMem = nullptr;
-  unsigned long long addrOffset;
-  volatile unsigned long long* addrHigh;
-  unsigned long long buffersizeMem;
-  unsigned long long iterations = 0;
+  int Comm = THREAD_WAIT;
+  bool Ack = false;
+  std::mutex Mutex;
+  uint64_t* AddrMem = nullptr;
+  uint64_t AddrOffset = 0;
+  volatile uint64_t* AddrHigh;
+  uint64_t BuffersizeMem{};
+  uint64_t Iterations = 0;
   // save the last iteration count when switching payloads
-  std::atomic<unsigned long long> lastIterations;
-  unsigned long long flops;
-  unsigned long long startTsc;
-  unsigned long long stopTsc;
-  std::atomic<unsigned long long> lastStartTsc;
-  std::atomic<unsigned long long> lastStopTsc;
+  std::atomic<uint64_t> LastIterations{};
+  uint64_t Flops{};
+  uint64_t StartTsc{};
+  uint64_t StopTsc{};
+  std::atomic<uint64_t> LastStartTsc{};
+  std::atomic<uint64_t> LastStopTsc{};
   // period in usecs
   // used in low load routine to sleep 1/100th of this time
-  unsigned long long period;
-  bool dumpRegisters;
-  bool errorDetection;
-  std::shared_ptr<unsigned long long> communicationLeft;
-  std::shared_ptr<unsigned long long> communicationRight;
+  uint64_t Period;
+  bool DumpRegisters;
+  bool ErrorDetection;
+  std::shared_ptr<uint64_t> CommunicationLeft;
+  std::shared_ptr<uint64_t> CommunicationRight;
 
 private:
-  int _id;
-  environment::Environment& _environment;
-  environment::platform::RuntimeConfig* _config;
+  int Id;
+  environment::Environment& Environment;
+  environment::platform::RuntimeConfig* Config;
 };
 
 } // namespace firestarter
diff --git a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
index a91e1228..3a0e68fc 100644
--- a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
+++ b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
@@ -25,23 +25,19 @@
 #include <nitro/log/severity.hpp>
 #include <thread>
 
-namespace firestarter {
-
-namespace logging {
+namespace firestarter::logging {
 
 template <typename Record> class FirstWorkerThreadFilter {
 public:
-  typedef Record record_type;
+  using record_type = Record;
 
-  static void setFirstThread(std::thread::id newFirstThread) { firstThread = newFirstThread; }
+  static void setFirstThread(std::thread::id NewFirstThread) { FirstThread = NewFirstThread; }
 
-  bool filter(Record& r) const {
-    return r.std_thread_id() == firstThread || r.severity() >= nitro::log::severity_level::error;
+  auto filter(Record& r) const -> bool {
+    return r.std_thread_id() == FirstThread || r.severity() >= nitro::log::severity_level::error;
   }
 
 private:
-  inline static std::thread::id firstThread{};
+  inline static std::thread::id FirstThread{};
 };
-} // namespace logging
-
-} // namespace firestarter
+} // namespace firestarter::logging
diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp
index c115a476..2045bd43 100644
--- a/include/firestarter/Measurement/MeasurementWorker.hpp
+++ b/include/firestarter/Measurement/MeasurementWorker.hpp
@@ -23,82 +23,83 @@
 
 #include <chrono>
 #include <firestarter/Logging/Log.hpp>
+#include <firestarter/Measurement/Metric/IPCEstimate.h>
+#include <firestarter/Measurement/Metric/Perf.h>
+#include <firestarter/Measurement/Metric/RAPL.h>
+#include <firestarter/Measurement/MetricInterface.h>
 #include <firestarter/Measurement/Summary.hpp>
 #include <firestarter/Measurement/TimeValue.hpp>
 #include <map>
 #include <mutex>
 
 extern "C" {
-#include <firestarter/Measurement/Metric/IPCEstimate.h>
-#include <firestarter/Measurement/Metric/Perf.h>
-#include <firestarter/Measurement/Metric/RAPL.h>
-#include <firestarter/Measurement/MetricInterface.h>
 #include <pthread.h>
 }
 
-void insertCallback(void* cls, const char* metricName, int64_t timeSinceEpoch, double value);
+void insertCallback(void* Cls, const char* MetricName, int64_t TimeSinceEpoch, double Value);
 
 namespace firestarter::measurement {
 
 class MeasurementWorker {
 private:
-  pthread_t workerThread;
-  pthread_t stdinThread;
+  pthread_t WorkerThread;
+  pthread_t StdinThread;
 
-  std::vector<metric_interface_t*> metrics = {&rapl_metric, &perf_ipc_metric, &perf_freq_metric, &ipc_estimate_metric};
+  std::vector<MetricInterface*> Metrics = {&RaplMetric, &PerfIpcMetric, &PerfFreqMetric, &IpcEstimateMetric};
 
-  std::mutex values_mutex;
-  std::map<std::string, std::vector<TimeValue>> values = {};
+  std::mutex ValuesMutex;
+  std::map<std::string, std::vector<TimeValue>> Values;
 
-  static int* dataAcquisitionWorker(void* measurementWorker);
+  static auto dataAcquisitionWorker(void* MeasurementWorker) -> int*;
 
-  static int* stdinDataAcquisitionWorker(void* measurementWorker);
+  static auto stdinDataAcquisitionWorker(void* MeasurementWorker) -> int*;
 
-  const metric_interface_t* findMetricByName(std::string metricName);
+  auto findMetricByName(std::string MetricName) -> const MetricInterface*;
 
-  std::chrono::milliseconds updateInterval;
+  std::chrono::milliseconds UpdateInterval;
 
-  std::chrono::high_resolution_clock::time_point startTime;
+  std::chrono::high_resolution_clock::time_point StartTime;
 
   // some metric values have to be devided by this
-  const unsigned long long numThreads;
+  const uint64_t NumThreads;
 
-  std::string availableMetricsString;
+  std::string AvailableMetricsString;
 
 #ifndef FIRESTARTER_LINK_STATIC
   std::vector<void*> _metricDylibs = {};
 #endif
 
-  std::vector<std::string> _stdinMetrics = {};
+  std::vector<std::string> StdinMetrics;
 
 public:
   // creates the worker thread
-  MeasurementWorker(std::chrono::milliseconds updateInterval, unsigned long long numThreads,
-                    std::vector<std::string> const& metricDylibs, std::vector<std::string> const& stdinMetrics);
+  MeasurementWorker(std::chrono::milliseconds UpdateInterval, uint64_t NumThreads,
+                    std::vector<std::string> const& MetricDylibs, std::vector<std::string> const& StdinMetrics);
 
   // stops the worker threads
   ~MeasurementWorker();
 
-  std::string const& availableMetrics() const { return this->availableMetricsString; }
+  [[nodiscard]] auto availableMetrics() const -> std::string const& { return this->AvailableMetricsString; }
 
-  std::vector<std::string> const& stdinMetrics() { return _stdinMetrics; }
+  auto stdinMetrics() -> std::vector<std::string> const& { return StdinMetrics; }
 
   // returns a list of metrics
-  std::vector<std::string> metricNames();
+  auto metricNames() -> std::vector<std::string>;
 
   // setup the selected metrics
   // returns a vector with the names of inialized metrics
-  std::vector<std::string> initMetrics(std::vector<std::string> const& metricNames);
+  auto initMetrics(std::vector<std::string> const& MetricNames) -> std::vector<std::string>;
 
   // callback function for metrics
-  void insertCallback(const char* metricName, int64_t timeSinceEpoch, double value);
+  void insertCallback(const char* MetricName, int64_t TimeSinceEpoch, double Value);
 
   // start the measurement
   void startMeasurement();
 
   // get the measurement values begining from measurement start until now.
-  std::map<std::string, Summary> getValues(std::chrono::milliseconds startDelta = std::chrono::milliseconds::zero(),
-                                           std::chrono::milliseconds stopDelta = std::chrono::milliseconds::zero());
+  auto getValues(std::chrono::milliseconds StartDelta = std::chrono::milliseconds::zero(),
+                 std::chrono::milliseconds StopDelta = std::chrono::milliseconds::zero())
+      -> std::map<std::string, Summary>;
 };
 
 } // namespace firestarter::measurement
diff --git a/include/firestarter/Measurement/Metric/IPCEstimate.h b/include/firestarter/Measurement/Metric/IPCEstimate.h
index 2c14bb0d..360c1d91 100644
--- a/include/firestarter/Measurement/Metric/IPCEstimate.h
+++ b/include/firestarter/Measurement/Metric/IPCEstimate.h
@@ -23,6 +23,9 @@
 
 #include <firestarter/Measurement/MetricInterface.h>
 
-extern metric_interface_t ipc_estimate_metric;
+extern "C" {
 
-extern void ipc_estimate_metric_insert(double value);
+extern MetricInterface IpcEstimateMetric;
+
+extern void ipcEstimateMetricInsert(double Value);
+};
\ No newline at end of file
diff --git a/include/firestarter/Measurement/Metric/Perf.h b/include/firestarter/Measurement/Metric/Perf.h
index 72221cca..39a070f6 100644
--- a/include/firestarter/Measurement/Metric/Perf.h
+++ b/include/firestarter/Measurement/Metric/Perf.h
@@ -23,6 +23,9 @@
 
 #include <firestarter/Measurement/MetricInterface.h>
 
-extern metric_interface_t perf_ipc_metric;
+extern "C" {
 
-extern metric_interface_t perf_freq_metric;
+extern MetricInterface PerfIpcMetric;
+
+extern MetricInterface PerfFreqMetric;
+};
\ No newline at end of file
diff --git a/include/firestarter/Measurement/Metric/RAPL.h b/include/firestarter/Measurement/Metric/RAPL.h
index d88e3d91..726ff61a 100644
--- a/include/firestarter/Measurement/Metric/RAPL.h
+++ b/include/firestarter/Measurement/Metric/RAPL.h
@@ -23,4 +23,7 @@
 
 #include <firestarter/Measurement/MetricInterface.h>
 
-extern metric_interface_t rapl_metric;
+extern "C" {
+
+extern MetricInterface RaplMetric;
+};
\ No newline at end of file
diff --git a/include/firestarter/Measurement/MetricInterface.h b/include/firestarter/Measurement/MetricInterface.h
index c0c1c58b..87352868 100644
--- a/include/firestarter/Measurement/MetricInterface.h
+++ b/include/firestarter/Measurement/MetricInterface.h
@@ -21,63 +21,73 @@
 
 #pragma once
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #include <stdint.h>
 
-// clang-format off
+// NOLINTBEGIN(modernize-use-using)
 typedef struct {
-  // Either set absolute or accumalative to specify the type of values from the
-  // metric.
-  uint32_t absolute : 1,
-           accumalative : 1,
-           // Set to divide metric values by thread count.
-           divide_by_thread_count : 1,
-           // Set to insert time-value pairs via callback function passed by
-           // register_insert_callback.
-           insert_callback : 1,
-					 // ignore the start and stop delta set by the user
-					 ignore_start_stop_delta : 1,
-           __reserved : 27;
-} metric_type_t;
-// clang-format on
+  uint32_t
+      // metric value is absolute
+      Absolute : 1,
+      // metric value accumulates
+      Accumalative : 1,
+      // Set to divide metric values by thread count.
+      DivideByThreadCount : 1,
+      // Set to insert time-value pairs via callback function passed by
+      // register_insert_callback.
+      InsertCallback : 1,
+      // ignore the start and stop delta set by the user
+      IgnoreStartStopDelta : 1,
+      // Reserved space to round up to 32 bits
+      Reserved : 27;
+} MetricType;
 
 // Define `metric_interface_t metric` inside your shared library to be able to
 // load it during runtime.
 typedef struct {
   // the name of the metric
-  const char* name;
+  const char* Name;
 
   // metric type with bitfield from metric_type_t
-  metric_type_t type;
+  MetricType Type;
 
   // the unit of the metric
-  const char* unit;
+  const char* Unit;
 
-  uint64_t callback_time;
+  uint64_t CallbackTime;
 
   // This function will be called every `callback_time` usecs. Disable by
   // setting `callback_time` to 0.
-  void (*callback)(void);
+  void (*Callback)();
 
   // init the metric.
   // returns EXIT_SUCCESS on success.
-  int32_t (*init)(void);
+  int32_t (*Init)();
 
   // deinit the metric.
   // returns EXIT_SUCCESS on success.
-  int32_t (*fini)(void);
+  int32_t (*Fini)();
 
   // Get a reading of the metric
   // Return EXIT_SUCCESS if we got a new value.
   // Set this function pointer to NULL if METRIC_INSERT_CALLBACK is specified.
-  int32_t (*get_reading)(double* value);
+  int32_t (*GetReading)(double* Value);
 
   // Get error in case return code not being EXIT_SUCCESS
-  const char* (*get_error)(void);
+  const char* (*GetError)();
 
   // If METRIC_INSERT_CALLBACK is set in the type, this function will be passed
   // a callback and the first argument for the callback.
   // Further arguments of callback are the metric name, an unix timestamp (time
   // since epoch) and a metric value.
-  int32_t (*register_insert_callback)(void (*)(void*, const char*, int64_t, double), void*);
+  int32_t (*RegisterInsertCallback)(void (*)(void*, const char*, int64_t, double), void*);
+
+} MetricInterface;
+// NOLINTEND(modernize-use-using)
 
-} metric_interface_t;
+#ifdef __cplusplus
+};
+#endif
\ No newline at end of file
diff --git a/include/firestarter/Measurement/Summary.hpp b/include/firestarter/Measurement/Summary.hpp
index 7f0d7899..09c91016 100644
--- a/include/firestarter/Measurement/Summary.hpp
+++ b/include/firestarter/Measurement/Summary.hpp
@@ -33,14 +33,14 @@ extern "C" {
 namespace firestarter::measurement {
 
 struct Summary {
-  size_t num_timepoints;
-  std::chrono::milliseconds duration;
+  size_t NumTimepoints;
+  std::chrono::milliseconds Duration;
 
-  double average;
-  double stddev;
+  double Average;
+  double Stddev;
 
-  static Summary calculate(std::vector<TimeValue>::iterator begin, std::vector<TimeValue>::iterator end,
-                           metric_type_t metricType, unsigned long long numThreads);
+  static auto calculate(std::vector<TimeValue>::iterator Begin, std::vector<TimeValue>::iterator End,
+                        MetricType MetricType, uint64_t NumThreads) -> Summary;
 };
 
 } // namespace firestarter::measurement
diff --git a/include/firestarter/Measurement/TimeValue.hpp b/include/firestarter/Measurement/TimeValue.hpp
index bf9377c9..cc168ad2 100644
--- a/include/firestarter/Measurement/TimeValue.hpp
+++ b/include/firestarter/Measurement/TimeValue.hpp
@@ -28,12 +28,12 @@ namespace firestarter::measurement {
 struct TimeValue {
   TimeValue() = default;
 
-  constexpr TimeValue(std::chrono::high_resolution_clock::time_point t, double v)
-      : time(t)
-      , value(v){};
+  constexpr TimeValue(std::chrono::high_resolution_clock::time_point Time, double Value)
+      : Time(Time)
+      , Value(Value){};
 
-  std::chrono::high_resolution_clock::time_point time;
-  double value;
+  std::chrono::high_resolution_clock::time_point Time;
+  double Value;
 };
 
 } // namespace firestarter::measurement
diff --git a/include/firestarter/OneAPI/OneAPI.hpp b/include/firestarter/OneAPI/OneAPI.hpp
index 0ed1844c..f6931e4d 100644
--- a/include/firestarter/OneAPI/OneAPI.hpp
+++ b/include/firestarter/OneAPI/OneAPI.hpp
@@ -24,25 +24,24 @@
 #include <condition_variable>
 #include <mutex>
 #include <thread>
-#include <vector>
 
 namespace firestarter::oneapi {
 
 class OneAPI {
 private:
-  std::thread _initThread;
-  std::condition_variable _waitForInitCv;
-  std::mutex _waitForInitCvMutex;
+  std::thread InitThread;
+  std::condition_variable WaitForInitCv;
+  std::mutex WaitForInitCvMutex;
 
-  static void initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble,
-                       unsigned matrixSize, int gpus);
+  static void initGpus(std::condition_variable& Cv, volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble,
+                       unsigned MatrixSize, int Gpus);
 
 public:
-  OneAPI(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus);
+  OneAPI(volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus);
 
   ~OneAPI() {
-    if (_initThread.joinable()) {
-      _initThread.join();
+    if (InitThread.joinable()) {
+      InitThread.join();
     }
   }
 };
diff --git a/include/firestarter/Optimizer/Algorithm.hpp b/include/firestarter/Optimizer/Algorithm.hpp
index d9186322..4cdae1ec 100644
--- a/include/firestarter/Optimizer/Algorithm.hpp
+++ b/include/firestarter/Optimizer/Algorithm.hpp
@@ -27,12 +27,12 @@ namespace firestarter::optimizer {
 
 class Algorithm {
 public:
-  Algorithm() {}
-  virtual ~Algorithm() {}
+  Algorithm() = default;
+  virtual ~Algorithm() = default;
 
-  virtual void checkPopulation(Population const& pop, std::size_t populationSize) = 0;
+  virtual void checkPopulation(Population const& Pop, std::size_t PopulationSize) = 0;
 
-  virtual Population evolve(Population& pop) = 0;
+  virtual auto evolve(Population& Pop) -> Population = 0;
 };
 
 } // namespace firestarter::optimizer
diff --git a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
index a144bb05..e02e7e14 100644
--- a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
+++ b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
@@ -27,17 +27,17 @@ namespace firestarter::optimizer::algorithm {
 
 class NSGA2 : public Algorithm {
 public:
-  NSGA2(unsigned gen, double cr, double m);
-  ~NSGA2() {}
+  NSGA2(unsigned Gen, double Cr, double M);
+  ~NSGA2() override = default;
 
-  void checkPopulation(firestarter::optimizer::Population const& pop, std::size_t populationSize) override;
+  void checkPopulation(firestarter::optimizer::Population const& Pop, std::size_t PopulationSize) override;
 
-  firestarter::optimizer::Population evolve(firestarter::optimizer::Population& pop) override;
+  auto evolve(firestarter::optimizer::Population& Pop) -> firestarter::optimizer::Population override;
 
 private:
-  unsigned _gen;
-  double _cr;
-  double _m;
+  unsigned Gen;
+  double Cr;
+  double M;
 };
 
 } // namespace firestarter::optimizer::algorithm
diff --git a/include/firestarter/Optimizer/History.hpp b/include/firestarter/Optimizer/History.hpp
index 2922301f..332b49c5 100644
--- a/include/firestarter/Optimizer/History.hpp
+++ b/include/firestarter/Optimizer/History.hpp
@@ -31,10 +31,8 @@
 #include <firestarter/Optimizer/Individual.hpp>
 #include <fstream>
 #include <iomanip>
-#include <iostream>
 #include <nlohmann/json.hpp>
 #include <optional>
-#include <tuple>
 #include <vector>
 
 extern "C" {
@@ -46,58 +44,58 @@ namespace firestarter::optimizer {
 struct History {
 private:
   // https://stackoverflow.com/questions/17074324/how-can-i-sort-two-vectors-in-the-same-way-with-criteria-that-uses-only-one-of/17074810#17074810
-  template <typename T, typename Compare>
-  inline static std::vector<std::size_t> sortPermutation(const std::vector<T>& vec, Compare& compare) {
-    std::vector<std::size_t> p(vec.size());
-    std::iota(p.begin(), p.end(), 0);
-    std::sort(p.begin(), p.end(), [&](std::size_t i, std::size_t j) { return compare(vec[i], vec[j]); });
-    return p;
+  template <typename T, typename CompareT>
+  static auto sortPermutation(const std::vector<T>& Vec, CompareT& Compare) -> std::vector<std::size_t> {
+    std::vector<std::size_t> P(Vec.size());
+    std::iota(P.begin(), P.end(), 0);
+    std::sort(P.begin(), P.end(), [&](std::size_t I, std::size_t J) { return Compare(Vec[I], Vec[J]); });
+    return P;
   }
 
-  inline static void padding(std::stringstream& ss, std::size_t width, std::size_t taken, char c) {
-    for (std::size_t i = 0; i < (std::max)(width, taken) - taken; ++i) {
-      ss << c;
+  static void padding(std::stringstream& Ss, std::size_t Width, std::size_t Taken, char C) {
+    for (std::size_t I = 0; I < (std::max)(Width, Taken) - Taken; ++I) {
+      Ss << C;
     }
   }
 
-  inline static int MAX_ELEMENT_PRINT_COUNT = 20;
-  inline static std::size_t MIN_COLUMN_WIDTH = 10;
+  inline static int MaxElementPrintCount = 20;
+  inline static std::size_t MinColumnWidth = 10;
 
-  inline static std::vector<Individual> _x = {};
-  inline static std::vector<std::map<std::string, firestarter::measurement::Summary>> _f = {};
+  inline static std::vector<Individual> X = {};
+  inline static std::vector<std::map<std::string, firestarter::measurement::Summary>> F = {};
 
 public:
-  inline static void append(std::vector<unsigned> const& ind,
-                            std::map<std::string, firestarter::measurement::Summary> const& metric) {
-    _x.push_back(ind);
-    _f.push_back(metric);
+  static void append(std::vector<unsigned> const& Ind,
+                     std::map<std::string, firestarter::measurement::Summary> const& Metric) {
+    X.push_back(Ind);
+    F.push_back(Metric);
   }
 
-  inline static std::optional<std::map<std::string, firestarter::measurement::Summary>>
-  find(std::vector<unsigned> const& individual) {
-    auto findEqual = [individual](auto const& ind) { return ind == individual; };
-    auto ind = std::find_if(_x.begin(), _x.end(), findEqual);
-    if (ind == _x.end()) {
+  static auto find(std::vector<unsigned> const& Individual)
+      -> std::optional<std::map<std::string, firestarter::measurement::Summary>> {
+    auto FindEqual = [Individual](auto const& ind) { return ind == Individual; };
+    auto Ind = std::find_if(X.begin(), X.end(), FindEqual);
+    if (Ind == X.end()) {
       return {};
     }
-    auto dist = std::distance(_x.begin(), ind);
-    return _f[dist];
+    auto Dist = std::distance(X.begin(), Ind);
+    return F[Dist];
   }
 
-  inline static void printBest(std::vector<std::string> const& optimizationMetrics,
-                               std::vector<std::string> const& payloadItems) {
+  static void printBest(std::vector<std::string> const& OptimizationMetrics,
+                        std::vector<std::string> const& PayloadItems) {
     // TODO: print paretto front
 
     // print the best 20 individuals for each metric in a format
     // where the user can give it to --run-instruction-groups directly
     std::map<std::string, std::size_t> columnWidth;
 
-    for (auto const& metric : optimizationMetrics) {
-      columnWidth[metric] = (std::max)(metric.size(), MIN_COLUMN_WIDTH);
+    for (auto const& metric : OptimizationMetrics) {
+      columnWidth[metric] = (std::max)(metric.size(), MinColumnWidth);
       firestarter::log::trace() << metric << ": " << columnWidth[metric];
     }
 
-    for (auto const& metric : optimizationMetrics) {
+    for (auto const& metric : OptimizationMetrics) {
       using SummaryMap = std::map<std::string, firestarter::measurement::Summary>;
       auto compareIndividual = [&metric](SummaryMap const& mapA, SummaryMap const& mapB) {
         auto summaryA = mapA.find(metric);
@@ -108,19 +106,19 @@ struct History {
           summaryB = mapB.find(metric.substr(1));
           assert(summaryA != mapA.end());
           assert(summaryB != mapB.end());
-          return summaryA->second.average < summaryB->second.average;
+          return summaryA->second.Average < summaryB->second.Average;
         }
 
         assert(summaryA != mapA.end());
         assert(summaryB != mapB.end());
-        return summaryA->second.average > summaryB->second.average;
+        return summaryA->second.Average > summaryB->second.Average;
       };
 
-      auto perm = sortPermutation(_f, compareIndividual);
+      auto perm = sortPermutation(F, compareIndividual);
 
-      auto formatIndividual = [&payloadItems](std::vector<unsigned> const& individual) {
+      auto formatIndividual = [&PayloadItems](std::vector<unsigned> const& individual) {
         std::string result = "";
-        assert(payloadItems.size() == individual.size());
+        assert(PayloadItems.size() == individual.size());
 
         for (std::size_t i = 0; i < individual.size(); ++i) {
           // skip zero values
@@ -131,7 +129,7 @@ struct History {
           if (result.size() != 0) {
             result += ",";
           }
-          result += payloadItems[i] + ":" + std::to_string(individual[i]);
+          result += PayloadItems[i] + ":" + std::to_string(individual[i]);
         }
 
         return result;
@@ -140,16 +138,16 @@ struct History {
       auto begin = perm.begin();
       auto end = perm.end();
 
-      // stop printing at a max of MAX_ELEMENT_PRINT_COUNT
-      if (std::distance(begin, end) > MAX_ELEMENT_PRINT_COUNT) {
+      // stop printing at a max of MaxElementPrintCount
+      if (std::distance(begin, end) > MaxElementPrintCount) {
         end = perm.begin();
-        std::advance(end, MAX_ELEMENT_PRINT_COUNT);
+        std::advance(end, MaxElementPrintCount);
       }
 
       // print each of the best elements
       std::size_t max = 0;
       for (auto it = begin; it != end; ++it) {
-        max = (std::max)(max, formatIndividual(_x[*it]).size());
+        max = (std::max)(max, formatIndividual(X[*it]).size());
       }
 
       std::stringstream firstLine;
@@ -162,7 +160,7 @@ struct History {
       secondLine << "  ";
       padding(secondLine, (std::max)(max, ind.size()), 0, '-');
 
-      for (auto const& metric : optimizationMetrics) {
+      for (auto const& metric : OptimizationMetrics) {
         auto width = columnWidth[metric];
 
         firstLine << " | ";
@@ -182,13 +180,13 @@ struct History {
 
       // print INDIVIDUAL | metric 1 | metric 2 | ... | metric N
       for (auto it = begin; it != end; ++it) {
-        auto const fitness = _f[*it];
-        auto const ind = formatIndividual(_x[*it]);
+        auto const fitness = F[*it];
+        auto const ind = formatIndividual(X[*it]);
 
         ss << "  " << ind;
         padding(ss, max, ind.size(), ' ');
 
-        for (auto const& metric : optimizationMetrics) {
+        for (auto const& metric : OptimizationMetrics) {
           auto width = columnWidth[metric];
           std::string value;
 
@@ -197,9 +195,9 @@ struct History {
           auto fitnessOfInvertedMetric = fitness.find(invertedMetric);
 
           if (fitnessOfMetric != fitness.end()) {
-            value = std::to_string(fitnessOfMetric->second.average);
+            value = std::to_string(fitnessOfMetric->second.Average);
           } else if (fitnessOfInvertedMetric != fitness.end()) {
-            value = std::to_string(fitnessOfInvertedMetric->second.average);
+            value = std::to_string(fitnessOfInvertedMetric->second.Average);
           } else {
             assert(false);
           }
@@ -220,86 +218,86 @@ struct History {
                                 "`--run-instruction-groups=INDIVIDUAL`";
   }
 
-  inline static void save(std::string const& path, std::string const& startTime,
-                          std::vector<std::string> const& payloadItems, const int argc, const char** argv) {
+  static void save(std::string const& Path, std::string const& StartTime, std::vector<std::string> const& PayloadItems,
+                   const int Argc, const char** Argv) {
     using json = nlohmann::json;
 
-    json j = json::object();
+    json J = json::object();
 
-    j["individuals"] = json::array();
-    for (auto const& ind : _x) {
-      j["individuals"].push_back(ind);
+    J["individuals"] = json::array();
+    for (auto const& Ind : X) {
+      J["individuals"].push_back(Ind);
     }
 
-    j["metrics"] = json::array();
-    for (auto const& eval : _f) {
-      j["metrics"].push_back(eval);
+    J["metrics"] = json::array();
+    for (auto const& Eval : F) {
+      J["metrics"].push_back(Eval);
     }
 
     // get the hostname
-    char cHostname[256];
-    std::string hostname;
-    if (0 != gethostname(cHostname, sizeof(cHostname))) {
-      hostname = "unknown";
+    char CHostname[256];
+    std::string Hostname;
+    if (0 != gethostname(CHostname, sizeof(CHostname))) {
+      Hostname = "unknown";
     } else {
-      hostname = cHostname;
+      Hostname = CHostname;
     }
 
-    j["hostname"] = hostname;
+    J["hostname"] = Hostname;
 
-    j["startTime"] = startTime;
-    j["endTime"] = getTime();
+    J["startTime"] = StartTime;
+    J["endTime"] = getTime();
 
     // save the payload items
-    j["payloadItems"] = json::array();
-    for (auto const& item : payloadItems) {
-      j["payloadItems"].push_back(item);
+    J["payloadItems"] = json::array();
+    for (auto const& Item : PayloadItems) {
+      J["payloadItems"].push_back(Item);
     }
 
     // save the arguments
-    j["args"] = json::array();
-    for (int i = 0; i < argc; ++i) {
-      j["args"].push_back(argv[i]);
+    J["args"] = json::array();
+    for (int I = 0; I < Argc; ++I) {
+      J["args"].push_back(Argv[I]);
     }
 
     // dump the output
-    std::string s = j.dump();
+    std::string S = J.dump();
 
-    firestarter::log::trace() << s;
+    firestarter::log::trace() << S;
 
-    std::string outpath = path;
-    if (outpath.empty()) {
-      char* pwd = get_current_dir_name();
-      if (pwd) {
-        outpath = pwd;
-        free(pwd);
+    std::string Outpath = Path;
+    if (Outpath.empty()) {
+      char* Pwd = get_current_dir_name();
+      if (Pwd) {
+        Outpath = Pwd;
+        free(Pwd);
       } else {
         firestarter::log::warn() << "Could not find $PWD.";
-        outpath = "/tmp";
+        Outpath = "/tmp";
       }
-      outpath += "/" + hostname + "_" + startTime + ".json";
+      Outpath += "/" + Hostname + "_" + StartTime + ".json";
     }
 
-    firestarter::log::info() << "\nDumping output json in " << outpath;
+    firestarter::log::info() << "\nDumping output json in " << Outpath;
 
-    std::ofstream fp(outpath);
+    std::ofstream Fp(Outpath);
 
-    if (fp.bad()) {
-      firestarter::log::error() << "Could not open " << outpath;
+    if (Fp.bad()) {
+      firestarter::log::error() << "Could not open " << Outpath;
       return;
     }
 
-    fp << s;
+    Fp << S;
 
-    fp.close();
+    Fp.close();
   }
 
-  inline static std::string getTime() {
-    auto t = std::time(nullptr);
-    auto tm = *std::localtime(&t);
-    std::stringstream ss;
-    ss << std::put_time(&tm, "%F_%T%z");
-    return ss.str();
+  static auto getTime() -> std::string {
+    auto T = std::time(nullptr);
+    auto Tm = *std::localtime(&T);
+    std::stringstream Ss;
+    Ss << std::put_time(&Tm, "%F_%T%z");
+    return Ss.str();
   }
 };
 } // namespace firestarter::optimizer
diff --git a/include/firestarter/Optimizer/OptimizerWorker.hpp b/include/firestarter/Optimizer/OptimizerWorker.hpp
index 816f4882..e98c25b9 100644
--- a/include/firestarter/Optimizer/OptimizerWorker.hpp
+++ b/include/firestarter/Optimizer/OptimizerWorker.hpp
@@ -32,26 +32,26 @@ namespace firestarter::optimizer {
 
 class OptimizerWorker {
 public:
-  OptimizerWorker(std::unique_ptr<firestarter::optimizer::Algorithm>&& algorithm,
-                  firestarter::optimizer::Population& population, std::string const& optimizationAlgorithm,
-                  unsigned individuals, std::chrono::seconds const& preheat);
+  OptimizerWorker(std::unique_ptr<firestarter::optimizer::Algorithm>&& Algorithm,
+                  firestarter::optimizer::Population& Population, std::string const& OptimizationAlgorithm,
+                  unsigned Individuals, std::chrono::seconds const& Preheat);
 
-  ~OptimizerWorker() {}
+  ~OptimizerWorker() = default;
 
   void join();
 
   void kill();
 
 private:
-  static void* optimizerThread(void* optimizerWorker);
+  static auto optimizerThread(void* OptimizerWorker) -> void*;
 
-  std::unique_ptr<firestarter::optimizer::Algorithm> _algorithm;
-  firestarter::optimizer::Population _population;
-  std::string _optimizationAlgorithm;
-  unsigned _individuals;
-  std::chrono::seconds _preheat;
+  std::unique_ptr<firestarter::optimizer::Algorithm> Algorithm;
+  firestarter::optimizer::Population Population;
+  std::string OptimizationAlgorithm;
+  unsigned Individuals;
+  std::chrono::seconds Preheat;
 
-  pthread_t workerThread;
+  pthread_t WorkerThread;
 };
 
 } // namespace firestarter::optimizer
diff --git a/include/firestarter/Optimizer/Population.hpp b/include/firestarter/Optimizer/Population.hpp
index 3bf3ac38..757a2e46 100644
--- a/include/firestarter/Optimizer/Population.hpp
+++ b/include/firestarter/Optimizer/Population.hpp
@@ -29,7 +29,6 @@
 #include <memory>
 #include <optional>
 #include <random>
-#include <tuple>
 #include <vector>
 
 namespace firestarter::optimizer {
@@ -39,60 +38,60 @@ class Population {
   // Construct a population from a problem.
   Population() = default;
 
-  Population(std::shared_ptr<Problem>&& problem)
-      : _problem(std::move(problem))
-      , gen(rd()) {}
+  explicit Population(std::shared_ptr<Problem>&& ProblemPtr)
+      : ProblemPtr(std::move(ProblemPtr))
+      , Gen(Rd()) {}
 
-  Population(Population& pop)
-      : _problem(pop._problem)
-      , _x(pop._x)
-      , _f(pop._f)
-      , gen(rd()) {}
+  Population(Population& Pop)
+      : ProblemPtr(Pop.ProblemPtr)
+      , X(Pop.X)
+      , F(Pop.F)
+      , Gen(Rd()) {}
 
-  Population& operator=(Population const& pop) {
-    _problem = std::move(pop._problem);
-    _x = pop._x;
-    _f = pop._f;
-    gen = pop.gen;
+  auto operator=(Population const& Pop) -> Population& {
+    ProblemPtr = Pop.ProblemPtr;
+    X = Pop.X;
+    F = Pop.F;
+    Gen = Pop.Gen;
 
     return *this;
   }
 
-  ~Population() {}
+  ~Population() = default;
 
-  void generateInitialPopulation(std::size_t populationSize = 0);
+  void generateInitialPopulation(std::size_t PopulationSize = 0);
 
-  std::size_t size() const;
+  [[nodiscard]] auto size() const -> std::size_t;
 
   // add one individual to the population. fitness will be evaluated.
-  void append(Individual const& ind);
+  void append(Individual const& Ind);
 
-  void insert(std::size_t idx, Individual const& ind, std::vector<double> const& fit);
+  void insert(std::size_t Idx, Individual const& Ind, std::vector<double> const& Fit);
 
   // get a random individual inside bounds of problem
-  Individual getRandomIndividual();
+  auto getRandomIndividual() -> Individual;
 
   // returns the best individual in case of single-objective.
   // return nothing in case of mutli-objective.
-  std::optional<Individual> bestIndividual() const;
+  [[nodiscard]] auto bestIndividual() const -> std::optional<Individual>;
 
-  Problem const& problem() const { return *_problem; }
+  [[nodiscard]] auto problem() const -> Problem const& { return *ProblemPtr; }
 
-  std::vector<Individual> const& x() const { return _x; }
-  std::vector<std::vector<double>> const& f() const { return _f; }
+  [[nodiscard]] auto x() const -> std::vector<Individual> const& { return X; }
+  [[nodiscard]] auto f() const -> std::vector<std::vector<double>> const& { return F; }
 
 private:
   // add one individual to the population with a fitness.
-  void append(Individual const& ind, std::vector<double> const& fit);
+  void append(Individual const& Ind, std::vector<double> const& Fit);
 
   // our problem.
-  std::shared_ptr<Problem> _problem;
+  std::shared_ptr<Problem> ProblemPtr;
 
-  std::vector<Individual> _x;
-  std::vector<std::vector<double>> _f;
+  std::vector<Individual> X;
+  std::vector<std::vector<double>> F;
 
-  std::random_device rd;
-  std::mt19937 gen;
+  std::random_device Rd;
+  std::mt19937 Gen;
 };
 
 } // namespace firestarter::optimizer
diff --git a/include/firestarter/Optimizer/Problem.hpp b/include/firestarter/Optimizer/Problem.hpp
index 009b4d01..df31ec98 100644
--- a/include/firestarter/Optimizer/Problem.hpp
+++ b/include/firestarter/Optimizer/Problem.hpp
@@ -32,33 +32,33 @@ namespace firestarter::optimizer {
 
 class Problem {
 public:
-  Problem()
-      : _fevals(0) {}
-  virtual ~Problem() {}
+  Problem() = default;
+  virtual ~Problem() = default;
 
   // return the fitness for an individual
-  virtual std::map<std::string, firestarter::measurement::Summary> metrics(Individual const& individual) = 0;
+  virtual auto metrics(Individual const& Individual) -> std::map<std::string, firestarter::measurement::Summary> = 0;
 
-  virtual std::vector<double> fitness(std::map<std::string, firestarter::measurement::Summary> const& summaries) = 0;
+  virtual auto fitness(std::map<std::string, firestarter::measurement::Summary> const& Summaries)
+      -> std::vector<double> = 0;
 
   // get the bounds of the problem
-  virtual std::vector<std::tuple<unsigned, unsigned>> getBounds() const = 0;
+  [[nodiscard]] virtual auto getBounds() const -> std::vector<std::tuple<unsigned, unsigned>> = 0;
 
   // get the number of dimensions of the problem
-  std::size_t getDims() const { return this->getBounds().size(); };
+  [[nodiscard]] auto getDims() const -> std::size_t { return this->getBounds().size(); };
 
   // get the number of objectives.
-  virtual std::size_t getNobjs() const = 0;
+  [[nodiscard]] virtual auto getNobjs() const -> std::size_t = 0;
 
   // is the problem multiobjective
-  bool isMO() const { return this->getNobjs() > 1; };
+  [[nodiscard]] auto isMO() const -> bool { return this->getNobjs() > 1; };
 
   // get the number of fitness evaluations
-  unsigned long long getFevals() const { return _fevals; };
+  [[nodiscard]] auto getFevals() const -> uint64_t { return Fevals; };
 
 protected:
   // number of fitness evaluations
-  unsigned long long _fevals;
+  uint64_t Fevals = 0;
 };
 
 } // namespace firestarter::optimizer
diff --git a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
index f24ae2f2..74346a74 100644
--- a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
+++ b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
@@ -24,9 +24,9 @@
 #include <cassert>
 #include <chrono>
 #include <cmath>
+#include <firestarter/Measurement/MeasurementWorker.hpp>
 #include <firestarter/Optimizer/Problem.hpp>
 #include <functional>
-#include <memory>
 #include <thread>
 #include <tuple>
 #include <utility>
@@ -35,103 +35,105 @@ namespace firestarter::optimizer::problem {
 
 class CLIArgumentProblem final : public firestarter::optimizer::Problem {
 public:
-  CLIArgumentProblem(std::function<void(std::vector<std::pair<std::string, unsigned>> const&)>&& changePayloadFunction,
-                     std::shared_ptr<firestarter::measurement::MeasurementWorker> const& measurementWorker,
-                     std::vector<std::string> const& metrics, std::chrono::seconds timeout,
-                     std::chrono::milliseconds startDelta, std::chrono::milliseconds stopDelta,
-                     std::vector<std::string> const& instructionGroups)
-      : _changePayloadFunction(changePayloadFunction)
-      , _measurementWorker(measurementWorker)
-      , _metrics(metrics)
-      , _timeout(timeout)
-      , _startDelta(startDelta)
-      , _stopDelta(stopDelta)
-      , _instructionGroups(instructionGroups) {
-    assert(_metrics.size() != 0);
+  CLIArgumentProblem(std::function<void(std::vector<std::pair<std::string, unsigned>> const&)>&& ChangePayloadFunction,
+                     std::shared_ptr<firestarter::measurement::MeasurementWorker> const& MeasurementWorker,
+                     std::vector<std::string> const& Metrics, std::chrono::seconds Timeout,
+                     std::chrono::milliseconds StartDelta, std::chrono::milliseconds StopDelta,
+                     std::vector<std::string> const& InstructionGroups)
+      : ChangePayloadFunction(ChangePayloadFunction)
+      , MeasurementWorker(MeasurementWorker)
+      , Metrics(Metrics)
+      , Timeout(Timeout)
+      , StartDelta(StartDelta)
+      , StopDelta(StopDelta)
+      , InstructionGroups(InstructionGroups) {
+    assert(Metrics.size() != 0);
   }
 
-  ~CLIArgumentProblem() {}
+  ~CLIArgumentProblem() override = default;
 
   // return all available metrics for the individual
-  std::map<std::string, firestarter::measurement::Summary> metrics(std::vector<unsigned> const& individual) override {
+  auto metrics(std::vector<unsigned> const& Individual)
+      -> std::map<std::string, firestarter::measurement::Summary> override {
     // increment evaluation idx
-    _fevals++;
+    Fevals++;
 
     // change the payload
-    assert(_instructionGroups.size() == individual.size());
-    std::vector<std::pair<std::string, unsigned>> payload = {};
-    auto it1 = _instructionGroups.begin();
-    auto it2 = individual.begin();
-    for (; it1 != _instructionGroups.end(); ++it1, ++it2) {
-      payload.push_back(std::make_pair(*it1, *it2));
+    assert(InstructionGroups.size() == Individual.size());
+    std::vector<std::pair<std::string, unsigned>> Payload = {};
+    auto It1 = InstructionGroups.begin();
+    auto It2 = Individual.begin();
+    for (; It1 != InstructionGroups.end(); ++It1, ++It2) {
+      Payload.emplace_back(*It1, *It2);
     }
-    _changePayloadFunction(payload);
+    ChangePayloadFunction(Payload);
 
     // start the measurement
     // NOTE: starting the measurement must happen after switching to not
     // mess up ipc-estimate metric
-    _measurementWorker->startMeasurement();
+    MeasurementWorker->startMeasurement();
 
     // wait for the measurement to finish
-    std::this_thread::sleep_for(_timeout);
+    std::this_thread::sleep_for(Timeout);
 
     // FIXME: this is an ugly workaround for the ipc-estimate metric
     // changeing the payload triggers a write of the iteration counter of
     // the last payload, which we use to estimate the ipc.
-    _changePayloadFunction(payload);
+    ChangePayloadFunction(Payload);
 
     // return the results
-    return _measurementWorker->getValues(_startDelta, _stopDelta);
+    return MeasurementWorker->getValues(StartDelta, StopDelta);
   }
 
-  std::vector<double> fitness(std::map<std::string, firestarter::measurement::Summary> const& summaries) override {
-    std::vector<double> values = {};
+  auto fitness(std::map<std::string, firestarter::measurement::Summary> const& Summaries)
+      -> std::vector<double> override {
+    std::vector<double> Values = {};
 
-    for (auto const& metricName : _metrics) {
-      auto findName = [metricName](auto const& summary) {
-        auto invertedName = "-" + summary.first;
-        return metricName.compare(summary.first) == 0 || metricName.compare(invertedName) == 0;
+    for (auto const& MetricName : Metrics) {
+      auto FindName = [MetricName](auto const& Summary) {
+        auto InvertedName = "-" + Summary.first;
+        return MetricName.compare(Summary.first) == 0 || MetricName.compare(InvertedName) == 0;
       };
 
-      auto it = std::find_if(summaries.begin(), summaries.end(), findName);
+      auto It = std::find_if(Summaries.begin(), Summaries.end(), FindName);
 
-      if (it == summaries.end()) {
+      if (It == Summaries.end()) {
         continue;
       }
 
       // round to two decimal places after the comma
-      auto value = std::round(it->second.average * 100.0) / 100.0;
+      auto Value = std::round(It->second.Average * 100.0) / 100.0;
 
       // invert metric
-      if (metricName[0] == '-') {
-        value *= -1.0;
+      if (MetricName[0] == '-') {
+        Value *= -1.0;
       }
 
-      values.push_back(value);
+      Values.push_back(Value);
     }
 
-    return values;
+    return Values;
   }
 
   // get the bounds of the problem
-  std::vector<std::tuple<unsigned, unsigned>> getBounds() const override {
-    std::vector<std::tuple<unsigned, unsigned>> vec(_instructionGroups.size(),
+  [[nodiscard]] auto getBounds() const -> std::vector<std::tuple<unsigned, unsigned>> override {
+    std::vector<std::tuple<unsigned, unsigned>> Vec(InstructionGroups.size(),
                                                     std::make_tuple<unsigned, unsigned>(0, 100));
 
-    return vec;
+    return Vec;
   }
 
   // get the number of objectives.
-  std::size_t getNobjs() const override { return _metrics.size(); }
+  [[nodiscard]] auto getNobjs() const -> std::size_t override { return Metrics.size(); }
 
 private:
-  std::function<void(std::vector<std::pair<std::string, unsigned>> const&)> _changePayloadFunction;
-  std::shared_ptr<firestarter::measurement::MeasurementWorker> _measurementWorker;
-  std::vector<std::string> _metrics;
-  std::chrono::seconds _timeout;
-  std::chrono::milliseconds _startDelta;
-  std::chrono::milliseconds _stopDelta;
-  std::vector<std::string> _instructionGroups;
+  std::function<void(std::vector<std::pair<std::string, unsigned>> const&)> ChangePayloadFunction;
+  std::shared_ptr<firestarter::measurement::MeasurementWorker> MeasurementWorker;
+  std::vector<std::string> Metrics;
+  std::chrono::seconds Timeout;
+  std::chrono::milliseconds StartDelta;
+  std::chrono::milliseconds StopDelta;
+  std::vector<std::string> InstructionGroups;
 };
 
 } // namespace firestarter::optimizer::problem
diff --git a/include/firestarter/Optimizer/Util/MultiObjective.hpp b/include/firestarter/Optimizer/Util/MultiObjective.hpp
index da61bf73..fab62be8 100644
--- a/include/firestarter/Optimizer/Util/MultiObjective.hpp
+++ b/include/firestarter/Optimizer/Util/MultiObjective.hpp
@@ -28,32 +28,31 @@
 
 namespace firestarter::optimizer::util {
 
-bool less_than_f(double a, double b);
+auto lessThanF(double A, double B) -> bool;
 
-bool greater_than_f(double a, double b);
+auto greaterThanF(double A, double B) -> bool;
 
-bool pareto_dominance(const std::vector<double>& obj1, const std::vector<double>& obj2);
+auto paretoDominance(const std::vector<double>& Obj1, const std::vector<double>& Obj2) -> bool;
 
-std::tuple<std::vector<std::vector<std::size_t>>, std::vector<std::vector<std::size_t>>, std::vector<std::size_t>,
-           std::vector<std::size_t>>
-fast_non_dominated_sorting(const std::vector<std::vector<double>>& points);
+auto fastNonDominatedSorting(const std::vector<std::vector<double>>& Points)
+    -> std::tuple<std::vector<std::vector<std::size_t>>, std::vector<std::vector<std::size_t>>,
+                  std::vector<std::size_t>, std::vector<std::size_t>>;
 
-std::vector<double> crowding_distance(const std::vector<std::vector<double>>& non_dom_front);
+auto crowdingDistance(const std::vector<std::vector<double>>& NonDomFront) -> std::vector<double>;
 
-std::vector<double>::size_type
-mo_tournament_selection(std::vector<double>::size_type idx1, std::vector<double>::size_type idx2,
-                        const std::vector<std::vector<double>::size_type>& non_domination_rank,
-                        const std::vector<double>& crowding_d, std::mt19937& mt);
+auto moTournamentSelection(std::vector<double>::size_type Idx1, std::vector<double>::size_type Idx2,
+                           const std::vector<std::vector<double>::size_type>& NonDominationRank,
+                           const std::vector<double>& CrowdingD, std::mt19937& Mt) -> std::vector<double>::size_type;
 
-std::pair<firestarter::optimizer::Individual, firestarter::optimizer::Individual>
-sbx_crossover(const firestarter::optimizer::Individual& parent1, const firestarter::optimizer::Individual& parent2,
-              const double p_cr, std::mt19937& mt);
+auto sbxCrossover(const firestarter::optimizer::Individual& Parent1, const firestarter::optimizer::Individual& Parent2,
+                  double PCr, std::mt19937& Mt)
+    -> std::pair<firestarter::optimizer::Individual, firestarter::optimizer::Individual>;
 
-void polynomial_mutation(firestarter::optimizer::Individual& child,
-                         const std::vector<std::tuple<unsigned, unsigned>>& bounds, const double p_m, std::mt19937& mt);
+void polynomialMutation(firestarter::optimizer::Individual& Child,
+                        const std::vector<std::tuple<unsigned, unsigned>>& Bounds, double PM, std::mt19937& Mt);
 
-std::vector<std::size_t> select_best_N_mo(const std::vector<std::vector<double>>& input_f, std::size_t N);
+auto selectBestNMo(const std::vector<std::vector<double>>& InputF, std::size_t N) -> std::vector<std::size_t>;
 
-std::vector<double> ideal(const std::vector<std::vector<double>>& points);
+auto ideal(const std::vector<std::vector<double>>& Points) -> std::vector<double>;
 
 } // namespace firestarter::optimizer::util
diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp
index 8a17021f..2e5290a2 100644
--- a/src/firestarter/Cuda/Cuda.cpp
+++ b/src/firestarter/Cuda/Cuda.cpp
@@ -326,7 +326,7 @@ static CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t)
 // GPU index. Used to pin this thread to the GPU.
 template <typename T>
 static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index,
-                        std::atomic<int>& initCount, volatile unsigned long long* loadVar, int matrixSize) {
+                        std::atomic<int>& initCount, volatile uint64_t* loadVar, int matrixSize) {
   static_assert(std::is_same<T, float>::value || std::is_same<T, double>::value,
                 "create_load<T>: Template argument T must be either float or double");
 
@@ -515,7 +515,7 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait
 #endif
 }
 
-Cuda::Cuda(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) {
+Cuda::Cuda(volatile uint64_t* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) {
   std::thread t(Cuda::initGpus, std::ref(_waitForInitCv), loadVar, useFloat, useDouble, matrixSize, gpus);
   _initThread = std::move(t);
 
@@ -524,7 +524,7 @@ Cuda::Cuda(volatile unsigned long long* loadVar, bool useFloat, bool useDouble,
   _waitForInitCv.wait(lk);
 }
 
-void Cuda::initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble,
+void Cuda::initGpus(std::condition_variable& cv, volatile uint64_t* loadVar, bool useFloat, bool useDouble,
                     unsigned matrixSize, int gpus) {
   std::condition_variable waitForInitCv;
   std::mutex waitForInitCvMutex;
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index c5d7b34e..06d7e417 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -31,10 +31,10 @@
 using namespace firestarter;
 
 namespace {
-static unsigned hammingDistance(unsigned long long x, unsigned long long y) {
+static unsigned hammingDistance(uint64_t x, uint64_t y) {
   unsigned dist = 0;
 
-  for (unsigned long long val = x ^ y; val > 0; val >>= 1) {
+  for (uint64_t val = x ^ y; val > 0; val >>= 1) {
     dist += val & 1;
   }
 
@@ -57,34 +57,34 @@ static std::string registerNameBySize(unsigned registerSize) {
 
 int Firestarter::initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta, std::string dumpFilePath) {
 
-  auto data = std::make_unique<DumpRegisterWorkerData>(this->loadThreads.begin()->second, dumpTimeDelta, dumpFilePath);
+  auto data = std::make_unique<DumpRegisterWorkerData>(this->LoadThreads.begin()->second, dumpTimeDelta, dumpFilePath);
 
-  this->dumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(data));
+  this->DumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(data));
 
   return EXIT_SUCCESS;
 }
 
-void Firestarter::joinDumpRegisterWorker() { this->dumpRegisterWorkerThread.join(); }
+void Firestarter::joinDumpRegisterWorker() { this->DumpRegisterWorkerThread.join(); }
 
 void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> data) {
 
   pthread_setname_np(pthread_self(), "DumpRegWorker");
 
-  int registerCount = data->loadWorkerData->config().payload().registerCount();
-  int registerSize = data->loadWorkerData->config().payload().registerSize();
+  int registerCount = data->LoadWorkerDataPtr->config().payload().registerCount();
+  int registerSize = data->LoadWorkerDataPtr->config().payload().registerSize();
   std::string registerPrefix = registerNameBySize(registerSize);
-  auto offset = sizeof(DumpRegisterStruct) / sizeof(unsigned long long);
+  auto offset = sizeof(DumpRegisterStruct) / sizeof(uint64_t);
 
-  auto dumpRegisterStruct = reinterpret_cast<DumpRegisterStruct*>(data->loadWorkerData->addrMem - offset);
+  auto dumpRegisterStruct = reinterpret_cast<DumpRegisterStruct*>(data->LoadWorkerDataPtr->AddrMem - offset);
 
-  auto dumpVar = reinterpret_cast<volatile unsigned long long*>(&dumpRegisterStruct->dumpVar);
+  auto dumpVar = reinterpret_cast<volatile uint64_t*>(&dumpRegisterStruct->DumpVar);
   // memory of simd variables is before the padding
-  volatile unsigned long long* dumpMemAddr = dumpRegisterStruct->padding - registerCount * registerSize;
+  volatile uint64_t* dumpMemAddr = dumpRegisterStruct->Padding - registerCount * registerSize;
 
   // TODO: maybe use aligned_malloc to make memcpy more efficient and don't
   // interrupt the workload as much?
-  unsigned long long* last = reinterpret_cast<unsigned long long*>(malloc(sizeof(unsigned long long) * offset));
-  unsigned long long* current = reinterpret_cast<unsigned long long*>(malloc(sizeof(unsigned long long) * offset));
+  uint64_t* last = reinterpret_cast<uint64_t*>(malloc(sizeof(uint64_t) * offset));
+  uint64_t* current = reinterpret_cast<uint64_t*>(malloc(sizeof(uint64_t) * offset));
 
   if (last == nullptr || current == nullptr) {
     log::error() << "Malloc failed in Firestarter::dumpRegisterWorker";
@@ -92,7 +92,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> dat
   }
 
   std::stringstream dumpFilePath;
-  dumpFilePath << data->dumpFilePath;
+  dumpFilePath << data->DumpFilePath;
 #if defined(__MINGW32__) || defined(__MINGW64__)
   dumpFilePath << "\\";
 #else
@@ -123,7 +123,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> dat
 
   // continue until stop and dump the registers every data->dumpTimeDelta
   // seconds
-  for (; *data->loadWorkerData->addrHigh != LOAD_STOP;) {
+  for (; *data->LoadWorkerDataPtr->AddrHigh != LOAD_STOP;) {
     // signal the thread to dump its largest SIMD registers
     *dumpVar = DumpVariable::Start;
     __asm__ __volatile__("mfence;");
@@ -132,7 +132,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> dat
     }
 
     // copy the register content to minimize the interruption of the load worker
-    std::memcpy(current, (void*)dumpMemAddr, sizeof(unsigned long long) * offset);
+    std::memcpy(current, (void*)dumpMemAddr, sizeof(uint64_t) * offset);
 
     // skip the first output, as we first have to get some valid values for last
     if (!skipFirst) {
@@ -150,7 +150,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> dat
 
         for (auto j = 0; j < registerSize; j++) {
           auto index = registerSize * i + j;
-          auto hd = static_cast<unsigned long long>(hammingDistance(current[index], last[index]));
+          auto hd = static_cast<uint64_t>(hammingDistance(current[index], last[index]));
 
           dumpFile << hd;
           if (j != registerSize - 1) {
@@ -168,9 +168,9 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> dat
       skipFirst = false;
     }
 
-    std::memcpy(last, current, sizeof(unsigned long long) * offset);
+    std::memcpy(last, current, sizeof(uint64_t) * offset);
 
-    std::this_thread::sleep_for(std::chrono::seconds(data->dumpTimeDelta));
+    std::this_thread::sleep_for(std::chrono::seconds(data->DumpTimeDelta));
   }
 
   dumpFile.close();
diff --git a/src/firestarter/Environment/CPUTopology.cpp b/src/firestarter/Environment/CPUTopology.cpp
index a21bd9b8..b3e9a862 100644
--- a/src/firestarter/Environment/CPUTopology.cpp
+++ b/src/firestarter/Environment/CPUTopology.cpp
@@ -19,20 +19,21 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <array>
 #include <firestarter/Environment/CPUTopology.hpp>
 #include <firestarter/Logging/Log.hpp>
 
-#include <array>
 #include <fstream>
 #include <regex>
+#include <utility>
 
 extern "C" {
 #include <stdio.h>
 }
 
-using namespace firestarter::environment;
+namespace firestarter::environment {
 
-std::ostream& CPUTopology::print(std::ostream& stream) const {
+auto CPUTopology::print(std::ostream& stream) const -> std::ostream& {
   stream << "  system summary:\n"
          << "    number of processors:        " << this->numPackages() << "\n"
          << "    number of cores (total)):    " << this->numCoresTotal() << "\n"
@@ -43,8 +44,8 @@ std::ostream& CPUTopology::print(std::ostream& stream) const {
 
   std::stringstream ss;
 
-  for (auto const& ent : this->features()) {
-    ss << ent << " ";
+  for (auto const& Entry : this->features()) {
+    ss << Entry << " ";
   }
 
   stream << "  processor characteristics:\n"
@@ -56,45 +57,42 @@ std::ostream& CPUTopology::print(std::ostream& stream) const {
          << "    supported features: " << ss.str() << "\n"
          << "    Caches:";
 
-  std::vector<hwloc_obj_type_t> caches = {
+  std::vector<hwloc_obj_type_t> Caches = {
       HWLOC_OBJ_L1CACHE, HWLOC_OBJ_L1ICACHE, HWLOC_OBJ_L2CACHE, HWLOC_OBJ_L2ICACHE,
       HWLOC_OBJ_L3CACHE, HWLOC_OBJ_L3ICACHE, HWLOC_OBJ_L4CACHE, HWLOC_OBJ_L5CACHE,
   };
 
-  std::vector<std::string> cacheStrings = {};
+  std::vector<std::string> CacheStrings = {};
 
-  for (hwloc_obj_type_t const& cache : caches) {
-    int width;
-    char string[128];
-    int shared;
-    hwloc_obj_t cacheObj;
+  for (hwloc_obj_type_t const& Cache : Caches) {
     std::stringstream ss;
 
-    width = hwloc_get_nbobjs_by_type(this->topology, cache);
+    auto Width = hwloc_get_nbobjs_by_type(this->topology, Cache);
 
-    if (width >= 1) {
+    if (Width >= 1) {
       ss << "\n      - ";
 
-      cacheObj = hwloc_get_obj_by_type(this->topology, cache, 0);
-      hwloc_obj_type_snprintf(string, sizeof(string), cacheObj, 0);
+      auto* CacheObj = hwloc_get_obj_by_type(this->topology, Cache, 0);
+      std::array<char, 128> String{};
+      hwloc_obj_type_snprintf(String.begin(), sizeof(String), CacheObj, 0);
 
-      switch (cacheObj->attr->cache.type) {
+      switch (CacheObj->attr->cache.type) {
       case HWLOC_OBJ_CACHE_DATA:
-        ss << "Level " << cacheObj->attr->cache.depth << " Data";
+        ss << "Level " << CacheObj->attr->cache.depth << " Data";
         break;
       case HWLOC_OBJ_CACHE_INSTRUCTION:
-        ss << "Level " << cacheObj->attr->cache.depth << " Instruction";
+        ss << "Level " << CacheObj->attr->cache.depth << " Instruction";
         break;
       case HWLOC_OBJ_CACHE_UNIFIED:
       default:
-        ss << "Unified Level " << cacheObj->attr->cache.depth;
+        ss << "Unified Level " << CacheObj->attr->cache.depth;
         break;
       }
 
-      ss << " Cache, " << cacheObj->attr->cache.size / 1024 << " KiB, " << cacheObj->attr->cache.linesize
+      ss << " Cache, " << CacheObj->attr->cache.size / 1024 << " KiB, " << CacheObj->attr->cache.linesize
          << " B Cacheline, ";
 
-      switch (cacheObj->attr->cache.associativity) {
+      switch (CacheObj->attr->cache.associativity) {
       case -1:
         ss << "full";
         break;
@@ -102,16 +100,16 @@ std::ostream& CPUTopology::print(std::ostream& stream) const {
         ss << "unknown";
         break;
       default:
-        ss << cacheObj->attr->cache.associativity << "-way set";
+        ss << CacheObj->attr->cache.associativity << "-way set";
         break;
       }
 
       ss << " associative, ";
 
-      shared = this->numThreads() / width;
+      auto Shared = this->numThreads() / Width;
 
-      if (shared > 1) {
-        ss << "shared among " << shared << " threads.";
+      if (Shared > 1) {
+        ss << "shared among " << Shared << " threads.";
       } else {
         ss << "per thread.";
       }
@@ -124,7 +122,7 @@ std::ostream& CPUTopology::print(std::ostream& stream) const {
 }
 
 CPUTopology::CPUTopology(std::string architecture)
-    : _architecture(architecture) {
+    : _architecture(std::move(architecture)) {
 
   hwloc_topology_init(&this->topology);
 
@@ -413,3 +411,5 @@ unsigned CPUTopology::maxNumThreads() const {
 
   return max;
 }
+
+}; // namespace firestarter::environment
\ No newline at end of file
diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp
index 34022c93..67e62d9d 100644
--- a/src/firestarter/Environment/Environment.cpp
+++ b/src/firestarter/Environment/Environment.cpp
@@ -19,14 +19,14 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <algorithm>
 #include <firestarter/Environment/Environment.hpp>
 #include <firestarter/Logging/Log.hpp>
 
-#include <iterator>
 #include <regex>
 #include <string>
 
-using namespace firestarter::environment;
+namespace firestarter::environment {
 
 #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
 
@@ -55,70 +55,70 @@ extern "C" {
     }                                                                                                                  \
   } while (0)
 
-int Environment::cpuSet(unsigned id) {
-  cpu_set_t mask;
+auto Environment::cpuSet(unsigned Id) -> int {
+  cpu_set_t Mask;
 
-  CPU_ZERO(&mask);
-  CPU_SET(id, &mask);
+  CPU_ZERO(&Mask);
+  CPU_SET(Id, &Mask);
 
-  return sched_setaffinity(0, sizeof(cpu_set_t), &mask);
+  return sched_setaffinity(0, sizeof(cpu_set_t), &Mask);
 }
 
-int Environment::cpuAllowed(unsigned id) {
-  cpu_set_t mask;
+auto Environment::cpuAllowed(unsigned Id) -> int {
+  cpu_set_t Mask;
 
-  CPU_ZERO(&mask);
+  CPU_ZERO(&Mask);
 
-  if (!sched_getaffinity(0, sizeof(cpu_set_t), &mask)) {
-    return CPU_ISSET(id, &mask);
+  if (!sched_getaffinity(0, sizeof(cpu_set_t), &Mask)) {
+    return CPU_ISSET(Id, &Mask);
   }
 
   return 0;
 }
 #endif
 
-int Environment::evaluateCpuAffinity(unsigned requestedNumThreads, std::string cpuBind) {
+auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, std::string cpuBind) -> int {
 #if not((defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY))
   (void)cpuBind;
 #endif
 
-  if (requestedNumThreads > 0 && requestedNumThreads > this->topology().numThreads()) {
+  if (RequestedNumThreads > 0 && RequestedNumThreads > this->topology().numThreads()) {
     log::warn() << "Not enough CPUs for requested number of threads";
   }
 
 #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
-  cpu_set_t cpuset;
+  cpu_set_t Cpuset;
 
-  CPU_ZERO(&cpuset);
+  CPU_ZERO(&Cpuset);
 
   if (cpuBind.empty()) {
     // no cpu binding defined
 
     // use all CPUs if not defined otherwise
-    if (requestedNumThreads == 0) {
-      for (unsigned i = 0; i < this->topology().maxNumThreads(); i++) {
-        if (this->cpuAllowed(i)) {
-          CPU_SET(i, &cpuset);
-          requestedNumThreads++;
+    if (RequestedNumThreads == 0) {
+      for (unsigned I = 0; I < this->topology().maxNumThreads(); I++) {
+        if (this->cpuAllowed(I)) {
+          CPU_SET(I, &Cpuset);
+          RequestedNumThreads++;
         }
       }
     } else {
       // if -n / --threads is set
-      unsigned cpu_count = 0;
-      for (unsigned i = 0; i < this->topology().maxNumThreads(); i++) {
+      unsigned CpuCount = 0;
+      for (unsigned I = 0; I < this->topology().maxNumThreads(); I++) {
         // skip if cpu is not available
-        if (!this->cpuAllowed(i)) {
+        if (!this->cpuAllowed(I)) {
           continue;
         }
-        ADD_CPU_SET(i, cpuset);
-        cpu_count++;
+        ADD_CPU_SET(I, Cpuset);
+        CpuCount++;
         // we reached the desired amounts of threads
-        if (cpu_count >= requestedNumThreads) {
+        if (CpuCount >= RequestedNumThreads) {
           break;
         }
       }
       // requested to many threads
-      if (cpu_count < requestedNumThreads) {
+      if (CpuCount < RequestedNumThreads) {
         log::error() << "You are requesting more threads than "
                         "there are CPUs available in the given cpuset.\n"
                      << "This can be caused by the taskset tool, cgrous, "
@@ -130,41 +130,42 @@ int Environment::evaluateCpuAffinity(unsigned requestedNumThreads, std::string c
     }
   } else {
     // parse CPULIST for binding
-    const std::string delimiter = ",";
-    const std::regex re("^(?:(\\d+)(?:-([1-9]\\d*)(?:\\/([1-9]\\d*))?)?)$");
+    const std::string Delimiter = ",";
+    const std::regex Re(R"(^(?:(\d+)(?:-([1-9]\d*)(?:\/([1-9]\d*))?)?)$)");
 
-    std::stringstream ss(cpuBind);
+    std::stringstream Ss(cpuBind);
 
-    while (ss.good()) {
-      std::string token;
-      std::smatch m;
-      std::getline(ss, token, ',');
+    while (Ss.good()) {
+      std::string Token;
+      std::smatch M;
+      std::getline(Ss, Token, ',');
       ;
 
-      if (std::regex_match(token, m, re)) {
-        unsigned long x, y, s;
+      if (std::regex_match(Token, M, Re)) {
+        unsigned long Y;
+        unsigned long S;
 
-        x = std::stoul(m[1].str());
-        if (m[2].matched) {
-          y = std::stoul(m[2].str());
+        unsigned long X = std::stoul(M[1].str());
+        if (M[2].matched) {
+          Y = std::stoul(M[2].str());
         } else {
-          y = x;
+          Y = X;
         }
-        if (m[3].matched) {
-          s = std::stoul(m[3].str());
+        if (M[3].matched) {
+          S = std::stoul(M[3].str());
         } else {
-          s = 1;
+          S = 1;
         }
-        if (y < x) {
-          log::error() << "y has to be >= x in x-y expressions of CPU list: " << token;
+        if (Y < X) {
+          log::error() << "y has to be >= x in x-y expressions of CPU list: " << Token;
           return EXIT_FAILURE;
         }
-        for (unsigned long i = x; i <= y; i += s) {
-          ADD_CPU_SET(i, cpuset);
-          requestedNumThreads++;
+        for (unsigned long I = X; I <= Y; I += S) {
+          ADD_CPU_SET(I, Cpuset);
+          RequestedNumThreads++;
         }
       } else {
-        log::error() << "Invalid symbols in CPU list: " << token;
+        log::error() << "Invalid symbols in CPU list: " << Token;
         return EXIT_FAILURE;
       }
     }
@@ -175,25 +176,22 @@ int Environment::evaluateCpuAffinity(unsigned requestedNumThreads, std::string c
   }
 #endif
 
-  if (requestedNumThreads == 0) {
+  if (RequestedNumThreads == 0) {
     log::error() << "Found no usable CPUs!";
     return 127;
   }
 #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
-  else {
-    for (unsigned i = 0; i < this->topology().maxNumThreads(); i++) {
-      if (CPU_ISSET(i, &cpuset)) {
-        this->cpuBind.push_back(i);
-      }
+  for (unsigned I = 0; I < this->topology().maxNumThreads(); I++) {
+    if (CPU_ISSET(I, &Cpuset)) {
+      this->CpuBind.push_back(I);
     }
   }
+
 #endif
 
-  if (requestedNumThreads > this->topology().maxNumThreads()) {
-    requestedNumThreads = this->topology().maxNumThreads();
-  }
+  RequestedNumThreads = std::min(RequestedNumThreads, this->topology().maxNumThreads());
 
-  this->_requestedNumThreads = requestedNumThreads;
+  this->RequestedNumThreads = RequestedNumThreads;
 
   return EXIT_SUCCESS;
 }
@@ -202,38 +200,40 @@ void Environment::printThreadSummary() {
   log::info() << "\n  using " << this->requestedNumThreads() << " threads";
 
 #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
-  bool printCoreIdInfo = false;
+  bool PrintCoreIdInfo = false;
   size_t i = 0;
 
-  std::vector<unsigned> cpuBind(this->cpuBind);
-  cpuBind.resize(this->requestedNumThreads());
-  for (auto const& bind : cpuBind) {
-    int coreId = this->topology().getCoreIdFromPU(bind);
-    int pkgId = this->topology().getPkgIdFromPU(bind);
+  std::vector<unsigned> CpuBind(this->CpuBind);
+  CpuBind.resize(this->requestedNumThreads());
+  for (auto const& Bind : CpuBind) {
+    int CoreId = this->topology().getCoreIdFromPU(Bind);
+    int PkgId = this->topology().getPkgIdFromPU(Bind);
 
-    if (coreId != -1 && pkgId != -1) {
-      log::info() << "    - Thread " << i << " run on CPU " << bind << ", core " << coreId << " in package: " << pkgId;
-      printCoreIdInfo = true;
+    if (CoreId != -1 && PkgId != -1) {
+      log::info() << "    - Thread " << i << " run on CPU " << Bind << ", core " << CoreId << " in package: " << PkgId;
+      PrintCoreIdInfo = true;
     }
 
     i++;
   }
 
-  if (printCoreIdInfo) {
+  if (PrintCoreIdInfo) {
     log::info() << "  The cores are numbered using the logical_index from hwloc.";
   }
 #endif
 }
 
-int Environment::setCpuAffinity(unsigned thread) {
-  if (thread >= this->requestedNumThreads()) {
+auto Environment::setCpuAffinity(unsigned Thread) -> int {
+  if (Thread >= this->requestedNumThreads()) {
     log::error() << "Trying to set more CPUs than available.";
     return EXIT_FAILURE;
   }
 
 #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
-  this->cpuSet(this->cpuBind.at(thread));
+  this->cpuSet(this->CpuBind.at(Thread));
 #endif
 
   return EXIT_SUCCESS;
 }
+
+}; // namespace firestarter::environment
\ No newline at end of file
diff --git a/src/firestarter/Environment/Payload/Payload.cpp b/src/firestarter/Environment/Payload/Payload.cpp
index 5cda6abc..39c0e6a2 100644
--- a/src/firestarter/Environment/Payload/Payload.cpp
+++ b/src/firestarter/Environment/Payload/Payload.cpp
@@ -24,73 +24,76 @@
 
 #include <firestarter/Environment/Payload/Payload.hpp>
 
-using namespace firestarter::environment::payload;
+namespace firestarter::environment::payload {
 
-unsigned Payload::getSequenceStartCount(const std::vector<std::string>& sequence, const std::string start) {
-  unsigned i = 0;
+auto Payload::getSequenceStartCount(const std::vector<std::string>& Sequence, const std::string& Start) -> unsigned {
+  unsigned I = 0;
 
-  for (const auto& item : sequence) {
-    if (0 == item.rfind(start, 0)) {
-      i++;
+  for (const auto& Item : Sequence) {
+    if (0 == Item.rfind(Start, 0)) {
+      I++;
     }
   }
 
-  return i;
+  return I;
 }
 
-std::vector<std::string> Payload::generateSequence(std::vector<std::pair<std::string, unsigned>> const& proportions) {
-  std::vector<std::pair<std::string, unsigned>> prop = proportions;
+auto Payload::generateSequence(std::vector<std::pair<std::string, unsigned>> const& Proportions)
+    -> std::vector<std::string> {
+  std::vector<std::pair<std::string, unsigned>> Prop = Proportions;
 
-  prop.erase(std::remove_if(prop.begin(), prop.end(), [](auto const& pair) { return pair.second == 0; }), prop.end());
+  Prop.erase(std::remove_if(Prop.begin(), Prop.end(), [](auto const& Pair) { return Pair.second == 0; }), Prop.end());
 
-  std::vector<std::string> sequence = {};
+  std::vector<std::string> Sequence = {};
 
-  if (prop.size() == 0) {
-    return sequence;
+  if (Prop.size() == 0) {
+    return Sequence;
   }
 
-  auto it = prop.begin();
-  auto insertIt = sequence.begin();
+  auto It = Prop.begin();
+  auto InsertIt = Sequence.begin();
 
-  sequence.insert(insertIt, it->second, it->first);
+  Sequence.insert(InsertIt, It->second, It->first);
 
-  for (++it; it != prop.end(); ++it) {
-    for (unsigned i = 0; i < it->second; i++) {
-      insertIt = sequence.begin();
-      std::advance(insertIt, 1 + floor(i * (sequence.size() + it->second - i) / (float)it->second));
-      sequence.insert(insertIt, it->first);
+  for (++It; It != Prop.end(); ++It) {
+    for (unsigned I = 0; I < It->second; I++) {
+      InsertIt = Sequence.begin();
+      std::advance(InsertIt, 1 + std::floor(I * (Sequence.size() + It->second - I) / static_cast<float>(It->second)));
+      Sequence.insert(InsertIt, It->first);
     }
   }
 
-  return sequence;
+  return Sequence;
 }
 
-unsigned Payload::getL2LoopCount(const std::vector<std::string>& sequence, const unsigned numberOfLines,
-                                 const unsigned size, const unsigned threads) {
-  if (this->getL2SequenceCount(sequence) == 0) {
+auto Payload::getL2LoopCount(const std::vector<std::string>& Sequence, const unsigned NumberOfLines,
+                             const unsigned Size, const unsigned Threads) -> unsigned {
+  if (getL2SequenceCount(Sequence) == 0) {
     return 0;
   }
-  return (
-      0.8 * size / 64 / threads /
-      (this->getL2SequenceCount(sequence) * this->getNumberOfSequenceRepetitions(sequence, numberOfLines / threads)));
+  return static_cast<unsigned>(
+      (0.8 * Size / 64 / Threads /
+       (getL2SequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Threads))));
 }
 
-unsigned Payload::getL3LoopCount(const std::vector<std::string>& sequence, const unsigned numberOfLines,
-                                 const unsigned size, const unsigned threads) {
-  if (this->getL3SequenceCount(sequence) == 0) {
+auto Payload::getL3LoopCount(const std::vector<std::string>& Sequence, const unsigned NumberOfLines,
+                             const unsigned Size, const unsigned Threads) -> unsigned {
+  if (getL3SequenceCount(Sequence) == 0) {
     return 0;
   }
-  return (
-      0.8 * size / 64 / threads /
-      (this->getL3SequenceCount(sequence) * this->getNumberOfSequenceRepetitions(sequence, numberOfLines / threads)));
+  return static_cast<unsigned>(
+      (0.8 * Size / 64 / Threads /
+       (getL3SequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Threads))));
 }
 
-unsigned Payload::getRAMLoopCount(const std::vector<std::string>& sequence, const unsigned numberOfLines,
-                                  const unsigned size, const unsigned threads) {
-  if (this->getRAMSequenceCount(sequence) == 0) {
+auto Payload::getRAMLoopCount(const std::vector<std::string>& Sequence, const unsigned NumberOfLines,
+                              const unsigned Size, const unsigned Threads) -> unsigned {
+  if (getRAMSequenceCount(Sequence) == 0) {
     return 0;
   }
-  return (
-      1.0 * size / 64 / threads /
-      (this->getRAMSequenceCount(sequence) * this->getNumberOfSequenceRepetitions(sequence, numberOfLines / threads)));
+  return static_cast<unsigned>(
+      (1.0 * Size / 64 / Threads /
+       (getRAMSequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Threads))));
 }
+
+}; // namespace firestarter::environment::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index 2c23d1c4..8e29715f 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -25,61 +25,61 @@ using namespace firestarter::environment::x86::payload;
 using namespace asmjit;
 using namespace asmjit::x86;
 
-int AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
-                                  unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
-                                  unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-                                  bool errorDetection) {
+auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
+                                   unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
+                                   unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+                                   bool ErrorDetection) -> int {
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto sequence = this->generateSequence(proportion);
-  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
+  auto sequence = this->generateSequence(Proportion);
+  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, NumberOfLines / Thread);
 
   // compute count of flops and memory access for performance report
   unsigned flops = 0;
   unsigned bytes = 0;
 
   for (const auto& item : sequence) {
-    auto it = this->instructionFlops.find(item);
+    auto it = this->InstructionFlops.find(item);
 
-    if (it == this->instructionFlops.end()) {
+    if (it == this->InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
     flops += it->second;
 
-    it = this->instructionMemory.find(item);
+    it = this->InstructionMemory.find(item);
 
-    if (it != this->instructionMemory.end()) {
+    if (it != this->InstructionMemory.end()) {
       bytes += it->second;
     }
   }
 
-  this->_flops = repetitions * flops;
-  this->_bytes = repetitions * bytes;
-  this->_instructions = repetitions * sequence.size() * 4 + 6;
+  this->Flops = repetitions * flops;
+  this->Bytes = repetitions * bytes;
+  this->Instructions = repetitions * sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
-  auto l1i_cache_size = instructionCacheSize / thread;
-  auto dataCacheBufferSizeIterator = dataCacheBufferSize.begin();
-  auto l1_size = *dataCacheBufferSizeIterator / thread;
+  auto l1i_cache_size = InstructionCacheSize / Thread;
+  auto dataCacheBufferSizeIterator = DataCacheBufferSize.begin();
+  auto l1_size = *dataCacheBufferSizeIterator / Thread;
   std::advance(dataCacheBufferSizeIterator, 1);
-  auto l2_size = *dataCacheBufferSizeIterator / thread;
+  auto l2_size = *dataCacheBufferSizeIterator / Thread;
   std::advance(dataCacheBufferSizeIterator, 1);
-  auto l3_size = *dataCacheBufferSizeIterator / thread;
-  auto ram_size = ramBufferSize / thread;
+  auto l3_size = *dataCacheBufferSizeIterator / Thread;
+  auto ram_size = RamBufferSize / Thread;
 
   // calculate the reset counters for the buffers
-  auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
-  auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
-  auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
+  auto l2_loop_count = getL2LoopCount(sequence, NumberOfLines, l2_size * Thread, Thread);
+  auto l3_loop_count = getL3LoopCount(sequence, NumberOfLines, l3_size * Thread, Thread);
+  auto ram_loop_count = getRAMLoopCount(sequence, NumberOfLines, ram_size * Thread, Thread);
 
   CodeHolder code;
-  code.init(this->rt.environment());
+  code.init(this->Rt.environment());
 
-  if (nullptr != this->loadFunction) {
-    this->rt.release(&this->loadFunction);
+  if (nullptr != this->LoadFunction) {
+    this->Rt.release(&this->LoadFunction);
   }
 
   Builder cb(&code);
@@ -108,9 +108,8 @@ int AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   auto ram_reg = zmm30;
 
   FuncDetail func;
-  func.init(FuncSignatureT<unsigned long long, unsigned long long*, volatile unsigned long long*, unsigned long long>(
-                CallConvId::kCDecl),
-            this->rt.environment());
+  func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl),
+            this->Rt.environment());
 
   FuncFrame frame;
   frame.init(func);
@@ -306,7 +305,7 @@ int AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   }
 
   cb.movq(temp_reg, iter_reg); // restore iteration counter
-  if (this->getRAMSequenceCount(sequence) > 0) {
+  if (getRAMSequenceCount(sequence) > 0) {
     // reset RAM counter
     auto NoRamReset = cb.newLabel();
 
@@ -317,10 +316,10 @@ int AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     cb.add(ram_addr, Imm(l3_size));
     cb.bind(NoRamReset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.inc(temp_reg); // increment iteration counter
-  if (this->getL2SequenceCount(sequence) > 0) {
+  if (getL2SequenceCount(sequence) > 0) {
     // reset L2-Cache counter
     auto NoL2Reset = cb.newLabel();
 
@@ -331,10 +330,10 @@ int AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     cb.add(l2_addr, Imm(l1_size));
     cb.bind(NoL2Reset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.movq(iter_reg, temp_reg); // store iteration counter
-  if (this->getL3SequenceCount(sequence) > 0) {
+  if (getL3SequenceCount(sequence) > 0) {
     // reset L3-Cache counter
     auto NoL3Reset = cb.newLabel();
 
@@ -345,11 +344,11 @@ int AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     cb.add(l3_addr, Imm(l2_size));
     cb.bind(NoL3Reset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.mov(l1_addr, pointer_reg);
 
-  if (dumpRegisters) {
+  if (DumpRegisters) {
     auto SkipRegistersDump = cb.newLabel();
 
     cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait));
@@ -366,7 +365,7 @@ int AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     cb.bind(SkipRegistersDump);
   }
 
-  if (errorDetection) {
+  if (ErrorDetection) {
     this->emitErrorDetectionCode<decltype(iter_reg), Zmm>(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
   }
 
@@ -384,7 +383,7 @@ int AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   // String sb;
   // cb.dump(sb);
 
-  Error err = this->rt.add(&this->loadFunction, &code);
+  Error err = this->Rt.add(&this->LoadFunction, &code);
   if (err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
@@ -408,15 +407,15 @@ int AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   return EXIT_SUCCESS;
 }
 
-std::list<std::string> AVX512Payload::getAvailableInstructions() const {
-  std::list<std::string> instructions;
+auto AVX512Payload::getAvailableInstructions() const -> std::list<std::string> {
+  std::list<std::string> Instructions;
 
-  transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions),
+  transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(Instructions),
             [](const auto& item) { return item.first; });
 
-  return instructions;
+  return Instructions;
 }
 
-void AVX512Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) {
-  X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4);
+void AVX512Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize) {
+  X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index c925f538..f3905ff0 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -19,6 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <cstdint>
 #include <firestarter/Environment/X86/Payload/AVXPayload.hpp>
 #include <firestarter/Logging/Log.hpp>
 
@@ -43,25 +44,25 @@ int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
   unsigned bytes = 0;
 
   for (const auto& item : sequence) {
-    auto it = this->instructionFlops.find(item);
+    auto it = this->InstructionFlops.find(item);
 
-    if (it == this->instructionFlops.end()) {
+    if (it == this->InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
     flops += it->second;
 
-    it = this->instructionMemory.find(item);
+    it = this->InstructionMemory.find(item);
 
-    if (it != this->instructionMemory.end()) {
+    if (it != this->InstructionMemory.end()) {
       bytes += it->second;
     }
   }
 
-  this->_flops = repetitions * flops;
-  this->_bytes = repetitions * bytes;
-  this->_instructions = repetitions * sequence.size() * 2 + 4;
+  this->Flops = repetitions * flops;
+  this->Bytes = repetitions * bytes;
+  this->Instructions = repetitions * sequence.size() * 2 + 4;
 
   // calculate the buffer sizes
   auto l1i_cache_size = instructionCacheSize / thread;
@@ -79,10 +80,10 @@ int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
   auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
 
   CodeHolder code;
-  code.init(this->rt.environment());
+  code.init(this->Rt.environment());
 
-  if (nullptr != this->loadFunction) {
-    this->rt.release(&this->loadFunction);
+  if (nullptr != this->LoadFunction) {
+    this->Rt.release(&this->LoadFunction);
   }
 
   Builder cb(&code);
@@ -107,9 +108,8 @@ int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
   auto trans_regs = 6;
 
   FuncDetail func;
-  func.init(FuncSignatureT<unsigned long long, unsigned long long*, volatile unsigned long long*, unsigned long long>(
-                CallConvId::kCDecl),
-            this->rt.environment());
+  func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl),
+            this->Rt.environment());
 
   FuncFrame frame;
   frame.init(func);
@@ -244,12 +244,12 @@ int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.vmovapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest));
         L1_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "L1_LS") {
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32));
         cb.vmovapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest));
         L1_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "L2_L") {
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l2_addr, 64));
         L2_INCREMENT();
@@ -257,12 +257,12 @@ int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.vmovapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest));
         L2_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "L2_LS") {
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l2_addr, 64));
         cb.vmovapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest));
         L2_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "L3_L") {
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64));
         L3_INCREMENT();
@@ -270,17 +270,17 @@ int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
         L3_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "L3_LS") {
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64));
         cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
         L3_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "L3_P") {
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32));
         cb.prefetcht0(ptr(l3_addr));
         L3_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "RAM_L") {
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(ram_addr, 64));
         RAM_INCREMENT();
@@ -288,24 +288,24 @@ int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
         RAM_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "RAM_LS") {
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64));
         cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
         RAM_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "RAM_P") {
         cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32));
         cb.prefetcht2(ptr(ram_addr));
         RAM_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else {
         workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
         return EXIT_FAILURE;
       }
 
       if (shift_regs > 1) {
-        this->_instructions++;
+        this->Instructions++;
         if (left) {
           cb.psrlw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs), Mm(shift_dst));
         } else {
@@ -348,7 +348,7 @@ int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
     cb.add(ram_addr, Imm(l3_size));
     cb.bind(NoRamReset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   if (this->getL2SequenceCount(sequence) > 0) {
     // reset L2-Cache counter
@@ -361,7 +361,7 @@ int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
     cb.add(l2_addr, Imm(l1_size));
     cb.bind(NoL2Reset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   if (this->getL3SequenceCount(sequence) > 0) {
     // reset L3-Cache counter
@@ -374,7 +374,7 @@ int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
     cb.add(l3_addr, Imm(l2_size));
     cb.bind(NoL3Reset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.inc(iter_reg); // increment iteration counter
   cb.mov(l1_addr, pointer_reg);
@@ -414,7 +414,7 @@ int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
   // String sb;
   // cb.dump(sb);
 
-  Error err = this->rt.add(&this->loadFunction, &code);
+  Error err = this->Rt.add(&this->LoadFunction, &code);
   if (err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
@@ -441,12 +441,12 @@ int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
 std::list<std::string> AVXPayload::getAvailableInstructions() const {
   std::list<std::string> instructions;
 
-  transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions),
+  transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions),
             [](const auto& item) { return item.first; });
 
   return instructions;
 }
 
-void AVXPayload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) {
+void AVXPayload::init(uint64_t* memoryAddr, uint64_t bufferSize) {
   X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index 1e5ffa85..9df404e2 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -19,6 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <cstdint>
 #include <firestarter/Environment/X86/Payload/FMA4Payload.hpp>
 #include <firestarter/Logging/Log.hpp>
 
@@ -43,25 +44,25 @@ int FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   unsigned bytes = 0;
 
   for (const auto& item : sequence) {
-    auto it = this->instructionFlops.find(item);
+    auto it = this->InstructionFlops.find(item);
 
-    if (it == this->instructionFlops.end()) {
+    if (it == this->InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
     flops += it->second;
 
-    it = this->instructionMemory.find(item);
+    it = this->InstructionMemory.find(item);
 
-    if (it != this->instructionMemory.end()) {
+    if (it != this->InstructionMemory.end()) {
       bytes += it->second;
     }
   }
 
-  this->_flops = repetitions * flops;
-  this->_bytes = repetitions * bytes;
-  this->_instructions = repetitions * sequence.size() * 4 + 6;
+  this->Flops = repetitions * flops;
+  this->Bytes = repetitions * bytes;
+  this->Instructions = repetitions * sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
   auto l1i_cache_size = instructionCacheSize / thread;
@@ -79,10 +80,10 @@ int FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
 
   CodeHolder code;
-  code.init(this->rt.environment());
+  code.init(this->Rt.environment());
 
-  if (nullptr != this->loadFunction) {
-    this->rt.release(&this->loadFunction);
+  if (nullptr != this->LoadFunction) {
+    this->Rt.release(&this->LoadFunction);
   }
 
   Builder cb(&code);
@@ -111,9 +112,8 @@ int FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   auto ram_reg = xmm15;
 
   FuncDetail func;
-  func.init(FuncSignatureT<unsigned long long, unsigned long long*, volatile unsigned long long*, unsigned long long>(
-                CallConvId::kCDecl),
-            this->rt.environment());
+  func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl),
+            this->Rt.environment());
 
   FuncFrame frame;
   frame.init(func);
@@ -325,7 +325,7 @@ int FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     cb.add(ram_addr, Imm(l3_size));
     cb.bind(NoRamReset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.inc(temp_reg); // increment iteration counter
   if (this->getL2SequenceCount(sequence) > 0) {
@@ -339,7 +339,7 @@ int FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     cb.add(l2_addr, Imm(l1_size));
     cb.bind(NoL2Reset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.movq(iter_reg, temp_reg); // store iteration counter
   if (this->getL3SequenceCount(sequence) > 0) {
@@ -353,7 +353,7 @@ int FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     cb.add(l3_addr, Imm(l2_size));
     cb.bind(NoL3Reset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.mov(l1_addr, pointer_reg);
 
@@ -392,7 +392,7 @@ int FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   // String sb;
   // cb.dump(sb);
 
-  Error err = this->rt.add(&this->loadFunction, &code);
+  Error err = this->Rt.add(&this->LoadFunction, &code);
   if (err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
@@ -419,12 +419,12 @@ int FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
 std::list<std::string> FMA4Payload::getAvailableInstructions() const {
   std::list<std::string> instructions;
 
-  transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions),
+  transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions),
             [](const auto& item) { return item.first; });
 
   return instructions;
 }
 
-void FMA4Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) {
+void FMA4Payload::init(uint64_t* memoryAddr, uint64_t bufferSize) {
   X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index 3a432bfb..ba6534a9 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -43,25 +43,25 @@ int FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
   unsigned bytes = 0;
 
   for (const auto& item : sequence) {
-    auto it = this->instructionFlops.find(item);
+    auto it = this->InstructionFlops.find(item);
 
-    if (it == this->instructionFlops.end()) {
+    if (it == this->InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
     flops += it->second;
 
-    it = this->instructionMemory.find(item);
+    it = this->InstructionMemory.find(item);
 
-    if (it != this->instructionMemory.end()) {
+    if (it != this->InstructionMemory.end()) {
       bytes += it->second;
     }
   }
 
-  this->_flops = repetitions * flops;
-  this->_bytes = repetitions * bytes;
-  this->_instructions = repetitions * sequence.size() * 4 + 6;
+  this->Flops = repetitions * flops;
+  this->Bytes = repetitions * bytes;
+  this->Instructions = repetitions * sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
   auto l1i_cache_size = instructionCacheSize / thread;
@@ -79,10 +79,10 @@ int FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
   auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
 
   CodeHolder code;
-  code.init(this->rt.environment());
+  code.init(this->Rt.environment());
 
-  if (nullptr != this->loadFunction) {
-    this->rt.release(&this->loadFunction);
+  if (nullptr != this->LoadFunction) {
+    this->Rt.release(&this->LoadFunction);
   }
 
   Builder cb(&code);
@@ -111,9 +111,8 @@ int FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
   auto ram_reg = ymm15;
 
   FuncDetail func;
-  func.init(FuncSignatureT<unsigned long long, unsigned long long*, volatile unsigned long long*, unsigned long long>(
-                CallConvId::kCDecl),
-            this->rt.environment());
+  func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl),
+            this->Rt.environment());
 
   FuncFrame frame;
   frame.init(func);
@@ -353,7 +352,7 @@ int FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
     cb.add(ram_addr, Imm(l3_size));
     cb.bind(NoRamReset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.inc(temp_reg); // increment iteration counter
   if (this->getL2SequenceCount(sequence) > 0) {
@@ -367,7 +366,7 @@ int FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
     cb.add(l2_addr, Imm(l1_size));
     cb.bind(NoL2Reset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.movq(iter_reg, temp_reg); // store iteration counter
   if (this->getL3SequenceCount(sequence) > 0) {
@@ -381,7 +380,7 @@ int FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
     cb.add(l3_addr, Imm(l2_size));
     cb.bind(NoL3Reset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.mov(l1_addr, pointer_reg);
 
@@ -420,7 +419,7 @@ int FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
   // String sb;
   // cb.dump(sb);
 
-  Error err = this->rt.add(&this->loadFunction, &code);
+  Error err = this->Rt.add(&this->LoadFunction, &code);
   if (err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
@@ -447,12 +446,12 @@ int FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> con
 std::list<std::string> FMAPayload::getAvailableInstructions() const {
   std::list<std::string> instructions;
 
-  transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions),
+  transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions),
             [](const auto& item) { return item.first; });
 
   return instructions;
 }
 
-void FMAPayload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) {
+void FMAPayload::init(uint64_t* memoryAddr, uint64_t bufferSize) {
   X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index d3d0147f..60a98ef1 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -43,25 +43,25 @@ int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   unsigned bytes = 0;
 
   for (const auto& item : sequence) {
-    auto it = this->instructionFlops.find(item);
+    auto it = this->InstructionFlops.find(item);
 
-    if (it == this->instructionFlops.end()) {
+    if (it == this->InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
     flops += it->second;
 
-    it = this->instructionMemory.find(item);
+    it = this->InstructionMemory.find(item);
 
-    if (it != this->instructionMemory.end()) {
+    if (it != this->InstructionMemory.end()) {
       bytes += it->second;
     }
   }
 
-  this->_flops = repetitions * flops;
-  this->_bytes = repetitions * bytes;
-  this->_instructions = repetitions * sequence.size() * 2 + 4;
+  this->Flops = repetitions * flops;
+  this->Bytes = repetitions * bytes;
+  this->Instructions = repetitions * sequence.size() * 2 + 4;
 
   // calculate the buffer sizes
   auto l1i_cache_size = instructionCacheSize / thread;
@@ -79,10 +79,10 @@ int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
 
   CodeHolder code;
-  code.init(this->rt.environment());
+  code.init(this->Rt.environment());
 
-  if (nullptr != this->loadFunction) {
-    this->rt.release(&this->loadFunction);
+  if (nullptr != this->LoadFunction) {
+    this->Rt.release(&this->LoadFunction);
   }
 
   Builder cb(&code);
@@ -107,9 +107,7 @@ int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   auto trans_regs = 2;
 
   FuncDetail func;
-  func.init(FuncSignatureT<unsigned long long, unsigned long long*, volatile unsigned long long*, unsigned long long>(
-                CallConvId::kCDecl),
-            this->rt.environment());
+  func.init(FuncSignatureT<uint64_t, uint64_t>(CallConvId::kCDecl), this->Rt.environment());
 
   FuncFrame frame;
   frame.init(func);
@@ -241,12 +239,12 @@ int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
         cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.movapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest));
         L1_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "L1_LS") {
         cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32));
         cb.movapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest));
         L1_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "L2_L") {
         cb.addpd(Xmm(add_dest), xmmword_ptr(l2_addr, 64));
         L2_INCREMENT();
@@ -254,12 +252,12 @@ int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
         cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.movapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest));
         L2_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "L2_LS") {
         cb.addpd(Xmm(add_dest), xmmword_ptr(l2_addr, 64));
         cb.movapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest));
         L2_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "L3_L") {
         cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64));
         L3_INCREMENT();
@@ -267,17 +265,17 @@ int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
         cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.movapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
         L3_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "L3_LS") {
         cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64));
         cb.movapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
         L3_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "L3_P") {
         cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32));
         cb.prefetcht0(ptr(l3_addr));
         L3_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "RAM_L") {
         cb.addpd(Xmm(add_dest), xmmword_ptr(ram_addr, 64));
         RAM_INCREMENT();
@@ -285,24 +283,24 @@ int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
         cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
         cb.movapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
         RAM_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "RAM_LS") {
         cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64));
         cb.movapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
         RAM_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else if (item == "RAM_P") {
         cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32));
         cb.prefetcht2(ptr(ram_addr));
         RAM_INCREMENT();
-        this->_instructions++;
+        this->Instructions++;
       } else {
         workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
         return EXIT_FAILURE;
       }
 
       if (mov_regs > 0) {
-        this->_instructions++;
+        this->Instructions++;
         cb.movq(Mm(mov_start + (movq_dst - mov_start + mov_regs - 1) % mov_regs), Mm(movq_dst));
       }
 
@@ -340,7 +338,7 @@ int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     cb.add(ram_addr, Imm(l3_size));
     cb.bind(NoRamReset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   if (this->getL2SequenceCount(sequence) > 0) {
     // reset L2-Cache counter
@@ -353,7 +351,7 @@ int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     cb.add(l2_addr, Imm(l1_size));
     cb.bind(NoL2Reset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   if (this->getL3SequenceCount(sequence) > 0) {
     // reset L3-Cache counter
@@ -366,7 +364,7 @@ int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     cb.add(l3_addr, Imm(l2_size));
     cb.bind(NoL3Reset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.inc(iter_reg); // increment iteration counter
   cb.mov(l1_addr, pointer_reg);
@@ -406,7 +404,7 @@ int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   // String sb;
   // cb.dump(sb);
 
-  Error err = this->rt.add(&this->loadFunction, &code);
+  Error err = this->Rt.add(&this->LoadFunction, &code);
   if (err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
@@ -433,12 +431,12 @@ int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
 std::list<std::string> SSE2Payload::getAvailableInstructions() const {
   std::list<std::string> instructions;
 
-  transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions),
+  transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions),
             [](const auto& item) { return item.first; });
 
   return instructions;
 }
 
-void SSE2Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) {
+void SSE2Payload::init(uint64_t* memoryAddr, uint64_t bufferSize) {
   X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index 8d85dc2d..73175bd5 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -32,7 +32,7 @@
 
 using namespace firestarter::environment::x86::payload;
 
-void X86Payload::lowLoadFunction(volatile unsigned long long* addrHigh, unsigned long long period) {
+void X86Payload::lowLoadFunction(volatile uint64_t* addrHigh, uint64_t period) {
   int nap;
 #ifdef _MSC_VER
   std::array<int, 4> cpuid;
@@ -69,53 +69,52 @@ void X86Payload::lowLoadFunction(volatile unsigned long long* addrHigh, unsigned
   }
 }
 
-void X86Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize, double firstValue,
-                      double lastValue) {
-  unsigned long long i = 0;
+void X86Payload::init(uint64_t* memoryAddr, uint64_t bufferSize, double firstValue, double lastValue) {
+  uint64_t i = 0;
 
   for (; i < INIT_BLOCKSIZE; i++)
     *((double*)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * firstValue;
   for (; i <= bufferSize - INIT_BLOCKSIZE; i += INIT_BLOCKSIZE)
-    std::memcpy(memoryAddr + i, memoryAddr + i - INIT_BLOCKSIZE, sizeof(unsigned long long) * INIT_BLOCKSIZE);
+    std::memcpy(memoryAddr + i, memoryAddr + i - INIT_BLOCKSIZE, sizeof(uint64_t) * INIT_BLOCKSIZE);
   for (; i < bufferSize; i++)
     *((double*)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * lastValue;
 }
 
-unsigned long long X86Payload::highLoadFunction(unsigned long long* addrMem, volatile unsigned long long* addrHigh,
-                                                unsigned long long iterations) {
-  return this->loadFunction(addrMem, addrHigh, iterations);
+uint64_t X86Payload::highLoadFunction(uint64_t* addrMem, volatile uint64_t* addrHigh, uint64_t iterations) {
+  return this->LoadFunction(addrMem, addrHigh, iterations);
 }
 
 // add MM regs to dirty regs
 // zmm31 is used for backup if VectorReg is of type asmjit::x86::Zmm
-template <class IterReg, class VectorReg>
-void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder& cb, IterReg iter_reg, asmjit::x86::Gpq addrHigh_reg,
+template <class IterRegT, class VectorRegT>
+void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder& Cb, IterRegT IterReg, asmjit::x86::Gpq addrHigh_reg,
                                         asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg,
                                         asmjit::x86::Gpq temp_reg2) {
   // we don't want anything to break... so we use asserts for everything that
   // could break it
-  static_assert(std::is_base_of<asmjit::x86::Vec, VectorReg>::value, "VectorReg must be of asmjit::asmjit::x86::Vec");
-  static_assert(std::is_same<asmjit::x86::Xmm, VectorReg>::value || std::is_same<asmjit::x86::Ymm, VectorReg>::value ||
-                    std::is_same<asmjit::x86::Zmm, VectorReg>::value,
+  static_assert(std::is_base_of<asmjit::x86::Vec, VectorRegT>::value, "VectorReg must be of asmjit::asmjit::x86::Vec");
+  static_assert(std::is_same<asmjit::x86::Xmm, VectorRegT>::value ||
+                    std::is_same<asmjit::x86::Ymm, VectorRegT>::value ||
+                    std::is_same<asmjit::x86::Zmm, VectorRegT>::value,
                 "VectorReg ist not of any supported type");
-  static_assert(std::is_same<asmjit::x86::Mm, IterReg>::value || std::is_same<asmjit::x86::Gpq, IterReg>::value,
+  static_assert(std::is_same<asmjit::x86::Mm, IterRegT>::value || std::is_same<asmjit::x86::Gpq, IterRegT>::value,
                 "IterReg is not of any supported type");
 
-  if constexpr (std::is_same<asmjit::x86::Mm, IterReg>::value) {
-    assert((iter_reg == asmjit::x86::mm0, "iter_reg must be mm0"));
+  if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
+    assert((IterReg == asmjit::x86::mm0, "iter_reg must be mm0"));
   }
 
-  assert((iter_reg != temp_reg, "iter_reg must be != temp_reg"));
+  assert((IterReg != temp_reg, "iter_reg must be != temp_reg"));
   assert((temp_reg != temp_reg2, "temp_reg must be != temp_reg2"));
   assert((temp_reg != addrHigh_reg, "temp_reg must be != addrHigh_reg"));
   assert((temp_reg != pointer_reg, "temp_reg must be != pointer_reg"));
 
-  assert((iter_reg != asmjit::x86::r8, "iter_reg must be != r8"));
-  assert((iter_reg != asmjit::x86::r9, "iter_reg must be != r9"));
-  assert((iter_reg != asmjit::x86::rax, "iter_reg must be != rax"));
-  assert((iter_reg != asmjit::x86::rbx, "iter_reg must be != rbx"));
-  assert((iter_reg != asmjit::x86::rcx, "iter_reg must be != rcx"));
-  assert((iter_reg != asmjit::x86::rdx, "iter_reg must be != rdx"));
+  assert((IterReg != asmjit::x86::r8, "iter_reg must be != r8"));
+  assert((IterReg != asmjit::x86::r9, "iter_reg must be != r9"));
+  assert((IterReg != asmjit::x86::rax, "iter_reg must be != rax"));
+  assert((IterReg != asmjit::x86::rbx, "iter_reg must be != rbx"));
+  assert((IterReg != asmjit::x86::rcx, "iter_reg must be != rcx"));
+  assert((IterReg != asmjit::x86::rdx, "iter_reg must be != rdx"));
 
   assert((temp_reg != asmjit::x86::r8, "temp_reg must be != r8"));
   assert((temp_reg != asmjit::x86::r9, "temp_reg must be != r9"));
@@ -138,172 +137,172 @@ void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder& cb, IterReg iter_r
   assert((addrHigh_reg != asmjit::x86::rcx, "addrHigh_reg must be != rcx"));
   assert((addrHigh_reg != asmjit::x86::rdx, "addrHigh_reg must be != rdx"));
 
-  auto SkipErrorDetection = cb.newLabel();
+  auto SkipErrorDetection = Cb.newLabel();
 
-  if constexpr (std::is_same<asmjit::x86::Mm, IterReg>::value) {
-    cb.movq(temp_reg, iter_reg);
+  if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
+    Cb.movq(temp_reg, IterReg);
   } else {
-    cb.mov(temp_reg, iter_reg);
+    Cb.mov(temp_reg, IterReg);
   }
   // round about 50-100 Hz
   // more or less, but this isn't really that relevant
-  cb.and_(temp_reg, asmjit::Imm(0x3fff));
-  cb.test(temp_reg, temp_reg);
-  cb.jnz(SkipErrorDetection);
+  Cb.and_(temp_reg, asmjit::Imm(0x3fff));
+  Cb.test(temp_reg, temp_reg);
+  Cb.jnz(SkipErrorDetection);
 
-  cb.mov(temp_reg, asmjit::Imm(0xffffffff));
+  Cb.mov(temp_reg, asmjit::Imm(0xffffffff));
 
   int registerCount = (int)this->registerCount();
 
   // Create a backup of VectorReg(0)
-  if constexpr (std::is_same<asmjit::x86::Xmm, VectorReg>::value) {
-    cb.movq(temp_reg2, asmjit::x86::xmm0);
-    cb.push(temp_reg2);
-    cb.crc32(temp_reg, temp_reg2);
-    cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-    cb.movq(temp_reg2, asmjit::x86::xmm0);
-    cb.push(temp_reg2);
-    cb.crc32(temp_reg, temp_reg2);
-
-  } else if constexpr (std::is_same<asmjit::x86::Ymm, VectorReg>::value &&
-                       std::is_same<asmjit::x86::Mm, IterReg>::value) {
-    cb.movq(temp_reg2, asmjit::x86::xmm0);
-    cb.movq(asmjit::x86::Mm(7), temp_reg2);
-    cb.crc32(temp_reg, temp_reg2);
-    cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-    cb.movq(temp_reg2, asmjit::x86::xmm0);
-    cb.movq(asmjit::x86::Mm(6), temp_reg2);
-    cb.crc32(temp_reg, temp_reg2);
-
-    cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1));
-
-    cb.movq(temp_reg2, asmjit::x86::xmm0);
-    cb.movq(asmjit::x86::Mm(5), temp_reg2);
-    cb.crc32(temp_reg, temp_reg2);
-    cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-    cb.movq(temp_reg2, asmjit::x86::xmm0);
-    cb.movq(asmjit::x86::Mm(4), temp_reg2);
-    cb.crc32(temp_reg, temp_reg2);
-  } else if constexpr (std::is_same<asmjit::x86::Zmm, VectorReg>::value &&
-                       std::is_same<asmjit::x86::Mm, IterReg>::value) {
+  if constexpr (std::is_same<asmjit::x86::Xmm, VectorRegT>::value) {
+    Cb.movq(temp_reg2, asmjit::x86::xmm0);
+    Cb.push(temp_reg2);
+    Cb.crc32(temp_reg, temp_reg2);
+    Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+    Cb.movq(temp_reg2, asmjit::x86::xmm0);
+    Cb.push(temp_reg2);
+    Cb.crc32(temp_reg, temp_reg2);
+
+  } else if constexpr (std::is_same<asmjit::x86::Ymm, VectorRegT>::value &&
+                       std::is_same<asmjit::x86::Mm, IterRegT>::value) {
+    Cb.movq(temp_reg2, asmjit::x86::xmm0);
+    Cb.movq(asmjit::x86::Mm(7), temp_reg2);
+    Cb.crc32(temp_reg, temp_reg2);
+    Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+    Cb.movq(temp_reg2, asmjit::x86::xmm0);
+    Cb.movq(asmjit::x86::Mm(6), temp_reg2);
+    Cb.crc32(temp_reg, temp_reg2);
+
+    Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1));
+
+    Cb.movq(temp_reg2, asmjit::x86::xmm0);
+    Cb.movq(asmjit::x86::Mm(5), temp_reg2);
+    Cb.crc32(temp_reg, temp_reg2);
+    Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+    Cb.movq(temp_reg2, asmjit::x86::xmm0);
+    Cb.movq(asmjit::x86::Mm(4), temp_reg2);
+    Cb.crc32(temp_reg, temp_reg2);
+  } else if constexpr (std::is_same<asmjit::x86::Zmm, VectorRegT>::value &&
+                       std::is_same<asmjit::x86::Mm, IterRegT>::value) {
     // We use vector registers zmm31 for our backup
-    cb.vmovapd(asmjit::x86::zmm31, asmjit::x86::zmm0);
+    Cb.vmovapd(asmjit::x86::zmm31, asmjit::x86::zmm0);
     registerCount--;
   }
 
   // Calculate the hash of the remaining VectorReg
   // use VectorReg(0) as a temporary place to unpack values
   for (int i = 1; i < registerCount; i++) {
-    if constexpr (std::is_same<asmjit::x86::Xmm, VectorReg>::value) {
-      cb.vmovapd(asmjit::x86::xmm0, asmjit::x86::Xmm(i));
-
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
-      cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
-    } else if constexpr (std::is_same<asmjit::x86::Ymm, VectorReg>::value) {
-      cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(i));
-
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
-      cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
-
-      cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1));
-
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
-      cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
-    } else if constexpr (std::is_same<asmjit::x86::Zmm, VectorReg>::value) {
-      cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(i));
-
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
-      cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
-
-      cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1));
-
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
-      cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
-
-      cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(i), asmjit::Imm(2));
-
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
-      cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
-
-      cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(i), asmjit::Imm(3));
-
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
-      cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      cb.movq(temp_reg2, asmjit::x86::xmm0);
-      cb.crc32(temp_reg, temp_reg2);
+    if constexpr (std::is_same<asmjit::x86::Xmm, VectorRegT>::value) {
+      Cb.vmovapd(asmjit::x86::xmm0, asmjit::x86::Xmm(i));
+
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
+      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
+    } else if constexpr (std::is_same<asmjit::x86::Ymm, VectorRegT>::value) {
+      Cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(i));
+
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
+      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
+
+      Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1));
+
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
+      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
+    } else if constexpr (std::is_same<asmjit::x86::Zmm, VectorRegT>::value) {
+      Cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(i));
+
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
+      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
+
+      Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1));
+
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
+      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
+
+      Cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(i), asmjit::Imm(2));
+
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
+      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
+
+      Cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(i), asmjit::Imm(3));
+
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
+      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+      Cb.movq(temp_reg2, asmjit::x86::xmm0);
+      Cb.crc32(temp_reg, temp_reg2);
     }
   }
 
   // Restore VectorReg(0) from backup
-  if constexpr (std::is_same<asmjit::x86::Xmm, VectorReg>::value) {
-    cb.pop(temp_reg2);
-    cb.movq(asmjit::x86::xmm0, temp_reg2);
-    cb.movlhps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-    cb.pop(temp_reg2);
-    cb.pinsrw(asmjit::x86::xmm0, temp_reg2.r32(), asmjit::Imm(0));
-    cb.shr(temp_reg2, asmjit::Imm(32));
-    cb.movd(temp_reg2.r32(), asmjit::x86::Mm(7));
-    cb.pinsrw(asmjit::x86::xmm0, temp_reg2.r32(), asmjit::Imm(1));
-  } else if constexpr (std::is_same<asmjit::x86::Ymm, VectorReg>::value &&
-                       std::is_same<asmjit::x86::Mm, IterReg>::value) {
-    cb.movq(temp_reg2, asmjit::x86::Mm(5));
-    cb.movq(asmjit::x86::xmm0, temp_reg2);
-    cb.movq(temp_reg2, asmjit::x86::Mm(4));
-    cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1));
-
-    cb.vinsertf128(asmjit::x86::ymm0, asmjit::x86::ymm0, asmjit::x86::xmm0, asmjit::Imm(1));
-
-    cb.movq(temp_reg2, asmjit::x86::Mm(7));
-    cb.movq(asmjit::x86::xmm0, temp_reg2);
-    cb.movq(temp_reg2, asmjit::x86::Mm(6));
-    cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1));
-  } else if constexpr (std::is_same<asmjit::x86::Zmm, VectorReg>::value &&
-                       std::is_same<asmjit::x86::Mm, IterReg>::value) {
+  if constexpr (std::is_same<asmjit::x86::Xmm, VectorRegT>::value) {
+    Cb.pop(temp_reg2);
+    Cb.movq(asmjit::x86::xmm0, temp_reg2);
+    Cb.movlhps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+    Cb.pop(temp_reg2);
+    Cb.pinsrw(asmjit::x86::xmm0, temp_reg2.r32(), asmjit::Imm(0));
+    Cb.shr(temp_reg2, asmjit::Imm(32));
+    Cb.movd(temp_reg2.r32(), asmjit::x86::Mm(7));
+    Cb.pinsrw(asmjit::x86::xmm0, temp_reg2.r32(), asmjit::Imm(1));
+  } else if constexpr (std::is_same<asmjit::x86::Ymm, VectorRegT>::value &&
+                       std::is_same<asmjit::x86::Mm, IterRegT>::value) {
+    Cb.movq(temp_reg2, asmjit::x86::Mm(5));
+    Cb.movq(asmjit::x86::xmm0, temp_reg2);
+    Cb.movq(temp_reg2, asmjit::x86::Mm(4));
+    Cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1));
+
+    Cb.vinsertf128(asmjit::x86::ymm0, asmjit::x86::ymm0, asmjit::x86::xmm0, asmjit::Imm(1));
+
+    Cb.movq(temp_reg2, asmjit::x86::Mm(7));
+    Cb.movq(asmjit::x86::xmm0, temp_reg2);
+    Cb.movq(temp_reg2, asmjit::x86::Mm(6));
+    Cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1));
+  } else if constexpr (std::is_same<asmjit::x86::Zmm, VectorRegT>::value &&
+                       std::is_same<asmjit::x86::Mm, IterRegT>::value) {
     // We use vector registers zmm31 for our backup
-    cb.vmovapd(asmjit::x86::zmm0, asmjit::x86::zmm31);
+    Cb.vmovapd(asmjit::x86::zmm0, asmjit::x86::zmm31);
   }
 
   // before starting the communication, backup r8, r9, rax, rbx, rcx and rdx
-  if constexpr (std::is_same<asmjit::x86::Mm, IterReg>::value) {
-    cb.movq(asmjit::x86::Mm(7), asmjit::x86::rax);
-    cb.movq(asmjit::x86::Mm(6), asmjit::x86::rbx);
-    cb.movq(asmjit::x86::Mm(5), asmjit::x86::rcx);
-    cb.movq(asmjit::x86::Mm(4), asmjit::x86::rdx);
-    cb.movq(asmjit::x86::Mm(3), asmjit::x86::r8);
-    cb.movq(asmjit::x86::Mm(2), asmjit::x86::r9);
+  if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
+    Cb.movq(asmjit::x86::Mm(7), asmjit::x86::rax);
+    Cb.movq(asmjit::x86::Mm(6), asmjit::x86::rbx);
+    Cb.movq(asmjit::x86::Mm(5), asmjit::x86::rcx);
+    Cb.movq(asmjit::x86::Mm(4), asmjit::x86::rdx);
+    Cb.movq(asmjit::x86::Mm(3), asmjit::x86::r8);
+    Cb.movq(asmjit::x86::Mm(2), asmjit::x86::r9);
   } else {
-    cb.push(asmjit::x86::rax);
-    cb.push(asmjit::x86::rbx);
-    cb.push(asmjit::x86::rcx);
-    cb.push(asmjit::x86::rdx);
-    cb.push(asmjit::x86::r8);
-    cb.push(asmjit::x86::r9);
+    Cb.push(asmjit::x86::rax);
+    Cb.push(asmjit::x86::rbx);
+    Cb.push(asmjit::x86::rcx);
+    Cb.push(asmjit::x86::rdx);
+    Cb.push(asmjit::x86::r8);
+    Cb.push(asmjit::x86::r9);
   }
 
   // do the actual communication
   // temp_reg contains our hash
 
   // save the pointer_reg. it might be any of r8, r9, rax, rbx, rcx or rdx
-  cb.mov(temp_reg2, pointer_reg);
+  Cb.mov(temp_reg2, pointer_reg);
 
   // Don't touch me!
   // This sychronization and communication works even if the threads run at
@@ -311,144 +310,144 @@ void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder& cb, IterReg iter_r
   // by a few hours of headache for two people.
   auto communication = [&](auto offset) {
     // communication
-    cb.mov(asmjit::x86::r8, asmjit::x86::ptr_64(temp_reg2, offset));
+    Cb.mov(asmjit::x86::r8, asmjit::x86::ptr_64(temp_reg2, offset));
 
     // temp data
-    cb.mov(asmjit::x86::r9, temp_reg2);
-    cb.add(asmjit::x86::r9, asmjit::Imm(offset + 8));
+    Cb.mov(asmjit::x86::r9, temp_reg2);
+    Cb.add(asmjit::x86::r9, asmjit::Imm(offset + 8));
 
-    cb.mov(asmjit::x86::rdx, asmjit::x86::ptr_64(asmjit::x86::r9, 0));
-    cb.mov(asmjit::x86::rax, asmjit::x86::ptr_64(asmjit::x86::r9, 8));
+    Cb.mov(asmjit::x86::rdx, asmjit::x86::ptr_64(asmjit::x86::r9, 0));
+    Cb.mov(asmjit::x86::rax, asmjit::x86::ptr_64(asmjit::x86::r9, 8));
 
-    auto L0 = cb.newLabel();
-    cb.bind(L0);
+    auto L0 = Cb.newLabel();
+    Cb.bind(L0);
 
-    cb.lock();
-    cb.cmpxchg16b(asmjit::x86::ptr(asmjit::x86::r8));
+    Cb.lock();
+    Cb.cmpxchg16b(asmjit::x86::ptr(asmjit::x86::r8));
 
-    auto L1 = cb.newLabel();
-    cb.jnz(L1);
+    auto L1 = Cb.newLabel();
+    Cb.jnz(L1);
 
-    cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx);
-    cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx);
-    cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
-    cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
+    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx);
+    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx);
+    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
+    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
 
-    cb.mov(asmjit::x86::rax, asmjit::Imm(2));
+    Cb.mov(asmjit::x86::rax, asmjit::Imm(2));
 
-    auto L6 = cb.newLabel();
-    cb.jmp(L6);
+    auto L6 = Cb.newLabel();
+    Cb.jmp(L6);
 
-    cb.bind(L1);
+    Cb.bind(L1);
 
-    cb.cmp(asmjit::x86::rcx, asmjit::x86::rdx);
+    Cb.cmp(asmjit::x86::rcx, asmjit::x86::rdx);
 
-    auto L2 = cb.newLabel();
-    cb.jle(L2);
+    auto L2 = Cb.newLabel();
+    Cb.jle(L2);
 
-    cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx);
-    cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx);
+    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx);
+    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx);
 
-    cb.jmp(L0);
+    Cb.jmp(L0);
 
-    cb.bind(L2);
+    Cb.bind(L2);
 
-    auto L3 = cb.newLabel();
+    auto L3 = Cb.newLabel();
 
-    cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
-    cb.jne(L3);
-    cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
-    cb.jne(L3);
+    Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
+    Cb.jne(L3);
+    Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
+    Cb.jne(L3);
 
-    cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::x86::rdx);
-    cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::x86::rax);
+    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::x86::rdx);
+    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::x86::rax);
 
-    cb.bind(L3);
+    Cb.bind(L3);
 
-    cb.cmp(asmjit::x86::rcx, asmjit::x86::ptr_64(asmjit::x86::r9, 16));
-    cb.mov(asmjit::x86::rax, asmjit::Imm(4));
-    cb.jne(L6);
+    Cb.cmp(asmjit::x86::rcx, asmjit::x86::ptr_64(asmjit::x86::r9, 16));
+    Cb.mov(asmjit::x86::rax, asmjit::Imm(4));
+    Cb.jne(L6);
 
-    cb.cmp(asmjit::x86::rbx, asmjit::x86::ptr_64(asmjit::x86::r9, 24));
-    auto L4 = cb.newLabel();
-    cb.jne(L4);
+    Cb.cmp(asmjit::x86::rbx, asmjit::x86::ptr_64(asmjit::x86::r9, 24));
+    auto L4 = Cb.newLabel();
+    Cb.jne(L4);
 
-    cb.mov(asmjit::x86::rax, asmjit::Imm(0));
+    Cb.mov(asmjit::x86::rax, asmjit::Imm(0));
 
-    auto L5 = cb.newLabel();
-    cb.jmp(L5);
+    auto L5 = Cb.newLabel();
+    Cb.jmp(L5);
 
-    cb.bind(L4);
+    Cb.bind(L4);
 
-    cb.mov(asmjit::x86::rax, asmjit::Imm(1));
+    Cb.mov(asmjit::x86::rax, asmjit::Imm(1));
 
-    cb.bind(L5);
+    Cb.bind(L5);
 
-    cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
-    cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
+    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
+    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
 
-    cb.bind(L6);
+    Cb.bind(L6);
 
     // if check failed
-    cb.cmp(asmjit::x86::rax, asmjit::Imm(1));
-    auto L7 = cb.newLabel();
-    cb.jne(L7);
+    Cb.cmp(asmjit::x86::rax, asmjit::Imm(1));
+    auto L7 = Cb.newLabel();
+    Cb.jne(L7);
 
     // write the error flag
-    cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 32), asmjit::Imm(1));
+    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 32), asmjit::Imm(1));
 
     // stop the execution after some time
-    cb.mov(asmjit::x86::ptr_64(addrHigh_reg), asmjit::Imm(LOAD_STOP));
-    cb.mfence();
+    Cb.mov(asmjit::x86::ptr_64(addrHigh_reg), asmjit::Imm(LOAD_STOP));
+    Cb.mfence();
 
-    cb.bind(L7);
+    Cb.bind(L7);
 
-    auto L9 = cb.newLabel();
-    cb.jmp(L9);
+    auto L9 = Cb.newLabel();
+    Cb.jmp(L9);
   };
 
   // left communication
   // move hash
-  cb.mov(asmjit::x86::rbx, temp_reg);
+  Cb.mov(asmjit::x86::rbx, temp_reg);
   // move iterations counter
-  if constexpr (std::is_same<asmjit::x86::Mm, IterReg>::value) {
-    cb.movq(asmjit::x86::rcx, iter_reg);
+  if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
+    Cb.movq(asmjit::x86::rcx, IterReg);
   } else {
-    cb.mov(asmjit::x86::rcx, iter_reg);
+    Cb.mov(asmjit::x86::rcx, IterReg);
   }
 
   communication(-128);
 
   // right communication
   // move hash
-  cb.mov(asmjit::x86::rbx, temp_reg);
+  Cb.mov(asmjit::x86::rbx, temp_reg);
   // move iterations counter
-  if constexpr (std::is_same<asmjit::x86::Mm, IterReg>::value) {
-    cb.movq(asmjit::x86::rcx, iter_reg);
+  if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
+    Cb.movq(asmjit::x86::rcx, IterReg);
   } else {
-    cb.mov(asmjit::x86::rcx, iter_reg);
+    Cb.mov(asmjit::x86::rcx, IterReg);
   }
 
   communication(-64);
 
   // restore r8, r9, rax, rbx, rcx and rdx
-  if constexpr (std::is_same<asmjit::x86::Mm, IterReg>::value) {
-    cb.movq(asmjit::x86::rax, asmjit::x86::Mm(7));
-    cb.movq(asmjit::x86::rbx, asmjit::x86::Mm(6));
-    cb.movq(asmjit::x86::rcx, asmjit::x86::Mm(5));
-    cb.movq(asmjit::x86::rdx, asmjit::x86::Mm(4));
-    cb.movq(asmjit::x86::r8, asmjit::x86::Mm(3));
-    cb.movq(asmjit::x86::r9, asmjit::x86::Mm(2));
+  if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
+    Cb.movq(asmjit::x86::rax, asmjit::x86::Mm(7));
+    Cb.movq(asmjit::x86::rbx, asmjit::x86::Mm(6));
+    Cb.movq(asmjit::x86::rcx, asmjit::x86::Mm(5));
+    Cb.movq(asmjit::x86::rdx, asmjit::x86::Mm(4));
+    Cb.movq(asmjit::x86::r8, asmjit::x86::Mm(3));
+    Cb.movq(asmjit::x86::r9, asmjit::x86::Mm(2));
   } else {
-    cb.pop(asmjit::x86::r9);
-    cb.pop(asmjit::x86::r8);
-    cb.pop(asmjit::x86::rdx);
-    cb.pop(asmjit::x86::rcx);
-    cb.pop(asmjit::x86::rbx);
-    cb.pop(asmjit::x86::rax);
+    Cb.pop(asmjit::x86::r9);
+    Cb.pop(asmjit::x86::r8);
+    Cb.pop(asmjit::x86::rdx);
+    Cb.pop(asmjit::x86::rcx);
+    Cb.pop(asmjit::x86::rbx);
+    Cb.pop(asmjit::x86::rax);
   }
 
-  cb.bind(SkipErrorDetection);
+  Cb.bind(SkipErrorDetection);
 }
 
 template void X86Payload::emitErrorDetectionCode<asmjit::x86::Gpq, asmjit::x86::Xmm>(
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index b933dcd1..ac7550e1 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -43,25 +43,25 @@ int ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   unsigned bytes = 0;
 
   for (const auto& item : sequence) {
-    auto it = this->instructionFlops.find(item);
+    auto it = this->InstructionFlops.find(item);
 
-    if (it == this->instructionFlops.end()) {
+    if (it == this->InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
     flops += it->second;
 
-    it = this->instructionMemory.find(item);
+    it = this->InstructionMemory.find(item);
 
-    if (it != this->instructionMemory.end()) {
+    if (it != this->InstructionMemory.end()) {
       bytes += it->second;
     }
   }
 
-  this->_flops = repetitions * flops;
-  this->_bytes = repetitions * bytes;
-  this->_instructions = repetitions * sequence.size() * 4 + 6;
+  this->Flops = repetitions * flops;
+  this->Bytes = repetitions * bytes;
+  this->Instructions = repetitions * sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
   auto l1i_cache_size = instructionCacheSize / thread;
@@ -79,10 +79,10 @@ int ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
 
   CodeHolder code;
-  code.init(this->rt.environment());
+  code.init(this->Rt.environment());
 
-  if (nullptr != this->loadFunction) {
-    this->rt.release(&this->loadFunction);
+  if (nullptr != this->LoadFunction) {
+    this->Rt.release(&this->LoadFunction);
   }
 
   Builder cb(&code);
@@ -108,9 +108,8 @@ int ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   auto ram_reg = ymm15;
 
   FuncDetail func;
-  func.init(FuncSignatureT<unsigned long long, unsigned long long*, volatile unsigned long long*, unsigned long long>(
-                CallConvId::kCDecl),
-            this->rt.environment());
+  func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl),
+            this->Rt.environment());
 
   FuncFrame frame;
   frame.init(func);
@@ -304,7 +303,7 @@ int ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     cb.add(ram_addr, Imm(l3_size));
     cb.bind(NoRamReset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.inc(temp_reg); // increment iteration counter
   if (this->getL2SequenceCount(sequence) > 0) {
@@ -318,7 +317,7 @@ int ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     cb.add(l2_addr, Imm(l1_size));
     cb.bind(NoL2Reset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.movq(iter_reg, temp_reg); // store iteration counter
   if (this->getL3SequenceCount(sequence) > 0) {
@@ -332,7 +331,7 @@ int ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     cb.add(l3_addr, Imm(l2_size));
     cb.bind(NoL3Reset);
     // adds always two instruction
-    this->_instructions += 2;
+    this->Instructions += 2;
   }
   cb.mov(l1_addr, pointer_reg);
 
@@ -371,7 +370,7 @@ int ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   // String sb;
   // cb.dump(sb);
 
-  Error err = this->rt.add(&this->loadFunction, &code);
+  Error err = this->Rt.add(&this->LoadFunction, &code);
   if (err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
@@ -398,12 +397,12 @@ int ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
 std::list<std::string> ZENFMAPayload::getAvailableInstructions() const {
   std::list<std::string> instructions;
 
-  transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions),
+  transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions),
             [](const auto& item) { return item.first; });
 
   return instructions;
 }
 
-void ZENFMAPayload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) {
+void ZENFMAPayload::init(uint64_t* memoryAddr, uint64_t bufferSize) {
   X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
diff --git a/src/firestarter/Environment/X86/X86CPUTopology.cpp b/src/firestarter/Environment/X86/X86CPUTopology.cpp
index 6e7eb288..dae61165 100644
--- a/src/firestarter/Environment/X86/X86CPUTopology.cpp
+++ b/src/firestarter/Environment/X86/X86CPUTopology.cpp
@@ -35,29 +35,29 @@ using namespace firestarter::environment::x86;
 
 X86CPUTopology::X86CPUTopology()
     : CPUTopology("x86_64")
-    , cpuInfo(asmjit::CpuInfo::host())
-    , _vendor(this->cpuInfo.vendor()) {
+    , CpuInfo(asmjit::CpuInfo::host())
+    , Vendor(this->CpuInfo.vendor()) {
 
   std::stringstream ss;
   ss << "Family " << this->familyId() << ", Model " << this->modelId() << ", Stepping " << this->stepping();
-  this->_model = ss.str();
+  this->Model = ss.str();
 
   for (int i = 0; i <= (int)asmjit::CpuFeatures::X86::Id::kMaxValue; i++) {
-    if (!this->cpuInfo.hasFeature(i)) {
+    if (!this->CpuInfo.hasFeature(i)) {
       continue;
     }
 
     asmjit::String sb;
 
-    auto error = asmjit::Formatter::formatFeature(sb, this->cpuInfo.arch(), i);
+    auto error = asmjit::Formatter::formatFeature(sb, this->CpuInfo.arch(), i);
     if (error != asmjit::ErrorCode::kErrorOk) {
       log::warn() << "Formatting cpu features got asmjit error: " << error;
     }
 
-    this->featureList.push_back(std::string(sb.data()));
+    this->FeatureList.push_back(std::string(sb.data()));
   }
 
-  unsigned long long a = 0, b = 0, c = 0, d = 0;
+  uint64_t a = 0, b = 0, c = 0, d = 0;
 
   // check if we have rdtsc
   this->cpuid(&a, &b, &c, &d);
@@ -65,9 +65,9 @@ X86CPUTopology::X86CPUTopology()
     a = 1;
     this->cpuid(&a, &b, &c, &d);
     if ((int)d & (1 << 4)) {
-      this->_hasRdtsc = true;
+      this->HasRdtsc = true;
     } else {
-      this->_hasRdtsc = false;
+      this->HasRdtsc = false;
     }
   }
 
@@ -75,7 +75,7 @@ X86CPUTopology::X86CPUTopology()
   if (this->hasRdtsc()) {
     a = 0, b = 0, c = 0, d = 0;
 
-    this->_hasInvariantRdtsc = true;
+    this->HasInvariantRdtsc = true;
 
     /* TSCs are usable if CPU supports only one frequency in C0 (no
        speedstep/Cool'n'Quite)
@@ -88,7 +88,7 @@ X86CPUTopology::X86CPUTopology()
       this->cpuid(&a, &b, &c, &d);
       /* no Frequency control */
       if ((!(d & (1 << 22))) && (!(c & (1 << 7)))) {
-        this->_hasInvariantRdtsc = true;
+        this->HasInvariantRdtsc = true;
       } else {
         a = 0x80000000;
         this->cpuid(&a, &b, &c, &d);
@@ -97,7 +97,7 @@ X86CPUTopology::X86CPUTopology()
           this->cpuid(&a, &b, &c, &d);
           /* invariant TSC */
           if (d & (1 << 8)) {
-            this->_hasInvariantRdtsc = true;
+            this->HasInvariantRdtsc = true;
           }
         }
       }
@@ -113,17 +113,17 @@ X86CPUTopology::X86CPUTopology()
 
         /* no Frequency control */
         if ((!(d & (1 << 7))) && (!(d & (1 << 1)))) {
-          this->_hasInvariantRdtsc = true;
+          this->HasInvariantRdtsc = true;
         }
         /* invariant TSC */
         if (d & (1 << 8)) {
-          this->_hasInvariantRdtsc = true;
+          this->HasInvariantRdtsc = true;
         }
       }
       /* assuming no frequency control if cpuid does not provide the extended
          function to test for it */
       else {
-        this->_hasInvariantRdtsc = true;
+        this->HasInvariantRdtsc = true;
       }
     }
   }
@@ -133,14 +133,14 @@ X86CPUTopology::X86CPUTopology()
 // only constant TSCs will be used (i.e. power management indepent TSCs)
 // save frequency in highest P-State or use generic fallback if no invarient TSC
 // is available
-unsigned long long X86CPUTopology::clockrate() const {
+uint64_t X86CPUTopology::clockrate() const {
   typedef std::chrono::high_resolution_clock Clock;
   typedef std::chrono::microseconds ticks;
 
-  unsigned long long start1_tsc, start2_tsc, end1_tsc, end2_tsc;
-  unsigned long long time_diff;
-  unsigned long long clock_lower_bound, clock_upper_bound, clock;
-  unsigned long long clockrate = 0;
+  uint64_t start1_tsc, start2_tsc, end1_tsc, end2_tsc;
+  uint64_t time_diff;
+  uint64_t clock_lower_bound, clock_upper_bound, clock;
+  uint64_t clockrate = 0;
   int i, num_measurements = 0, min_measurements;
 
   Clock::time_point start_time, end_time;
@@ -207,11 +207,11 @@ unsigned long long X86CPUTopology::clockrate() const {
   return clockrate;
 }
 
-unsigned long long X86CPUTopology::timestamp() const {
+uint64_t X86CPUTopology::timestamp() const {
 #ifndef _MSC_VER
-  unsigned long long reg_a, reg_d;
+  uint64_t reg_a, reg_d;
 #else
-  unsigned long long i;
+  uint64_t i;
 #endif
 
   if (!this->hasRdtsc()) {
@@ -227,11 +227,9 @@ unsigned long long X86CPUTopology::timestamp() const {
 #endif
 }
 
-void X86CPUTopology::cpuid(unsigned long long* a, unsigned long long* b, unsigned long long* c,
-                           unsigned long long* d) const {
+void X86CPUTopology::cpuid(uint64_t* a, uint64_t* b, uint64_t* c, uint64_t* d) const {
 #ifndef _MSC_VER
-  unsigned long long reg_a, reg_b, reg_c, reg_d;
-
+  uint64_t reg_a, reg_b, reg_c, reg_d;
   __asm__ __volatile__("cpuid;"
                        : "=a"(reg_a), "=b"(reg_b), "=c"(reg_c), "=d"(reg_d)
                        : "a"(*a), "b"(*b), "c"(*c), "d"(*d));
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index b923fbf4..508b01c6 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -29,14 +29,14 @@
 using namespace firestarter::environment::x86;
 
 void X86Environment::evaluateFunctions() {
-  for (auto ctor : this->platformConfigsCtor) {
+  for (auto ctor : this->PlatformConfigsCtor) {
     // add asmjit for model and family detection
-    this->platformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(),
+    this->PlatformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(),
                                          this->topology().modelId(), this->topology().numThreadsPerCore()));
   }
 
-  for (auto ctor : this->fallbackPlatformConfigsCtor) {
-    this->fallbackPlatformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(),
+  for (auto ctor : this->FallbackPlatformConfigsCtor) {
+    this->FallbackPlatformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(),
                                                  this->topology().modelId(), this->topology().numThreadsPerCore()));
   }
 }
@@ -46,7 +46,7 @@ int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePay
   std::string defaultPayloadName("");
 
   // if functionId is 0 get the default or fallback
-  for (auto config : this->platformConfigs) {
+  for (auto config : this->PlatformConfigs) {
     for (auto const& [thread, functionName] : config->getThreadMap()) {
       // the selected function
       if (id == functionId) {
@@ -58,14 +58,14 @@ int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePay
           }
         }
         // found function
-        this->_selectedConfig = new ::firestarter::environment::platform::RuntimeConfig(
+        this->SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(
             *config, thread, this->topology().instructionCacheSize());
         return EXIT_SUCCESS;
       }
       // default function
       if (0 == functionId && config->isDefault()) {
         if (thread == this->topology().numThreadsPerCore()) {
-          this->_selectedConfig = new ::firestarter::environment::platform::RuntimeConfig(
+          this->SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(
               *config, thread, this->topology().instructionCacheSize());
           return EXIT_SUCCESS;
         } else {
@@ -91,7 +91,7 @@ int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePay
 
     // loop over available implementation and check if they are marked as
     // fallback
-    for (auto config : this->fallbackPlatformConfigs) {
+    for (auto config : this->FallbackPlatformConfigs) {
       if (config->isAvailable()) {
         auto selectedThread = 0;
         auto selectedFunctionName = std::string("");
@@ -105,7 +105,7 @@ int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePay
           selectedThread = config->getThreadMap().begin()->first;
           selectedFunctionName = config->getThreadMap().begin()->second;
         }
-        this->_selectedConfig = new ::firestarter::environment::platform::RuntimeConfig(
+        this->SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(
             *config, selectedThread, this->topology().instructionCacheSize());
         log::warn() << "Using function " << selectedFunctionName << " as fallback.\n"
                     << "You can use the parameter --function to try other "
@@ -200,7 +200,7 @@ void X86Environment::printFunctionSummary() {
 
   unsigned id = 1;
 
-  for (auto const& config : this->platformConfigs) {
+  for (auto const& config : this->PlatformConfigs) {
     for (auto const& [thread, functionName] : config->getThreadMap()) {
       const char* available = config->isAvailable() ? "yes" : "no";
       const char* fmt = "  %4u | %-30s | %-24s | %s";
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 7dd511f5..0df2c6c3 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -40,81 +40,81 @@ extern "C" {
 
 using namespace firestarter;
 
-Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds const& timeout, unsigned loadPercent,
-                         std::chrono::microseconds const& period, unsigned requestedNumThreads,
-                         std::string const& cpuBind, bool printFunctionSummary, unsigned functionId,
-                         bool listInstructionGroups, std::string const& instructionGroups, unsigned lineCount,
-                         bool allowUnavailablePayload, bool dumpRegisters,
-                         std::chrono::seconds const& dumpRegistersTimeDelta, std::string const& dumpRegistersOutpath,
-                         bool errorDetection, int gpus, unsigned gpuMatrixSize, bool gpuUseFloat, bool gpuUseDouble,
-                         bool listMetrics, bool measurement, std::chrono::milliseconds const& startDelta,
-                         std::chrono::milliseconds const& stopDelta,
-                         std::chrono::milliseconds const& measurementInterval,
-                         std::vector<std::string> const& metricPaths, std::vector<std::string> const& stdinMetrics,
-                         bool optimize, std::chrono::seconds const& preheat, std::string const& optimizationAlgorithm,
-                         std::vector<std::string> const& optimizationMetrics,
-                         std::chrono::seconds const& evaluationDuration, unsigned individuals,
-                         std::string const& optimizeOutfile, unsigned generations, double nsga2_cr, double nsga2_m)
-    : _argc(argc)
-    , _argv(argv)
-    , _timeout(timeout)
-    , _loadPercent(loadPercent)
-    , _period(period)
-    , _dumpRegisters(dumpRegisters)
-    , _dumpRegistersTimeDelta(dumpRegistersTimeDelta)
-    , _dumpRegistersOutpath(dumpRegistersOutpath)
-    , _errorDetection(errorDetection)
-    , _gpus(gpus)
-    , _gpuMatrixSize(gpuMatrixSize)
-    , _gpuUseFloat(gpuUseFloat)
-    , _gpuUseDouble(gpuUseDouble)
-    , _startDelta(startDelta)
-    , _stopDelta(stopDelta)
-    , _measurement(measurement)
-    , _optimize(optimize)
-    , _preheat(preheat)
-    , _optimizationAlgorithm(optimizationAlgorithm)
-    , _optimizationMetrics(optimizationMetrics)
-    , _evaluationDuration(evaluationDuration)
-    , _individuals(individuals)
-    , _optimizeOutfile(optimizeOutfile)
-    , _generations(generations)
-    , _nsga2_cr(nsga2_cr)
-    , _nsga2_m(nsga2_m) {
+Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds const& Timeout, unsigned LoadPercent,
+                         std::chrono::microseconds const& Period, unsigned RequestedNumThreads,
+                         std::string const& CpuBind, bool PrintFunctionSummary, unsigned FunctionId,
+                         bool ListInstructionGroups, std::string const& InstructionGroups, unsigned LineCount,
+                         bool AllowUnavailablePayload, bool DumpRegisters,
+                         std::chrono::seconds const& DumpRegistersTimeDelta, std::string const& DumpRegistersOutpath,
+                         bool ErrorDetection, int Gpus, unsigned GpuMatrixSize, bool GpuUseFloat, bool GpuUseDouble,
+                         bool ListMetrics, bool Measurement, std::chrono::milliseconds const& StartDelta,
+                         std::chrono::milliseconds const& StopDelta,
+                         std::chrono::milliseconds const& MeasurementInterval,
+                         std::vector<std::string> const& MetricPaths, std::vector<std::string> const& StdinMetrics,
+                         bool Optimize, std::chrono::seconds const& Preheat, std::string const& OptimizationAlgorithm,
+                         std::vector<std::string> const& OptimizationMetrics,
+                         std::chrono::seconds const& EvaluationDuration, unsigned Individuals,
+                         std::string const& OptimizeOutfile, unsigned Generations, double Nsga2Cr, double Nsga2M)
+    : Argc(Argc)
+    , Argv(Argv)
+    , Timeout(Timeout)
+    , LoadPercent(LoadPercent)
+    , Period(Period)
+    , DumpRegisters(DumpRegisters)
+    , DumpRegistersTimeDelta(DumpRegistersTimeDelta)
+    , DumpRegistersOutpath(DumpRegistersOutpath)
+    , ErrorDetection(ErrorDetection)
+    , Gpus(Gpus)
+    , GpuMatrixSize(GpuMatrixSize)
+    , GpuUseFloat(GpuUseFloat)
+    , GpuUseDouble(GpuUseDouble)
+    , StartDelta(StartDelta)
+    , StopDelta(StopDelta)
+    , Measurement(Measurement)
+    , Optimize(Optimize)
+    , Preheat(Preheat)
+    , OptimizationAlgorithm(OptimizationAlgorithm)
+    , OptimizationMetrics(OptimizationMetrics)
+    , EvaluationDuration(EvaluationDuration)
+    , Individuals(Individuals)
+    , OptimizeOutfile(OptimizeOutfile)
+    , Generations(Generations)
+    , Nsga2Cr(Nsga2Cr)
+    , Nsga2M(Nsga2M) {
   int returnCode;
 
-  _load = (_period * _loadPercent) / 100;
-  if (_loadPercent == 100 || _load == std::chrono::microseconds::zero()) {
-    _period = std::chrono::microseconds::zero();
+  Load = (Period * LoadPercent) / 100;
+  if (LoadPercent == 100 || Load == std::chrono::microseconds::zero()) {
+    this->Period = std::chrono::microseconds::zero();
   }
 
 #if defined(linux) || defined(__linux__)
 #else
-  (void)listMetrics;
-  (void)measurementInterval;
-  (void)metricPaths;
-  (void)stdinMetrics;
+  (void)ListMetrics;
+  (void)MeasurementInterval;
+  (void)MetricPaths;
+  (void)StdinMetrics;
 #endif
 
 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-  this->_environment = new environment::x86::X86Environment();
+  this->Environment = new environment::x86::X86Environment();
 #endif
 
-  if (EXIT_SUCCESS != (returnCode = this->environment().evaluateCpuAffinity(requestedNumThreads, cpuBind))) {
+  if (EXIT_SUCCESS != (returnCode = this->environment().evaluateCpuAffinity(RequestedNumThreads, CpuBind))) {
     std::exit(returnCode);
   }
 
 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
   // Error detection uses crc32 instruction added by the SSE4.2 extension to x86
-  if (_errorDetection) {
-    if (!_environment->topology().featuresAsmjit().has(asmjit::CpuFeatures::X86::kSSE4_2)) {
+  if (ErrorDetection) {
+    if (!Environment->topology().featuresAsmjit().has(asmjit::CpuFeatures::X86::kSSE4_2)) {
       throw std::invalid_argument("Option --error-detection requires the crc32 "
                                   "instruction added with SSE_4_2.\n");
     }
   }
 #endif
 
-  if (_errorDetection && this->environment().requestedNumThreads() < 2) {
+  if (ErrorDetection && this->environment().requestedNumThreads() < 2) {
     throw std::invalid_argument("Option --error-detection must run with 2 or more threads. Number of "
                                 "threads is " +
                                 std::to_string(this->environment().requestedNumThreads()) + "\n");
@@ -122,43 +122,43 @@ Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds
 
   this->environment().evaluateFunctions();
 
-  if (printFunctionSummary) {
+  if (PrintFunctionSummary) {
     this->environment().printFunctionSummary();
     std::exit(EXIT_SUCCESS);
   }
 
-  if (EXIT_SUCCESS != (returnCode = this->environment().selectFunction(functionId, allowUnavailablePayload))) {
+  if (EXIT_SUCCESS != (returnCode = this->environment().selectFunction(FunctionId, AllowUnavailablePayload))) {
     std::exit(returnCode);
   }
 
-  if (listInstructionGroups) {
+  if (ListInstructionGroups) {
     this->environment().printAvailableInstructionGroups();
     std::exit(EXIT_SUCCESS);
   }
 
-  if (!instructionGroups.empty()) {
-    if (EXIT_SUCCESS != (returnCode = this->environment().selectInstructionGroups(instructionGroups))) {
+  if (!InstructionGroups.empty()) {
+    if (EXIT_SUCCESS != (returnCode = this->environment().selectInstructionGroups(InstructionGroups))) {
       std::exit(returnCode);
     }
   }
 
-  if (lineCount != 0) {
-    this->environment().setLineCount(lineCount);
+  if (LineCount != 0) {
+    this->environment().setLineCount(LineCount);
   }
 
 #if defined(linux) || defined(__linux__)
-  if (_measurement || listMetrics || _optimize) {
-    _measurementWorker = std::make_shared<measurement::MeasurementWorker>(
-        measurementInterval, this->environment().requestedNumThreads(), metricPaths, stdinMetrics);
+  if (Measurement || ListMetrics || Optimize) {
+    MeasurementWorker = std::make_shared<measurement::MeasurementWorker>(
+        MeasurementInterval, this->environment().requestedNumThreads(), MetricPaths, StdinMetrics);
 
-    if (listMetrics) {
-      log::info() << _measurementWorker->availableMetrics();
+    if (ListMetrics) {
+      log::info() << MeasurementWorker->availableMetrics();
       std::exit(EXIT_SUCCESS);
     }
 
     // init all metrics
-    auto all = _measurementWorker->metricNames();
-    auto initialized = _measurementWorker->initMetrics(all);
+    auto all = MeasurementWorker->metricNames();
+    auto initialized = MeasurementWorker->initMetrics(all);
 
     if (initialized.size() == 0) {
       log::error() << "No metrics initialized";
@@ -166,7 +166,7 @@ Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds
     }
 
     // check if selected metrics are initialized
-    for (auto const& optimizationMetric : optimizationMetrics) {
+    for (auto const& optimizationMetric : OptimizationMetrics) {
       auto nameEqual = [optimizationMetric](auto const& name) {
         auto invertedName = "-" + name;
         return name.compare(optimizationMetric) == 0 || invertedName.compare(optimizationMetric) == 0;
@@ -184,71 +184,71 @@ Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds
     }
   }
 
-  if (_optimize) {
+  if (Optimize) {
     auto applySettings = std::bind(
         [this](std::vector<std::pair<std::string, unsigned>> const& setting) {
           using Clock = std::chrono::high_resolution_clock;
           auto start = Clock::now();
 
-          for (auto& thread : this->loadThreads) {
+          for (auto& thread : this->LoadThreads) {
             auto td = thread.second;
 
             td->config().setPayloadSettings(setting);
           }
 
-          for (auto const& thread : this->loadThreads) {
+          for (auto const& thread : this->LoadThreads) {
             auto td = thread.second;
 
-            td->mutex.lock();
+            td->Mutex.lock();
           }
 
-          for (auto const& thread : this->loadThreads) {
+          for (auto const& thread : this->LoadThreads) {
             auto td = thread.second;
 
-            td->comm = THREAD_SWITCH;
-            td->mutex.unlock();
+            td->Comm = THREAD_SWITCH;
+            td->Mutex.unlock();
           }
 
-          this->loadVar = LOAD_SWITCH;
+          this->LoadVar = LOAD_SWITCH;
 
-          for (auto const& thread : this->loadThreads) {
+          for (auto const& thread : this->LoadThreads) {
             auto td = thread.second;
             bool ack;
 
             do {
-              td->mutex.lock();
-              ack = td->ack;
-              td->mutex.unlock();
+              td->Mutex.lock();
+              ack = td->Ack;
+              td->Mutex.unlock();
             } while (!ack);
 
-            td->mutex.lock();
-            td->ack = false;
-            td->mutex.unlock();
+            td->Mutex.lock();
+            td->Ack = false;
+            td->Mutex.unlock();
           }
 
-          this->loadVar = LOAD_HIGH;
+          this->LoadVar = LOAD_HIGH;
 
           this->signalWork();
 
-          unsigned long long startTimestamp = 0xffffffffffffffff;
-          unsigned long long stopTimestamp = 0;
+          uint64_t startTimestamp = 0xffffffffffffffff;
+          uint64_t stopTimestamp = 0;
 
-          for (auto const& thread : this->loadThreads) {
+          for (auto const& thread : this->LoadThreads) {
             auto td = thread.second;
 
-            if (startTimestamp > td->lastStartTsc) {
-              startTimestamp = td->lastStartTsc;
+            if (startTimestamp > td->LastStartTsc) {
+              startTimestamp = td->LastStartTsc;
             }
-            if (stopTimestamp < td->lastStopTsc) {
-              stopTimestamp = td->lastStopTsc;
+            if (stopTimestamp < td->LastStopTsc) {
+              stopTimestamp = td->LastStopTsc;
             }
           }
 
-          for (auto const& thread : this->loadThreads) {
+          for (auto const& thread : this->LoadThreads) {
             auto td = thread.second;
-            ipc_estimate_metric_insert((double)td->lastIterations *
-                                       (double)this->loadThreads.front().second->config().payload().instructions() /
-                                       (double)(stopTimestamp - startTimestamp));
+            ipcEstimateMetricInsert((double)td->LastIterations *
+                                    (double)this->LoadThreads.front().second->config().payload().instructions() /
+                                    (double)(stopTimestamp - startTimestamp));
           }
 
           auto end = Clock::now();
@@ -259,18 +259,18 @@ Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds
         std::placeholders::_1);
 
     auto prob = std::make_shared<firestarter::optimizer::problem::CLIArgumentProblem>(
-        std::move(applySettings), _measurementWorker, _optimizationMetrics, _evaluationDuration, _startDelta,
-        _stopDelta, this->environment().selectedConfig().payloadItems());
+        std::move(applySettings), MeasurementWorker, OptimizationMetrics, EvaluationDuration, StartDelta, StopDelta,
+        this->environment().selectedConfig().payloadItems());
 
-    _population = firestarter::optimizer::Population(std::move(prob));
+    Population = firestarter::optimizer::Population(std::move(prob));
 
-    if (_optimizationAlgorithm == "NSGA2") {
-      _algorithm = std::make_unique<firestarter::optimizer::algorithm::NSGA2>(_generations, _nsga2_cr, _nsga2_m);
+    if (OptimizationAlgorithm == "NSGA2") {
+      Algorithm = std::make_unique<firestarter::optimizer::algorithm::NSGA2>(Generations, Nsga2Cr, Nsga2M);
     } else {
-      throw std::invalid_argument("Algorithm " + _optimizationAlgorithm + " unknown.");
+      throw std::invalid_argument("Algorithm " + OptimizationAlgorithm + " unknown.");
     }
 
-    _algorithm->checkPopulation(static_cast<firestarter::optimizer::Population const&>(_population), _individuals);
+    Algorithm->checkPopulation(static_cast<firestarter::optimizer::Population const&>(Population), Individuals);
   }
 #endif
 
@@ -280,7 +280,7 @@ Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds
 
   // setup thread with either high or low load configured at the start
   // low loads has to know the length of the period
-  if (EXIT_SUCCESS != (returnCode = this->initLoadWorkers((_loadPercent == 0), _period.count()))) {
+  if (EXIT_SUCCESS != (returnCode = this->initLoadWorkers((LoadPercent == 0), Period.count()))) {
     std::exit(returnCode);
   }
 
@@ -301,7 +301,7 @@ Firestarter::~Firestarter() {
   _oneapi.reset();
 #endif
 
-  delete _environment;
+  delete Environment;
 }
 
 void Firestarter::mainThread() {
@@ -317,42 +317,42 @@ void Firestarter::mainThread() {
 
 #if defined(linux) || defined(__linux__)
   // if measurement is enabled, start it here
-  if (_measurement) {
-    _measurementWorker->startMeasurement();
+  if (Measurement) {
+    MeasurementWorker->startMeasurement();
   }
 #endif
 
   this->signalWork();
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
-  if (_dumpRegisters) {
+  if (DumpRegisters) {
     int returnCode;
-    if (EXIT_SUCCESS != (returnCode = this->initDumpRegisterWorker(_dumpRegistersTimeDelta, _dumpRegistersOutpath))) {
+    if (EXIT_SUCCESS != (returnCode = this->initDumpRegisterWorker(DumpRegistersTimeDelta, DumpRegistersOutpath))) {
       std::exit(returnCode);
     }
   }
 #endif
 
   // worker thread for load control
-  this->watchdogWorker(_period, _load, _timeout);
+  this->watchdogWorker(Period, Load, Timeout);
 
 #if defined(linux) || defined(__linux__)
   // check if optimization is selected
-  if (_optimize) {
+  if (Optimize) {
     auto startTime = optimizer::History::getTime();
 
-    Firestarter::_optimizer = std::make_unique<optimizer::OptimizerWorker>(
-        std::move(_algorithm), _population, _optimizationAlgorithm, _individuals, _preheat);
+    Firestarter::Optimizer = std::make_unique<optimizer::OptimizerWorker>(std::move(Algorithm), Population,
+                                                                          OptimizationAlgorithm, Individuals, Preheat);
 
     // wait here until optimizer thread terminates
-    Firestarter::_optimizer->join();
+    Firestarter::Optimizer->join();
 
     auto payloadItems = this->environment().selectedConfig().payloadItems();
 
-    firestarter::optimizer::History::save(_optimizeOutfile, startTime, payloadItems, _argc, _argv);
+    firestarter::optimizer::History::save(OptimizeOutfile, startTime, payloadItems, Argc, Argv);
 
     // print the best 20 according to each metric
-    firestarter::optimizer::History::printBest(_optimizationMetrics, payloadItems);
+    firestarter::optimizer::History::printBest(OptimizationMetrics, payloadItems);
 
     // stop all the load threads
     std::raise(SIGTERM);
@@ -362,35 +362,35 @@ void Firestarter::mainThread() {
   // wait for watchdog to timeout or until user terminates
   this->joinLoadWorkers();
 #ifdef FIRESTARTER_DEBUG_FEATURES
-  if (_dumpRegisters) {
+  if (DumpRegisters) {
     this->joinDumpRegisterWorker();
   }
 #endif
 
-  if (!_optimize) {
+  if (!Optimize) {
     this->printPerformanceReport();
   }
 
 #if defined(linux) || defined(__linux__)
   // if measurment is enabled, stop it here
-  if (_measurement) {
+  if (Measurement) {
     // TODO: clear this up
     log::info() << "metric,num_timepoints,duration_ms,average,stddev";
-    for (auto const& [name, sum] : _measurementWorker->getValues(_startDelta, _stopDelta)) {
-      log::info() << std::quoted(name) << "," << sum.num_timepoints << "," << sum.duration.count() << "," << sum.average
-                  << "," << sum.stddev;
+    for (auto const& [name, sum] : MeasurementWorker->getValues(StartDelta, StopDelta)) {
+      log::info() << std::quoted(name) << "," << sum.NumTimepoints << "," << sum.Duration.count() << "," << sum.Average
+                  << "," << sum.Stddev;
     }
   }
 #endif
 
-  if (_errorDetection) {
+  if (ErrorDetection) {
     this->printThreadErrorReport();
   }
 }
 
-void Firestarter::setLoad(unsigned long long value) {
+void Firestarter::setLoad(uint64_t value) {
   // signal load change to workers
-  Firestarter::loadVar = value;
+  Firestarter::LoadVar = value;
 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
 #ifndef _MSC_VER
   __asm__ __volatile__("mfence;");
@@ -412,15 +412,15 @@ void Firestarter::sigtermHandler(int signum) {
   // used in case of 0 < load < 100
   // or interrupt sleep for timeout
   {
-    std::lock_guard<std::mutex> lk(Firestarter::_watchdogTerminateMutex);
-    Firestarter::_watchdog_terminate = true;
+    std::lock_guard<std::mutex> lk(Firestarter::WatchdogTerminateMutex);
+    Firestarter::WatchdogTerminate = true;
   }
-  Firestarter::_watchdogTerminateAlert.notify_all();
+  Firestarter::WatchdogTerminateAlert.notify_all();
 
 #if defined(linux) || defined(__linux__)
   // if we have optimization running stop it
-  if (Firestarter::_optimizer) {
-    Firestarter::_optimizer->kill();
+  if (Firestarter::Optimizer) {
+    Firestarter::Optimizer->kill();
   }
 #endif
 }
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index 53323187..ed925cf1 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -45,7 +45,7 @@ using namespace firestarter;
 
 auto aligned_free_deleter = [](void* p) { ALIGNED_FREE(p); };
 
-int Firestarter::initLoadWorkers(bool lowLoad, unsigned long long period) {
+int Firestarter::initLoadWorkers(bool lowLoad, uint64_t period) {
   int returnCode;
 
   if (EXIT_SUCCESS != (returnCode = this->environment().setCpuAffinity(0))) {
@@ -54,40 +54,39 @@ int Firestarter::initLoadWorkers(bool lowLoad, unsigned long long period) {
 
   // setup load variable to execute low or high load once the threads switch to
   // work.
-  this->loadVar = lowLoad ? LOAD_LOW : LOAD_HIGH;
+  this->LoadVar = lowLoad ? LOAD_LOW : LOAD_HIGH;
 
   auto numThreads = this->environment().requestedNumThreads();
 
   // create a std::vector<std::shared_ptr<>> of requestenNumThreads()
   // communication pointers and add these to the threaddata
-  if (_errorDetection) {
-    for (unsigned long long i = 0; i < numThreads; i++) {
-      auto commPtr = reinterpret_cast<unsigned long long*>(ALIGNED_MALLOC(2 * sizeof(unsigned long long), 64));
+  if (ErrorDetection) {
+    for (uint64_t i = 0; i < numThreads; i++) {
+      auto commPtr = reinterpret_cast<uint64_t*>(ALIGNED_MALLOC(2 * sizeof(uint64_t), 64));
       assert(commPtr);
-      this->errorCommunication.push_back(std::shared_ptr<unsigned long long>(commPtr, aligned_free_deleter));
+      this->ErrorCommunication.push_back(std::shared_ptr<uint64_t>(commPtr, aligned_free_deleter));
       log::debug() << "Threads " << (i + numThreads - 1) % numThreads << " and " << i << " commPtr = 0x"
-                   << std::setfill('0') << std::setw(sizeof(unsigned long long) * 2) << std::hex
-                   << (unsigned long long)commPtr;
+                   << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex << (uint64_t)commPtr;
     }
   }
 
-  for (unsigned long long i = 0; i < numThreads; i++) {
-    auto td = std::make_shared<LoadWorkerData>(i, this->environment(), &this->loadVar, period, _dumpRegisters,
-                                               _errorDetection);
+  for (uint64_t i = 0; i < numThreads; i++) {
+    auto td =
+        std::make_shared<LoadWorkerData>(i, this->environment(), &this->LoadVar, period, DumpRegisters, ErrorDetection);
 
-    if (_errorDetection) {
+    if (ErrorDetection) {
       // distribute pointers for error deteciton. (set threads in a ring)
       // give this thread the left pointer i and right pointer (i+1) %
       // requestedNumThreads().
-      td->setErrorCommunication(this->errorCommunication[i], this->errorCommunication[(i + 1) % numThreads]);
+      td->setErrorCommunication(this->ErrorCommunication[i], this->ErrorCommunication[(i + 1) % numThreads]);
     }
 
     auto dataCacheSizeIt = td->config().platformConfig().dataCacheBufferSize().begin();
     auto ramBufferSize = td->config().platformConfig().ramBufferSize();
 
-    td->buffersizeMem =
+    td->BuffersizeMem =
         (*dataCacheSizeIt + *std::next(dataCacheSizeIt, 1) + *std::next(dataCacheSizeIt, 2) + ramBufferSize) /
-        td->config().thread() / sizeof(unsigned long long);
+        td->config().thread() / sizeof(uint64_t);
 
     // create the thread
     std::thread t(Firestarter::loadThreadWorker, td);
@@ -99,7 +98,7 @@ int Firestarter::initLoadWorkers(bool lowLoad, unsigned long long period) {
       firestarter::logging::FirstWorkerThreadFilter<firestarter::logging::record>::setFirstThread(t.get_id());
     }
 
-    this->loadThreads.push_back(std::make_pair(std::move(t), td));
+    this->LoadThreads.push_back(std::make_pair(std::move(t), td));
   }
 
   this->signalLoadWorkers(THREAD_INIT);
@@ -111,54 +110,54 @@ void Firestarter::signalLoadWorkers(int comm) {
   bool ack;
 
   // start the work
-  for (auto const& thread : this->loadThreads) {
+  for (auto const& thread : this->LoadThreads) {
     auto td = thread.second;
 
-    td->mutex.lock();
+    td->Mutex.lock();
   }
 
-  for (auto const& thread : this->loadThreads) {
+  for (auto const& thread : this->LoadThreads) {
     auto td = thread.second;
 
-    td->comm = comm;
-    td->mutex.unlock();
+    td->Comm = comm;
+    td->Mutex.unlock();
   }
 
-  for (auto const& thread : this->loadThreads) {
+  for (auto const& thread : this->LoadThreads) {
     auto td = thread.second;
 
     do {
-      td->mutex.lock();
-      ack = td->ack;
-      td->mutex.unlock();
+      td->Mutex.lock();
+      ack = td->Ack;
+      td->Mutex.unlock();
     } while (!ack);
 
-    td->mutex.lock();
-    td->ack = false;
-    td->mutex.unlock();
+    td->Mutex.lock();
+    td->Ack = false;
+    td->Mutex.unlock();
   }
 }
 
 void Firestarter::joinLoadWorkers() {
   // wait for threads after watchdog has requested termination
-  for (auto& thread : this->loadThreads) {
+  for (auto& thread : this->LoadThreads) {
     thread.first.join();
   }
 }
 
 void Firestarter::printThreadErrorReport() {
-  if (_errorDetection) {
-    auto maxSize = this->loadThreads.size();
+  if (ErrorDetection) {
+    auto maxSize = this->LoadThreads.size();
 
     std::vector<bool> errors(maxSize, false);
 
     for (decltype(maxSize) i = 0; i < maxSize; i++) {
-      auto errorDetectionStruct = this->loadThreads[i].second->errorDetectionStruct();
+      auto errorDetectionStruct = this->LoadThreads[i].second->errorDetectionStruct();
 
-      if (errorDetectionStruct->errorLeft) {
+      if (errorDetectionStruct->ErrorLeft) {
         errors[(i + maxSize - 1) % maxSize] = true;
       }
-      if (errorDetectionStruct->errorRight) {
+      if (errorDetectionStruct->ErrorRight) {
         errors[i] = true;
       }
     }
@@ -174,44 +173,44 @@ void Firestarter::printThreadErrorReport() {
 
 void Firestarter::printPerformanceReport() {
   // performance report
-  unsigned long long startTimestamp = 0xffffffffffffffff;
-  unsigned long long stopTimestamp = 0;
+  uint64_t startTimestamp = 0xffffffffffffffff;
+  uint64_t stopTimestamp = 0;
 
-  unsigned long long iterations = 0;
+  uint64_t iterations = 0;
 
   log::debug() << "\nperformance report:\n";
 
-  for (auto const& thread : this->loadThreads) {
+  for (auto const& thread : this->LoadThreads) {
     auto td = thread.second;
 
-    log::debug() << "Thread " << td->id() << ": " << td->iterations
-                 << " iterations, tsc_delta: " << td->stopTsc - td->startTsc;
+    log::debug() << "Thread " << td->id() << ": " << td->Iterations
+                 << " iterations, tsc_delta: " << td->StopTsc - td->StartTsc;
 
-    if (startTimestamp > td->startTsc) {
-      startTimestamp = td->startTsc;
+    if (startTimestamp > td->StartTsc) {
+      startTimestamp = td->StartTsc;
     }
-    if (stopTimestamp < td->stopTsc) {
-      stopTimestamp = td->stopTsc;
+    if (stopTimestamp < td->StopTsc) {
+      stopTimestamp = td->StopTsc;
     }
 
-    iterations += td->iterations;
+    iterations += td->Iterations;
   }
 
   double runtime = (double)(stopTimestamp - startTimestamp) / (double)this->environment().topology().clockrate();
   double gFlops =
-      (double)this->loadThreads.front().second->config().payload().flops() * 0.000000001 * (double)iterations / runtime;
+      (double)this->LoadThreads.front().second->config().payload().flops() * 0.000000001 * (double)iterations / runtime;
   double bandwidth =
-      (double)this->loadThreads.front().second->config().payload().bytes() * 0.000000001 * (double)iterations / runtime;
+      (double)this->LoadThreads.front().second->config().payload().bytes() * 0.000000001 * (double)iterations / runtime;
 
   // insert values for ipc-estimate metric
   // if we are on linux
 #if defined(linux) || defined(__linux__)
-  if (_measurement) {
-    for (auto const& thread : this->loadThreads) {
+  if (Measurement) {
+    for (auto const& thread : this->LoadThreads) {
       auto td = thread.second;
-      ipc_estimate_metric_insert((double)td->iterations *
-                                 (double)this->loadThreads.front().second->config().payload().instructions() /
-                                 (double)(stopTimestamp - startTimestamp));
+      ipcEstimateMetricInsert((double)td->Iterations *
+                              (double)this->LoadThreads.front().second->config().payload().instructions() /
+                              (double)(stopTimestamp - startTimestamp));
     }
   }
 #endif
@@ -256,16 +255,16 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
 #endif
 
   for (;;) {
-    td->mutex.lock();
-    int comm = td->comm;
-    td->mutex.unlock();
+    td->Mutex.lock();
+    int comm = td->Comm;
+    td->Mutex.unlock();
 
     if (comm != old) {
       old = comm;
 
-      td->mutex.lock();
-      td->ack = true;
-      td->mutex.unlock();
+      td->Mutex.lock();
+      td->Ack = true;
+      td->Mutex.unlock();
     } else {
       std::this_thread::sleep_for(std::chrono::microseconds(1));
       continue;
@@ -280,47 +279,47 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
       // compile payload
       td->config().payload().compilePayload(td->config().payloadSettings(), td->config().instructionCacheSize(),
                                             td->config().dataCacheBufferSize(), td->config().ramBufferSize(),
-                                            td->config().thread(), td->config().lines(), td->dumpRegisters,
-                                            td->errorDetection);
+                                            td->config().thread(), td->config().lines(), td->DumpRegisters,
+                                            td->ErrorDetection);
 
       // allocate memory
       // if we should dump some registers, we use the first part of the memory
       // for them.
-      td->addrMem = reinterpret_cast<unsigned long long*>(
-                        ALIGNED_MALLOC((td->buffersizeMem + td->addrOffset) * sizeof(unsigned long long), 64)) +
-                    td->addrOffset;
+      td->AddrMem =
+          reinterpret_cast<uint64_t*>(ALIGNED_MALLOC((td->BuffersizeMem + td->AddrOffset) * sizeof(uint64_t), 64)) +
+          td->AddrOffset;
 
       // exit application on error
-      if (td->addrMem - td->addrOffset == nullptr) {
+      if (td->AddrMem - td->AddrOffset == nullptr) {
         workerLog::error() << "Could not allocate memory for CPU load thread " << td->id() << "\n";
         exit(ENOMEM);
       }
 
-      if (td->dumpRegisters) {
-        reinterpret_cast<DumpRegisterStruct*>(td->addrMem - td->addrOffset)->dumpVar = DumpVariable::Wait;
+      if (td->DumpRegisters) {
+        reinterpret_cast<DumpRegisterStruct*>(td->AddrMem - td->AddrOffset)->DumpVar = DumpVariable::Wait;
       }
 
-      if (td->errorDetection) {
-        auto errorDetectionStruct = reinterpret_cast<ErrorDetectionStruct*>(td->addrMem - td->addrOffset);
+      if (td->ErrorDetection) {
+        auto errorDetectionStruct = reinterpret_cast<ErrorDetectionStruct*>(td->AddrMem - td->AddrOffset);
 
         std::memset(errorDetectionStruct, 0, sizeof(ErrorDetectionStruct));
 
         // distribute left and right communication pointers
-        errorDetectionStruct->communicationLeft = td->communicationLeft.get();
-        errorDetectionStruct->communicationRight = td->communicationRight.get();
+        errorDetectionStruct->CommunicationLeft = td->CommunicationLeft.get();
+        errorDetectionStruct->CommunicationRight = td->CommunicationRight.get();
 
         // do first touch memset 0 for the communication pointers
-        std::memset((void*)errorDetectionStruct->communicationLeft, 0, sizeof(unsigned long long) * 2);
+        std::memset((void*)errorDetectionStruct->CommunicationLeft, 0, sizeof(uint64_t) * 2);
       }
 
       // call init function
-      td->config().payload().init(td->addrMem, td->buffersizeMem);
+      td->config().payload().init(td->AddrMem, td->BuffersizeMem);
 
       break;
     // perform stress test
     case THREAD_WORK:
       // record threads start timestamp
-      td->startTsc = td->environment().topology().timestamp();
+      td->StartTsc = td->environment().topology().timestamp();
 
       // will be terminated by watchdog
       for (;;) {
@@ -331,7 +330,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
 #ifdef ENABLE_SCOREP
         SCOREP_USER_REGION_BY_NAME_BEGIN("HIGH", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
-        td->iterations = td->config().payload().highLoadFunction(td->addrMem, td->addrHigh, td->iterations);
+        td->Iterations = td->config().payload().highLoadFunction(td->AddrMem, td->AddrHigh, td->Iterations);
 
         // call low load function
 #ifdef ENABLE_VTRACING
@@ -342,7 +341,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
         SCOREP_USER_REGION_BY_NAME_END("HIGH");
         SCOREP_USER_REGION_BY_NAME_BEGIN("LOW", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
-        td->config().payload().lowLoadFunction(td->addrHigh, td->period);
+        td->config().payload().lowLoadFunction(td->AddrHigh, td->Period);
 #ifdef ENABLE_VTRACING
         VT_USER_END("LOW_LOAD_FUNC");
 #endif
@@ -351,14 +350,14 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
 #endif
 
         // terminate if master signals end of run and record stop timestamp
-        if (*td->addrHigh == LOAD_STOP) {
-          td->stopTsc = td->environment().topology().timestamp();
+        if (*td->AddrHigh == LOAD_STOP) {
+          td->StopTsc = td->environment().topology().timestamp();
 
           return;
         }
 
-        if (*td->addrHigh == LOAD_SWITCH) {
-          td->stopTsc = td->environment().topology().timestamp();
+        if (*td->AddrHigh == LOAD_SWITCH) {
+          td->StopTsc = td->environment().topology().timestamp();
 
           break;
         }
@@ -368,17 +367,17 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
       // compile payload
       td->config().payload().compilePayload(td->config().payloadSettings(), td->config().instructionCacheSize(),
                                             td->config().dataCacheBufferSize(), td->config().ramBufferSize(),
-                                            td->config().thread(), td->config().lines(), td->dumpRegisters,
-                                            td->errorDetection);
+                                            td->config().thread(), td->config().lines(), td->DumpRegisters,
+                                            td->ErrorDetection);
 
       // call init function
-      td->config().payload().init(td->addrMem, td->buffersizeMem);
+      td->config().payload().init(td->AddrMem, td->BuffersizeMem);
 
       // save old iteration count
-      td->lastIterations = td->iterations;
-      td->lastStartTsc = td->startTsc;
-      td->lastStopTsc = td->stopTsc;
-      td->iterations = 0;
+      td->LastIterations = td->Iterations;
+      td->LastStartTsc = td->StartTsc;
+      td->LastStopTsc = td->StopTsc;
+      td->Iterations = 0;
       break;
     case THREAD_WAIT:
       break;
diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp
index efd7a4bc..36405051 100644
--- a/src/firestarter/Measurement/MeasurementWorker.cpp
+++ b/src/firestarter/Measurement/MeasurementWorker.cpp
@@ -36,11 +36,11 @@ void insertCallback(void* cls, const char* metricName, int64_t timeSinceEpoch, d
 
 using namespace firestarter::measurement;
 
-MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, unsigned long long numThreads,
+MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, uint64_t numThreads,
                                      std::vector<std::string> const& metricDylibs,
                                      std::vector<std::string> const& stdinMetrics)
-    : updateInterval(updateInterval)
-    , numThreads(numThreads) {
+    : UpdateInterval(updateInterval)
+    , NumThreads(numThreads) {
 
 #ifndef FIRESTARTER_LINK_STATIC
   // open dylibs and find metric symbol.
@@ -92,18 +92,18 @@ MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, u
       continue;
     }
 
-    this->_stdinMetrics.push_back(name);
+    this->StdinMetrics.push_back(name);
   }
 
   std::stringstream ss;
   unsigned maxLength = 0;
   std::map<std::string, bool> available;
 
-  for (auto const& metric : this->metrics) {
-    std::string name(metric->name);
+  for (auto const& metric : this->Metrics) {
+    std::string name(metric->Name);
     maxLength = maxLength < name.size() ? name.size() : maxLength;
-    int returnCode = metric->init();
-    metric->fini();
+    int returnCode = metric->Init();
+    metric->Fini();
     available[name] = returnCode == EXIT_SUCCESS ? true : false;
   }
 
@@ -115,36 +115,36 @@ MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, u
     ss << (value ? "yes" : "no") << "\n";
   }
 
-  this->availableMetricsString = ss.str();
+  this->AvailableMetricsString = ss.str();
 
-  pthread_create(&this->workerThread, NULL,
+  pthread_create(&this->WorkerThread, NULL,
                  reinterpret_cast<void* (*)(void*)>(MeasurementWorker::dataAcquisitionWorker), this);
 
   // create a worker for getting metric values from stdin
-  if (this->_stdinMetrics.size() > 0) {
-    pthread_create(&this->stdinThread, NULL,
+  if (this->StdinMetrics.size() > 0) {
+    pthread_create(&this->StdinThread, NULL,
                    reinterpret_cast<void* (*)(void*)>(MeasurementWorker::stdinDataAcquisitionWorker), this);
   }
 }
 
 MeasurementWorker::~MeasurementWorker() {
-  pthread_cancel(this->workerThread);
+  pthread_cancel(this->WorkerThread);
 
-  pthread_join(this->workerThread, NULL);
+  pthread_join(this->WorkerThread, NULL);
 
-  if (this->_stdinMetrics.size() > 0) {
-    pthread_cancel(this->stdinThread);
+  if (this->StdinMetrics.size() > 0) {
+    pthread_cancel(this->StdinThread);
 
-    pthread_join(this->stdinThread, NULL);
+    pthread_join(this->StdinThread, NULL);
   }
 
-  for (auto const& [key, value] : this->values) {
+  for (auto const& [key, value] : this->Values) {
     auto metric = this->findMetricByName(key);
     if (metric == nullptr) {
       continue;
     }
 
-    metric->fini();
+    metric->Fini();
   }
 
 #ifndef FIRESTARTER_LINK_STATIC
@@ -156,104 +156,104 @@ MeasurementWorker::~MeasurementWorker() {
 
 std::vector<std::string> MeasurementWorker::metricNames() {
   std::vector<std::string> metrics;
-  std::transform(this->metrics.begin(), this->metrics.end(), std::back_inserter(metrics),
-                 [](auto& metric) -> std::string { return std::string(metric->name); });
-  for (auto const& name : this->_stdinMetrics) {
+  std::transform(this->Metrics.begin(), this->Metrics.end(), std::back_inserter(metrics),
+                 [](auto& metric) -> std::string { return std::string(metric->Name); });
+  for (auto const& name : this->StdinMetrics) {
     metrics.push_back(name);
   }
 
   return metrics;
 }
 
-const metric_interface_t* MeasurementWorker::findMetricByName(std::string metricName) {
-  auto name_equal = [metricName](auto& metricInterface) { return metricName.compare(metricInterface->name) == 0; };
-  auto metric = std::find_if(this->metrics.begin(), this->metrics.end(), name_equal);
+auto MeasurementWorker::findMetricByName(std::string MetricName) -> const MetricInterface* {
+  auto NameEqual = [MetricName](auto& MetricInterface) { return MetricName.compare(MetricInterface->Name) == 0; };
+  auto Metric = std::find_if(this->Metrics.begin(), this->Metrics.end(), NameEqual);
 
   // metric not found
-  if (metric == this->metrics.end()) {
+  if (Metric == this->Metrics.end()) {
     return nullptr;
   }
   // metric found
-  return const_cast<const metric_interface_t*>(*metric);
+  return const_cast<const MetricInterface*>(*Metric);
 }
 
 // this must be called by the main thread.
 // if not done so things like perf_event_attr.inherit might not work as expected
-std::vector<std::string> MeasurementWorker::initMetrics(std::vector<std::string> const& metricNames) {
-  this->values_mutex.lock();
+auto MeasurementWorker::initMetrics(std::vector<std::string> const& MetricNames) -> std::vector<std::string> {
+  this->ValuesMutex.lock();
 
   std::vector<std::string> initialized = {};
 
   // try to find each metric and initialize it
-  for (auto const& metricName : metricNames) {
+  for (auto const& metricName : MetricNames) {
     // init values map with empty vector
     auto name_equal = [metricName](auto const& pair) { return metricName.compare(pair.first) == 0; };
-    auto pair = std::find_if(this->values.begin(), this->values.end(), name_equal);
-    if (pair != this->values.end()) {
+    auto pair = std::find_if(this->Values.begin(), this->Values.end(), name_equal);
+    if (pair != this->Values.end()) {
       pair->second.clear();
     } else {
       auto metric = this->findMetricByName(metricName);
       if (metric != nullptr) {
-        int returnValue = metric->init();
+        int returnValue = metric->Init();
         if (returnValue != EXIT_SUCCESS) {
-          log::error() << "Metric " << metric->name << ": " << metric->get_error();
+          log::error() << "Metric " << metric->Name << ": " << metric->GetError();
           continue;
         }
       }
-      this->values[metricName] = std::vector<TimeValue>();
+      this->Values[metricName] = std::vector<TimeValue>();
       if (metric != nullptr) {
-        if (metric->type.insert_callback) {
-          metric->register_insert_callback(::insertCallback, this);
+        if (metric->Type.InsertCallback) {
+          metric->RegisterInsertCallback(::insertCallback, this);
         }
       }
       initialized.push_back(metricName);
     }
   }
 
-  this->values_mutex.unlock();
+  this->ValuesMutex.unlock();
 
   return initialized;
 }
 
 void MeasurementWorker::insertCallback(const char* metricName, int64_t timeSinceEpoch, double value) {
-  this->values_mutex.lock();
+  this->ValuesMutex.lock();
 
   using Duration = std::chrono::duration<int64_t, std::nano>;
   auto time = std::chrono::time_point<std::chrono::high_resolution_clock, Duration>(Duration(timeSinceEpoch));
   auto name_equal = [metricName](auto const& pair) { return std::string(metricName).compare(pair.first) == 0; };
-  auto pair = std::find_if(this->values.begin(), this->values.end(), name_equal);
+  auto pair = std::find_if(this->Values.begin(), this->Values.end(), name_equal);
 
-  if (pair != this->values.end()) {
+  if (pair != this->Values.end()) {
     pair->second.push_back(TimeValue(time, value));
   }
 
-  this->values_mutex.unlock();
+  this->ValuesMutex.unlock();
 }
 
-void MeasurementWorker::startMeasurement() { this->startTime = std::chrono::high_resolution_clock::now(); }
+void MeasurementWorker::startMeasurement() { this->StartTime = std::chrono::high_resolution_clock::now(); }
 
 std::map<std::string, Summary> MeasurementWorker::getValues(std::chrono::milliseconds startDelta,
                                                             std::chrono::milliseconds stopDelta) {
   std::map<std::string, Summary> measurment = {};
 
-  this->values_mutex.lock();
+  this->ValuesMutex.lock();
 
-  for (auto& [key, values] : this->values) {
-    auto startTime = this->startTime;
+  for (auto& [key, values] : this->Values) {
+    auto startTime = this->StartTime;
     auto endTime = std::chrono::high_resolution_clock::now();
     auto metric = this->findMetricByName(key);
 
-    metric_type_t type;
+    MetricType type;
     std::memset(&type, 0, sizeof(type));
     if (metric == nullptr) {
-      type.absolute = 1;
+      type.Absolute = 1;
 
       startTime += startDelta;
       endTime -= stopDelta;
     } else {
-      std::memcpy(&type, &metric->type, sizeof(type));
+      std::memcpy(&type, &metric->Type, sizeof(type));
 
-      if (metric->type.ignore_start_stop_delta == 0) {
+      if (metric->Type.IgnoreStartStopDelta == 0) {
         startTime += startDelta;
         endTime -= stopDelta;
       }
@@ -261,16 +261,16 @@ std::map<std::string, Summary> MeasurementWorker::getValues(std::chrono::millise
 
     decltype(values) croppedValues(values.size());
 
-    auto findAll = [startTime, endTime](auto const& tv) { return startTime <= tv.time && tv.time <= endTime; };
+    auto findAll = [startTime, endTime](auto const& tv) { return startTime <= tv.Time && tv.Time <= endTime; };
     auto it = std::copy_if(values.begin(), values.end(), croppedValues.begin(), findAll);
     croppedValues.resize(std::distance(croppedValues.begin(), it));
 
-    Summary sum = Summary::calculate(croppedValues.begin(), croppedValues.end(), type, this->numThreads);
+    Summary sum = Summary::calculate(croppedValues.begin(), croppedValues.end(), type, this->NumThreads);
 
     measurment[key] = sum;
   }
 
-  this->values_mutex.unlock();
+  this->ValuesMutex.unlock();
 
   return measurment;
 }
@@ -299,36 +299,36 @@ int* MeasurementWorker::dataAcquisitionWorker(void* measurementWorker) {
   std::priority_queue<callbackTuple, std::vector<callbackTuple>, decltype(callbackTupleComparator)> callbackQueue(
       callbackTupleComparator);
 
-  _this->values_mutex.lock();
+  _this->ValuesMutex.lock();
 
-  for (auto const& [key, value] : _this->values) {
+  for (auto const& [key, value] : _this->Values) {
     auto metric_interface = _this->findMetricByName(key);
 
     if (metric_interface == nullptr) {
       continue;
     }
 
-    auto callbackTime = std::chrono::microseconds(metric_interface->callback_time);
+    auto callbackTime = std::chrono::microseconds(metric_interface->CallbackTime);
     if (callbackTime.count() == 0) {
       continue;
     }
 
     auto currentTime = clock::now();
 
-    callbackQueue.push(std::make_tuple(metric_interface->callback, callbackTime, currentTime));
+    callbackQueue.push(std::make_tuple(metric_interface->Callback, callbackTime, currentTime));
   }
 
-  _this->values_mutex.unlock();
+  _this->ValuesMutex.unlock();
 
-  auto nextFetch = clock::now() + _this->updateInterval;
+  auto nextFetch = clock::now() + _this->UpdateInterval;
 
   for (;;) {
     auto now = clock::now();
 
     if (nextFetch <= now) {
-      _this->values_mutex.lock();
+      _this->ValuesMutex.lock();
 
-      for (auto& [metricName, values] : _this->values) {
+      for (auto& [metricName, values] : _this->Values) {
         auto metric_interface = _this->findMetricByName(metricName);
 
         if (metric_interface == nullptr) {
@@ -337,17 +337,17 @@ int* MeasurementWorker::dataAcquisitionWorker(void* measurementWorker) {
 
         double value;
 
-        if (!metric_interface->type.insert_callback && metric_interface->get_reading != nullptr) {
-          if (EXIT_SUCCESS == metric_interface->get_reading(&value)) {
+        if (!metric_interface->Type.InsertCallback && metric_interface->GetReading != nullptr) {
+          if (EXIT_SUCCESS == metric_interface->GetReading(&value)) {
             auto tv = TimeValue(std::chrono::high_resolution_clock::now(), value);
             values.push_back(tv);
           }
         }
       }
 
-      _this->values_mutex.unlock();
+      _this->ValuesMutex.unlock();
 
-      nextFetch = now + _this->updateInterval;
+      nextFetch = now + _this->UpdateInterval;
     }
 
     auto nextWake = nextFetch;
diff --git a/src/firestarter/Measurement/Metric/IPCEstimate.cpp b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
index 145f02ae..9e18a6be 100644
--- a/src/firestarter/Measurement/Metric/IPCEstimate.cpp
+++ b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
@@ -57,7 +57,7 @@ static int32_t register_insert_callback(void (*c)(void*, const char*, int64_t, d
   return EXIT_SUCCESS;
 }
 
-void ipc_estimate_metric_insert(double value) {
+void ipcEstimateMetricInsert(double Value) {
   if (callback == nullptr || callback_arg == nullptr) {
     return;
   }
@@ -66,23 +66,23 @@ void ipc_estimate_metric_insert(double value) {
       std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch())
           .count();
 
-  callback(callback_arg, "ipc-estimate", t, value);
+  callback(callback_arg, "ipc-estimate", t, Value);
 }
 
-metric_interface_t ipc_estimate_metric = {
-    .name = "ipc-estimate",
-    .type = {.absolute = 1,
-             .accumalative = 0,
-             .divide_by_thread_count = 0,
-             .insert_callback = 1,
-             .ignore_start_stop_delta = 1,
-             .__reserved = 0},
-    .unit = "IPC",
-    .callback_time = 0,
-    .callback = nullptr,
-    .init = init,
-    .fini = fini,
-    .get_reading = nullptr,
-    .get_error = get_error,
-    .register_insert_callback = register_insert_callback,
+MetricInterface IpcEstimateMetric = {
+    .Name = "ipc-estimate",
+    .Type = {.Absolute = 1,
+             .Accumalative = 0,
+             .DivideByThreadCount = 0,
+             .InsertCallback = 1,
+             .IgnoreStartStopDelta = 1,
+             .Reserved = 0},
+    .Unit = "IPC",
+    .CallbackTime = 0,
+    .Callback = nullptr,
+    .Init = init,
+    .Fini = fini,
+    .GetReading = nullptr,
+    .GetError = get_error,
+    .RegisterInsertCallback = register_insert_callback,
 };
diff --git a/src/firestarter/Measurement/Metric/Perf.cpp b/src/firestarter/Measurement/Metric/Perf.cpp
index a7266db2..0d7a0225 100644
--- a/src/firestarter/Measurement/Metric/Perf.cpp
+++ b/src/firestarter/Measurement/Metric/Perf.cpp
@@ -230,38 +230,38 @@ static const char* get_error(void) {
 }
 }
 
-metric_interface_t perf_ipc_metric = {
-    .name = "perf-ipc",
-    .type = {.absolute = 1,
-             .accumalative = 0,
-             .divide_by_thread_count = 0,
-             .insert_callback = 0,
-             .ignore_start_stop_delta = 0,
-             .__reserved = 0},
-    .unit = "IPC",
-    .callback_time = 0,
-    .callback = nullptr,
-    .init = init,
-    .fini = fini,
-    .get_reading = get_reading_ipc,
-    .get_error = get_error,
-    .register_insert_callback = nullptr,
+MetricInterface PerfIpcMetric = {
+    .Name = "perf-ipc",
+    .Type = {.Absolute = 1,
+             .Accumalative = 0,
+             .DivideByThreadCount = 0,
+             .InsertCallback = 0,
+             .IgnoreStartStopDelta = 0,
+             .Reserved = 0},
+    .Unit = "IPC",
+    .CallbackTime = 0,
+    .Callback = nullptr,
+    .Init = init,
+    .Fini = fini,
+    .GetReading = get_reading_ipc,
+    .GetError = get_error,
+    .RegisterInsertCallback = nullptr,
 };
 
-metric_interface_t perf_freq_metric = {
-    .name = "perf-freq",
-    .type = {.absolute = 0,
-             .accumalative = 1,
-             .divide_by_thread_count = 1,
-             .insert_callback = 0,
-             .ignore_start_stop_delta = 0,
-             .__reserved = 0},
-    .unit = "GHz",
-    .callback_time = 0,
-    .callback = nullptr,
-    .init = init,
-    .fini = fini,
-    .get_reading = get_reading_freq,
-    .get_error = get_error,
-    .register_insert_callback = nullptr,
+MetricInterface PerfFreqMetric = {
+    .Name = "perf-freq",
+    .Type = {.Absolute = 0,
+             .Accumalative = 1,
+             .DivideByThreadCount = 1,
+             .InsertCallback = 0,
+             .IgnoreStartStopDelta = 0,
+             .Reserved = 0},
+    .Unit = "GHz",
+    .CallbackTime = 0,
+    .Callback = nullptr,
+    .Init = init,
+    .Fini = fini,
+    .GetReading = get_reading_freq,
+    .GetError = get_error,
+    .RegisterInsertCallback = nullptr,
 };
diff --git a/src/firestarter/Measurement/Metric/RAPL.cpp b/src/firestarter/Measurement/Metric/RAPL.cpp
index e6d28f1d..c73ef004 100644
--- a/src/firestarter/Measurement/Metric/RAPL.cpp
+++ b/src/firestarter/Measurement/Metric/RAPL.cpp
@@ -137,27 +137,27 @@ static int32_t init(void) {
       break;
     }
 
-    unsigned long long reading;
-    unsigned long long max;
+    uint64_t reading;
+    uint64_t max;
     std::string buffer;
     int read;
 
     std::getline(energyReadingStream, buffer);
-    read = std::sscanf(buffer.c_str(), "%llu", &reading);
+    read = std::sscanf(buffer.c_str(), "%lu", &reading);
 
     if (read == 0) {
       std::stringstream ss;
-      ss << "Contents in file " << energyUjPath.str() << " do not conform to mask (unsigned long long)";
+      ss << "Contents in file " << energyUjPath.str() << " do not conform to mask (uint64_t)";
       errorString = ss.str();
       break;
     }
 
     std::getline(maxEnergyReadingStream, buffer);
-    read = std::sscanf(buffer.c_str(), "%llu", &max);
+    read = std::sscanf(buffer.c_str(), "%lu", &max);
 
     if (read == 0) {
       std::stringstream ss;
-      ss << "Contents in file " << maxEnergyUjRangePath.str() << " do not conform to mask (unsigned long long)";
+      ss << "Contents in file " << maxEnergyUjRangePath.str() << " do not conform to mask (uint64_t)";
       errorString = ss.str();
       break;
     }
@@ -220,23 +220,23 @@ static const char* get_error(void) {
 
 // this function will be called periodically to make sure we do not miss an
 // overflow of the counter
-static void callback(void) { get_reading(nullptr); }
+static void callback() { get_reading(nullptr); }
 }
 
-metric_interface_t rapl_metric = {
-    .name = "sysfs-powercap-rapl",
-    .type = {.absolute = 0,
-             .accumalative = 1,
-             .divide_by_thread_count = 0,
-             .insert_callback = 0,
-             .ignore_start_stop_delta = 0,
-             .__reserved = 0},
-    .unit = "J",
-    .callback_time = 30000000,
-    .callback = callback,
-    .init = init,
-    .fini = fini,
-    .get_reading = get_reading,
-    .get_error = get_error,
-    .register_insert_callback = nullptr,
+MetricInterface RaplMetric = {
+    .Name = "sysfs-powercap-rapl",
+    .Type = {.Absolute = 0,
+             .Accumalative = 1,
+             .DivideByThreadCount = 0,
+             .InsertCallback = 0,
+             .IgnoreStartStopDelta = 0,
+             .Reserved = 0},
+    .Unit = "J",
+    .CallbackTime = 30000000,
+    .Callback = callback,
+    .Init = init,
+    .Fini = fini,
+    .GetReading = get_reading,
+    .GetError = get_error,
+    .RegisterInsertCallback = nullptr,
 };
diff --git a/src/firestarter/Measurement/Summary.cpp b/src/firestarter/Measurement/Summary.cpp
index 2d1bd8f4..730775be 100644
--- a/src/firestarter/Measurement/Summary.cpp
+++ b/src/firestarter/Measurement/Summary.cpp
@@ -28,73 +28,73 @@ using namespace firestarter::measurement;
 
 // this functions borows a lot of code from
 // https://github.com/metricq/metricq-cpp/blob/master/tools/metricq-summary/src/summary.cpp
-Summary Summary::calculate(std::vector<TimeValue>::iterator begin, std::vector<TimeValue>::iterator end,
-                           metric_type_t metricType, unsigned long long numThreads) {
-  std::vector<TimeValue> values = {};
+auto Summary::calculate(std::vector<TimeValue>::iterator Begin, std::vector<TimeValue>::iterator End,
+                        MetricType MetricType, uint64_t NumThreads) -> Summary {
+  std::vector<TimeValue> Values = {};
 
   // TODO: i would really like to make this code a bit more readable, but i
   // could not find a way yet.
-  if (metricType.accumalative) {
+  if (MetricType.Accumalative) {
     TimeValue prev;
 
-    if (begin != end) {
-      prev = *begin++;
-      for (auto it = begin; it != end; ++it) {
+    if (Begin != End) {
+      prev = *Begin++;
+      for (auto it = Begin; it != End; ++it) {
         auto time_diff =
-            1e-6 * (double)std::chrono::duration_cast<std::chrono::microseconds>(it->time - prev.time).count();
-        auto value_diff = it->value - prev.value;
+            1e-6 * (double)std::chrono::duration_cast<std::chrono::microseconds>(it->Time - prev.Time).count();
+        auto value_diff = it->Value - prev.Value;
 
         double value = value_diff / time_diff;
 
-        if (metricType.divide_by_thread_count) {
-          value /= numThreads;
+        if (MetricType.DivideByThreadCount) {
+          value /= NumThreads;
         }
 
-        values.push_back(TimeValue(prev.time, value));
+        Values.emplace_back(prev.Time, value);
         prev = *it;
       }
     }
-  } else if (metricType.absolute) {
-    for (auto it = begin; it != end; ++it) {
-      double value = it->value;
+  } else if (MetricType.Absolute) {
+    for (auto it = Begin; it != End; ++it) {
+      double value = it->Value;
 
-      if (metricType.divide_by_thread_count) {
-        value /= numThreads;
+      if (MetricType.DivideByThreadCount) {
+        value /= NumThreads;
       }
 
-      values.push_back(TimeValue(it->time, value));
+      Values.emplace_back(it->Time, value);
     }
   } else {
     assert(false);
   }
 
-  begin = values.begin();
-  end = values.end();
+  Begin = Values.begin();
+  End = Values.end();
 
-  Summary summary{};
+  Summary SummaryVal{};
 
-  summary.num_timepoints = std::distance(begin, end);
+  SummaryVal.NumTimepoints = std::distance(Begin, End);
 
-  if (summary.num_timepoints > 0) {
+  if (SummaryVal.NumTimepoints > 0) {
 
-    auto last = begin;
-    std::advance(last, summary.num_timepoints - 1);
-    summary.duration = std::chrono::duration_cast<std::chrono::milliseconds>(last->time - begin->time);
+    auto last = Begin;
+    std::advance(last, SummaryVal.NumTimepoints - 1);
+    SummaryVal.Duration = std::chrono::duration_cast<std::chrono::milliseconds>(last->Time - Begin->Time);
 
-    auto sum_over_nths = [&begin, end, summary](auto fn) {
+    auto sum_over_nths = [&Begin, End, SummaryVal](auto fn) {
       double acc = 0.0;
-      for (auto it = begin; it != end; ++it) {
-        acc += fn(it->value);
+      for (auto it = Begin; it != End; ++it) {
+        acc += fn(it->Value);
       }
-      return acc / summary.num_timepoints;
+      return acc / SummaryVal.NumTimepoints;
     };
 
-    summary.average = sum_over_nths([](double v) { return v; });
-    summary.stddev = std::sqrt(sum_over_nths([&summary](double v) {
-      double centered = v - summary.average;
+    SummaryVal.Average = sum_over_nths([](double v) { return v; });
+    SummaryVal.Stddev = std::sqrt(sum_over_nths([&SummaryVal](double v) {
+      double centered = v - SummaryVal.Average;
       return centered * centered;
     }));
   }
 
-  return summary;
+  return SummaryVal;
 }
diff --git a/src/firestarter/OneAPI/OneAPI.cpp b/src/firestarter/OneAPI/OneAPI.cpp
index c31ae6cd..6ebb2da3 100644
--- a/src/firestarter/OneAPI/OneAPI.cpp
+++ b/src/firestarter/OneAPI/OneAPI.cpp
@@ -116,7 +116,7 @@ static int round_up(int num_to_round, int multiple) {
 // GPU index. Used to pin this thread to the GPU.
 template <typename T>
 static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index,
-                        std::atomic<int>& initCount, volatile unsigned long long* loadVar, int matrixSize) {
+                        std::atomic<int>& initCount, volatile uint64_t* loadVar, int matrixSize) {
   static_assert(std::is_same<T, float>::value || std::is_same<T, double>::value,
                 "create_load<T>: Template argument T must be either float or double");
 
@@ -236,7 +236,7 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait
   }
 }
 
-OneAPI::OneAPI(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) {
+OneAPI::OneAPI(volatile uint64_t* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) {
   std::thread t(OneAPI::initGpus, std::ref(_waitForInitCv), loadVar, useFloat, useDouble, matrixSize, gpus);
   _initThread = std::move(t);
 
@@ -245,7 +245,7 @@ OneAPI::OneAPI(volatile unsigned long long* loadVar, bool useFloat, bool useDoub
   _waitForInitCv.wait(lk);
 }
 
-void OneAPI::initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble,
+void OneAPI::initGpus(std::condition_variable& cv, volatile uint64_t* loadVar, bool useFloat, bool useDouble,
                       unsigned matrixSize, int gpus) {
   std::condition_variable waitForInitCv;
   std::mutex waitForInitCvMutex;
diff --git a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
index 972c0d0a..8b9a7b02 100644
--- a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
+++ b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
@@ -30,47 +30,46 @@
 
 using namespace firestarter::optimizer::algorithm;
 
-NSGA2::NSGA2(unsigned gen, double cr, double m)
-    : Algorithm()
-    , _gen(gen)
-    , _cr(cr)
-    , _m(m) {
-  if (cr >= 1. || cr < 0.) {
+NSGA2::NSGA2(unsigned Gen, double Cr, double M)
+    : Gen(Gen)
+    , Cr(Cr)
+    , M(M) {
+  if (Cr >= 1. || Cr < 0.) {
     throw std::invalid_argument("The crossover probability must be in the "
                                 "[0,1[ range, while a value of " +
-                                std::to_string(cr) + " was detected");
+                                std::to_string(Cr) + " was detected");
   }
-  if (m < 0. || m > 1.) {
+  if (M < 0. || M > 1.) {
     throw std::invalid_argument("The mutation probability must be in the [0,1] "
                                 "range, while a value of " +
-                                std::to_string(m) + " was detected");
+                                std::to_string(M) + " was detected");
   }
 }
 
-void NSGA2::checkPopulation(firestarter::optimizer::Population const& pop, std::size_t populationSize) {
-  const auto& prob = pop.problem();
+void NSGA2::checkPopulation(firestarter::optimizer::Population const& Pop, std::size_t PopulationSize) {
+  const auto& Prob = Pop.problem();
 
-  if (!prob.isMO()) {
+  if (!Prob.isMO()) {
     throw std::invalid_argument("NSGA2 is a multiobjective algorithms, while number of objectives is " +
-                                std::to_string(prob.getNobjs()));
+                                std::to_string(Prob.getNobjs()));
   }
 
-  if (populationSize < 5u || (populationSize % 4 != 0u)) {
+  if (PopulationSize < 5u || (PopulationSize % 4 != 0u)) {
     throw std::invalid_argument("for NSGA-II at least 5 individuals in the "
                                 "population are needed and the "
                                 "population size must be a multiple of 4. "
                                 "Detected input population size is: " +
-                                std::to_string(populationSize));
+                                std::to_string(PopulationSize));
   }
 }
 
-firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Population& pop) {
-  const auto& prob = pop.problem();
+auto NSGA2::evolve(firestarter::optimizer::Population& Pop) -> firestarter::optimizer::Population {
+  const auto& prob = Pop.problem();
   const auto bounds = prob.getBounds();
-  auto NP = pop.size();
+  auto NP = Pop.size();
   auto fevals0 = prob.getFevals();
 
-  this->checkPopulation(const_cast<firestarter::optimizer::Population const&>(pop), NP);
+  this->checkPopulation(const_cast<firestarter::optimizer::Population const&>(Pop), NP);
 
   std::random_device rd;
   std::mt19937 rng(rd());
@@ -92,10 +91,10 @@ firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Populat
     firestarter::log::info() << ss.str();
   }
 
-  for (decltype(_gen) gen = 1u; gen <= _gen; ++gen) {
+  for (decltype(Gen) gen = 1u; gen <= Gen; ++gen) {
     {
       // Print the logs
-      std::vector<double> idealPoint = util::ideal(pop.f());
+      std::vector<double> idealPoint = util::ideal(Pop.f());
       std::stringstream ss;
 
       ss << std::setw(7) << gen << std::setw(15) << prob.getFevals() - fevals0;
@@ -107,7 +106,7 @@ firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Populat
     }
 
     // At each generation we make a copy of the population into popnew
-    firestarter::optimizer::Population popnew(pop);
+    firestarter::optimizer::Population popnew(Pop);
 
     // We create some pseudo-random permutation of the poulation indexes
     std::random_shuffle(shuffle1.begin(), shuffle1.end());
@@ -115,7 +114,7 @@ firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Populat
 
     // We compute crowding distance and non dominated rank for the current
     // population
-    auto fnds_res = util::fast_non_dominated_sorting(pop.f());
+    auto fnds_res = util::fastNonDominatedSorting(Pop.f());
     auto ndf = std::get<0>(fnds_res); // non dominated fronts [[0,3,2],[1,5,6],[4],...]
     std::vector<double> pop_cd(NP);   // crowding distances of the whole population
     auto ndr = std::get<3>(fnds_res); // non domination rank [0,1,0,0,2,1,1, ... ]
@@ -129,9 +128,9 @@ firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Populat
       } else {
         std::vector<std::vector<double>> front;
         for (auto idx : front_idxs) {
-          front.push_back(pop.f()[idx]);
+          front.push_back(Pop.f()[idx]);
         }
-        auto cd = util::crowding_distance(front);
+        auto cd = util::crowdingDistance(front);
         for (decltype(cd.size()) i = 0u; i < cd.size(); ++i) {
           pop_cd[front_idxs[i]] = cd[i];
         }
@@ -142,33 +141,33 @@ firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Populat
     // of parents that will each create 2 new offspring
     for (decltype(NP) i = 0u; i < NP; i += 4) {
       // We create two offsprings using the shuffled list 1
-      parent1_idx = util::mo_tournament_selection(shuffle1[i], shuffle1[i + 1], ndr, pop_cd, rng);
-      parent2_idx = util::mo_tournament_selection(shuffle1[i + 2], shuffle1[i + 3], ndr, pop_cd, rng);
-      children = util::sbx_crossover(pop.x()[parent1_idx], pop.x()[parent2_idx], _cr, rng);
-      util::polynomial_mutation(children.first, bounds, _m, rng);
-      util::polynomial_mutation(children.second, bounds, _m, rng);
+      parent1_idx = util::moTournamentSelection(shuffle1[i], shuffle1[i + 1], ndr, pop_cd, rng);
+      parent2_idx = util::moTournamentSelection(shuffle1[i + 2], shuffle1[i + 3], ndr, pop_cd, rng);
+      children = util::sbxCrossover(Pop.x()[parent1_idx], Pop.x()[parent2_idx], Cr, rng);
+      util::polynomialMutation(children.first, bounds, M, rng);
+      util::polynomialMutation(children.second, bounds, M, rng);
 
       popnew.append(children.first);
       popnew.append(children.second);
 
       // We repeat with the shuffled list 2
-      parent1_idx = util::mo_tournament_selection(shuffle2[i], shuffle2[i + 1], ndr, pop_cd, rng);
-      parent2_idx = util::mo_tournament_selection(shuffle2[i + 2], shuffle2[i + 3], ndr, pop_cd, rng);
-      children = util::sbx_crossover(pop.x()[parent1_idx], pop.x()[parent2_idx], _cr, rng);
-      util::polynomial_mutation(children.first, bounds, _m, rng);
-      util::polynomial_mutation(children.second, bounds, _m, rng);
+      parent1_idx = util::moTournamentSelection(shuffle2[i], shuffle2[i + 1], ndr, pop_cd, rng);
+      parent2_idx = util::moTournamentSelection(shuffle2[i + 2], shuffle2[i + 3], ndr, pop_cd, rng);
+      children = util::sbxCrossover(Pop.x()[parent1_idx], Pop.x()[parent2_idx], Cr, rng);
+      util::polynomialMutation(children.first, bounds, M, rng);
+      util::polynomialMutation(children.second, bounds, M, rng);
 
       popnew.append(children.first);
       popnew.append(children.second);
     } // popnew now contains 2NP individuals
     // This method returns the sorted N best individuals in the population
     // according to the crowded comparison operator
-    best_idx = util::select_best_N_mo(popnew.f(), NP);
+    best_idx = util::selectBestNMo(popnew.f(), NP);
     // We insert into the population
     for (decltype(NP) i = 0; i < NP; ++i) {
-      pop.insert(i, popnew.x()[best_idx[i]], popnew.f()[best_idx[i]]);
+      Pop.insert(i, popnew.x()[best_idx[i]], popnew.f()[best_idx[i]]);
     }
   }
 
-  return pop;
+  return Pop;
 }
diff --git a/src/firestarter/Optimizer/OptimizerWorker.cpp b/src/firestarter/Optimizer/OptimizerWorker.cpp
index 7cb98cce..610b8cbd 100644
--- a/src/firestarter/Optimizer/OptimizerWorker.cpp
+++ b/src/firestarter/Optimizer/OptimizerWorker.cpp
@@ -29,22 +29,22 @@ OptimizerWorker::OptimizerWorker(std::unique_ptr<firestarter::optimizer::Algorit
                                  firestarter::optimizer::Population& population,
                                  std::string const& optimizationAlgorithm, unsigned individuals,
                                  std::chrono::seconds const& preheat)
-    : _algorithm(std::move(algorithm))
-    , _population(population)
-    , _optimizationAlgorithm(optimizationAlgorithm)
-    , _individuals(individuals)
-    , _preheat(preheat) {
-  pthread_create(&this->workerThread, NULL, reinterpret_cast<void* (*)(void*)>(OptimizerWorker::optimizerThread), this);
+    : Algorithm(std::move(algorithm))
+    , Population(population)
+    , OptimizationAlgorithm(optimizationAlgorithm)
+    , Individuals(individuals)
+    , Preheat(preheat) {
+  pthread_create(&this->WorkerThread, NULL, reinterpret_cast<void* (*)(void*)>(OptimizerWorker::optimizerThread), this);
 }
 
 void OptimizerWorker::kill() {
   // we ignore ESRCH errno if thread already exited
-  pthread_cancel(this->workerThread);
+  pthread_cancel(this->WorkerThread);
 }
 
 void OptimizerWorker::join() {
   // we ignore ESRCH errno if thread already exited
-  pthread_join(this->workerThread, NULL);
+  pthread_join(this->WorkerThread, NULL);
 }
 
 void* OptimizerWorker::optimizerThread(void* optimizerWorker) {
@@ -57,14 +57,14 @@ void* OptimizerWorker::optimizerThread(void* optimizerWorker) {
 #endif
 
   // heat the cpu before attempting to optimize
-  std::this_thread::sleep_for(_this->_preheat);
+  std::this_thread::sleep_for(_this->Preheat);
 
   // For NSGA2 we start with a initial population
-  if (_this->_optimizationAlgorithm == "NSGA2") {
-    _this->_population.generateInitialPopulation(_this->_individuals);
+  if (_this->OptimizationAlgorithm == "NSGA2") {
+    _this->Population.generateInitialPopulation(_this->Individuals);
   }
 
-  _this->_algorithm->evolve(_this->_population);
+  _this->Algorithm->evolve(_this->Population);
 
   return NULL;
 }
diff --git a/src/firestarter/Optimizer/Population.cpp b/src/firestarter/Optimizer/Population.cpp
index d7915bd7..35c5ef04 100644
--- a/src/firestarter/Optimizer/Population.cpp
+++ b/src/firestarter/Optimizer/Population.cpp
@@ -52,7 +52,7 @@ void Population::generateInitialPopulation(std::size_t populationSize) {
   }
 }
 
-std::size_t Population::size() const { return _x.size(); }
+std::size_t Population::size() const { return X.size(); }
 
 void Population::append(Individual const& ind) {
   assert(this->problem().getDims() == ind.size());
@@ -64,10 +64,10 @@ void Population::append(Individual const& ind) {
   if (optional_metric.has_value()) {
     metrics = optional_metric.value();
   } else {
-    metrics = this->_problem->metrics(ind);
+    metrics = this->ProblemPtr->metrics(ind);
   }
 
-  auto fitness = this->_problem->fitness(metrics);
+  auto fitness = this->ProblemPtr->fitness(metrics);
 
   this->append(ind, fitness);
 
@@ -87,16 +87,16 @@ void Population::append(Individual const& ind, std::vector<double> const& fit) {
   assert(this->problem().getNobjs() == fit.size());
   assert(this->problem().getDims() == ind.size());
 
-  this->_x.push_back(ind);
-  this->_f.push_back(fit);
+  this->X.push_back(ind);
+  this->F.push_back(fit);
 }
 
 void Population::insert(std::size_t idx, Individual const& ind, std::vector<double> const& fit) {
   // assert that population is big enough
-  assert(_x.size() > idx);
+  assert(X.size() > idx);
 
-  _x[idx] = ind;
-  _f[idx] = fit;
+  X[idx] = ind;
+  F[idx] = fit;
 }
 
 Individual Population::getRandomIndividual() {
@@ -111,7 +111,7 @@ Individual Population::getRandomIndividual() {
     auto const lb = std::get<0>(bounds[i]);
     auto const ub = std::get<1>(bounds[i]);
 
-    out[i] = std::uniform_int_distribution<unsigned>(lb, ub)(this->gen);
+    out[i] = std::uniform_int_distribution<unsigned>(lb, ub)(this->Gen);
 
     firestarter::log::trace() << "  - " << i << ": [" << lb << "," << ub << "]: " << out[i];
   }
@@ -127,11 +127,11 @@ std::optional<Individual> Population::bestIndividual() const {
   }
 
   // assert that we have individuals
-  assert(this->_x.size() > 0);
+  assert(this->X.size() > 0);
 
-  auto best = std::max_element(this->_x.begin(), this->_x.end(), [](auto a, auto b) { return a < b; });
+  auto best = std::max_element(this->X.begin(), this->X.end(), [](auto a, auto b) { return a < b; });
 
-  assert(best != this->_x.end());
+  assert(best != this->X.end());
 
   return *best;
 }
diff --git a/src/firestarter/Optimizer/Util/MultiObjective.cpp b/src/firestarter/Optimizer/Util/MultiObjective.cpp
index df24effa..78092234 100644
--- a/src/firestarter/Optimizer/Util/MultiObjective.cpp
+++ b/src/firestarter/Optimizer/Util/MultiObjective.cpp
@@ -32,7 +32,7 @@ namespace firestarter::optimizer::util {
 // Less than compares floating point types placing nans after inf or before -inf
 // It is a useful function when calling e.g. std::sort to guarantee a weak
 // strict ordering and avoid an undefined behaviour
-bool less_than_f(double a, double b) {
+bool lessThanF(double a, double b) {
   if (!std::isnan(a)) {
     if (!std::isnan(b))
       return a < b; // a < b
@@ -49,7 +49,7 @@ bool less_than_f(double a, double b) {
 // Greater than compares floating point types placing nans after inf or before
 // -inf It is a useful function when calling e.g. std::sort to guarantee a weak
 // strict ordering and avoid an undefined behaviour
-bool greater_than_f(double a, double b) {
+bool greaterThanF(double a, double b) {
   if (!std::isnan(a)) {
     if (!std::isnan(b))
       return a > b; // a > b
@@ -81,7 +81,7 @@ bool greater_than_f(double a, double b) {
  * @throws std::invalid_argument if the dimensions of the two objectives are
  * different
  */
-bool pareto_dominance(const std::vector<double>& obj1, const std::vector<double>& obj2) {
+bool paretoDominance(const std::vector<double>& obj1, const std::vector<double>& obj2) {
   if (obj1.size() != obj2.size()) {
     throw std::invalid_argument(
         "Different number of objectives found in input fitnesses: " + std::to_string(obj1.size()) + " and " +
@@ -89,9 +89,9 @@ bool pareto_dominance(const std::vector<double>& obj1, const std::vector<double>
   }
   bool found_strictly_dominating_dimension = false;
   for (decltype(obj1.size()) i = 0u; i < obj1.size(); ++i) {
-    if (greater_than_f(obj2[i], obj1[i])) {
+    if (greaterThanF(obj2[i], obj1[i])) {
       return false;
-    } else if (less_than_f(obj2[i], obj1[i])) {
+    } else if (lessThanF(obj2[i], obj1[i])) {
       found_strictly_dominating_dimension = true;
     }
   }
@@ -130,7 +130,7 @@ bool pareto_dominance(const std::vector<double>& obj1, const std::vector<double>
  */
 std::tuple<std::vector<std::vector<std::size_t>>, std::vector<std::vector<std::size_t>>, std::vector<std::size_t>,
            std::vector<std::size_t>>
-fast_non_dominated_sorting(const std::vector<std::vector<double>>& points) {
+fastNonDominatedSorting(const std::vector<std::vector<double>>& points) {
   auto N = points.size();
   // We make sure to have two points at least (one could also be allowed)
   if (N < 2u) {
@@ -148,10 +148,10 @@ fast_non_dominated_sorting(const std::vector<std::vector<double>>& points) {
     dom_list[i].clear();
     dom_count[i] = 0u;
     for (decltype(N) j = 0u; j < i; ++j) {
-      if (pareto_dominance(points[i], points[j])) {
+      if (paretoDominance(points[i], points[j])) {
         dom_list[i].push_back(j);
         ++dom_count[j];
-      } else if (pareto_dominance(points[j], points[i])) {
+      } else if (paretoDominance(points[j], points[i])) {
         dom_list[j].push_back(i);
         ++dom_count[i];
       }
@@ -212,7 +212,7 @@ fast_non_dominated_sorting(const std::vector<std::vector<double>>& points) {
  * @throws std::invalid_argument If points in \p non_dom_front do not all have
  * the same dimensionality
  */
-std::vector<double> crowding_distance(const std::vector<std::vector<double>>& non_dom_front) {
+std::vector<double> crowdingDistance(const std::vector<std::vector<double>>& non_dom_front) {
   auto N = non_dom_front.size();
   // We make sure to have two points at least
   if (N < 2u) {
@@ -239,7 +239,7 @@ std::vector<double> crowding_distance(const std::vector<std::vector<double>>& no
   std::vector<double> retval(N, 0.);
   for (decltype(M) i = 0u; i < M; ++i) {
     std::sort(indexes.begin(), indexes.end(), [i, &non_dom_front](std::size_t idx1, std::size_t idx2) {
-      return less_than_f(non_dom_front[idx1][i], non_dom_front[idx2][i]);
+      return lessThanF(non_dom_front[idx1][i], non_dom_front[idx2][i]);
     });
     retval[indexes[0]] = std::numeric_limits<double>::infinity();
     retval[indexes[N - 1u]] = std::numeric_limits<double>::infinity();
@@ -254,9 +254,9 @@ std::vector<double> crowding_distance(const std::vector<std::vector<double>>& no
 // Multi-objective tournament selection. Requires all sizes to be consistent.
 // Does not check if input is well formed.
 std::vector<double>::size_type
-mo_tournament_selection(std::vector<double>::size_type idx1, std::vector<double>::size_type idx2,
-                        const std::vector<std::vector<double>::size_type>& non_domination_rank,
-                        const std::vector<double>& crowding_d, std::mt19937& mt) {
+moTournamentSelection(std::vector<double>::size_type idx1, std::vector<double>::size_type idx2,
+                      const std::vector<std::vector<double>::size_type>& non_domination_rank,
+                      const std::vector<double>& crowding_d, std::mt19937& mt) {
   if (non_domination_rank[idx1] < non_domination_rank[idx2])
     return idx1;
   if (non_domination_rank[idx1] > non_domination_rank[idx2])
@@ -275,8 +275,8 @@ mo_tournament_selection(std::vector<double>::size_type idx1, std::vector<double>
 // bound reads. nix is the integer dimension (integer alleles assumed at the end
 // of the chromosome)
 std::pair<firestarter::optimizer::Individual, firestarter::optimizer::Individual>
-sbx_crossover(const firestarter::optimizer::Individual& parent1, const firestarter::optimizer::Individual& parent2,
-              const double p_cr, std::mt19937& mt) {
+sbxCrossover(const firestarter::optimizer::Individual& parent1, const firestarter::optimizer::Individual& parent2,
+             const double p_cr, std::mt19937& mt) {
   // Decision vector dimensions
   auto nix = parent1.size();
   firestarter::optimizer::Individual::size_type site1, site2;
@@ -309,9 +309,8 @@ sbx_crossover(const firestarter::optimizer::Individual& parent1, const firestart
 
 // Performs polynomial mutation. Requires all sizes to be consistent. Does not
 // check if input is well formed. p_m is the mutation probability
-void polynomial_mutation(firestarter::optimizer::Individual& child,
-                         const std::vector<std::tuple<unsigned, unsigned>>& bounds, const double p_m,
-                         std::mt19937& mt) {
+void polynomialMutation(firestarter::optimizer::Individual& child,
+                        const std::vector<std::tuple<unsigned, unsigned>>& bounds, const double p_m, std::mt19937& mt) {
   // Decision vector dimensions
   auto nix = child.size();
   // Random distributions
@@ -362,7 +361,7 @@ void polynomial_mutation(firestarter::optimizer::Individual& child,
  * @throws unspecified all exceptions thrown by
  * pagmo::fast_non_dominated_sorting and pagmo::crowding_distance
  */
-std::vector<std::size_t> select_best_N_mo(const std::vector<std::vector<double>>& input_f, std::size_t N) {
+std::vector<std::size_t> selectBestNMo(const std::vector<std::vector<double>>& input_f, std::size_t N) {
   if (N == 0u) { // corner case
     return {};
   }
@@ -380,7 +379,7 @@ std::vector<std::size_t> select_best_N_mo(const std::vector<std::vector<double>>
   std::vector<std::size_t> retval;
   std::vector<std::size_t>::size_type front_id(0u);
   // Run fast-non-dominated sorting
-  auto tuple = fast_non_dominated_sorting(input_f);
+  auto tuple = fastNonDominatedSorting(input_f);
   // Insert all non dominated fronts if not more than N
   for (const auto& front : std::get<0>(tuple)) {
     if (retval.size() + front.size() <= N) {
@@ -401,13 +400,13 @@ std::vector<std::size_t> select_best_N_mo(const std::vector<std::vector<double>>
   for (decltype(front.size()) i = 0u; i < front.size(); ++i) {
     non_dom_fits[i] = input_f[front[i]];
   }
-  std::vector<double> cds(crowding_distance(non_dom_fits));
+  std::vector<double> cds(crowdingDistance(non_dom_fits));
   // We now have front and crowding distance, we sort the front w.r.t. the
   // crowding
   std::vector<std::size_t> idxs(front.size());
   std::iota(idxs.begin(), idxs.end(), std::size_t(0u));
   std::sort(idxs.begin(), idxs.end(), [&cds](std::size_t idx1, std::size_t idx2) {
-    return greater_than_f(cds[idx1], cds[idx2]);
+    return greaterThanF(cds[idx1], cds[idx2]);
   }); // Descending order1
   auto remaining = N - retval.size();
   for (decltype(remaining) i = 0u; i < remaining; ++i) {
@@ -453,7 +452,7 @@ std::vector<double> ideal(const std::vector<std::vector<double>>& points) {
   for (decltype(M) i = 0u; i < M; ++i) {
     retval[i] = (*std::min_element(points.begin(), points.end(),
                                    [i](const std::vector<double>& f1, const std::vector<double>& f2) {
-                                     return util::greater_than_f(f1[i], f2[i]);
+                                     return util::greaterThanF(f1[i], f2[i]);
                                    }))[i];
   }
   return retval;
diff --git a/src/firestarter/WatchdogWorker.cpp b/src/firestarter/WatchdogWorker.cpp
index f5091fa2..8d8218eb 100644
--- a/src/firestarter/WatchdogWorker.cpp
+++ b/src/firestarter/WatchdogWorker.cpp
@@ -80,11 +80,11 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m
       SCOREP_USER_REGION_BY_NAME_BEGIN("WD_HIGH", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
       {
-        std::unique_lock<std::mutex> lk(this->_watchdogTerminateMutex);
+        std::unique_lock<std::mutex> lk(this->WatchdogTerminateMutex);
         // abort waiting if we get the interrupt signal
-        this->_watchdogTerminateAlert.wait_for(lk, load_nsec, [this]() { return this->_watchdog_terminate; });
+        this->WatchdogTerminateAlert.wait_for(lk, load_nsec, [this]() { return this->WatchdogTerminate; });
         // terminate on interrupt
-        if (this->_watchdog_terminate) {
+        if (this->WatchdogTerminate) {
           return EXIT_SUCCESS;
         }
       }
@@ -109,11 +109,11 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m
       SCOREP_USER_REGION_BY_NAME_BEGIN("WD_LOW", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
       {
-        std::unique_lock<std::mutex> lk(this->_watchdogTerminateMutex);
+        std::unique_lock<std::mutex> lk(this->WatchdogTerminateMutex);
         // abort waiting if we get the interrupt signal
-        this->_watchdogTerminateAlert.wait_for(lk, idle_nsec, [this]() { return this->_watchdog_terminate; });
+        this->WatchdogTerminateAlert.wait_for(lk, idle_nsec, [this]() { return this->WatchdogTerminate; });
         // terminate on interrupt
-        if (this->_watchdog_terminate) {
+        if (this->WatchdogTerminate) {
           return EXIT_SUCCESS;
         }
       }
@@ -129,8 +129,8 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m
 
       // exit when termination signal is received or timeout is reached
       {
-        std::lock_guard<std::mutex> lk(this->_watchdogTerminateMutex);
-        if (this->_watchdog_terminate || (timeout > sec::zero() && (time > timeout))) {
+        std::lock_guard<std::mutex> lk(this->WatchdogTerminateMutex);
+        if (this->WatchdogTerminate || (timeout > sec::zero() && (time > timeout))) {
           this->setLoad(LOAD_STOP);
 
           return EXIT_SUCCESS;
@@ -143,9 +143,9 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m
   // else return and wait for sigterm handler to request threads to stop.
   if (timeout > sec::zero()) {
     {
-      std::unique_lock<std::mutex> lk(Firestarter::_watchdogTerminateMutex);
+      std::unique_lock<std::mutex> lk(Firestarter::WatchdogTerminateMutex);
       // abort waiting if we get the interrupt signal
-      Firestarter::_watchdogTerminateAlert.wait_for(lk, timeout, []() { return Firestarter::_watchdog_terminate; });
+      Firestarter::WatchdogTerminateAlert.wait_for(lk, timeout, []() { return Firestarter::WatchdogTerminate; });
     }
 
     this->setLoad(LOAD_STOP);

From 37a8518fd294a6bdc36e5eb4e5ccfaf8df639148 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 27 Sep 2024 16:17:02 +0200
Subject: [PATCH 011/167] update .clang-tidy

---
 .clang-tidy | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/.clang-tidy b/.clang-tidy
index c3a4c174..f61bbcf4 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -6,6 +6,13 @@
 
 #  -boost-use-ranges: crash of clangd https://github.com/llvm/llvm-project/issues/109037
 
+#  -readability-identifier-length length of at least 3 does not make sense for some variables
+
+#  -cppcoreguidelines-avoid-magic-numbers
+#  -readability-magic-numbers currently we have too may numbers in this code
+
+#  -bugprone-easily-swappable-parameters we are not using strong typedefs
+
 Checks: >
   -*,
   boost-*,
@@ -22,7 +29,11 @@ Checks: >
   readability-*,
   -bugprone-narrowing-conversions,
   -cppcoreguidelines-special-member-functions,
-  -boost-use-ranges
+  -boost-use-ranges,
+  -readability-identifier-length,
+  -cppcoreguidelines-avoid-magic-numbers,
+  -readability-magic-numbers,
+  -bugprone-easily-swappable-parameters
   
 # Turn all the warnings from the checks above into errors.
 WarningsAsErrors: "*"

From 90beb594232ebbc7460db3b8b193317a1d31a977 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 27 Sep 2024 16:18:23 +0200
Subject: [PATCH 012/167] add an empty file as a workaround for clangd issue

---
 src/CMakeLists.txt                            |  2 ++
 .../X86/Platform/X86PlatformConfig.cpp        | 25 +++++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 6136bb35..7d405dd9 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -4,6 +4,8 @@ SET(FIRESTARTER_FILES
 	firestarter/LoadWorker.cpp
 	firestarter/WatchdogWorker.cpp
 	firestarter/DumpRegisterWorker.cpp
+	
+	firestarter/Environment/X86/Platform/X86PlatformConfig.cpp
 
 	firestarter/Environment/Environment.cpp
 	firestarter/Environment/CPUTopology.cpp
diff --git a/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp b/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp
new file mode 100644
index 00000000..0cc5abef
--- /dev/null
+++ b/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp
@@ -0,0 +1,25 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2024 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+// This file exists to get an entry in the compile commands database. Clangd will interpolate the include directories
+// for header files based on the source file with the best matching score. This file should be the best score for the
+// included header. Therefore the we should not see any errors in this file for missing includes. For more infomation
+// look in the LLVM code base: clang/lib/Tooling/InterpolatingCompilationDatabase.cpp
\ No newline at end of file

From ff6113bb00741efff872772dd2a3bd2856e42649 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 27 Sep 2024 16:21:48 +0200
Subject: [PATCH 013/167] second pass of clang-tidy for the header files. use
 unique_ptr for the payload instead of raw pointers

---
 .../firestarter/DumpRegisterWorkerData.hpp    |   6 +-
 .../firestarter/Environment/CPUTopology.hpp   |  60 +--
 .../firestarter/Environment/Environment.hpp   |  17 +-
 .../Environment/Payload/Payload.hpp           |   3 +-
 .../Environment/Platform/PlatformConfig.hpp   |  11 +-
 .../Environment/Platform/RuntimeConfig.hpp    |   5 +-
 .../Environment/X86/Payload/AVX512Payload.hpp |   6 +-
 .../Environment/X86/Payload/AVXPayload.hpp    |   6 +-
 .../Environment/X86/Payload/FMA4Payload.hpp   |   6 +-
 .../Environment/X86/Payload/FMAPayload.hpp    |   6 +-
 .../Environment/X86/Payload/SSE2Payload.hpp   |   7 +-
 .../Environment/X86/Payload/X86Payload.hpp    |  10 +-
 .../Environment/X86/Payload/ZENFMAPayload.hpp |   7 +-
 .../X86/Platform/BulldozerConfig.hpp          |  12 +-
 .../X86/Platform/HaswellConfig.hpp            |  10 +-
 .../X86/Platform/HaswellEPConfig.hpp          |  12 +-
 .../X86/Platform/KnightsLandingConfig.hpp     |  12 +-
 .../Environment/X86/Platform/NaplesConfig.hpp |  12 +-
 .../X86/Platform/NehalemConfig.hpp            |  12 +-
 .../X86/Platform/NehalemEPConfig.hpp          |  12 +-
 .../Environment/X86/Platform/RomeConfig.hpp   |  12 +-
 .../X86/Platform/SandyBridgeConfig.hpp        |  12 +-
 .../X86/Platform/SandyBridgeEPConfig.hpp      |  12 +-
 .../X86/Platform/SkylakeConfig.hpp            |  12 +-
 .../X86/Platform/SkylakeSPConfig.hpp          |  12 +-
 .../X86/Platform/X86PlatformConfig.hpp        |  45 +-
 .../Environment/X86/X86CPUTopology.hpp        |   9 +-
 .../Environment/X86/X86Environment.hpp        |  58 ++-
 include/firestarter/Firestarter.hpp           |  20 +-
 include/firestarter/Json/Summary.hpp          |  25 +-
 include/firestarter/LoadWorkerData.hpp        |   8 +-
 .../Logging/FirstWorkerThreadFilter.hpp       |   5 +-
 include/firestarter/Logging/Log.hpp           |  42 +-
 .../Measurement/MeasurementWorker.hpp         |  13 +-
 .../Measurement/Metric/IPCEstimate.h          |   2 +-
 include/firestarter/Measurement/Metric/Perf.h |   2 +-
 include/firestarter/Measurement/Metric/RAPL.h |   2 +-
 include/firestarter/Measurement/Summary.hpp   |   7 +-
 include/firestarter/Measurement/TimeValue.hpp |   2 +-
 include/firestarter/Optimizer/Algorithm.hpp   |   2 +-
 .../firestarter/Optimizer/Algorithm/NSGA2.hpp |   2 +-
 include/firestarter/Optimizer/History.hpp     | 158 +++----
 .../firestarter/Optimizer/OptimizerWorker.hpp |   4 +-
 include/firestarter/Optimizer/Population.hpp  |   5 +-
 include/firestarter/Optimizer/Problem.hpp     |   4 +-
 .../Optimizer/Problem/CLIArgumentProblem.hpp  |   6 +-
 .../Optimizer/Util/MultiObjective.hpp         |   2 +-
 src/firestarter/Environment/CPUTopology.cpp   | 401 +++++++++---------
 .../Environment/X86/Payload/AVX512Payload.cpp |   9 +-
 .../Environment/X86/Payload/X86Payload.cpp    |   1 +
 .../Environment/X86/X86CPUTopology.cpp        | 228 +++++-----
 .../Environment/X86/X86Environment.cpp        |  86 ++--
 src/firestarter/LoadWorker.cpp                |   4 +-
 src/firestarter/Main.cpp                      |   8 +-
 .../Measurement/MeasurementWorker.cpp         |   2 +
 src/firestarter/Optimizer/Algorithm/NSGA2.cpp |   2 +
 src/firestarter/Optimizer/Population.cpp      |   2 +-
 57 files changed, 723 insertions(+), 735 deletions(-)

diff --git a/include/firestarter/DumpRegisterWorkerData.hpp b/include/firestarter/DumpRegisterWorkerData.hpp
index a05863d5..cf5e22ba 100644
--- a/include/firestarter/DumpRegisterWorkerData.hpp
+++ b/include/firestarter/DumpRegisterWorkerData.hpp
@@ -21,9 +21,9 @@
 
 #pragma once
 
+#include "LoadWorkerData.hpp"
+#include "Logging/Log.hpp"
 #include <chrono>
-#include <firestarter/DumpRegisterStruct.hpp>
-#include <firestarter/LoadWorkerData.hpp>
 #include <utility>
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
@@ -32,6 +32,8 @@ namespace firestarter {
 
 class DumpRegisterWorkerData {
 public:
+  DumpRegisterWorkerData() = delete;
+
   DumpRegisterWorkerData(std::shared_ptr<LoadWorkerData> LoadWorkerDataPtr, std::chrono::seconds DumpTimeDelta,
                          const std::string& DumpFilePath)
       : LoadWorkerDataPtr(std::move(LoadWorkerDataPtr))
diff --git a/include/firestarter/Environment/CPUTopology.hpp b/include/firestarter/Environment/CPUTopology.hpp
index ba10df3c..c58933db 100644
--- a/include/firestarter/Environment/CPUTopology.hpp
+++ b/include/firestarter/Environment/CPUTopology.hpp
@@ -34,50 +34,52 @@ namespace firestarter::environment {
 
 class CPUTopology {
 public:
-  CPUTopology(std::string architecture);
+  explicit CPUTopology(std::string Architecture);
   virtual ~CPUTopology();
 
-  unsigned numThreads() const { return _numThreadsPerCore * _numCoresTotal; }
-  unsigned maxNumThreads() const;
-  unsigned numThreadsPerCore() const { return _numThreadsPerCore; }
-  unsigned numCoresTotal() const { return _numCoresTotal; }
-  unsigned numPackages() const { return _numPackages; }
+  [[nodiscard]] auto numThreads() const -> unsigned { return NumThreadsPerCore * NumCoresTotal; }
+  [[nodiscard]] auto maxNumThreads() const -> unsigned;
+  [[nodiscard]] auto numThreadsPerCore() const -> unsigned { return NumThreadsPerCore; }
+  [[nodiscard]] auto numCoresTotal() const -> unsigned { return NumCoresTotal; }
+  [[nodiscard]] auto numPackages() const -> unsigned { return NumPackages; }
 
-  std::string const& architecture() const { return _architecture; }
-  virtual std::string const& vendor() const { return _vendor; }
-  virtual std::string const& processorName() const { return _processorName; }
-  virtual std::string const& model() const = 0;
+  [[nodiscard]] auto architecture() const -> std::string const& { return Architecture; }
+  [[nodiscard]] virtual auto vendor() const -> std::string const& { return Vendor; }
+  [[nodiscard]] virtual auto processorName() const -> std::string const& { return ProcessorName; }
+  [[nodiscard]] virtual auto model() const -> std::string const& { return Model; }
 
   // get the size of the L1i-cache in bytes
-  unsigned instructionCacheSize() const { return _instructionCacheSize; }
+  [[nodiscard]] auto instructionCacheSize() const -> unsigned { return InstructionCacheSize; }
 
   // return the cpu clockrate in Hz
-  virtual uint64_t clockrate() const { return _clockrate; }
+  [[nodiscard]] virtual auto clockrate() const -> uint64_t { return Clockrate; }
   // return the cpu features
-  virtual std::list<std::string> const& features() const = 0;
+  [[nodiscard]] virtual auto features() const -> std::list<std::string> const& = 0;
 
   // get a timestamp
-  virtual uint64_t timestamp() const = 0;
+  [[nodiscard]] virtual auto timestamp() const -> uint64_t = 0;
 
-  int getPkgIdFromPU(unsigned pu) const;
-  int getCoreIdFromPU(unsigned pu) const;
+  [[nodiscard]] auto getPkgIdFromPU(unsigned Pu) const -> int;
+  [[nodiscard]] auto getCoreIdFromPU(unsigned Pu) const -> int;
 
 protected:
-  std::string scalingGovernor() const;
-  std::ostream& print(std::ostream& stream) const;
+  [[nodiscard]] static auto scalingGovernor() -> std::string;
+  [[nodiscard]] auto print(std::ostream& Stream) const -> std::ostream&;
+
+  std::string Vendor;
+  std::string Model;
 
 private:
-  static std::stringstream getFileAsStream(std::string const& filePath);
-
-  unsigned _numThreadsPerCore;
-  unsigned _numCoresTotal;
-  unsigned _numPackages;
-  std::string _architecture;
-  std::string _vendor = "";
-  std::string _processorName = "";
-  unsigned _instructionCacheSize = 0;
-  uint64_t _clockrate = 0;
-  hwloc_topology_t topology;
+  [[nodiscard]] static auto getFileAsStream(std::string const& FilePath) -> std::stringstream;
+
+  unsigned NumThreadsPerCore;
+  unsigned NumCoresTotal;
+  unsigned NumPackages;
+  std::string Architecture;
+  std::string ProcessorName;
+  unsigned InstructionCacheSize = 0;
+  uint64_t Clockrate = 0;
+  hwloc_topology_t Topology;
 };
 
 } // namespace firestarter::environment
diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index 5f204ccc..57e60094 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -21,11 +21,10 @@
 
 #pragma once
 
+#include "CPUTopology.hpp"
+#include "Platform/RuntimeConfig.hpp"
 #include <cassert>
 #include <cstdint>
-#include <firestarter/Environment/CPUTopology.hpp>
-#include <firestarter/Environment/Platform/PlatformConfig.hpp>
-#include <firestarter/Environment/Platform/RuntimeConfig.hpp>
 #include <vector>
 
 namespace firestarter::environment {
@@ -33,13 +32,9 @@ namespace firestarter::environment {
 class Environment {
 public:
   Environment() = delete;
-  explicit Environment(CPUTopology* Topology)
-      : Topology(Topology) {}
-  virtual ~Environment() {
-    delete this->Topology;
-
-    delete SelectedConfig;
-  }
+  explicit Environment(std::unique_ptr<CPUTopology>&& Topology)
+      : Topology(std::move(Topology)) {}
+  virtual ~Environment() { delete SelectedConfig; }
 
   auto evaluateCpuAffinity(unsigned RequestedNumThreads, std::string CpuBind) -> int;
   auto setCpuAffinity(unsigned Thread) -> int;
@@ -77,7 +72,7 @@ class Environment {
 
 protected:
   platform::RuntimeConfig* SelectedConfig = nullptr;
-  CPUTopology* Topology = nullptr;
+  std::unique_ptr<CPUTopology> Topology;
 
 private:
   uint64_t RequestedNumThreads = 0;
diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index 3871400f..9c37bdfc 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include <list>
+#include <memory>
 #include <string>
 #include <utility>
 #include <vector>
@@ -101,7 +102,7 @@ class Payload {
   [[nodiscard]] virtual auto highLoadFunction(uint64_t* AddrMem, volatile uint64_t* AddrHigh, uint64_t Iterations)
       -> uint64_t = 0;
 
-  [[nodiscard]] virtual auto clone() const -> Payload* = 0;
+  [[nodiscard]] virtual auto clone() const -> std::unique_ptr<Payload> = 0;
 };
 
 } // namespace firestarter::environment::payload
diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp
index 5588cb8a..954b6682 100644
--- a/include/firestarter/Environment/Platform/PlatformConfig.hpp
+++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp
@@ -21,8 +21,7 @@
 
 #pragma once
 
-#include <firestarter/Environment/Payload/Payload.hpp>
-#include <firestarter/Logging/Log.hpp>
+#include "../Payload/Payload.hpp"
 #include <initializer_list>
 #include <map>
 #include <sstream>
@@ -35,7 +34,7 @@ class PlatformConfig {
 private:
   std::string Name;
   std::list<unsigned> Threads;
-  payload::Payload* Payload;
+  std::unique_ptr<payload::Payload> Payload;
 
 protected:
   unsigned InstructionCacheSize;
@@ -48,15 +47,15 @@ class PlatformConfig {
 
   PlatformConfig(std::string Name, std::list<unsigned> Threads, unsigned InstructionCacheSize,
                  std::initializer_list<unsigned> DataCacheBufferSize, unsigned RamBufferSize, unsigned Lines,
-                 payload::Payload* Payload)
+                 std::unique_ptr<payload::Payload>&& Payload)
       : Name(std::move(Name))
       , Threads(std::move(Threads))
-      , Payload(Payload)
+      , Payload(std::move(Payload))
       , InstructionCacheSize(InstructionCacheSize)
       , DataCacheBufferSize(DataCacheBufferSize)
       , RamBufferSize(RamBufferSize)
       , Lines(Lines) {}
-  virtual ~PlatformConfig() { delete Payload; }
+  virtual ~PlatformConfig() = default;
 
   [[nodiscard]] auto name() const -> const std::string& { return Name; }
   [[nodiscard]] auto instructionCacheSize() const -> unsigned { return InstructionCacheSize; }
diff --git a/include/firestarter/Environment/Platform/RuntimeConfig.hpp b/include/firestarter/Environment/Platform/RuntimeConfig.hpp
index 3d1d1786..17770e97 100644
--- a/include/firestarter/Environment/Platform/RuntimeConfig.hpp
+++ b/include/firestarter/Environment/Platform/RuntimeConfig.hpp
@@ -21,8 +21,9 @@
 
 #pragma once
 
+#include "../../Logging/Log.hpp"
+#include "../Platform/PlatformConfig.hpp"
 #include <cassert>
-#include <firestarter/Environment/Platform/PlatformConfig.hpp>
 
 namespace firestarter::environment::platform {
 
@@ -62,7 +63,7 @@ class RuntimeConfig {
       , RamBufferSize(Other.ramBufferSize())
       , Lines(Other.lines()) {}
 
-  ~RuntimeConfig() { Payload.reset(); }
+  ~RuntimeConfig() = default;
 
   [[nodiscard]] auto platformConfig() const -> PlatformConfig const& { return PlatformConfigValue; }
   [[nodiscard]] auto payload() const -> payload::Payload& {
diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
index 317b8196..9478353e 100644
--- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/X86Payload.hpp>
+#include "X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 class AVX512Payload final : public X86Payload {
@@ -37,8 +37,8 @@ class AVX512Payload final : public X86Payload {
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
   void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
 
-  [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override {
-    return new AVX512Payload(this->supportedFeatures());
+  [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
+    return std::make_unique<AVX512Payload>(this->supportedFeatures());
   };
 
 private:
diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
index 6516c0de..ca6cac6d 100644
--- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/X86Payload.hpp>
+#include "X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 class AVXPayload final : public X86Payload {
@@ -37,8 +37,8 @@ class AVXPayload final : public X86Payload {
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
   void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
 
-  [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override {
-    return new AVXPayload(this->supportedFeatures());
+  [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
+    return std::make_unique<AVXPayload>(this->supportedFeatures());
   };
 
 private:
diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
index bb623e68..ccc43d00 100644
--- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/X86Payload.hpp>
+#include "X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 
@@ -39,8 +39,8 @@ class FMA4Payload final : public X86Payload {
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
   void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
 
-  [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override {
-    return new FMA4Payload(this->supportedFeatures());
+  [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
+    return std::make_unique<FMA4Payload>(this->supportedFeatures());
   };
 
 private:
diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
index b610a838..a8443b97 100644
--- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/X86Payload.hpp>
+#include "X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 class FMAPayload final : public X86Payload {
@@ -37,8 +37,8 @@ class FMAPayload final : public X86Payload {
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
   void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
 
-  [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override {
-    return new FMAPayload(this->supportedFeatures());
+  [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
+    return std::make_unique<FMAPayload>(this->supportedFeatures());
   };
 
 private:
diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
index 538837b4..5e363432 100644
--- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
@@ -21,8 +21,7 @@
 
 #pragma once
 
-#include <cstdint>
-#include <firestarter/Environment/X86/Payload/X86Payload.hpp>
+#include "X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 class SSE2Payload final : public X86Payload {
@@ -38,8 +37,8 @@ class SSE2Payload final : public X86Payload {
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
   void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
 
-  [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override {
-    return new SSE2Payload(this->supportedFeatures());
+  [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
+    return std::make_unique<SSE2Payload>(this->supportedFeatures());
   };
 
 private:
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index 9e947143..2e38b855 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -21,13 +21,13 @@
 
 #pragma once
 
+#include "../../../Constants.hpp"          // IWYU pragma: keep
+#include "../../../DumpRegisterStruct.hpp" // IWYU pragma: keep
+#include "../../../Logging/Log.hpp"        // IWYU pragma: keep
+#include "../../Payload/Payload.hpp"
 #include <asmjit/x86.h>
-
 #include <cstdint>
-#include <firestarter/DumpRegisterWorkerData.hpp>
-#include <firestarter/Environment/Payload/Payload.hpp>
-#include <firestarter/LoadWorkerData.hpp>
-#include <firestarter/Logging/Log.hpp>
+#include <map> // IWYU pragma: keep
 #include <utility>
 
 #define INIT_BLOCKSIZE 1024
diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
index 425dd600..35746efc 100644
--- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
@@ -21,8 +21,7 @@
 
 #pragma once
 
-#include <cstdint>
-#include <firestarter/Environment/X86/Payload/X86Payload.hpp>
+#include "X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 class ZENFMAPayload final : public X86Payload {
@@ -39,8 +38,8 @@ class ZENFMAPayload final : public X86Payload {
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
   void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
 
-  [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override {
-    return new ZENFMAPayload(this->supportedFeatures());
+  [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
+    return std::make_unique<ZENFMAPayload>(this->supportedFeatures());
   };
 
 private:
diff --git a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
index 4cc4b811..fc7e5337 100644
--- a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
@@ -21,17 +21,17 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/FMA4Payload.hpp>
-#include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
+#include "../Payload/FMA4Payload.hpp"
+#include "X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class BulldozerConfig final : public X86PlatformConfig {
 public:
-  BulldozerConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("BLD_OPTERON", 21, {1, 2, 3}, {1}, 0, {16384, 1048576, 786432}, 104857600, 1536, family,
-                          model, threads, new payload::FMA4Payload(supportedFeatures)) {}
+  BulldozerConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+      : X86PlatformConfig("BLD_OPTERON", 21, {1, 2, 3}, {1}, 0, {16384, 1048576, 786432}, 104857600, 1536, Family,
+                          Model, Threads, std::make_unique<payload::FMA4Payload>(SupportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
         {{"RAM_L", 1}, {"L3_L", 1}, {"L2_LS", 5}, {"L1_L", 90}, {"REG", 45}});
   }
diff --git a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
index 5b30d6a0..00bf3199 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
@@ -21,17 +21,17 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/FMAPayload.hpp>
-#include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
+#include "../Payload/FMAPayload.hpp"
+#include "X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class HaswellConfig final : public X86PlatformConfig {
 public:
-  HaswellConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
+  HaswellConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
       : X86PlatformConfig("HSW_COREI", 6, {60, 61, 69, 70, 71}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
-                          family, model, threads, new payload::FMAPayload(supportedFeatures)) {}
+                          Family, Model, Threads, std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
         {{"RAM_L", 2}, {"L3_LS", 3}, {"L2_LS", 9}, {"L1_LS", 90}, {"REG", 40}});
   }
diff --git a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
index 106dd0e3..d5df3341 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
@@ -21,17 +21,17 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/FMAPayload.hpp>
-#include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
+#include "../Payload/FMAPayload.hpp"
+#include "X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class HaswellEPConfig final : public X86PlatformConfig {
 public:
-  HaswellEPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("HSW_XEONEP", 6, {63, 79}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, family,
-                          model, threads, new payload::FMAPayload(supportedFeatures)) {}
+  HaswellEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+      : X86PlatformConfig("HSW_XEONEP", 6, {63, 79}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, Family,
+                          Model, Threads, std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
         {{"RAM_L", 8}, {"L3_LS", 1}, {"L2_LS", 29}, {"L1_LS", 100}, {"REG", 100}});
   }
diff --git a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
index 709ef934..cbc7d976 100644
--- a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
@@ -21,17 +21,17 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/AVX512Payload.hpp>
-#include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
+#include "../Payload/AVX512Payload.hpp"
+#include "X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class KnightsLandingConfig final : public X86PlatformConfig {
 public:
-  KnightsLandingConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("KNL_XEONPHI", 6, {87}, {4}, 0, {32768, 524288, 236279125}, 26214400, 1536, family, model,
-                          threads, new payload::AVX512Payload(supportedFeatures)) {}
+  KnightsLandingConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+      : X86PlatformConfig("KNL_XEONPHI", 6, {87}, {4}, 0, {32768, 524288, 236279125}, 26214400, 1536, Family, Model,
+                          Threads, std::make_unique<payload::AVX512Payload>(SupportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 3}, {"L2_S", 8}, {"L1_L", 40}, {"REG", 10}});
   }
 };
diff --git a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
index 5ad0a065..56f5bdc6 100644
--- a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
@@ -21,17 +21,17 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/ZENFMAPayload.hpp>
-#include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
+#include "../Payload/ZENFMAPayload.hpp"
+#include "X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class NaplesConfig final : public X86PlatformConfig {
 public:
-  NaplesConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24}, {1, 2}, 0, {65536, 524288, 2097152}, 104857600, 1536, family,
-                          model, threads, new payload::ZENFMAPayload(supportedFeatures)) {}
+  NaplesConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+      : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24}, {1, 2}, 0, {65536, 524288, 2097152}, 104857600, 1536, Family,
+                          Model, Threads, std::make_unique<payload::ZENFMAPayload>(SupportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
         {{"RAM_L", 3}, {"L3_L", 14}, {"L2_L", 75}, {"L1_LS", 81}, {"REG", 100}});
   }
diff --git a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
index 3f0748de..320d29f4 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
@@ -21,17 +21,17 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/SSE2Payload.hpp>
-#include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
+#include "../Payload/SSE2Payload.hpp"
+#include "X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class NehalemConfig final : public X86PlatformConfig {
 public:
-  NehalemConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, family,
-                          model, threads, new payload::SSE2Payload(supportedFeatures)) {}
+  NehalemConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+      : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, Family,
+                          Model, Threads, std::make_unique<payload::SSE2Payload>(SupportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 1}, {"L1_LS", 70}, {"REG", 2}});
   }
 };
diff --git a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
index a738fb7f..c9d032da 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
@@ -21,17 +21,17 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/SSE2Payload.hpp>
-#include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
+#include "../Payload/SSE2Payload.hpp"
+#include "X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class NehalemEPConfig final : public X86PlatformConfig {
 public:
-  NehalemEPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("NHM_XEONEP", 6, {26, 44}, {1, 2}, 0, {32768, 262144, 2097152}, 104857600, 1536, family,
-                          model, threads, new payload::SSE2Payload(supportedFeatures)) {}
+  NehalemEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+      : X86PlatformConfig("NHM_XEONEP", 6, {26, 44}, {1, 2}, 0, {32768, 262144, 2097152}, 104857600, 1536, Family,
+                          Model, Threads, std::make_unique<payload::SSE2Payload>(SupportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 1}, {"L1_LS", 60}, {"REG", 2}});
   }
 };
diff --git a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
index 230d91ba..c8c1ea73 100644
--- a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
@@ -21,17 +21,17 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/FMAPayload.hpp>
-#include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
+#include "../Payload/FMAPayload.hpp"
+#include "X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class RomeConfig final : public X86PlatformConfig {
 public:
-  RomeConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("ZEN_2_EPYC", 23, {49}, {1, 2}, 0, {32768, 524288, 2097152}, 104857600, 1536, family, model,
-                          threads, new payload::FMAPayload(supportedFeatures)) {}
+  RomeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+      : X86PlatformConfig("ZEN_2_EPYC", 23, {49}, {1, 2}, 0, {32768, 524288, 2097152}, 104857600, 1536, Family, Model,
+                          Threads, std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
         {{"RAM_L", 10}, {"L3_L", 25}, {"L2_L", 91}, {"L1_2LS_256", 72}, {"L1_LS_256", 82}, {"REG", 75}});
   }
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
index a58e193a..5ceef9ff 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
@@ -21,17 +21,17 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/AVXPayload.hpp>
-#include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
+#include "../Payload/AVXPayload.hpp"
+#include "X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class SandyBridgeConfig final : public X86PlatformConfig {
 public:
-  SandyBridgeConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("SNB_COREI", 6, {42, 58}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, family, model,
-                          threads, new payload::AVXPayload(supportedFeatures)) {}
+  SandyBridgeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+      : X86PlatformConfig("SNB_COREI", 6, {42, 58}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, Family, Model,
+                          Threads, std::make_unique<payload::AVXPayload>(SupportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
         {{"RAM_L", 2}, {"L3_LS", 4}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 45}});
   }
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
index 3f4f6303..8449bee1 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
@@ -22,17 +22,17 @@
 #ifndef INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SANDYBRIDGEEPCONFIG_H
 #define INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SANDYBRIDGEEPCONFIG_H
 
-#include <firestarter/Environment/X86/Payload/AVXPayload.hpp>
-#include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
+#include "../Payload/AVXPayload.hpp"
+#include "X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class SandyBridgeEPConfig final : public X86PlatformConfig {
 public:
-  SandyBridgeEPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("SNB_XEONEP", 6, {45, 62}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, family,
-                          model, threads, new payload::AVXPayload(supportedFeatures)) {}
+  SandyBridgeEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+      : X86PlatformConfig("SNB_XEONEP", 6, {45, 62}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, Family,
+                          Model, Threads, std::make_unique<payload::AVXPayload>(SupportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
         {{"RAM_L", 3}, {"L3_LS", 2}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 30}});
   }
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
index c533c3a5..1437ce98 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
@@ -22,17 +22,17 @@
 #ifndef INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SKYLAKECONFIG_H
 #define INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SKYLAKECONFIG_H
 
-#include <firestarter/Environment/X86/Payload/FMAPayload.hpp>
-#include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
+#include "../Payload/FMAPayload.hpp"
+#include "X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class SkylakeConfig final : public X86PlatformConfig {
 public:
-  SkylakeConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("SKL_COREI", 6, {78, 94}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, family, model,
-                          threads, new payload::FMAPayload(supportedFeatures)) {}
+  SkylakeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+      : X86PlatformConfig("SKL_COREI", 6, {78, 94}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, Family, Model,
+                          Threads, std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
         {{"RAM_L", 3}, {"L3_LS_256", 5}, {"L2_LS_256", 18}, {"L1_2LS_256", 78}, {"REG", 40}});
   }
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
index 8243d9d6..8a91b6de 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
@@ -21,17 +21,17 @@
 
 #pragma once
 
-#include <firestarter/Environment/X86/Payload/AVX512Payload.hpp>
-#include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
+#include "../Payload/AVX512Payload.hpp"
+#include "X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class SkylakeSPConfig final : public X86PlatformConfig {
 public:
-  SkylakeSPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads)
-      : X86PlatformConfig("SKL_XEONEP", 6, {85}, {1, 2}, 0, {32768, 1048576, 1441792}, 1048576000, 1536, family, model,
-                          threads, new payload::AVX512Payload(supportedFeatures)) {}
+  SkylakeSPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+      : X86PlatformConfig("SKL_XEONEP", 6, {85}, {1, 2}, 0, {32768, 1048576, 1441792}, 1048576000, 1536, Family, Model,
+                          Threads, std::make_unique<payload::AVX512Payload>(SupportedFeatures)) {}
 
-  std::vector<std::pair<std::string, unsigned>> getDefaultPayloadSettings() const override {
+  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_S", 3},
                                                           {"RAM_P", 1},
                                                           {"L3_S", 1},
diff --git a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
index 648346d8..3d33756c 100644
--- a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
@@ -21,34 +21,39 @@
 
 #pragma once
 
-#include <firestarter/Environment/Platform/PlatformConfig.hpp>
-#include <firestarter/Environment/X86/Payload/X86Payload.hpp>
+#include "../../Platform/PlatformConfig.hpp"
+#include "../Payload/X86Payload.hpp"
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector> // IWYU pragma: keep
 
 namespace firestarter::environment::x86::platform {
 
 class X86PlatformConfig : public environment::platform::PlatformConfig {
 private:
-  unsigned _family;
-  std::list<unsigned> _models;
-  unsigned _currentFamily;
-  unsigned _currentModel;
-  unsigned _currentThreads;
+  unsigned Family;
+  std::list<unsigned> Models;
+  unsigned CurrentFamily;
+  unsigned CurrentModel;
+  unsigned CurrentThreads;
 
 public:
-  X86PlatformConfig(std::string name, unsigned family, std::initializer_list<unsigned> models,
-                    std::initializer_list<unsigned> threads, unsigned instructionCacheSize,
-                    std::initializer_list<unsigned> dataCacheBufferSize, unsigned ramBuffersize, unsigned lines,
-                    unsigned currentFamily, unsigned currentModel, unsigned currentThreads,
-                    payload::X86Payload* payload)
-      : PlatformConfig(name, threads, instructionCacheSize, dataCacheBufferSize, ramBuffersize, lines, payload)
-      , _family(family)
-      , _models(models)
-      , _currentFamily(currentFamily)
-      , _currentModel(currentModel)
-      , _currentThreads(currentThreads) {}
+  X86PlatformConfig(std::string Name, unsigned Family, std::initializer_list<unsigned> Models,
+                    std::initializer_list<unsigned> Threads, unsigned InstructionCacheSize,
+                    std::initializer_list<unsigned> DataCacheBufferSize, unsigned RamBuffersize, unsigned Lines,
+                    unsigned CurrentFamily, unsigned CurrentModel, unsigned CurrentThreads,
+                    std::unique_ptr<payload::X86Payload>&& Payload)
+      : PlatformConfig(std::move(Name), Threads, InstructionCacheSize, DataCacheBufferSize, RamBuffersize, Lines,
+                       std::move(Payload))
+      , Family(Family)
+      , Models(Models)
+      , CurrentFamily(CurrentFamily)
+      , CurrentModel(CurrentModel)
+      , CurrentThreads(CurrentThreads) {}
 
-  bool isDefault() const override {
-    return _family == _currentFamily && (std::find(_models.begin(), _models.end(), _currentModel) != _models.end()) &&
+  [[nodiscard]] auto isDefault() const -> bool override {
+    return Family == CurrentFamily && (std::find(Models.begin(), Models.end(), CurrentModel) != Models.end()) &&
            isAvailable();
   }
 };
diff --git a/include/firestarter/Environment/X86/X86CPUTopology.hpp b/include/firestarter/Environment/X86/X86CPUTopology.hpp
index d9ca6393..44875e03 100644
--- a/include/firestarter/Environment/X86/X86CPUTopology.hpp
+++ b/include/firestarter/Environment/X86/X86CPUTopology.hpp
@@ -23,7 +23,7 @@
 
 #include <asmjit/asmjit.h>
 
-#include <firestarter/Environment/CPUTopology.hpp>
+#include "../CPUTopology.hpp"
 
 namespace firestarter::environment::x86 {
 
@@ -36,9 +36,6 @@ class X86CPUTopology final : public CPUTopology {
   [[nodiscard]] auto features() const -> std::list<std::string> const& override { return this->FeatureList; }
   [[nodiscard]] auto featuresAsmjit() const -> const asmjit::CpuFeatures& { return this->CpuInfo.features(); }
 
-  [[nodiscard]] auto vendor() const -> std::string const& override { return this->Vendor; }
-  [[nodiscard]] auto model() const -> std::string const& override { return this->Model; }
-
   [[nodiscard]] auto clockrate() const -> uint64_t override;
 
   [[nodiscard]] auto timestamp() const -> uint64_t override;
@@ -50,15 +47,13 @@ class X86CPUTopology final : public CPUTopology {
 private:
   [[nodiscard]] auto hasRdtsc() const -> bool { return this->HasRdtsc; }
   [[nodiscard]] auto hasInvariantRdtsc() const -> bool { return this->HasInvariantRdtsc; }
-  void cpuid(uint64_t* A, uint64_t* B, uint64_t* C, uint64_t* D) const;
+  static void cpuid(uint64_t* Rax, uint64_t* Rbx, uint64_t* Rcx, uint64_t* Rdx);
 
   asmjit::CpuInfo CpuInfo;
   std::list<std::string> FeatureList;
 
   bool HasRdtsc;
   bool HasInvariantRdtsc;
-  std::string Vendor;
-  std::string Model;
 };
 
 inline auto operator<<(std::ostream& Stream, X86CPUTopology const& CpuTopology) -> std::ostream& {
diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp
index 7873c9c4..3e358d90 100644
--- a/include/firestarter/Environment/X86/X86Environment.hpp
+++ b/include/firestarter/Environment/X86/X86Environment.hpp
@@ -23,27 +23,26 @@
 
 #include <asmjit/asmjit.h>
 
-#include <firestarter/Environment/Environment.hpp>
-#include <firestarter/Environment/X86/Platform/BulldozerConfig.hpp>
-#include <firestarter/Environment/X86/Platform/HaswellConfig.hpp>
-#include <firestarter/Environment/X86/Platform/HaswellEPConfig.hpp>
-#include <firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp>
-#include <firestarter/Environment/X86/Platform/NaplesConfig.hpp>
-#include <firestarter/Environment/X86/Platform/NehalemConfig.hpp>
-#include <firestarter/Environment/X86/Platform/NehalemEPConfig.hpp>
-#include <firestarter/Environment/X86/Platform/RomeConfig.hpp>
-#include <firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp>
-#include <firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp>
-#include <firestarter/Environment/X86/Platform/SkylakeConfig.hpp>
-#include <firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp>
-#include <firestarter/Environment/X86/Platform/X86PlatformConfig.hpp>
-#include <firestarter/Environment/X86/X86CPUTopology.hpp>
-#include <functional>
+#include "../Environment.hpp"
+#include "Platform/BulldozerConfig.hpp"
+#include "Platform/HaswellConfig.hpp"
+#include "Platform/HaswellEPConfig.hpp"
+#include "Platform/KnightsLandingConfig.hpp"
+#include "Platform/NaplesConfig.hpp"
+#include "Platform/NehalemConfig.hpp"
+#include "Platform/NehalemEPConfig.hpp"
+#include "Platform/RomeConfig.hpp"
+#include "Platform/SandyBridgeConfig.hpp"
+#include "Platform/SandyBridgeEPConfig.hpp"
+#include "Platform/SkylakeConfig.hpp"
+#include "Platform/SkylakeSPConfig.hpp"
+#include "Platform/X86PlatformConfig.hpp"
+#include "X86CPUTopology.hpp"
 
 #define REGISTER(NAME)                                                                                                 \
   [](asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model,                                    \
-     unsigned threads) -> platform::X86PlatformConfig* {                                                               \
-    return new platform::NAME(supportedFeatures, family, model, threads);                                              \
+     unsigned threads) -> std::unique_ptr<platform::X86PlatformConfig> {                                               \
+    return std::make_unique<platform::NAME>(supportedFeatures, family, model, threads);                                \
   }
 
 namespace firestarter::environment::x86 {
@@ -51,18 +50,9 @@ namespace firestarter::environment::x86 {
 class X86Environment final : public Environment {
 public:
   X86Environment()
-      : Environment(new X86CPUTopology()) {}
+      : Environment(std::make_unique<X86CPUTopology>()) {}
 
-  ~X86Environment() {
-    for (auto const& Config : PlatformConfigs) {
-      delete Config;
-    }
-    for (auto const& Config : FallbackPlatformConfigs) {
-      delete Config;
-    }
-  }
-
-  auto topology() -> X86CPUTopology const& { return *reinterpret_cast<X86CPUTopology*>(this->Topology); }
+  auto topology() -> X86CPUTopology const& { return *dynamic_cast<X86CPUTopology*>(Topology.get()); }
 
   void evaluateFunctions() override;
   auto selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) -> int override;
@@ -76,16 +66,18 @@ class X86Environment final : public Environment {
   // The available function IDs are generated by iterating through this list
   // of PlatformConfig. Add new PlatformConfig at the bottom to maintain
   // stable IDs.
-  const std::list<std::function<platform::X86PlatformConfig*(asmjit::CpuFeatures const&, unsigned, unsigned, unsigned)>>
+  const std::list<std::function<std::unique_ptr<platform::X86PlatformConfig>(asmjit::CpuFeatures const&, unsigned,
+                                                                             unsigned, unsigned)>>
       PlatformConfigsCtor = {REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig),   REGISTER(SkylakeSPConfig),
                              REGISTER(HaswellConfig),        REGISTER(HaswellEPConfig), REGISTER(SandyBridgeConfig),
                              REGISTER(SandyBridgeEPConfig),  REGISTER(NehalemConfig),   REGISTER(NehalemEPConfig),
                              REGISTER(BulldozerConfig),      REGISTER(NaplesConfig),    REGISTER(RomeConfig)};
 
-  std::list<platform::X86PlatformConfig*> PlatformConfigs;
+  std::list<std::unique_ptr<platform::X86PlatformConfig>> PlatformConfigs;
 
   // List of fallback PlatformConfig. Add one for each x86 extension.
-  const std::list<std::function<platform::X86PlatformConfig*(asmjit::CpuFeatures const&, unsigned, unsigned, unsigned)>>
+  const std::list<std::function<std::unique_ptr<platform::X86PlatformConfig>(asmjit::CpuFeatures const&, unsigned,
+                                                                             unsigned, unsigned)>>
       FallbackPlatformConfigsCtor = {
           REGISTER(SkylakeSPConfig),   // AVX512
           REGISTER(BulldozerConfig),   // FMA4
@@ -94,7 +86,7 @@ class X86Environment final : public Environment {
           REGISTER(NehalemConfig)      // SSE2
       };
 
-  std::list<platform::X86PlatformConfig*> FallbackPlatformConfigs;
+  std::list<std::unique_ptr<platform::X86PlatformConfig>> FallbackPlatformConfigs;
 
 #undef REGISTER
 };
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 8009c1c9..0e4c7ef5 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -22,27 +22,27 @@
 #pragma once
 
 #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
-#include <firestarter/Cuda/Cuda.hpp>
+#include "Cuda/Cuda.hpp"
 #endif
 
 #ifdef FIRESTARTER_BUILD_ONEAPI
-#include <firestarter/OneAPI/OneAPI.hpp>
+#include "OneAPI/OneAPI.hpp"
 #endif
 
-#include <firestarter/Constants.hpp>
+#include "Constants.hpp"
 
 #if defined(linux) || defined(__linux__)
-#include <firestarter/Measurement/MeasurementWorker.hpp>
-#include <firestarter/Optimizer/Algorithm.hpp>
-#include <firestarter/Optimizer/OptimizerWorker.hpp>
-#include <firestarter/Optimizer/Population.hpp>
+#include "Measurement/MeasurementWorker.hpp"
+#include "Optimizer/Algorithm.hpp"
+#include "Optimizer/OptimizerWorker.hpp"
+#include "Optimizer/Population.hpp"
 #endif
 
-#include <firestarter/DumpRegisterWorkerData.hpp>
-#include <firestarter/LoadWorkerData.hpp>
+#include "DumpRegisterWorkerData.hpp"
+#include "LoadWorkerData.hpp"
 
 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-#include <firestarter/Environment/X86/X86Environment.hpp>
+#include "Environment/X86/X86Environment.hpp"
 #endif
 
 #include <chrono>
diff --git a/include/firestarter/Json/Summary.hpp b/include/firestarter/Json/Summary.hpp
index a2e8e03a..8fe781e9 100644
--- a/include/firestarter/Json/Summary.hpp
+++ b/include/firestarter/Json/Summary.hpp
@@ -21,23 +21,26 @@
 
 #pragma once
 
-#include <firestarter/Measurement/Summary.hpp>
+#include "../Measurement/Summary.hpp"
 
 namespace nlohmann {
 template <> struct adl_serializer<firestarter::measurement::Summary> {
-  static firestarter::measurement::Summary from_json(const json& j) {
-    return {j["num_timepoints"].get<size_t>(),
-            std::chrono::milliseconds(j["duration"].get<std::chrono::milliseconds::rep>()), j["average"].get<double>(),
-            j["stddev"].get<double>()};
+  // functions for nlohmann json do not follow LLVM code style
+  // NOLINTBEGIN(readability-identifier-naming)
+  static auto from_json(const json& J) -> firestarter::measurement::Summary {
+    return {J["num_timepoints"].get<size_t>(),
+            std::chrono::milliseconds(J["duration"].get<std::chrono::milliseconds::rep>()), J["average"].get<double>(),
+            J["stddev"].get<double>()};
   }
 
-  static void to_json(json& j, firestarter::measurement::Summary s) {
-    j = json::object();
+  static void to_json(json& J, firestarter::measurement::Summary S) {
+    J = json::object();
 
-    j["num_timepoints"] = s.NumTimepoints;
-    j["duration"] = s.Duration.count();
-    j["average"] = s.Average;
-    j["stddev"] = s.Stddev;
+    J["num_timepoints"] = S.NumTimepoints;
+    J["duration"] = S.Duration.count();
+    J["average"] = S.Average;
+    J["stddev"] = S.Stddev;
   }
+  // NOLINTEND(readability-identifier-naming)
 };
 } // namespace nlohmann
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index eb7e0c3c..6aa5b40f 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -21,11 +21,11 @@
 
 #pragma once
 
+#include "Constants.hpp"
+#include "DumpRegisterStruct.hpp"
+#include "Environment/Environment.hpp"
+#include "ErrorDetectionStruct.hpp"
 #include <atomic>
-#include <firestarter/Constants.hpp>
-#include <firestarter/DumpRegisterStruct.hpp>
-#include <firestarter/Environment/Environment.hpp>
-#include <firestarter/ErrorDetectionStruct.hpp>
 #include <memory>
 #include <mutex>
 #include <utility>
diff --git a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
index 3a0e68fc..2a1a51f1 100644
--- a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
+++ b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
@@ -21,7 +21,6 @@
 
 #pragma once
 
-#include <nitro/log/log.hpp>
 #include <nitro/log/severity.hpp>
 #include <thread>
 
@@ -33,8 +32,8 @@ template <typename Record> class FirstWorkerThreadFilter {
 
   static void setFirstThread(std::thread::id NewFirstThread) { FirstThread = NewFirstThread; }
 
-  auto filter(Record& r) const -> bool {
-    return r.std_thread_id() == FirstThread || r.severity() >= nitro::log::severity_level::error;
+  auto filter(Record& R) const -> bool {
+    return R.std_thread_id() == FirstThread || R.severity() >= nitro::log::severity_level::error;
   }
 
 private:
diff --git a/include/firestarter/Logging/Log.hpp b/include/firestarter/Logging/Log.hpp
index 74cc3e1a..a2874ebb 100644
--- a/include/firestarter/Logging/Log.hpp
+++ b/include/firestarter/Logging/Log.hpp
@@ -21,9 +21,7 @@
 
 #pragma once
 
-#include <firestarter/Logging/FirstWorkerThreadFilter.hpp>
-#include <iomanip>
-#include <ios>
+#include "FirstWorkerThreadFilter.hpp"
 #include <iostream>
 #include <nitro/log/attribute/message.hpp>
 #include <nitro/log/attribute/severity.hpp>
@@ -42,61 +40,61 @@ namespace logging {
 
 class StdOut {
 public:
-  void sink(nitro::log::severity_level severity, const std::string& formatted_record) {
-    switch (severity) {
+  static void sink(nitro::log::severity_level Severity, const std::string& FormattedRecord) {
+    switch (Severity) {
     case nitro::log::severity_level::warn:
     case nitro::log::severity_level::error:
     case nitro::log::severity_level::fatal:
-      std::cerr << formatted_record << std::endl << std::flush;
+      std::cerr << FormattedRecord << '\n' << std::flush;
       break;
     default:
-      std::cout << formatted_record << std::endl << std::flush;
+      std::cout << FormattedRecord << '\n' << std::flush;
       break;
     }
   }
 };
 
-using record = nitro::log::record<nitro::log::severity_attribute, nitro::log::message_attribute,
+using Record = nitro::log::record<nitro::log::severity_attribute, nitro::log::message_attribute,
                                   nitro::log::timestamp_attribute, nitro::log::std_thread_id_attribute>;
 
-template <typename Record> class formater {
+template <typename Record> class Formater {
 public:
-  std::string format(Record& r) {
-    std::stringstream s;
+  auto format(Record& R) -> std::string {
+    std::stringstream S;
 
-    switch (r.severity()) {
+    switch (R.severity()) {
     case nitro::log::severity_level::warn:
-      s << "Warning: ";
+      S << "Warning: ";
       break;
     case nitro::log::severity_level::error:
-      s << "Error: ";
+      S << "Error: ";
       break;
     case nitro::log::severity_level::fatal:
-      s << "Fatal: ";
+      S << "Fatal: ";
       break;
     case nitro::log::severity_level::trace:
-      s << "Debug: ";
+      S << "Debug: ";
       break;
     default:
       break;
     }
 
-    s << r.message();
+    S << R.message();
 
-    return s.str();
+    return S.str();
   }
 };
 
-template <typename Record> using filter = nitro::log::filter::severity_filter<Record>;
+template <typename Record> using Filter = nitro::log::filter::severity_filter<Record>;
 
 template <typename Record>
-using workerFilter = nitro::log::filter::and_filter<filter<Record>, FirstWorkerThreadFilter<Record>>;
+using WorkerFilter = nitro::log::filter::and_filter<Filter<Record>, FirstWorkerThreadFilter<Record>>;
 
 } // namespace logging
 
-using log = nitro::log::logger<logging::record, logging::formater, firestarter::logging::StdOut, logging::filter>;
+using log = nitro::log::logger<logging::Record, logging::Formater, firestarter::logging::StdOut, logging::Filter>;
 
 using workerLog =
-    nitro::log::logger<logging::record, logging::formater, firestarter::logging::StdOut, logging::workerFilter>;
+    nitro::log::logger<logging::Record, logging::Formater, firestarter::logging::StdOut, logging::WorkerFilter>;
 
 } // namespace firestarter
diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp
index 2045bd43..e9e18b76 100644
--- a/include/firestarter/Measurement/MeasurementWorker.hpp
+++ b/include/firestarter/Measurement/MeasurementWorker.hpp
@@ -21,14 +21,13 @@
 
 #pragma once
 
+#include "Metric/IPCEstimate.h"
+#include "Metric/Perf.h"
+#include "Metric/RAPL.h"
+#include "MetricInterface.h"
+#include "Summary.hpp"
+#include "TimeValue.hpp"
 #include <chrono>
-#include <firestarter/Logging/Log.hpp>
-#include <firestarter/Measurement/Metric/IPCEstimate.h>
-#include <firestarter/Measurement/Metric/Perf.h>
-#include <firestarter/Measurement/Metric/RAPL.h>
-#include <firestarter/Measurement/MetricInterface.h>
-#include <firestarter/Measurement/Summary.hpp>
-#include <firestarter/Measurement/TimeValue.hpp>
 #include <map>
 #include <mutex>
 
diff --git a/include/firestarter/Measurement/Metric/IPCEstimate.h b/include/firestarter/Measurement/Metric/IPCEstimate.h
index 360c1d91..63dcb26b 100644
--- a/include/firestarter/Measurement/Metric/IPCEstimate.h
+++ b/include/firestarter/Measurement/Metric/IPCEstimate.h
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include <firestarter/Measurement/MetricInterface.h>
+#include "../MetricInterface.h"
 
 extern "C" {
 
diff --git a/include/firestarter/Measurement/Metric/Perf.h b/include/firestarter/Measurement/Metric/Perf.h
index 39a070f6..2702cd94 100644
--- a/include/firestarter/Measurement/Metric/Perf.h
+++ b/include/firestarter/Measurement/Metric/Perf.h
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include <firestarter/Measurement/MetricInterface.h>
+#include "../MetricInterface.h"
 
 extern "C" {
 
diff --git a/include/firestarter/Measurement/Metric/RAPL.h b/include/firestarter/Measurement/Metric/RAPL.h
index 726ff61a..017373a1 100644
--- a/include/firestarter/Measurement/Metric/RAPL.h
+++ b/include/firestarter/Measurement/Metric/RAPL.h
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include <firestarter/Measurement/MetricInterface.h>
+#include "../MetricInterface.h"
 
 extern "C" {
 
diff --git a/include/firestarter/Measurement/Summary.hpp b/include/firestarter/Measurement/Summary.hpp
index 09c91016..019a73eb 100644
--- a/include/firestarter/Measurement/Summary.hpp
+++ b/include/firestarter/Measurement/Summary.hpp
@@ -21,15 +21,12 @@
 
 #pragma once
 
+#include "MetricInterface.h"
+#include "TimeValue.hpp"
 #include <chrono>
-#include <firestarter/Measurement/TimeValue.hpp>
 #include <nlohmann/json.hpp>
 #include <vector>
 
-extern "C" {
-#include <firestarter/Measurement/MetricInterface.h>
-}
-
 namespace firestarter::measurement {
 
 struct Summary {
diff --git a/include/firestarter/Measurement/TimeValue.hpp b/include/firestarter/Measurement/TimeValue.hpp
index cc168ad2..10b31e8b 100644
--- a/include/firestarter/Measurement/TimeValue.hpp
+++ b/include/firestarter/Measurement/TimeValue.hpp
@@ -33,7 +33,7 @@ struct TimeValue {
       , Value(Value){};
 
   std::chrono::high_resolution_clock::time_point Time;
-  double Value;
+  double Value{};
 };
 
 } // namespace firestarter::measurement
diff --git a/include/firestarter/Optimizer/Algorithm.hpp b/include/firestarter/Optimizer/Algorithm.hpp
index 4cdae1ec..8bae8bd3 100644
--- a/include/firestarter/Optimizer/Algorithm.hpp
+++ b/include/firestarter/Optimizer/Algorithm.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include <firestarter/Optimizer/Population.hpp>
+#include "Population.hpp"
 
 namespace firestarter::optimizer {
 
diff --git a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
index e02e7e14..70c2aac0 100644
--- a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
+++ b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include <firestarter/Optimizer/Algorithm.hpp>
+#include "../Algorithm.hpp"
 
 namespace firestarter::optimizer::algorithm {
 
diff --git a/include/firestarter/Optimizer/History.hpp b/include/firestarter/Optimizer/History.hpp
index 332b49c5..8c573d72 100644
--- a/include/firestarter/Optimizer/History.hpp
+++ b/include/firestarter/Optimizer/History.hpp
@@ -21,14 +21,14 @@
 
 #pragma once
 
+#include "../Json/Summary.hpp" // IWYU pragma: keep
+#include "../Logging/Log.hpp"
+#include "../Measurement/Summary.hpp"
+#include "Individual.hpp"
 #include <algorithm>
 #include <cassert>
 #include <cstring>
 #include <ctime>
-#include <firestarter/Json/Summary.hpp>
-#include <firestarter/Logging/Log.hpp>
-#include <firestarter/Measurement/Summary.hpp>
-#include <firestarter/Optimizer/Individual.hpp>
 #include <fstream>
 #include <iomanip>
 #include <nlohmann/json.hpp>
@@ -88,129 +88,129 @@ struct History {
 
     // print the best 20 individuals for each metric in a format
     // where the user can give it to --run-instruction-groups directly
-    std::map<std::string, std::size_t> columnWidth;
+    std::map<std::string, std::size_t> ColumnWidth;
 
-    for (auto const& metric : OptimizationMetrics) {
-      columnWidth[metric] = (std::max)(metric.size(), MinColumnWidth);
-      firestarter::log::trace() << metric << ": " << columnWidth[metric];
+    for (auto const& Metric : OptimizationMetrics) {
+      ColumnWidth[Metric] = (std::max)(Metric.size(), MinColumnWidth);
+      firestarter::log::trace() << Metric << ": " << ColumnWidth[Metric];
     }
 
-    for (auto const& metric : OptimizationMetrics) {
+    for (auto const& Metric : OptimizationMetrics) {
       using SummaryMap = std::map<std::string, firestarter::measurement::Summary>;
-      auto compareIndividual = [&metric](SummaryMap const& mapA, SummaryMap const& mapB) {
-        auto summaryA = mapA.find(metric);
-        auto summaryB = mapB.find(metric);
-
-        if (summaryA == mapA.end() || summaryB == mapB.end()) {
-          summaryA = mapA.find(metric.substr(1));
-          summaryB = mapB.find(metric.substr(1));
-          assert(summaryA != mapA.end());
-          assert(summaryB != mapB.end());
-          return summaryA->second.Average < summaryB->second.Average;
+      auto CompareIndividual = [&Metric](SummaryMap const& MapA, SummaryMap const& MapB) {
+        auto SummaryA = MapA.find(Metric);
+        auto SummaryB = MapB.find(Metric);
+
+        if (SummaryA == MapA.end() || SummaryB == MapB.end()) {
+          SummaryA = MapA.find(Metric.substr(1));
+          SummaryB = MapB.find(Metric.substr(1));
+          assert(SummaryA != MapA.end());
+          assert(SummaryB != MapB.end());
+          return SummaryA->second.Average < SummaryB->second.Average;
         }
 
-        assert(summaryA != mapA.end());
-        assert(summaryB != mapB.end());
-        return summaryA->second.Average > summaryB->second.Average;
+        assert(SummaryA != MapA.end());
+        assert(SummaryB != MapB.end());
+        return SummaryA->second.Average > SummaryB->second.Average;
       };
 
-      auto perm = sortPermutation(F, compareIndividual);
+      auto Perm = sortPermutation(F, CompareIndividual);
 
-      auto formatIndividual = [&PayloadItems](std::vector<unsigned> const& individual) {
-        std::string result = "";
-        assert(PayloadItems.size() == individual.size());
+      auto FormatIndividual = [&PayloadItems](std::vector<unsigned> const& Individual) {
+        std::string Result;
+        assert(PayloadItems.size() == Individual.size());
 
-        for (std::size_t i = 0; i < individual.size(); ++i) {
+        for (std::size_t I = 0; I < Individual.size(); ++I) {
           // skip zero values
-          if (individual[i] == 0) {
+          if (Individual[I] == 0) {
             continue;
           }
 
-          if (result.size() != 0) {
-            result += ",";
+          if (Result.size() != 0) {
+            Result += ",";
           }
-          result += PayloadItems[i] + ":" + std::to_string(individual[i]);
+          Result += PayloadItems[I] + ":" + std::to_string(Individual[I]);
         }
 
-        return result;
+        return Result;
       };
 
-      auto begin = perm.begin();
-      auto end = perm.end();
+      auto Begin = Perm.begin();
+      auto End = Perm.end();
 
       // stop printing at a max of MaxElementPrintCount
-      if (std::distance(begin, end) > MaxElementPrintCount) {
-        end = perm.begin();
-        std::advance(end, MaxElementPrintCount);
+      if (std::distance(Begin, End) > MaxElementPrintCount) {
+        End = Perm.begin();
+        std::advance(End, MaxElementPrintCount);
       }
 
       // print each of the best elements
-      std::size_t max = 0;
-      for (auto it = begin; it != end; ++it) {
-        max = (std::max)(max, formatIndividual(X[*it]).size());
+      std::size_t Max = 0;
+      for (auto It = Begin; It != End; ++It) {
+        Max = (std::max)(Max, FormatIndividual(X[*It]).size());
       }
 
-      std::stringstream firstLine;
-      std::stringstream secondLine;
-      std::string ind = "INDIVIDUAL";
+      std::stringstream FirstLine;
+      std::stringstream SecondLine;
+      std::string Ind = "INDIVIDUAL";
 
-      firstLine << "  " << ind;
-      padding(firstLine, max, ind.size(), ' ');
+      FirstLine << "  " << Ind;
+      padding(FirstLine, Max, Ind.size(), ' ');
 
-      secondLine << "  ";
-      padding(secondLine, (std::max)(max, ind.size()), 0, '-');
+      SecondLine << "  ";
+      padding(SecondLine, (std::max)(Max, Ind.size()), 0, '-');
 
-      for (auto const& metric : OptimizationMetrics) {
-        auto width = columnWidth[metric];
+      for (auto const& Metric : OptimizationMetrics) {
+        auto Width = ColumnWidth[Metric];
 
-        firstLine << " | ";
-        secondLine << "---";
+        FirstLine << " | ";
+        SecondLine << "---";
 
-        firstLine << metric;
-        padding(firstLine, width, metric.size(), ' ');
-        padding(secondLine, width, 0, '-');
+        FirstLine << Metric;
+        padding(FirstLine, Width, Metric.size(), ' ');
+        padding(SecondLine, Width, 0, '-');
       }
 
-      std::stringstream ss;
+      std::stringstream Ss;
 
-      ss << "\n Best individuals sorted by metric " << metric << " "
-         << ((metric[0] == '-') ? "ascending" : "descending") << ":\n"
-         << firstLine.str() << "\n"
-         << secondLine.str() << "\n";
+      Ss << "\n Best individuals sorted by metric " << Metric << " "
+         << ((Metric[0] == '-') ? "ascending" : "descending") << ":\n"
+         << FirstLine.str() << "\n"
+         << SecondLine.str() << "\n";
 
       // print INDIVIDUAL | metric 1 | metric 2 | ... | metric N
-      for (auto it = begin; it != end; ++it) {
-        auto const fitness = F[*it];
-        auto const ind = formatIndividual(X[*it]);
+      for (auto It = Begin; It != End; ++It) {
+        auto const& Fitness = F[*It];
+        auto const Ind = FormatIndividual(X[*It]);
 
-        ss << "  " << ind;
-        padding(ss, max, ind.size(), ' ');
+        Ss << "  " << Ind;
+        padding(Ss, Max, Ind.size(), ' ');
 
-        for (auto const& metric : OptimizationMetrics) {
-          auto width = columnWidth[metric];
-          std::string value;
+        for (auto const& Metric : OptimizationMetrics) {
+          auto Width = ColumnWidth[Metric];
+          std::string Value;
 
-          auto fitnessOfMetric = fitness.find(metric);
-          auto invertedMetric = metric.substr(1);
-          auto fitnessOfInvertedMetric = fitness.find(invertedMetric);
+          auto FitnessOfMetric = Fitness.find(Metric);
+          auto InvertedMetric = Metric.substr(1);
+          auto FitnessOfInvertedMetric = Fitness.find(InvertedMetric);
 
-          if (fitnessOfMetric != fitness.end()) {
-            value = std::to_string(fitnessOfMetric->second.Average);
-          } else if (fitnessOfInvertedMetric != fitness.end()) {
-            value = std::to_string(fitnessOfInvertedMetric->second.Average);
+          if (FitnessOfMetric != Fitness.end()) {
+            Value = std::to_string(FitnessOfMetric->second.Average);
+          } else if (FitnessOfInvertedMetric != Fitness.end()) {
+            Value = std::to_string(FitnessOfInvertedMetric->second.Average);
           } else {
             assert(false);
           }
 
-          ss << " | " << value;
-          padding(ss, width, value.size(), ' ');
+          Ss << " | " << Value;
+          padding(Ss, Width, Value.size(), ' ');
         }
-        ss << "\n";
+        Ss << "\n";
       }
 
-      ss << "\n";
+      Ss << "\n";
 
-      firestarter::log::info() << ss.str();
+      firestarter::log::info() << Ss.str();
     }
 
     firestarter::log::info() << "To run FIRESTARTER with the best individual of a given metric "
diff --git a/include/firestarter/Optimizer/OptimizerWorker.hpp b/include/firestarter/Optimizer/OptimizerWorker.hpp
index e98c25b9..ba106595 100644
--- a/include/firestarter/Optimizer/OptimizerWorker.hpp
+++ b/include/firestarter/Optimizer/OptimizerWorker.hpp
@@ -19,9 +19,9 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "Algorithm.hpp"
+#include "Population.hpp"
 #include <chrono>
-#include <firestarter/Optimizer/Algorithm.hpp>
-#include <firestarter/Optimizer/Population.hpp>
 #include <memory>
 
 extern "C" {
diff --git a/include/firestarter/Optimizer/Population.hpp b/include/firestarter/Optimizer/Population.hpp
index 757a2e46..2d904467 100644
--- a/include/firestarter/Optimizer/Population.hpp
+++ b/include/firestarter/Optimizer/Population.hpp
@@ -22,10 +22,9 @@
 #ifndef FIRESTARTER_OPTIMIZER_POPULATION_HPP
 #define FIRESTARTER_OPTIMIZER_POPULATION_HPP
 
+#include "Individual.hpp"
+#include "Problem.hpp"
 #include <cstring>
-#include <firestarter/Optimizer/History.hpp>
-#include <firestarter/Optimizer/Individual.hpp>
-#include <firestarter/Optimizer/Problem.hpp>
 #include <memory>
 #include <optional>
 #include <random>
diff --git a/include/firestarter/Optimizer/Problem.hpp b/include/firestarter/Optimizer/Problem.hpp
index df31ec98..ae0d285d 100644
--- a/include/firestarter/Optimizer/Problem.hpp
+++ b/include/firestarter/Optimizer/Problem.hpp
@@ -21,9 +21,9 @@
 
 #pragma once
 
+#include "../Measurement/Summary.hpp"
+#include "Individual.hpp"
 #include <cstring>
-#include <firestarter/Measurement/Summary.hpp>
-#include <firestarter/Optimizer/Individual.hpp>
 #include <map>
 #include <tuple>
 #include <vector>
diff --git a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
index 74346a74..9d3c4ed7 100644
--- a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
+++ b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
@@ -21,11 +21,9 @@
 
 #pragma once
 
+#include "../../Measurement/MeasurementWorker.hpp"
+#include "../Problem.hpp"
 #include <cassert>
-#include <chrono>
-#include <cmath>
-#include <firestarter/Measurement/MeasurementWorker.hpp>
-#include <firestarter/Optimizer/Problem.hpp>
 #include <functional>
 #include <thread>
 #include <tuple>
diff --git a/include/firestarter/Optimizer/Util/MultiObjective.hpp b/include/firestarter/Optimizer/Util/MultiObjective.hpp
index fab62be8..1b3a1873 100644
--- a/include/firestarter/Optimizer/Util/MultiObjective.hpp
+++ b/include/firestarter/Optimizer/Util/MultiObjective.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include <firestarter/Optimizer/Individual.hpp>
+#include "../Individual.hpp"
 #include <random>
 #include <utility>
 #include <vector>
diff --git a/src/firestarter/Environment/CPUTopology.cpp b/src/firestarter/Environment/CPUTopology.cpp
index b3e9a862..62c9224c 100644
--- a/src/firestarter/Environment/CPUTopology.cpp
+++ b/src/firestarter/Environment/CPUTopology.cpp
@@ -27,34 +27,30 @@
 #include <regex>
 #include <utility>
 
-extern "C" {
-#include <stdio.h>
-}
-
 namespace firestarter::environment {
 
-auto CPUTopology::print(std::ostream& stream) const -> std::ostream& {
-  stream << "  system summary:\n"
-         << "    number of processors:        " << this->numPackages() << "\n"
-         << "    number of cores (total)):    " << this->numCoresTotal() << "\n"
+auto CPUTopology::print(std::ostream& Stream) const -> std::ostream& {
+  Stream << "  system summary:\n"
+         << "    number of processors:        " << numPackages() << "\n"
+         << "    number of cores (total)):    " << numCoresTotal() << "\n"
          << "  (this includes only cores in the cgroup)"
          << "\n"
-         << "    number of threads per core:  " << this->numThreadsPerCore() << "\n"
-         << "    total number of threads:     " << this->numThreads() << "\n\n";
+         << "    number of threads per core:  " << numThreadsPerCore() << "\n"
+         << "    total number of threads:     " << numThreads() << "\n\n";
 
-  std::stringstream ss;
+  std::stringstream Ss;
 
-  for (auto const& Entry : this->features()) {
-    ss << Entry << " ";
+  for (auto const& Entry : features()) {
+    Ss << Entry << " ";
   }
 
-  stream << "  processor characteristics:\n"
-         << "    architecture:       " << this->architecture() << "\n"
-         << "    vendor:             " << this->vendor() << "\n"
-         << "    processor-name:     " << this->processorName() << "\n"
-         << "    model:              " << this->model() << "\n"
-         << "    frequency:          " << this->clockrate() / 1000000 << " MHz\n"
-         << "    supported features: " << ss.str() << "\n"
+  Stream << "  processor characteristics:\n"
+         << "    architecture:       " << architecture() << "\n"
+         << "    vendor:             " << vendor() << "\n"
+         << "    processor-name:     " << processorName() << "\n"
+         << "    model:              " << model() << "\n"
+         << "    frequency:          " << clockrate() / 1000000 << " MHz\n"
+         << "    supported features: " << Ss.str() << "\n"
          << "    Caches:";
 
   std::vector<hwloc_obj_type_t> Caches = {
@@ -65,76 +61,76 @@ auto CPUTopology::print(std::ostream& stream) const -> std::ostream& {
   std::vector<std::string> CacheStrings = {};
 
   for (hwloc_obj_type_t const& Cache : Caches) {
-    std::stringstream ss;
+    std::stringstream Ss;
 
-    auto Width = hwloc_get_nbobjs_by_type(this->topology, Cache);
+    auto Width = hwloc_get_nbobjs_by_type(Topology, Cache);
 
     if (Width >= 1) {
-      ss << "\n      - ";
+      Ss << "\n      - ";
 
-      auto* CacheObj = hwloc_get_obj_by_type(this->topology, Cache, 0);
+      auto* CacheObj = hwloc_get_obj_by_type(Topology, Cache, 0);
       std::array<char, 128> String{};
       hwloc_obj_type_snprintf(String.begin(), sizeof(String), CacheObj, 0);
 
       switch (CacheObj->attr->cache.type) {
       case HWLOC_OBJ_CACHE_DATA:
-        ss << "Level " << CacheObj->attr->cache.depth << " Data";
+        Ss << "Level " << CacheObj->attr->cache.depth << " Data";
         break;
       case HWLOC_OBJ_CACHE_INSTRUCTION:
-        ss << "Level " << CacheObj->attr->cache.depth << " Instruction";
+        Ss << "Level " << CacheObj->attr->cache.depth << " Instruction";
         break;
       case HWLOC_OBJ_CACHE_UNIFIED:
       default:
-        ss << "Unified Level " << CacheObj->attr->cache.depth;
+        Ss << "Unified Level " << CacheObj->attr->cache.depth;
         break;
       }
 
-      ss << " Cache, " << CacheObj->attr->cache.size / 1024 << " KiB, " << CacheObj->attr->cache.linesize
+      Ss << " Cache, " << CacheObj->attr->cache.size / 1024 << " KiB, " << CacheObj->attr->cache.linesize
          << " B Cacheline, ";
 
       switch (CacheObj->attr->cache.associativity) {
       case -1:
-        ss << "full";
+        Ss << "full";
         break;
       case 0:
-        ss << "unknown";
+        Ss << "unknown";
         break;
       default:
-        ss << CacheObj->attr->cache.associativity << "-way set";
+        Ss << CacheObj->attr->cache.associativity << "-way set";
         break;
       }
 
-      ss << " associative, ";
+      Ss << " associative, ";
 
-      auto Shared = this->numThreads() / Width;
+      auto Shared = numThreads() / Width;
 
       if (Shared > 1) {
-        ss << "shared among " << Shared << " threads.";
+        Ss << "shared among " << Shared << " threads.";
       } else {
-        ss << "per thread.";
+        Ss << "per thread.";
       }
 
-      stream << ss.str();
+      Stream << Ss.str();
     }
   }
 
-  return stream;
+  return Stream;
 }
 
-CPUTopology::CPUTopology(std::string architecture)
-    : _architecture(std::move(architecture)) {
+CPUTopology::CPUTopology(std::string Architecture)
+    : Architecture(std::move(Architecture)) {
 
-  hwloc_topology_init(&this->topology);
+  hwloc_topology_init(&Topology);
 
   // do not filter icaches
-  hwloc_topology_set_cache_types_filter(this->topology, HWLOC_TYPE_FILTER_KEEP_ALL);
+  hwloc_topology_set_cache_types_filter(Topology, HWLOC_TYPE_FILTER_KEEP_ALL);
 
-  hwloc_topology_load(this->topology);
+  hwloc_topology_load(Topology);
 
   // check for hybrid processor
-  int nr_cpukinds = hwloc_cpukinds_get_nr(this->topology, 0);
+  int NrCpukinds = hwloc_cpukinds_get_nr(Topology, 0);
 
-  switch (nr_cpukinds) {
+  switch (NrCpukinds) {
   case -1:
     log::warn() << "Hybrid core check failed";
     break;
@@ -142,203 +138,206 @@ CPUTopology::CPUTopology(std::string architecture)
     log::warn() << "Hybrid core check read no information";
     break;
   default:
-    log::trace() << "Number of CPU kinds:" << nr_cpukinds;
+    log::trace() << "Number of CPU kinds:" << NrCpukinds;
   }
-  if (nr_cpukinds > 1) {
+  if (NrCpukinds > 1) {
     log::warn() << "FIRESTARTER detected a hybrid CPU set-up";
   }
   // get number of packages
-  int depth = hwloc_get_type_depth(this->topology, HWLOC_OBJ_PACKAGE);
+  int Depth = hwloc_get_type_depth(Topology, HWLOC_OBJ_PACKAGE);
 
-  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
-    this->_numPackages = 1;
+  if (Depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
+    NumPackages = 1;
     log::warn() << "Could not get number of packages";
   } else {
-    this->_numPackages = hwloc_get_nbobjs_by_depth(this->topology, depth);
+    NumPackages = hwloc_get_nbobjs_by_depth(Topology, Depth);
   }
 
-  log::trace() << "Number of Packages:" << this->_numPackages;
+  log::trace() << "Number of Packages:" << NumPackages;
   // get number of cores per package
-  depth = hwloc_get_type_depth(this->topology, HWLOC_OBJ_CORE);
+  Depth = hwloc_get_type_depth(Topology, HWLOC_OBJ_CORE);
 
-  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
-    this->_numCoresTotal = 1;
+  if (Depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
+    NumCoresTotal = 1;
     log::warn() << "Could not get number of cores";
   } else {
-    this->_numCoresTotal = hwloc_get_nbobjs_by_depth(this->topology, depth);
-    if (this->_numCoresTotal == 0) {
+    NumCoresTotal = hwloc_get_nbobjs_by_depth(Topology, Depth);
+    if (NumCoresTotal == 0) {
       log::warn() << "Could not get number of cores";
-      this->_numCoresTotal = 1;
+      NumCoresTotal = 1;
     }
   }
-  log::trace() << "Number of Cores:" << this->_numCoresTotal;
+  log::trace() << "Number of Cores:" << NumCoresTotal;
 
   // get number of threads per core
-  depth = hwloc_get_type_depth(this->topology, HWLOC_OBJ_PU);
+  Depth = hwloc_get_type_depth(Topology, HWLOC_OBJ_PU);
 
-  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
-    this->_numThreadsPerCore = 1;
+  if (Depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
+    NumThreadsPerCore = 1;
     log::warn() << "Could not get number of threads";
   } else {
-    this->_numThreadsPerCore = hwloc_get_nbobjs_by_depth(this->topology, depth) / this->_numCoresTotal;
-    if (this->_numThreadsPerCore == 0) {
+    NumThreadsPerCore = hwloc_get_nbobjs_by_depth(Topology, Depth) / NumCoresTotal;
+    if (NumThreadsPerCore == 0) {
       log::warn() << "Could not get number of threads per core";
-      this->_numThreadsPerCore = 1;
+      NumThreadsPerCore = 1;
     }
   }
 
   // get vendor, processor name and clockrate for linux
 #if defined(linux) || defined(__linux__)
-  auto procCpuinfo = this->getFileAsStream("/proc/cpuinfo");
-  std::string line;
-  std::string clockrate = "0";
-
-  while (std::getline(procCpuinfo, line, '\n')) {
-    const std::regex vendorIdRe("^vendor_id.*:\\s*(.*)\\s*$");
-    const std::regex modelNameRe("^model name.*:\\s*(.*)\\s*$");
-    const std::regex cpuMHzRe("^cpu MHz.*:\\s*(.*)\\s*$");
-    std::smatch vendorIdM;
-    std::smatch modelNameM;
-    std::smatch cpuMHzM;
-
-    if (std::regex_match(line, vendorIdM, vendorIdRe)) {
-      this->_vendor = vendorIdM[1].str();
-    }
+  {
+    auto ProcCpuinfo = getFileAsStream("/proc/cpuinfo");
+    std::string Line;
+    std::string ClockrateStr = "0";
+
+    while (std::getline(ProcCpuinfo, Line, '\n')) {
+      const std::regex VendorIdRe("^vendor_id.*:\\s*(.*)\\s*$");
+      const std::regex ModelNameRe("^model name.*:\\s*(.*)\\s*$");
+      const std::regex CpuMHzRe("^cpu MHz.*:\\s*(.*)\\s*$");
+      std::smatch VendorIdMatch;
+      std::smatch ModelNameMatch;
+      std::smatch CpuMHzMatch;
+
+      if (std::regex_match(Line, VendorIdMatch, VendorIdRe)) {
+        Vendor = VendorIdMatch[1].str();
+      }
 
-    if (std::regex_match(line, modelNameM, modelNameRe)) {
-      this->_processorName = modelNameM[1].str();
-    }
+      if (std::regex_match(Line, ModelNameMatch, ModelNameRe)) {
+        ProcessorName = ModelNameMatch[1].str();
+      }
 
-    if (std::regex_match(line, cpuMHzM, cpuMHzRe)) {
-      clockrate = cpuMHzM[1].str();
+      if (std::regex_match(Line, CpuMHzMatch, CpuMHzRe)) {
+        ClockrateStr = CpuMHzMatch[1].str();
+      }
     }
-  }
 
-  if (this->_vendor == "") {
-    log::warn() << "Could determine vendor from /proc/cpuinfo";
-  }
+    if (Vendor.empty()) {
+      log::warn() << "Could determine vendor from /proc/cpuinfo";
+    }
 
-  if (this->_processorName == "") {
-    log::warn() << "Could determine processor-name from /proc/cpuinfo";
-  }
+    if (ProcessorName.empty()) {
+      log::warn() << "Could determine processor-name from /proc/cpuinfo";
+    }
 
-  if (clockrate == "0") {
-    firestarter::log::warn() << "Can't determine clockrate from /proc/cpuinfo";
-  } else {
-    firestarter::log::trace() << "Clockrate from /proc/cpuinfo is " << clockrate;
-    this->_clockrate = 1e6 * std::stoi(clockrate);
-  }
+    if (ClockrateStr == "0") {
+      firestarter::log::warn() << "Can't determine clockrate from /proc/cpuinfo";
+    } else {
+      firestarter::log::trace() << "Clockrate from /proc/cpuinfo is " << ClockrateStr;
+      Clockrate = 1e6 * std::stoi(ClockrateStr);
+    }
 
-  auto governor = this->scalingGovernor();
-  if (!governor.empty()) {
+    auto Governor = scalingGovernor();
+    if (!Governor.empty()) {
 
-    auto scalingCurFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq").str();
-    auto cpuinfoCurFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq").str();
-    auto scalingMaxFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq").str();
-    auto cpuinfoMaxFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq").str();
+      auto ScalingCurFreq = getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq").str();
+      auto CpuinfoCurFreq = getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq").str();
+      auto ScalingMaxFreq = getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq").str();
+      auto CpuinfoMaxFreq = getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq").str();
 
-    if (governor.compare("performance") || governor.compare("powersave")) {
-      if (scalingCurFreq.empty()) {
-        if (!cpuinfoCurFreq.empty()) {
-          clockrate = cpuinfoCurFreq;
+      if (Governor.compare("performance") || Governor.compare("powersave")) {
+        if (ScalingCurFreq.empty()) {
+          if (!CpuinfoCurFreq.empty()) {
+            ClockrateStr = CpuinfoCurFreq;
+          }
+        } else {
+          ClockrateStr = ScalingCurFreq;
         }
       } else {
-        clockrate = scalingCurFreq;
-      }
-    } else {
-      if (scalingMaxFreq.empty()) {
-        if (!cpuinfoMaxFreq.empty()) {
-          clockrate = cpuinfoMaxFreq;
+        if (ScalingMaxFreq.empty()) {
+          if (!CpuinfoMaxFreq.empty()) {
+            ClockrateStr = CpuinfoMaxFreq;
+          }
+        } else {
+          ClockrateStr = ScalingMaxFreq;
         }
-      } else {
-        clockrate = scalingMaxFreq;
       }
-    }
 
-    this->_clockrate = 1e3 * std::stoi(clockrate);
+      Clockrate = 1e3 * std::stoi(ClockrateStr);
+    }
   }
 #endif
 
   // try to detect processor name for macos
 #ifdef __APPLE__
-  // use sysctl to detect the name
-  std::array<char, 128> buffer;
-  auto cmd = "sysctl -n machdep.cpu.brand_string";
-  std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
-  if (!pipe) {
-    log::warn() << "Could not determine processor-name";
-  }
-  if (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
-    auto str = std::string(buffer.data());
-    str.erase(std::remove(str.begin(), str.end(), '\n'), str.end());
-    this->_processorName = str;
+  {
+    // use sysctl to detect the name
+    std::array<char, 128> Buffer{};
+    const auto* Cmd = "sysctl -n machdep.cpu.brand_string";
+    std::unique_ptr<FILE, decltype(&pclose)> Pipe(popen(Cmd, "r"), pclose);
+    if (!Pipe) {
+      log::warn() << "Could not determine processor-name";
+    }
+    if (fgets(Buffer.data(), Buffer.size(), Pipe.get()) != nullptr) {
+      auto Str = std::string(Buffer.data());
+      Str.erase(std::remove(Str.begin(), Str.end(), '\n'), Str.end());
+      ProcessorName = Str;
+    }
   }
 #endif
 
 // try to detect processor name for windows
 #ifdef _WIN32
-  // use wmic
-  std::array<char, 128> buffer;
-  auto cmd = "wmic cpu get name";
-  std::unique_ptr<FILE, decltype(&_pclose)> pipe(_popen(cmd, "r"), _pclose);
-  if (!pipe) {
-    log::warn() << "Could not determine processor-name";
-  }
-  auto line = 0;
-  while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
-    if (line != 1) {
-      line++;
-      continue;
+  {
+    // use wmic
+    std::array<char, 128> Buffer{};
+    const auto* Cmd = "wmic cpu get name";
+    std::unique_ptr<FILE, decltype(&_pclose)> Pipe(_popen(Cmd, "r"), _pclose);
+    if (!Pipe) {
+      log::warn() << "Could not determine processor-name";
     }
+    auto Line = 0;
+    while (fgets(Buffer.data(), Buffer.size(), Pipe.get()) != nullptr) {
+      if (Line != 1) {
+        Line++;
+        continue;
+      }
 
-    auto str = std::string(buffer.data());
-    str.erase(std::remove(str.begin(), str.end(), '\n'), str.end());
-    this->_processorName = str;
+      auto Str = std::string(Buffer.data());
+      Str.erase(std::remove(Str.begin(), Str.end(), '\n'), Str.end());
+      ProcessorName = Str;
+    }
   }
 #endif
 
   // get L1i-Cache size
-  int width = hwloc_get_nbobjs_by_type(this->topology, HWLOC_OBJ_L1ICACHE);
+  int Width = hwloc_get_nbobjs_by_type(Topology, HWLOC_OBJ_L1ICACHE);
 
-  if (width >= 1) {
-    hwloc_obj_t cacheObj = hwloc_get_obj_by_type(this->topology, HWLOC_OBJ_L1ICACHE, 0);
-    this->_instructionCacheSize = cacheObj->attr->cache.size;
+  if (Width >= 1) {
+    hwloc_obj_t CacheObj = hwloc_get_obj_by_type(Topology, HWLOC_OBJ_L1ICACHE, 0);
+    InstructionCacheSize = CacheObj->attr->cache.size;
   }
 }
 
-CPUTopology::~CPUTopology() { hwloc_topology_destroy(this->topology); }
+CPUTopology::~CPUTopology() { hwloc_topology_destroy(Topology); }
 
-std::stringstream CPUTopology::getFileAsStream(std::string const& filePath) {
-  std::ifstream file(filePath);
-  std::stringstream ss;
+auto CPUTopology::getFileAsStream(std::string const& FilePath) -> std::stringstream {
+  std::ifstream File(FilePath);
+  std::stringstream Ss;
 
-  if (!file.is_open()) {
-    log::trace() << "Could not open " << filePath;
+  if (!File.is_open()) {
+    log::trace() << "Could not open " << FilePath;
   } else {
-    ss << file.rdbuf();
-    file.close();
+    Ss << File.rdbuf();
+    File.close();
   }
 
-  return ss;
+  return Ss;
 }
 
-std::string CPUTopology::scalingGovernor() const {
-  return this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor").str();
+auto CPUTopology::scalingGovernor() -> std::string {
+  return getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor").str();
 }
 
-int CPUTopology::getCoreIdFromPU(unsigned pu) const {
-  int width;
-  hwloc_obj_t obj;
-
-  width = hwloc_get_nbobjs_by_type(this->topology, HWLOC_OBJ_PU);
+auto CPUTopology::getCoreIdFromPU(unsigned Pu) const -> int {
+  auto Width = hwloc_get_nbobjs_by_type(Topology, HWLOC_OBJ_PU);
 
-  if (width >= 1) {
-    for (int i = 0; i < width; i++) {
-      obj = hwloc_get_obj_by_type(this->topology, HWLOC_OBJ_PU, i);
-      if (obj->os_index == pu) {
-        for (; obj; obj = obj->parent) {
-          if (obj->type == HWLOC_OBJ_CORE) {
-            return obj->logical_index;
+  if (Width >= 1) {
+    for (int I = 0; I < Width; I++) {
+      auto* Obj = hwloc_get_obj_by_type(Topology, HWLOC_OBJ_PU, I);
+      if (Obj->os_index == Pu) {
+        for (; Obj; Obj = Obj->parent) {
+          if (Obj->type == HWLOC_OBJ_CORE) {
+            return Obj->logical_index;
           }
         }
       }
@@ -348,19 +347,16 @@ int CPUTopology::getCoreIdFromPU(unsigned pu) const {
   return -1;
 }
 
-int CPUTopology::getPkgIdFromPU(unsigned pu) const {
-  int width;
-  hwloc_obj_t obj;
+auto CPUTopology::getPkgIdFromPU(unsigned Pu) const -> int {
+  auto Width = hwloc_get_nbobjs_by_type(Topology, HWLOC_OBJ_PU);
 
-  width = hwloc_get_nbobjs_by_type(this->topology, HWLOC_OBJ_PU);
-
-  if (width >= 1) {
-    for (int i = 0; i < width; i++) {
-      obj = hwloc_get_obj_by_type(this->topology, HWLOC_OBJ_PU, i);
-      if (obj->os_index == pu) {
-        for (; obj; obj = obj->parent) {
-          if (obj->type == HWLOC_OBJ_PACKAGE) {
-            return obj->logical_index;
+  if (Width >= 1) {
+    for (int I = 0; I < Width; I++) {
+      auto* Obj = hwloc_get_obj_by_type(Topology, HWLOC_OBJ_PU, I);
+      if (Obj->os_index == Pu) {
+        for (; Obj; Obj = Obj->parent) {
+          if (Obj->type == HWLOC_OBJ_PACKAGE) {
+            return Obj->logical_index;
           }
         }
       }
@@ -370,46 +366,45 @@ int CPUTopology::getPkgIdFromPU(unsigned pu) const {
   return -1;
 }
 
-unsigned CPUTopology::maxNumThreads() const {
-  unsigned max = 0;
+auto CPUTopology::maxNumThreads() const -> unsigned {
+  unsigned Max = 0;
 
   // There might be more then one kind of cores
-  int nr_cpukinds = hwloc_cpukinds_get_nr(this->topology, 0);
+  int NrCpukinds = hwloc_cpukinds_get_nr(Topology, 0);
 
   // fallback in case this did not work ... can happen on some platforms
   // already printed a warning earlier
-  if (nr_cpukinds < 1) {
-    hwloc_obj_t obj;
-    int width = hwloc_get_nbobjs_by_type(this->topology, HWLOC_OBJ_PU);
-    unsigned max = 0;
-
-    for (int i = 0; i < width; i++) {
-      obj = hwloc_get_obj_by_type(this->topology, HWLOC_OBJ_PU, i);
-      max = max < obj->os_index ? obj->os_index : max;
+  if (NrCpukinds < 1) {
+    auto Width = hwloc_get_nbobjs_by_type(Topology, HWLOC_OBJ_PU);
+    unsigned Max = 0;
+
+    for (int I = 0; I < Width; I++) {
+      auto* Obj = hwloc_get_obj_by_type(Topology, HWLOC_OBJ_PU, I);
+      Max = std::max(Max, Obj->os_index);
     }
 
-    return max + 1;
+    return Max + 1;
   }
 
   // Allocate bitmap to get CPUs later
-  hwloc_bitmap_t bitmap = hwloc_bitmap_alloc();
-  if (bitmap == NULL) {
+  hwloc_bitmap_t Bitmap = hwloc_bitmap_alloc();
+  if (Bitmap == nullptr) {
     log::error() << "Could not allocate memory for CPU bitmap";
     return 1;
   }
 
   // Find CPUs per kind
-  for (int kind_index = 0; kind_index < nr_cpukinds; kind_index++) {
-    int result = hwloc_cpukinds_get_info(this->topology, kind_index, bitmap, NULL, NULL, NULL, 0);
-    if (result) {
-      log::warn() << "Could not get information for CPU kind " << kind_index;
+  for (int KindIndex = 0; KindIndex < NrCpukinds; KindIndex++) {
+    int Result = hwloc_cpukinds_get_info(Topology, KindIndex, Bitmap, nullptr, nullptr, nullptr, 0);
+    if (Result) {
+      log::warn() << "Could not get information for CPU kind " << KindIndex;
     }
-    max += hwloc_bitmap_weight(bitmap);
+    Max += hwloc_bitmap_weight(Bitmap);
   }
 
-  hwloc_bitmap_free(bitmap);
+  hwloc_bitmap_free(Bitmap);
 
-  return max;
+  return Max;
 }
 
 }; // namespace firestarter::environment
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index 8e29715f..2325ed04 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -103,7 +103,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   auto shift_reg32 = std::vector<Gp>({edi, esi, edx});
   auto nr_shift_regs = 3;
   auto mul_regs = 3;
-  auto add_regs = 24;
+  auto add_regs = 22;
   auto alt_dst_regs = 5;
   auto ram_reg = zmm30;
 
@@ -123,7 +123,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   }
   // make all other used registers dirty except RAX
   frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg,
-                     offset_reg, addrHigh_reg, iter_reg, ram_addr);
+                     temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr);
   for (const auto& reg : shift_reg) {
     frame.addDirtyRegs(reg);
   }
@@ -190,7 +190,6 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   bool left = false;
   auto add_dest = add_start + 1;
   auto mov_dst = trans_start;
-  auto mov_src = mov_dst + 1;
   unsigned l1_offset = 0;
 
 #define L1_INCREMENT()                                                                                                 \
@@ -292,10 +291,6 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
       if (mov_dst > trans_end) {
         mov_dst = trans_start;
       }
-      mov_src++;
-      if (mov_src > trans_end) {
-        mov_src = trans_start;
-      }
       shift_pos++;
       if (shift_pos == nr_shift_regs) {
         shift_pos = 0;
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index 73175bd5..93458d25 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -19,6 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <cassert>
 #include <chrono>
 #include <thread>
 #include <type_traits>
diff --git a/src/firestarter/Environment/X86/X86CPUTopology.cpp b/src/firestarter/Environment/X86/X86CPUTopology.cpp
index dae61165..283e7f61 100644
--- a/src/firestarter/Environment/X86/X86CPUTopology.cpp
+++ b/src/firestarter/Environment/X86/X86CPUTopology.cpp
@@ -31,99 +31,101 @@
 #pragma intrinsic(__rdtsc)
 #endif
 
-using namespace firestarter::environment::x86;
+namespace firestarter::environment::x86 {
 
 X86CPUTopology::X86CPUTopology()
     : CPUTopology("x86_64")
-    , CpuInfo(asmjit::CpuInfo::host())
-    , Vendor(this->CpuInfo.vendor()) {
+    , CpuInfo(asmjit::CpuInfo::host()) {
 
-  std::stringstream ss;
-  ss << "Family " << this->familyId() << ", Model " << this->modelId() << ", Stepping " << this->stepping();
-  this->Model = ss.str();
+  Vendor = CpuInfo.vendor();
 
-  for (int i = 0; i <= (int)asmjit::CpuFeatures::X86::Id::kMaxValue; i++) {
-    if (!this->CpuInfo.hasFeature(i)) {
+  {
+    std::stringstream Ss;
+    Ss << "Family " << familyId() << ", Model " << modelId() << ", Stepping " << stepping();
+    Model = Ss.str();
+  }
+
+  for (auto FeatureId = 0; FeatureId <= asmjit::CpuFeatures::X86::Id::kMaxValue; FeatureId++) {
+    if (!CpuInfo.hasFeature(FeatureId)) {
       continue;
     }
 
-    asmjit::String sb;
+    asmjit::String Sb;
 
-    auto error = asmjit::Formatter::formatFeature(sb, this->CpuInfo.arch(), i);
-    if (error != asmjit::ErrorCode::kErrorOk) {
-      log::warn() << "Formatting cpu features got asmjit error: " << error;
+    auto Error = asmjit::Formatter::formatFeature(Sb, CpuInfo.arch(), FeatureId);
+    if (Error != asmjit::ErrorCode::kErrorOk) {
+      log::warn() << "Formatting cpu features got asmjit error: " << Error;
     }
 
-    this->FeatureList.push_back(std::string(sb.data()));
+    FeatureList.emplace_back(Sb.data());
   }
 
-  uint64_t a = 0, b = 0, c = 0, d = 0;
+  uint64_t Rax = 0;
+  uint64_t Rbx = 0;
+  uint64_t Rcx = 0;
+  uint64_t Rdx = 0;
 
   // check if we have rdtsc
-  this->cpuid(&a, &b, &c, &d);
-  if (a >= 1) {
-    a = 1;
-    this->cpuid(&a, &b, &c, &d);
-    if ((int)d & (1 << 4)) {
-      this->HasRdtsc = true;
-    } else {
-      this->HasRdtsc = false;
-    }
+  cpuid(&Rax, &Rbx, &Rcx, &Rdx);
+  if (Rax >= 1) {
+    Rax = 1;
+    cpuid(&Rax, &Rbx, &Rcx, &Rdx);
+    HasRdtsc = (Rdx & (1 << 4)) != 0;
   }
 
   // check if we have invarant rdtsc
-  if (this->hasRdtsc()) {
-    a = 0, b = 0, c = 0, d = 0;
+  if (hasRdtsc()) {
+    Rax = 0, Rbx = 0, Rcx = 0, Rdx = 0;
 
-    this->HasInvariantRdtsc = true;
+    HasInvariantRdtsc = true;
 
     /* TSCs are usable if CPU supports only one frequency in C0 (no
        speedstep/Cool'n'Quite)
        or if multiple frequencies are available and the constant/invariant TSC
        feature flag is set */
 
-    if (0 == this->vendor().compare("INTEL")) {
+    if ("INTEL" == vendor()) {
       /*check if Powermanagement and invariant TSC are supported*/
-      a = 1;
-      this->cpuid(&a, &b, &c, &d);
+      Rax = 1;
+      cpuid(&Rax, &Rbx, &Rcx, &Rdx);
       /* no Frequency control */
-      if ((!(d & (1 << 22))) && (!(c & (1 << 7)))) {
-        this->HasInvariantRdtsc = true;
+      if ((!(Rdx & (1 << 22))) && (!(Rcx & (1 << 7)))) {
+        HasInvariantRdtsc = true;
       } else {
-        a = 0x80000000;
-        this->cpuid(&a, &b, &c, &d);
-        if (a >= 0x80000007) {
-          a = 0x80000007;
-          this->cpuid(&a, &b, &c, &d);
+        Rax = 0x80000000;
+        cpuid(&Rax, &Rbx, &Rcx, &Rdx);
+        if (Rax >= 0x80000007) {
+          Rax = 0x80000007;
+          cpuid(&Rax, &Rbx, &Rcx, &Rdx);
           /* invariant TSC */
-          if (d & (1 << 8)) {
-            this->HasInvariantRdtsc = true;
+          if (Rdx & (1 << 8)) {
+            HasInvariantRdtsc = true;
           }
         }
       }
     }
 
-    if (0 == this->vendor().compare("AMD")) {
+    if ("AMD" == vendor()) {
       /*check if Powermanagement and invariant TSC are supported*/
-      a = 0x80000000;
-      this->cpuid(&a, &b, &c, &d);
-      if (a >= 0x80000007) {
-        a = 0x80000007;
-        this->cpuid(&a, &b, &c, &d);
+      Rax = 0x80000000;
+      cpuid(&Rax, &Rbx, &Rcx, &Rdx);
+      if (Rax >= 0x80000007) {
+        Rax = 0x80000007;
+        cpuid(&Rax, &Rbx, &Rcx, &Rdx);
 
         /* no Frequency control */
-        if ((!(d & (1 << 7))) && (!(d & (1 << 1)))) {
-          this->HasInvariantRdtsc = true;
+        if ((!(Rdx & (1 << 7))) && (!(Rdx & (1 << 1)))) {
+          HasInvariantRdtsc = true;
         }
         /* invariant TSC */
-        if (d & (1 << 8)) {
-          this->HasInvariantRdtsc = true;
+        if (Rdx & (1 << 8)) {
+          HasInvariantRdtsc = true;
         }
       }
       /* assuming no frequency control if cpuid does not provide the extended
          function to test for it */
       else {
-        this->HasInvariantRdtsc = true;
+        HasInvariantRdtsc = true;
       }
     }
   }
@@ -133,118 +135,122 @@ X86CPUTopology::X86CPUTopology()
 // only constant TSCs will be used (i.e. power management indepent TSCs)
 // save frequency in highest P-State or use generic fallback if no invarient TSC
 // is available
-uint64_t X86CPUTopology::clockrate() const {
-  typedef std::chrono::high_resolution_clock Clock;
-  typedef std::chrono::microseconds ticks;
+auto X86CPUTopology::clockrate() const -> uint64_t {
+  using ClockT = std::chrono::high_resolution_clock;
+  using TicksT = std::chrono::microseconds;
 
-  uint64_t start1_tsc, start2_tsc, end1_tsc, end2_tsc;
-  uint64_t time_diff;
-  uint64_t clock_lower_bound, clock_upper_bound, clock;
-  uint64_t clockrate = 0;
-  int i, num_measurements = 0, min_measurements;
+  uint64_t TimeDiff = 0;
+  uint64_t Clockrate = 0;
+  int NumMeasurements = 0;
+  int MinMeasurements = 0;
 
-  Clock::time_point start_time, end_time;
+  ClockT::time_point StartTime;
+  ClockT::time_point EndTime;
 
 #if not(defined(__APPLE__) || defined(_WIN32))
-  auto governor = this->scalingGovernor();
+  auto governor = scalingGovernor();
   if (governor.empty()) {
     return CPUTopology::clockrate();
   }
 
   /* non invariant TSCs can be used if CPUs run at fixed frequency */
-  if (!this->hasInvariantRdtsc() && governor.compare("performance") && governor.compare("powersave")) {
+  if (!hasInvariantRdtsc() && governor.compare("performance") && governor.compare("powersave")) {
     return CPUTopology::clockrate();
   }
 
-  min_measurements = 5;
+  MinMeasurements = 5;
 #else
   min_measurements = 20;
 #endif
 
-  i = 3;
+  int I = 3;
 
   do {
+    uint64_t End1Tsc = 0;
+    uint64_t End2Tsc = 0;
+
     // start timestamp
-    start1_tsc = this->timestamp();
-    start_time = Clock::now();
-    start2_tsc = this->timestamp();
+    uint64_t Start1Tsc = timestamp();
+    StartTime = ClockT::now();
+    uint64_t Start2Tsc = timestamp();
 
     // waiting
     do {
-      end1_tsc = this->timestamp();
-    } while (end1_tsc < start2_tsc + 1000000 * i); /* busy waiting */
+      End1Tsc = timestamp();
+    } while (End1Tsc < Start2Tsc + 1000000 * I); /* busy waiting */
 
     // end timestamp
     do {
-      end1_tsc = this->timestamp();
-      end_time = Clock::now();
-      end2_tsc = this->timestamp();
+      End1Tsc = timestamp();
+      EndTime = ClockT::now();
+      End2Tsc = timestamp();
 
-      time_diff = std::chrono::duration_cast<ticks>(end_time - start_time).count();
-    } while (0 == time_diff);
+      TimeDiff = std::chrono::duration_cast<TicksT>(EndTime - StartTime).count();
+    } while (0 == TimeDiff);
 
-    clock_lower_bound = (((end1_tsc - start2_tsc) * 1000000) / (time_diff));
-    clock_upper_bound = (((end2_tsc - start1_tsc) * 1000000) / (time_diff));
+    uint64_t ClockLowerBound = (((End1Tsc - Start2Tsc) * 1000000) / (TimeDiff));
+    uint64_t ClockUpperBound = (((End2Tsc - Start1Tsc) * 1000000) / (TimeDiff));
 
     // if both values differ significantly, the measurement could have been
     // interrupted between 2 rdtsc's
-    if (((double)clock_lower_bound > (((double)clock_upper_bound) * 0.999)) && ((time_diff) > 2000)) {
-      num_measurements++;
-      clock = (clock_lower_bound + clock_upper_bound) / 2;
-      if (clockrate == 0)
-        clockrate = clock;
+    if ((static_cast<double>(ClockLowerBound) > ((static_cast<double>(ClockUpperBound)) * 0.999)) &&
+        ((TimeDiff) > 2000)) {
+      NumMeasurements++;
+      uint64_t Clock = (ClockLowerBound + ClockUpperBound) / 2;
+      bool ClockrateUpdateCondition = Clockrate == 0 ||
 #ifndef _WIN32
-      else if (clock < clockrate)
-        clockrate = clock;
+                                      Clock < Clockrate;
 #else
-      else if (clock > clockrate)
-        clockrate = clock;
+                                      Clock > Clockrate;
 #endif
+      if (ClockrateUpdateCondition) {
+        Clockrate = Clock;
+      }
     }
-    i += 2;
-  } while (((time_diff) < 10000) || (num_measurements < min_measurements));
+    I += 2;
+  } while (((TimeDiff) < 10000) || (NumMeasurements < MinMeasurements));
 
-  return clockrate;
+  return Clockrate;
 }
 
-uint64_t X86CPUTopology::timestamp() const {
-#ifndef _MSC_VER
-  uint64_t reg_a, reg_d;
-#else
-  uint64_t i;
-#endif
-
-  if (!this->hasRdtsc()) {
+auto X86CPUTopology::timestamp() const -> uint64_t {
+  if (!hasRdtsc()) {
     return 0;
   }
 
 #ifndef _MSC_VER
-  __asm__ __volatile__("rdtsc;" : "=a"(reg_a), "=d"(reg_d));
-  return (reg_d << 32) | (reg_a & 0xffffffffULL);
+  uint64_t Rax = 0;
+  uint64_t Rdx = 0;
+  __asm__ __volatile__("rdtsc;" : "=a"(Rax), "=d"(Rdx));
+  return (Rdx << 32) | (Rax & 0xffffffffULL);
 #else
-  i = __rdtsc();
-  return i;
+  return __rdtsc();
 #endif
 }
 
-void X86CPUTopology::cpuid(uint64_t* a, uint64_t* b, uint64_t* c, uint64_t* d) const {
+void X86CPUTopology::cpuid(uint64_t* Rax, uint64_t* Rbx, uint64_t* Rcx, uint64_t* Rdx) {
 #ifndef _MSC_VER
-  uint64_t reg_a, reg_b, reg_c, reg_d;
+  uint64_t RaxOut = 0;
+  uint64_t RbxOut = 0;
+  uint64_t RcxOut = 0;
+  uint64_t RdxOut = 0;
   __asm__ __volatile__("cpuid;"
-                       : "=a"(reg_a), "=b"(reg_b), "=c"(reg_c), "=d"(reg_d)
-                       : "a"(*a), "b"(*b), "c"(*c), "d"(*d));
-  *a = reg_a;
-  *b = reg_b;
-  *c = reg_c;
-  *d = reg_d;
+                       : "=a"(RaxOut), "=b"(RbxOut), "=c"(RcxOut), "=d"(RdxOut)
+                       : "a"(*Rax), "b"(*Rbx), "c"(*Rcx), "d"(*Rdx));
+  *Rax = RaxOut;
+  *Rbx = RbxOut;
+  *Rcx = RcxOut;
+  *Rdx = RdxOut;
 #else
   std::array<int, 4> cpuid;
 
-  __cpuidex(cpuid.data(), *a, *c);
+  __cpuidex(cpuid.data(), *Rax, *Rcx);
 
-  *a = cpuid[0];
-  *b = cpuid[1];
-  *c = cpuid[2];
-  *d = cpuid[3];
+  *Rax = cpuid[0];
+  *Rbx = cpuid[1];
+  *Rcx = cpuid[2];
+  *Rdx = cpuid[3];
 #endif
 }
+
+} // namespace firestarter::environment::x86
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index 508b01c6..2c2dabb0 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -26,50 +26,50 @@
 #include <cstdio>
 #include <regex>
 
-using namespace firestarter::environment::x86;
+namespace firestarter::environment::x86 {
 
 void X86Environment::evaluateFunctions() {
-  for (auto ctor : this->PlatformConfigsCtor) {
+  for (const auto& Ctor : PlatformConfigsCtor) {
     // add asmjit for model and family detection
-    this->PlatformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(),
-                                         this->topology().modelId(), this->topology().numThreadsPerCore()));
+    PlatformConfigs.emplace_back(
+        Ctor(topology().featuresAsmjit(), topology().familyId(), topology().modelId(), topology().numThreadsPerCore()));
   }
 
-  for (auto ctor : this->FallbackPlatformConfigsCtor) {
-    this->FallbackPlatformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(),
-                                                 this->topology().modelId(), this->topology().numThreadsPerCore()));
+  for (const auto& Ctor : FallbackPlatformConfigsCtor) {
+    FallbackPlatformConfigs.emplace_back(
+        Ctor(topology().featuresAsmjit(), topology().familyId(), topology().modelId(), topology().numThreadsPerCore()));
   }
 }
 
-int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePayload) {
+auto X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) -> int {
   unsigned id = 1;
   std::string defaultPayloadName("");
 
   // if functionId is 0 get the default or fallback
-  for (auto config : this->PlatformConfigs) {
-    for (auto const& [thread, functionName] : config->getThreadMap()) {
+  for (const auto& Config : PlatformConfigs) {
+    for (auto const& [thread, functionName] : Config->getThreadMap()) {
       // the selected function
-      if (id == functionId) {
-        if (!config->isAvailable()) {
-          log::error() << "Function " << functionId << " (\"" << functionName << "\") requires "
-                       << config->payload().name() << ", which is not supported by the processor.";
-          if (!allowUnavailablePayload) {
+      if (id == FunctionId) {
+        if (!Config->isAvailable()) {
+          log::error() << "Function " << FunctionId << " (\"" << functionName << "\") requires "
+                       << Config->payload().name() << ", which is not supported by the processor.";
+          if (!AllowUnavailablePayload) {
             return EXIT_FAILURE;
           }
         }
         // found function
-        this->SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(
-            *config, thread, this->topology().instructionCacheSize());
+        SelectedConfig =
+            new ::firestarter::environment::platform::RuntimeConfig(*Config, thread, topology().instructionCacheSize());
         return EXIT_SUCCESS;
       }
       // default function
-      if (0 == functionId && config->isDefault()) {
-        if (thread == this->topology().numThreadsPerCore()) {
-          this->SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(
-              *config, thread, this->topology().instructionCacheSize());
+      if (0 == FunctionId && Config->isDefault()) {
+        if (thread == topology().numThreadsPerCore()) {
+          SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, thread,
+                                                                                   topology().instructionCacheSize());
           return EXIT_SUCCESS;
         } else {
-          defaultPayloadName = config->payload().name();
+          defaultPayloadName = Config->payload().name();
         }
       }
       id++;
@@ -78,35 +78,35 @@ int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePay
 
   // no default found
   // use fallback
-  if (0 == functionId) {
+  if (0 == FunctionId) {
     if (!defaultPayloadName.empty()) {
       // default payload available, but number of threads per core is not
       // supported
-      log::warn() << "No " << defaultPayloadName << " code path for " << this->topology().numThreadsPerCore()
+      log::warn() << "No " << defaultPayloadName << " code path for " << topology().numThreadsPerCore()
                   << " threads per core!";
     }
-    log::warn() << this->topology().vendor() << " " << this->topology().model()
+    log::warn() << topology().vendor() << " " << topology().model()
                 << " is not supported by this version of FIRESTARTER!\n"
                 << "Check project website for updates.";
 
     // loop over available implementation and check if they are marked as
     // fallback
-    for (auto config : this->FallbackPlatformConfigs) {
-      if (config->isAvailable()) {
+    for (const auto& Config : FallbackPlatformConfigs) {
+      if (Config->isAvailable()) {
         auto selectedThread = 0;
         auto selectedFunctionName = std::string("");
-        for (auto const& [thread, functionName] : config->getThreadMap()) {
-          if (thread == this->topology().numThreadsPerCore()) {
+        for (auto const& [thread, functionName] : Config->getThreadMap()) {
+          if (thread == topology().numThreadsPerCore()) {
             selectedThread = thread;
             selectedFunctionName = functionName;
           }
         }
         if (selectedThread == 0) {
-          selectedThread = config->getThreadMap().begin()->first;
-          selectedFunctionName = config->getThreadMap().begin()->second;
+          selectedThread = Config->getThreadMap().begin()->first;
+          selectedFunctionName = Config->getThreadMap().begin()->second;
         }
-        this->SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(
-            *config, selectedThread, this->topology().instructionCacheSize());
+        SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, selectedThread,
+                                                                                 topology().instructionCacheSize());
         log::warn() << "Using function " << selectedFunctionName << " as fallback.\n"
                     << "You can use the parameter --function to try other "
                        "functions.";
@@ -120,14 +120,14 @@ int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePay
     return EXIT_FAILURE;
   }
 
-  log::error() << "unknown function id: " << functionId << ", see --avail for available ids";
+  log::error() << "unknown function id: " << FunctionId << ", see --avail for available ids";
   return EXIT_FAILURE;
 }
 
 int X86Environment::selectInstructionGroups(std::string groups) {
   const std::string delimiter = ",";
   const std::regex re("^(\\w+):(\\d+)$");
-  const auto availableInstructionGroups = this->selectedConfig().platformConfig().payload().getAvailableInstructions();
+  const auto availableInstructionGroups = selectedConfig().platformConfig().payload().getAvailableInstructions();
 
   std::stringstream ss(groups);
   std::vector<std::pair<std::string, unsigned>> payloadSettings = {};
@@ -161,7 +161,7 @@ int X86Environment::selectInstructionGroups(std::string groups) {
     }
   }
 
-  this->selectedConfig().setPayloadSettings(payloadSettings);
+  selectedConfig().setPayloadSettings(payloadSettings);
 
   log::info() << "  Running custom instruction group: " << groups;
 
@@ -171,7 +171,7 @@ int X86Environment::selectInstructionGroups(std::string groups) {
 void X86Environment::printAvailableInstructionGroups() {
   std::stringstream ss;
 
-  for (auto const& item : this->selectedConfig().platformConfig().payload().getAvailableInstructions()) {
+  for (auto const& item : selectedConfig().platformConfig().payload().getAvailableInstructions()) {
     ss << item << ",";
   }
 
@@ -180,14 +180,14 @@ void X86Environment::printAvailableInstructionGroups() {
     s.pop_back();
   }
 
-  log::info() << " available instruction-groups for payload "
-              << this->selectedConfig().platformConfig().payload().name() << ":\n"
+  log::info() << " available instruction-groups for payload " << selectedConfig().platformConfig().payload().name()
+              << ":\n"
               << "  " << s;
 }
 
-void X86Environment::setLineCount(unsigned lineCount) { this->selectedConfig().setLineCount(lineCount); }
+void X86Environment::setLineCount(unsigned lineCount) { selectedConfig().setLineCount(lineCount); }
 
-void X86Environment::printSelectedCodePathSummary() { this->selectedConfig().printCodePathSummary(); }
+void X86Environment::printSelectedCodePathSummary() { selectedConfig().printCodePathSummary(); }
 
 void X86Environment::printFunctionSummary() {
   log::info() << " available load-functions:\n"
@@ -200,7 +200,7 @@ void X86Environment::printFunctionSummary() {
 
   unsigned id = 1;
 
-  for (auto const& config : this->PlatformConfigs) {
+  for (auto const& config : PlatformConfigs) {
     for (auto const& [thread, functionName] : config->getThreadMap()) {
       const char* available = config->isAvailable() ? "yes" : "no";
       const char* fmt = "  %4u | %-30s | %-24s | %s";
@@ -214,3 +214,5 @@ void X86Environment::printFunctionSummary() {
     }
   }
 }
+
+} // namespace firestarter::environment::x86
\ No newline at end of file
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index ed925cf1..c5a998c5 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -22,6 +22,7 @@
 #include <firestarter/ErrorDetectionStruct.hpp>
 #include <firestarter/Firestarter.hpp>
 #include <firestarter/Logging/Log.hpp>
+#include <iomanip>
 
 #if defined(linux) || defined(__linux__)
 extern "C" {
@@ -95,7 +96,7 @@ int Firestarter::initLoadWorkers(bool lowLoad, uint64_t period) {
 
     if (i == 0) {
       // only show error for all worker threads except first.
-      firestarter::logging::FirstWorkerThreadFilter<firestarter::logging::record>::setFirstThread(t.get_id());
+      firestarter::logging::FirstWorkerThreadFilter<firestarter::logging::Record>::setFirstThread(t.get_id());
     }
 
     this->LoadThreads.push_back(std::make_pair(std::move(t), td));
@@ -383,6 +384,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
       break;
     case THREAD_STOP:
     default:
+      firestarter::log::debug() << "ERR" << '\n';
       return;
     }
   }
diff --git a/src/firestarter/Main.cpp b/src/firestarter/Main.cpp
index 62bcc426..51b53177 100644
--- a/src/firestarter/Main.cpp
+++ b/src/firestarter/Main.cpp
@@ -263,13 +263,13 @@ Config::Config(int argc, const char** argv) {
     auto options = parser.parse(argc, argv);
 
     if (options.count("quiet")) {
-      firestarter::logging::filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::warn);
+      firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::warn);
     } else if (options.count("report")) {
-      firestarter::logging::filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::debug);
+      firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::debug);
     } else if (options.count("debug")) {
-      firestarter::logging::filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::trace);
+      firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::trace);
     } else {
-      firestarter::logging::filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::info);
+      firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::info);
     }
 
     if (options.count("version")) {
diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp
index 36405051..0c880bbb 100644
--- a/src/firestarter/Measurement/MeasurementWorker.cpp
+++ b/src/firestarter/Measurement/MeasurementWorker.cpp
@@ -19,8 +19,10 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <firestarter/Logging/Log.hpp>
 #include <firestarter/Measurement/MeasurementWorker.hpp>
 
+#include <iostream>
 #include <queue>
 #include <thread>
 
diff --git a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
index 8b9a7b02..e6a703bb 100644
--- a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
+++ b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
@@ -21,11 +21,13 @@
 
 // This file borrows a lot of code from https://github.com/esa/pagmo2
 
+#include <firestarter/Logging/Log.hpp>
 #include <firestarter/Optimizer/Algorithm/NSGA2.hpp>
 #include <firestarter/Optimizer/Individual.hpp>
 #include <firestarter/Optimizer/Util/MultiObjective.hpp>
 
 #include <algorithm>
+#include <iomanip>
 #include <stdexcept>
 
 using namespace firestarter::optimizer::algorithm;
diff --git a/src/firestarter/Optimizer/Population.cpp b/src/firestarter/Optimizer/Population.cpp
index 35c5ef04..e136fda6 100644
--- a/src/firestarter/Optimizer/Population.cpp
+++ b/src/firestarter/Optimizer/Population.cpp
@@ -20,11 +20,11 @@
  *****************************************************************************/
 
 #include <firestarter/Logging/Log.hpp>
+#include <firestarter/Optimizer/History.hpp>
 #include <firestarter/Optimizer/Population.hpp>
 
 #include <algorithm>
 #include <cassert>
-#include <stdexcept>
 
 using namespace firestarter::optimizer;
 

From c33291f000ee4774cba315864b09fb1442ad7b87 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 27 Sep 2024 16:38:56 +0200
Subject: [PATCH 014/167] clang-tidy workflow: add .clang-tidy file location

---
 .github/workflows/clang-tidy.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
index 562b5079..6cfe98ec 100644
--- a/.github/workflows/clang-tidy.yml
+++ b/.github/workflows/clang-tidy.yml
@@ -13,6 +13,7 @@ jobs:
       id: review
       with:
         split_workflow: true
+        config_file: '${{ github.workspace }}/.clang-tidy'
 
     - uses: ZedThree/clang-tidy-review/upload@v0.14.0
       id: upload-review

From a3b01b16692fe5ecb12bf2d895cc61030bb34757 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 27 Sep 2024 16:42:44 +0200
Subject: [PATCH 015/167] clang-tidy workflow: update .clang-tidy file location

---
 .github/workflows/clang-tidy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
index 6cfe98ec..bc5520eb 100644
--- a/.github/workflows/clang-tidy.yml
+++ b/.github/workflows/clang-tidy.yml
@@ -13,7 +13,7 @@ jobs:
       id: review
       with:
         split_workflow: true
-        config_file: '${{ github.workspace }}/.clang-tidy'
+        config_file: '.clang-tidy'
 
     - uses: ZedThree/clang-tidy-review/upload@v0.14.0
       id: upload-review

From d53a1077679acd8a0b2ea5810034c6f70527424f Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 27 Sep 2024 18:11:23 +0200
Subject: [PATCH 016/167] clang-tidy workflow: add comment and rum build before
 clang-tidy

---
 .github/workflows/clang-tidy.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
index bc5520eb..325023ea 100644
--- a/.github/workflows/clang-tidy.yml
+++ b/.github/workflows/clang-tidy.yml
@@ -9,11 +9,15 @@ jobs:
     steps:
     - uses: actions/checkout@v4
 
+    # Ideally we would want to run the clang-tidy for every kind of build.
+    # This would make shure that we will check all platform dependent code parts.
+    # Here we only test the standard linux build.
     - uses: ZedThree/clang-tidy-review@v0.14.0
       id: review
       with:
         split_workflow: true
         config_file: '.clang-tidy'
+        cmake_command: 'cmake . && make -j2'
 
     - uses: ZedThree/clang-tidy-review/upload@v0.14.0
       id: upload-review

From fafd720812071d2ab94111e8620dae6381457515 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 27 Sep 2024 18:16:49 +0200
Subject: [PATCH 017/167] clang-tidy workflow: update fetch depth

---
 .github/workflows/clang-tidy.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
index 325023ea..06bd903f 100644
--- a/.github/workflows/clang-tidy.yml
+++ b/.github/workflows/clang-tidy.yml
@@ -8,6 +8,8 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: '0'
 
     # Ideally we would want to run the clang-tidy for every kind of build.
     # This would make shure that we will check all platform dependent code parts.

From 6e4c880454baa73586273b82a6b407030b72b6e3 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 27 Sep 2024 18:36:54 +0200
Subject: [PATCH 018/167] clang-tidy workflow: clone with submodules

---
 .github/workflows/clang-tidy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
index 06bd903f..a697042a 100644
--- a/.github/workflows/clang-tidy.yml
+++ b/.github/workflows/clang-tidy.yml
@@ -9,7 +9,7 @@ jobs:
     steps:
     - uses: actions/checkout@v4
       with:
-        fetch-depth: '0'
+        submodules: 'true'
 
     # Ideally we would want to run the clang-tidy for every kind of build.
     # This would make shure that we will check all platform dependent code parts.

From 770b405f52056a6dbc17b53d35db15be7afdf965 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Wed, 2 Oct 2024 19:55:57 +0200
Subject: [PATCH 019/167] first pass of clang-tidy for the source files

---
 .../Environment/X86/Payload/X86Payload.hpp    | 368 +++++++++-
 include/firestarter/Firestarter.hpp           |  23 +-
 .../Measurement/MeasurementWorker.hpp         |   4 +-
 .../Measurement/Metric/IPCEstimate.h          |   9 +-
 include/firestarter/Measurement/Metric/Perf.h |  11 +-
 include/firestarter/Measurement/Metric/RAPL.h |   9 +-
 .../firestarter/Optimizer/Algorithm/NSGA2.hpp |   6 +-
 .../firestarter/Optimizer/OptimizerWorker.hpp |   4 +-
 src/firestarter/DumpRegisterWorker.cpp        | 133 ++--
 .../Environment/X86/Payload/AVX512Payload.cpp | 549 ++++++++-------
 .../Environment/X86/Payload/AVXPayload.cpp    | 607 ++++++++--------
 .../Environment/X86/Payload/FMA4Payload.cpp   | 583 ++++++++--------
 .../Environment/X86/Payload/FMAPayload.cpp    | 645 +++++++++---------
 .../Environment/X86/Payload/SSE2Payload.cpp   | 593 ++++++++--------
 .../Environment/X86/Payload/X86Payload.cpp    | 420 +-----------
 .../Environment/X86/Payload/ZENFMAPayload.cpp | 502 +++++++-------
 .../X86/Platform/X86PlatformConfig.cpp        |   2 +-
 .../Environment/X86/X86CPUTopology.cpp        |   6 +-
 .../Environment/X86/X86Environment.cpp        | 119 ++--
 src/firestarter/Firestarter.cpp               | 215 +++---
 src/firestarter/LoadWorker.cpp                | 294 ++++----
 src/firestarter/Main.cpp                      | 297 ++++----
 .../Measurement/MeasurementWorker.cpp         | 329 +++++----
 .../Measurement/Metric/IPCEstimate.cpp        |  40 +-
 src/firestarter/Measurement/Metric/Perf.cpp   | 211 +++---
 src/firestarter/Measurement/Metric/RAPL.cpp   | 200 +++---
 src/firestarter/Measurement/Summary.cpp       |  56 +-
 src/firestarter/Optimizer/Algorithm/NSGA2.cpp | 131 ++--
 src/firestarter/Optimizer/OptimizerWorker.cpp |  44 +-
 src/firestarter/Optimizer/Population.cpp      | 112 +--
 .../Optimizer/Util/MultiObjective.cpp         | 357 +++++-----
 src/firestarter/WatchdogWorker.cpp            |  64 +-
 32 files changed, 3461 insertions(+), 3482 deletions(-)

diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index 2e38b855..33839135 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -26,8 +26,10 @@
 #include "../../../Logging/Log.hpp"        // IWYU pragma: keep
 #include "../../Payload/Payload.hpp"
 #include <asmjit/x86.h>
+#include <cassert>
 #include <cstdint>
 #include <map> // IWYU pragma: keep
+#include <type_traits>
 #include <utility>
 
 #define INIT_BLOCKSIZE 1024
@@ -49,9 +51,369 @@ class X86Payload : public environment::payload::Payload {
 
   [[nodiscard]] auto supportedFeatures() const -> asmjit::CpuFeatures const& { return this->SupportedFeatures; }
 
-  template <class IterRegT, class VectorRegT>
-  void emitErrorDetectionCode(asmjit::x86::Builder& Cb, IterRegT IterReg, asmjit::x86::Gpq AddrHighReg,
-                              asmjit::x86::Gpq PointerReg, asmjit::x86::Gpq TempReg, asmjit::x86::Gpq TempReg2);
+  // add MM regs to dirty regs
+  // zmm31 is used for backup if VectorReg is of type asmjit::x86::Zmm
+  template <class MaybeConstIterRegT, class MaybeConstVectorRegT>
+  void emitErrorDetectionCode(asmjit::x86::Builder& Cb, MaybeConstIterRegT& IterReg,
+                              const asmjit::x86::Gpq& AddrHighReg, const asmjit::x86::Gpq& PointerReg,
+                              const asmjit::x86::Gpq& TempReg, const asmjit::x86::Gpq& TempReg2) {
+    using IterRegT = std::remove_const_t<MaybeConstIterRegT>;
+    using VectorRegT = std::remove_const_t<MaybeConstVectorRegT>;
+
+    // we don't want anything to break... so we use asserts for everything that
+    // could break it
+    static_assert(std::is_base_of_v<asmjit::x86::Vec, VectorRegT>, "VectorReg must be of asmjit::asmjit::x86::Vec");
+    static_assert(std::is_same_v<asmjit::x86::Xmm, VectorRegT> || std::is_same_v<asmjit::x86::Ymm, VectorRegT> ||
+                      std::is_same_v<asmjit::x86::Zmm, VectorRegT>,
+                  "VectorReg ist not of any supported type");
+    static_assert(std::is_same_v<asmjit::x86::Mm, IterRegT> || std::is_same_v<asmjit::x86::Gpq, IterRegT>,
+                  "IterReg is not of any supported type");
+
+    if constexpr (std::is_same_v<asmjit::x86::Mm, IterRegT>) {
+      assert((IterReg == asmjit::x86::mm0, "iter_reg must be mm0"));
+    }
+
+    assert((IterReg != TempReg, "iter_reg must be != temp_reg"));
+    assert((TempReg != TempReg2, "temp_reg must be != temp_reg2"));
+    assert((TempReg != AddrHighReg, "temp_reg must be != addrHigh_reg"));
+    assert((TempReg != PointerReg, "temp_reg must be != pointer_reg"));
+
+    assert((IterReg != asmjit::x86::r8, "iter_reg must be != r8"));
+    assert((IterReg != asmjit::x86::r9, "iter_reg must be != r9"));
+    assert((IterReg != asmjit::x86::rax, "iter_reg must be != rax"));
+    assert((IterReg != asmjit::x86::rbx, "iter_reg must be != rbx"));
+    assert((IterReg != asmjit::x86::rcx, "iter_reg must be != rcx"));
+    assert((IterReg != asmjit::x86::rdx, "iter_reg must be != rdx"));
+
+    assert((TempReg != asmjit::x86::r8, "temp_reg must be != r8"));
+    assert((TempReg != asmjit::x86::r9, "temp_reg must be != r9"));
+    assert((TempReg != asmjit::x86::rax, "temp_reg must be != rax"));
+    assert((TempReg != asmjit::x86::rbx, "temp_reg must be != rbx"));
+    assert((TempReg != asmjit::x86::rcx, "temp_reg must be != rcx"));
+    assert((TempReg != asmjit::x86::rdx, "temp_reg must be != rdx"));
+
+    assert((TempReg2 != asmjit::x86::r8, "temp_reg2 must be != r8"));
+    assert((TempReg2 != asmjit::x86::r9, "temp_reg2 must be != r9"));
+    assert((TempReg2 != asmjit::x86::rax, "temp_reg2 must be != rax"));
+    assert((TempReg2 != asmjit::x86::rbx, "temp_reg2 must be != rbx"));
+    assert((TempReg2 != asmjit::x86::rcx, "temp_reg2 must be != rcx"));
+    assert((TempReg2 != asmjit::x86::rdx, "temp_reg2 must be != rdx"));
+
+    assert((AddrHighReg != asmjit::x86::r8, "addrHigh_reg must be != r8"));
+    assert((AddrHighReg != asmjit::x86::r9, "addrHigh_reg must be != r9"));
+    assert((AddrHighReg != asmjit::x86::rax, "addrHigh_reg must be != rax"));
+    assert((AddrHighReg != asmjit::x86::rbx, "addrHigh_reg must be != rbx"));
+    assert((AddrHighReg != asmjit::x86::rcx, "addrHigh_reg must be != rcx"));
+    assert((AddrHighReg != asmjit::x86::rdx, "addrHigh_reg must be != rdx"));
+
+    auto SkipErrorDetection = Cb.newLabel();
+
+    if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
+      Cb.movq(TempReg, IterReg);
+    } else {
+      Cb.mov(TempReg, IterReg);
+    }
+    // round about 50-100 Hz
+    // more or less, but this isn't really that relevant
+    Cb.and_(TempReg, asmjit::Imm(0x3fff));
+    Cb.test(TempReg, TempReg);
+    Cb.jnz(SkipErrorDetection);
+
+    Cb.mov(TempReg, asmjit::Imm(0xffffffff));
+
+    auto RegisterCount = registerCount();
+
+    // Create a backup of VectorReg(0)
+    if constexpr (std::is_same_v<asmjit::x86::Xmm, VectorRegT>) {
+      Cb.movq(TempReg2, asmjit::x86::xmm0);
+      Cb.push(TempReg2);
+      Cb.crc32(TempReg, TempReg2);
+      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+      Cb.movq(TempReg2, asmjit::x86::xmm0);
+      Cb.push(TempReg2);
+      Cb.crc32(TempReg, TempReg2);
+
+    } else if constexpr (std::is_same_v<asmjit::x86::Ymm, VectorRegT> && std::is_same_v<asmjit::x86::Mm, IterRegT>) {
+      Cb.movq(TempReg2, asmjit::x86::xmm0);
+      Cb.movq(asmjit::x86::Mm(7), TempReg2);
+      Cb.crc32(TempReg, TempReg2);
+      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+      Cb.movq(TempReg2, asmjit::x86::xmm0);
+      Cb.movq(asmjit::x86::Mm(6), TempReg2);
+      Cb.crc32(TempReg, TempReg2);
+
+      Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1));
+
+      Cb.movq(TempReg2, asmjit::x86::xmm0);
+      Cb.movq(asmjit::x86::Mm(5), TempReg2);
+      Cb.crc32(TempReg, TempReg2);
+      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+      Cb.movq(TempReg2, asmjit::x86::xmm0);
+      Cb.movq(asmjit::x86::Mm(4), TempReg2);
+      Cb.crc32(TempReg, TempReg2);
+    } else if constexpr (std::is_same_v<asmjit::x86::Zmm, VectorRegT> && std::is_same_v<asmjit::x86::Mm, IterRegT>) {
+      // We use vector registers zmm31 for our backup
+      Cb.vmovapd(asmjit::x86::zmm31, asmjit::x86::zmm0);
+      RegisterCount--;
+    }
+
+    // Calculate the hash of the remaining VectorReg
+    // use VectorReg(0) as a temporary place to unpack values
+    for (unsigned I = 1; I < RegisterCount; I++) {
+      if constexpr (std::is_same_v<asmjit::x86::Xmm, VectorRegT>) {
+        Cb.vmovapd(asmjit::x86::xmm0, asmjit::x86::Xmm(I));
+
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+        Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+      } else if constexpr (std::is_same_v<asmjit::x86::Ymm, VectorRegT>) {
+        Cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(I));
+
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+        Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+
+        Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1));
+
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+        Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+      } else if constexpr (std::is_same_v<asmjit::x86::Zmm, VectorRegT>) {
+        Cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(I));
+
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+        Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+
+        Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1));
+
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+        Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+
+        Cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(I), asmjit::Imm(2));
+
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+        Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+
+        Cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(I), asmjit::Imm(3));
+
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+        Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+        Cb.movq(TempReg2, asmjit::x86::xmm0);
+        Cb.crc32(TempReg, TempReg2);
+      }
+    }
+
+    // Restore VectorReg(0) from backup
+    if constexpr (std::is_same_v<asmjit::x86::Xmm, VectorRegT>) {
+      Cb.pop(TempReg2);
+      Cb.movq(asmjit::x86::xmm0, TempReg2);
+      Cb.movlhps(asmjit::x86::xmm0, asmjit::x86::xmm0);
+      Cb.pop(TempReg2);
+      Cb.pinsrw(asmjit::x86::xmm0, TempReg2.r32(), asmjit::Imm(0));
+      Cb.shr(TempReg2, asmjit::Imm(32));
+      Cb.movd(TempReg2.r32(), asmjit::x86::Mm(7));
+      Cb.pinsrw(asmjit::x86::xmm0, TempReg2.r32(), asmjit::Imm(1));
+    } else if constexpr (std::is_same_v<asmjit::x86::Ymm, VectorRegT> && std::is_same_v<asmjit::x86::Mm, IterRegT>) {
+      Cb.movq(TempReg2, asmjit::x86::Mm(5));
+      Cb.movq(asmjit::x86::xmm0, TempReg2);
+      Cb.movq(TempReg2, asmjit::x86::Mm(4));
+      Cb.pinsrq(asmjit::x86::xmm0, TempReg2, asmjit::Imm(1));
+
+      Cb.vinsertf128(asmjit::x86::ymm0, asmjit::x86::ymm0, asmjit::x86::xmm0, asmjit::Imm(1));
+
+      Cb.movq(TempReg2, asmjit::x86::Mm(7));
+      Cb.movq(asmjit::x86::xmm0, TempReg2);
+      Cb.movq(TempReg2, asmjit::x86::Mm(6));
+      Cb.pinsrq(asmjit::x86::xmm0, TempReg2, asmjit::Imm(1));
+    } else if constexpr (std::is_same_v<asmjit::x86::Zmm, VectorRegT> && std::is_same_v<asmjit::x86::Mm, IterRegT>) {
+      // We use vector registers zmm31 for our backup
+      Cb.vmovapd(asmjit::x86::zmm0, asmjit::x86::zmm31);
+    }
+
+    // before starting the communication, backup r8, r9, rax, rbx, rcx and rdx
+    if constexpr (std::is_same_v<asmjit::x86::Mm, IterRegT>) {
+      Cb.movq(asmjit::x86::Mm(7), asmjit::x86::rax);
+      Cb.movq(asmjit::x86::Mm(6), asmjit::x86::rbx);
+      Cb.movq(asmjit::x86::Mm(5), asmjit::x86::rcx);
+      Cb.movq(asmjit::x86::Mm(4), asmjit::x86::rdx);
+      Cb.movq(asmjit::x86::Mm(3), asmjit::x86::r8);
+      Cb.movq(asmjit::x86::Mm(2), asmjit::x86::r9);
+    } else {
+      Cb.push(asmjit::x86::rax);
+      Cb.push(asmjit::x86::rbx);
+      Cb.push(asmjit::x86::rcx);
+      Cb.push(asmjit::x86::rdx);
+      Cb.push(asmjit::x86::r8);
+      Cb.push(asmjit::x86::r9);
+    }
+
+    // do the actual communication
+    // temp_reg contains our hash
+
+    // save the pointer_reg. it might be any of r8, r9, rax, rbx, rcx or rdx
+    Cb.mov(TempReg2, PointerReg);
+
+    // Don't touch me!
+    // This sychronization and communication works even if the threads run at
+    // different (changing) speed, with just one "lock cmpxchg16b" Brought to you
+    // by a few hours of headache for two people.
+    auto Communication = [&](auto Offset) {
+      // communication
+      Cb.mov(asmjit::x86::r8, asmjit::x86::ptr_64(TempReg2, Offset));
+
+      // temp data
+      Cb.mov(asmjit::x86::r9, TempReg2);
+      Cb.add(asmjit::x86::r9, asmjit::Imm(Offset + 8));
+
+      Cb.mov(asmjit::x86::rdx, asmjit::x86::ptr_64(asmjit::x86::r9, 0));
+      Cb.mov(asmjit::x86::rax, asmjit::x86::ptr_64(asmjit::x86::r9, 8));
+
+      auto L0 = Cb.newLabel();
+      Cb.bind(L0);
+
+      Cb.lock();
+      Cb.cmpxchg16b(asmjit::x86::ptr(asmjit::x86::r8));
+
+      auto L1 = Cb.newLabel();
+      Cb.jnz(L1);
+
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx);
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx);
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
+
+      Cb.mov(asmjit::x86::rax, asmjit::Imm(2));
+
+      auto L6 = Cb.newLabel();
+      Cb.jmp(L6);
+
+      Cb.bind(L1);
+
+      Cb.cmp(asmjit::x86::rcx, asmjit::x86::rdx);
+
+      auto L2 = Cb.newLabel();
+      Cb.jle(L2);
+
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx);
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx);
+
+      Cb.jmp(L0);
+
+      Cb.bind(L2);
+
+      auto L3 = Cb.newLabel();
+
+      Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
+      Cb.jne(L3);
+      Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
+      Cb.jne(L3);
+
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::x86::rdx);
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::x86::rax);
+
+      Cb.bind(L3);
+
+      Cb.cmp(asmjit::x86::rcx, asmjit::x86::ptr_64(asmjit::x86::r9, 16));
+      Cb.mov(asmjit::x86::rax, asmjit::Imm(4));
+      Cb.jne(L6);
+
+      Cb.cmp(asmjit::x86::rbx, asmjit::x86::ptr_64(asmjit::x86::r9, 24));
+      auto L4 = Cb.newLabel();
+      Cb.jne(L4);
+
+      Cb.mov(asmjit::x86::rax, asmjit::Imm(0));
+
+      auto L5 = Cb.newLabel();
+      Cb.jmp(L5);
+
+      Cb.bind(L4);
+
+      Cb.mov(asmjit::x86::rax, asmjit::Imm(1));
+
+      Cb.bind(L5);
+
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
+
+      Cb.bind(L6);
+
+      // if check failed
+      Cb.cmp(asmjit::x86::rax, asmjit::Imm(1));
+      auto L7 = Cb.newLabel();
+      Cb.jne(L7);
+
+      // write the error flag
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 32), asmjit::Imm(1));
+
+      // stop the execution after some time
+      Cb.mov(asmjit::x86::ptr_64(AddrHighReg), asmjit::Imm(LOAD_STOP));
+      Cb.mfence();
+
+      Cb.bind(L7);
+
+      auto L9 = Cb.newLabel();
+      Cb.jmp(L9);
+    };
+
+    // left communication
+    // move hash
+    Cb.mov(asmjit::x86::rbx, TempReg);
+    // move iterations counter
+    if constexpr (std::is_same_v<asmjit::x86::Mm, IterRegT>) {
+      Cb.movq(asmjit::x86::rcx, IterReg);
+    } else {
+      Cb.mov(asmjit::x86::rcx, IterReg);
+    }
+
+    Communication(-128);
+
+    // right communication
+    // move hash
+    Cb.mov(asmjit::x86::rbx, TempReg);
+    // move iterations counter
+    if constexpr (std::is_same_v<asmjit::x86::Mm, IterRegT>) {
+      Cb.movq(asmjit::x86::rcx, IterReg);
+    } else {
+      Cb.mov(asmjit::x86::rcx, IterReg);
+    }
+
+    Communication(-64);
+
+    // restore r8, r9, rax, rbx, rcx and rdx
+    if constexpr (std::is_same_v<asmjit::x86::Mm, IterRegT>) {
+      Cb.movq(asmjit::x86::rax, asmjit::x86::Mm(7));
+      Cb.movq(asmjit::x86::rbx, asmjit::x86::Mm(6));
+      Cb.movq(asmjit::x86::rcx, asmjit::x86::Mm(5));
+      Cb.movq(asmjit::x86::rdx, asmjit::x86::Mm(4));
+      Cb.movq(asmjit::x86::r8, asmjit::x86::Mm(3));
+      Cb.movq(asmjit::x86::r9, asmjit::x86::Mm(2));
+    } else {
+      Cb.pop(asmjit::x86::r9);
+      Cb.pop(asmjit::x86::r8);
+      Cb.pop(asmjit::x86::rdx);
+      Cb.pop(asmjit::x86::rcx);
+      Cb.pop(asmjit::x86::rbx);
+      Cb.pop(asmjit::x86::rax);
+    }
+
+    Cb.bind(SkipErrorDetection);
+  }
 
 public:
   X86Payload(asmjit::CpuFeatures const& SupportedFeatures,
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 0e4c7ef5..6e9ad166 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -66,15 +66,14 @@ class Firestarter {
               std::chrono::microseconds const& Period, unsigned RequestedNumThreads, std::string const& CpuBind,
               bool PrintFunctionSummary, unsigned FunctionId, bool ListInstructionGroups,
               std::string const& InstructionGroups, unsigned LineCount, bool AllowUnavailablePayload,
-              bool DumpRegisters, std::chrono::seconds const& DumpRegistersTimeDelta,
-              std::string const& DumpRegistersOutpath, bool ErrorDetection, int Gpus, unsigned GpuMatrixSize,
-              bool GpuUseFloat, bool GpuUseDouble, bool ListMetrics, bool Measurement,
-              std::chrono::milliseconds const& StartDelta, std::chrono::milliseconds const& StopDelta,
-              std::chrono::milliseconds const& MeasurementInterval, std::vector<std::string> const& MetricPaths,
-              std::vector<std::string> const& StdinMetrics, bool Optimize, std::chrono::seconds const& Preheat,
-              std::string const& OptimizationAlgorithm, std::vector<std::string> const& OptimizationMetrics,
-              std::chrono::seconds const& EvaluationDuration, unsigned Individuals, std::string const& OptimizeOutfile,
-              unsigned Generations, double Nsga2Cr, double Nsga2M);
+              bool DumpRegisters, std::chrono::seconds const& DumpRegistersTimeDelta, std::string DumpRegistersOutpath,
+              bool ErrorDetection, int Gpus, unsigned GpuMatrixSize, bool GpuUseFloat, bool GpuUseDouble,
+              bool ListMetrics, bool Measurement, std::chrono::milliseconds const& StartDelta,
+              std::chrono::milliseconds const& StopDelta, std::chrono::milliseconds const& MeasurementInterval,
+              std::vector<std::string> const& MetricPaths, std::vector<std::string> const& StdinMetrics, bool Optimize,
+              std::chrono::seconds const& Preheat, std::string const& OptimizationAlgorithm,
+              std::vector<std::string> const& OptimizationMetrics, std::chrono::seconds const& EvaluationDuration,
+              unsigned Individuals, std::string OptimizeOutfile, unsigned Generations, double Nsga2Cr, double Nsga2M);
 
   ~Firestarter();
 
@@ -141,12 +140,12 @@ class Firestarter {
   void signalWork() { signalLoadWorkers(THREAD_WORK); };
 
   // WatchdogWorker.cpp
-  auto watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load, std::chrono::seconds Timeout)
-      -> int;
+  static auto watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load,
+                             std::chrono::seconds Timeout) -> int;
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
   // DumpRegisterWorker.cpp
-  auto initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, std::string DumpFilePath) -> int;
+  auto initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath) -> int;
   void joinDumpRegisterWorker();
 #endif
 
diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp
index e9e18b76..0205bc03 100644
--- a/include/firestarter/Measurement/MeasurementWorker.hpp
+++ b/include/firestarter/Measurement/MeasurementWorker.hpp
@@ -44,7 +44,7 @@ class MeasurementWorker {
   pthread_t WorkerThread;
   pthread_t StdinThread;
 
-  std::vector<MetricInterface*> Metrics = {&RaplMetric, &PerfIpcMetric, &PerfFreqMetric, &IpcEstimateMetric};
+  std::vector<const MetricInterface*> Metrics = {&RaplMetric, &PerfIpcMetric, &PerfFreqMetric, &IpcEstimateMetric};
 
   std::mutex ValuesMutex;
   std::map<std::string, std::vector<TimeValue>> Values;
@@ -65,7 +65,7 @@ class MeasurementWorker {
   std::string AvailableMetricsString;
 
 #ifndef FIRESTARTER_LINK_STATIC
-  std::vector<void*> _metricDylibs = {};
+  std::vector<void*> MetricDylibs;
 #endif
 
   std::vector<std::string> StdinMetrics;
diff --git a/include/firestarter/Measurement/Metric/IPCEstimate.h b/include/firestarter/Measurement/Metric/IPCEstimate.h
index 63dcb26b..f5362f93 100644
--- a/include/firestarter/Measurement/Metric/IPCEstimate.h
+++ b/include/firestarter/Measurement/Metric/IPCEstimate.h
@@ -23,9 +23,14 @@
 
 #include "../MetricInterface.h"
 
+#ifdef __cplusplus
 extern "C" {
+#endif
 
-extern MetricInterface IpcEstimateMetric;
+extern const MetricInterface IpcEstimateMetric;
 
 extern void ipcEstimateMetricInsert(double Value);
-};
\ No newline at end of file
+
+#ifdef __cplusplus
+};
+#endif
\ No newline at end of file
diff --git a/include/firestarter/Measurement/Metric/Perf.h b/include/firestarter/Measurement/Metric/Perf.h
index 2702cd94..480fb808 100644
--- a/include/firestarter/Measurement/Metric/Perf.h
+++ b/include/firestarter/Measurement/Metric/Perf.h
@@ -23,9 +23,14 @@
 
 #include "../MetricInterface.h"
 
+#ifdef __cplusplus
 extern "C" {
+#endif
 
-extern MetricInterface PerfIpcMetric;
+extern const MetricInterface PerfIpcMetric;
 
-extern MetricInterface PerfFreqMetric;
-};
\ No newline at end of file
+extern const MetricInterface PerfFreqMetric;
+
+#ifdef __cplusplus
+};
+#endif
\ No newline at end of file
diff --git a/include/firestarter/Measurement/Metric/RAPL.h b/include/firestarter/Measurement/Metric/RAPL.h
index 017373a1..5076affe 100644
--- a/include/firestarter/Measurement/Metric/RAPL.h
+++ b/include/firestarter/Measurement/Metric/RAPL.h
@@ -23,7 +23,12 @@
 
 #include "../MetricInterface.h"
 
+#ifdef __cplusplus
 extern "C" {
+#endif
 
-extern MetricInterface RaplMetric;
-};
\ No newline at end of file
+extern const MetricInterface RaplMetric;
+
+#ifdef __cplusplus
+};
+#endif
\ No newline at end of file
diff --git a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
index 70c2aac0..acaa441f 100644
--- a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
+++ b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
@@ -35,9 +35,9 @@ class NSGA2 : public Algorithm {
   auto evolve(firestarter::optimizer::Population& Pop) -> firestarter::optimizer::Population override;
 
 private:
-  unsigned Gen;
-  double Cr;
-  double M;
+  const unsigned Gen;
+  const double Cr;
+  const double M;
 };
 
 } // namespace firestarter::optimizer::algorithm
diff --git a/include/firestarter/Optimizer/OptimizerWorker.hpp b/include/firestarter/Optimizer/OptimizerWorker.hpp
index ba106595..f6c3a37f 100644
--- a/include/firestarter/Optimizer/OptimizerWorker.hpp
+++ b/include/firestarter/Optimizer/OptimizerWorker.hpp
@@ -33,7 +33,7 @@ namespace firestarter::optimizer {
 class OptimizerWorker {
 public:
   OptimizerWorker(std::unique_ptr<firestarter::optimizer::Algorithm>&& Algorithm,
-                  firestarter::optimizer::Population& Population, std::string const& OptimizationAlgorithm,
+                  firestarter::optimizer::Population& Population, std::string OptimizationAlgorithm,
                   unsigned Individuals, std::chrono::seconds const& Preheat);
 
   ~OptimizerWorker() = default;
@@ -51,7 +51,7 @@ class OptimizerWorker {
   unsigned Individuals;
   std::chrono::seconds Preheat;
 
-  pthread_t WorkerThread;
+  pthread_t WorkerThread{};
 };
 
 } // namespace firestarter::optimizer
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index 06d7e417..5dee113e 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -28,21 +28,19 @@
 #include <sstream>
 #include <thread>
 
-using namespace firestarter;
-
 namespace {
-static unsigned hammingDistance(uint64_t x, uint64_t y) {
-  unsigned dist = 0;
+auto hammingDistance(uint64_t X, uint64_t Y) -> unsigned {
+  unsigned Dist = 0;
 
-  for (uint64_t val = x ^ y; val > 0; val >>= 1) {
-    dist += val & 1;
+  for (uint64_t Val = X ^ Y; Val > 0; Val >>= 1) {
+    Dist += Val & 1;
   }
 
-  return dist;
+  return Dist;
 }
 
-static std::string registerNameBySize(unsigned registerSize) {
-  switch (registerSize) {
+auto registerNameBySize(unsigned RegisterSize) -> std::string {
+  switch (RegisterSize) {
   case 2:
     return "xmm";
   case 4:
@@ -55,128 +53,133 @@ static std::string registerNameBySize(unsigned registerSize) {
 }
 } // namespace
 
-int Firestarter::initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta, std::string dumpFilePath) {
+namespace firestarter {
+
+auto Firestarter::initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath) -> int {
 
-  auto data = std::make_unique<DumpRegisterWorkerData>(this->LoadThreads.begin()->second, dumpTimeDelta, dumpFilePath);
+  auto Data = std::make_unique<DumpRegisterWorkerData>(this->LoadThreads.begin()->second, DumpTimeDelta, DumpFilePath);
 
-  this->DumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(data));
+  this->DumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(Data));
 
   return EXIT_SUCCESS;
 }
 
 void Firestarter::joinDumpRegisterWorker() { this->DumpRegisterWorkerThread.join(); }
 
-void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> data) {
+void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Data) {
 
   pthread_setname_np(pthread_self(), "DumpRegWorker");
 
-  int registerCount = data->LoadWorkerDataPtr->config().payload().registerCount();
-  int registerSize = data->LoadWorkerDataPtr->config().payload().registerSize();
-  std::string registerPrefix = registerNameBySize(registerSize);
-  auto offset = sizeof(DumpRegisterStruct) / sizeof(uint64_t);
+  auto RegisterCount = Data->LoadWorkerDataPtr->config().payload().registerCount();
+  auto RegisterSize = Data->LoadWorkerDataPtr->config().payload().registerSize();
+  std::string RegisterPrefix = registerNameBySize(RegisterSize);
+  auto Offset = sizeof(DumpRegisterStruct) / sizeof(uint64_t);
 
-  auto dumpRegisterStruct = reinterpret_cast<DumpRegisterStruct*>(data->LoadWorkerDataPtr->AddrMem - offset);
+  auto* DumpRegisterStruct = reinterpret_cast<struct DumpRegisterStruct*>(Data->LoadWorkerDataPtr->AddrMem - Offset);
 
-  auto dumpVar = reinterpret_cast<volatile uint64_t*>(&dumpRegisterStruct->DumpVar);
+  auto* DumpVar = reinterpret_cast<volatile uint64_t*>(&DumpRegisterStruct->DumpVar);
   // memory of simd variables is before the padding
-  volatile uint64_t* dumpMemAddr = dumpRegisterStruct->Padding - registerCount * registerSize;
+  auto* DumpMemAddr = static_cast<volatile uint64_t*>(DumpRegisterStruct->Padding) -
+                      (static_cast<size_t>(RegisterCount * RegisterSize));
 
-  // TODO: maybe use aligned_malloc to make memcpy more efficient and don't
+  // TODO(marenz): maybe use aligned_malloc to make memcpy more efficient and don't
   // interrupt the workload as much?
-  uint64_t* last = reinterpret_cast<uint64_t*>(malloc(sizeof(uint64_t) * offset));
-  uint64_t* current = reinterpret_cast<uint64_t*>(malloc(sizeof(uint64_t) * offset));
+  auto* Last = reinterpret_cast<uint64_t*>(malloc(sizeof(uint64_t) * Offset));
+  auto* Current = reinterpret_cast<uint64_t*>(malloc(sizeof(uint64_t) * Offset));
 
-  if (last == nullptr || current == nullptr) {
+  if (Last == nullptr || Current == nullptr) {
     log::error() << "Malloc failed in Firestarter::dumpRegisterWorker";
     exit(ENOMEM);
   }
 
-  std::stringstream dumpFilePath;
-  dumpFilePath << data->DumpFilePath;
+  std::stringstream DumpFilePath;
+  DumpFilePath << Data->DumpFilePath;
 #if defined(__MINGW32__) || defined(__MINGW64__)
   dumpFilePath << "\\";
 #else
-  dumpFilePath << "/";
+  DumpFilePath << "/";
 #endif
-  dumpFilePath << "hamming_distance.csv";
-  auto dumpFile = std::ofstream(dumpFilePath.str());
+  DumpFilePath << "hamming_distance.csv";
+  auto DumpFile = std::ofstream(DumpFilePath.str());
 
   // dump the header to the csv file
-  dumpFile << "total_hamming_distance,";
-  for (int i = 0; i < registerCount; i++) {
-    for (int j = 0; j < registerSize; j++) {
-      dumpFile << registerPrefix << i << "[" << j << "]";
+  DumpFile << "total_hamming_distance,";
+  for (auto I = 0U; I < RegisterCount; I++) {
+    for (auto J = 0U; J < RegisterSize; J++) {
+      DumpFile << RegisterPrefix << I << "[" << J << "]";
 
-      if (j != registerSize - 1) {
-        dumpFile << ",";
+      if (J != RegisterSize - 1) {
+        DumpFile << ",";
       }
     }
 
-    if (i != registerCount - 1) {
-      dumpFile << ",";
+    if (I != RegisterCount - 1) {
+      DumpFile << ",";
     }
   }
-  dumpFile << std::endl << std::flush;
+  DumpFile << '\n' << std::flush;
 
   // do not output the hamming distance for the first run
-  bool skipFirst = true;
+  bool SkipFirst = true;
 
   // continue until stop and dump the registers every data->dumpTimeDelta
   // seconds
-  for (; *data->LoadWorkerDataPtr->AddrHigh != LOAD_STOP;) {
+  for (; *Data->LoadWorkerDataPtr->AddrHigh != LOAD_STOP;) {
     // signal the thread to dump its largest SIMD registers
-    *dumpVar = DumpVariable::Start;
+    *DumpVar = DumpVariable::Start;
     __asm__ __volatile__("mfence;");
-    while (*dumpVar == DumpVariable::Start) {
+    while (*DumpVar == DumpVariable::Start) {
       std::this_thread::sleep_for(std::chrono::milliseconds(10));
     }
 
     // copy the register content to minimize the interruption of the load worker
-    std::memcpy(current, (void*)dumpMemAddr, sizeof(uint64_t) * offset);
+    std::memcpy(Current, (void*)DumpMemAddr, sizeof(uint64_t) * Offset);
 
     // skip the first output, as we first have to get some valid values for last
-    if (!skipFirst) {
+    if (!SkipFirst) {
       // calculate the total hamming distance
-      int totalHammingDistance = 0;
-      for (int i = 0; i < registerCount * registerSize; i++) {
-        totalHammingDistance += hammingDistance(current[i], last[i]);
+      auto TotalHammingDistance = 0U;
+      for (auto I = 0U; I < RegisterCount * RegisterSize; I++) {
+        TotalHammingDistance += hammingDistance(Current[I], Last[I]);
       }
 
-      dumpFile << totalHammingDistance << ",";
+      DumpFile << TotalHammingDistance << ",";
 
       // dump the hamming distance of each double (last, current) pair
-      for (int i = registerCount - 1; i >= 0; i--) {
+      for (int I = RegisterCount - 1; I >= 0; I--) {
         // auto registerNum = registerCount - 1 - i;
 
-        for (auto j = 0; j < registerSize; j++) {
-          auto index = registerSize * i + j;
-          auto hd = static_cast<uint64_t>(hammingDistance(current[index], last[index]));
+        for (auto J = 0U; J < RegisterSize; J++) {
+          auto Index = (RegisterSize * I) + J;
+          auto Hd = static_cast<uint64_t>(hammingDistance(Current[Index], Last[Index]));
 
-          dumpFile << hd;
-          if (j != registerSize - 1) {
-            dumpFile << ",";
+          DumpFile << Hd;
+          if (J != RegisterSize - 1) {
+            DumpFile << ",";
           }
         }
 
-        if (i != 0) {
-          dumpFile << ",";
+        if (I != 0) {
+          DumpFile << ",";
         }
       }
 
-      dumpFile << std::endl << std::flush;
+      DumpFile << '\n' << std::flush;
     } else {
-      skipFirst = false;
+      SkipFirst = false;
     }
 
-    std::memcpy(last, current, sizeof(uint64_t) * offset);
+    std::memcpy(Last, Current, sizeof(uint64_t) * Offset);
 
-    std::this_thread::sleep_for(std::chrono::seconds(data->DumpTimeDelta));
+    std::this_thread::sleep_for(std::chrono::seconds(Data->DumpTimeDelta));
   }
 
-  dumpFile.close();
+  DumpFile.close();
 
-  free(last);
-  free(current);
+  free(Last);
+  free(Current);
 }
 
-#endif
+} // namespace firestarter
+
+#endif
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index 2325ed04..3f866f70 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -21,382 +21,379 @@
 
 #include <firestarter/Environment/X86/Payload/AVX512Payload.hpp>
 
-using namespace firestarter::environment::x86::payload;
-using namespace asmjit;
-using namespace asmjit::x86;
+namespace firestarter::environment::x86::payload {
 
 auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
                                    bool ErrorDetection) -> int {
+  using namespace asmjit;
+  using namespace asmjit::x86;
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto sequence = this->generateSequence(Proportion);
-  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, NumberOfLines / Thread);
+  auto Sequence = generateSequence(Proportion);
+  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
 
   // compute count of flops and memory access for performance report
-  unsigned flops = 0;
-  unsigned bytes = 0;
+  Flops = 0;
+  Bytes = 0;
 
-  for (const auto& item : sequence) {
-    auto it = this->InstructionFlops.find(item);
+  for (const auto& Item : Sequence) {
+    auto It = InstructionFlops.find(Item);
 
-    if (it == this->InstructionFlops.end()) {
-      workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
+    if (It == InstructionFlops.end()) {
+      workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
-    flops += it->second;
+    Flops += It->second;
 
-    it = this->InstructionMemory.find(item);
+    It = InstructionMemory.find(Item);
 
-    if (it != this->InstructionMemory.end()) {
-      bytes += it->second;
+    if (It != InstructionMemory.end()) {
+      Bytes += It->second;
     }
   }
 
-  this->Flops = repetitions * flops;
-  this->Bytes = repetitions * bytes;
-  this->Instructions = repetitions * sequence.size() * 4 + 6;
+  Flops *= Repetitions;
+  Bytes *= Repetitions;
+  Instructions = Repetitions * Sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
-  auto l1i_cache_size = InstructionCacheSize / Thread;
-  auto dataCacheBufferSizeIterator = DataCacheBufferSize.begin();
-  auto l1_size = *dataCacheBufferSizeIterator / Thread;
-  std::advance(dataCacheBufferSizeIterator, 1);
-  auto l2_size = *dataCacheBufferSizeIterator / Thread;
-  std::advance(dataCacheBufferSizeIterator, 1);
-  auto l3_size = *dataCacheBufferSizeIterator / Thread;
-  auto ram_size = RamBufferSize / Thread;
+  const auto L1iCacheSize = InstructionCacheSize / Thread;
+  auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin();
+  const auto L1Size = *DataCacheBufferSizeIterator / Thread;
+  std::advance(DataCacheBufferSizeIterator, 1);
+  const auto L2Size = *DataCacheBufferSizeIterator / Thread;
+  std::advance(DataCacheBufferSizeIterator, 1);
+  const auto L3Size = *DataCacheBufferSizeIterator / Thread;
+  const auto RamSize = RamBufferSize / Thread;
 
   // calculate the reset counters for the buffers
-  auto l2_loop_count = getL2LoopCount(sequence, NumberOfLines, l2_size * Thread, Thread);
-  auto l3_loop_count = getL3LoopCount(sequence, NumberOfLines, l3_size * Thread, Thread);
-  auto ram_loop_count = getRAMLoopCount(sequence, NumberOfLines, ram_size * Thread, Thread);
+  const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread);
+  const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
+  const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
-  CodeHolder code;
-  code.init(this->Rt.environment());
+  CodeHolder Code;
+  Code.init(Rt.environment());
 
-  if (nullptr != this->LoadFunction) {
-    this->Rt.release(&this->LoadFunction);
+  if (nullptr != LoadFunction) {
+    Rt.release(&LoadFunction);
   }
 
-  Builder cb(&code);
-  cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
+  Builder Cb(&Code);
+  Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
                           asmjit::DiagnosticOptions::kValidateIntermediate);
 
-  auto pointer_reg = rax;
-  auto l1_addr = rbx;
-  auto l2_addr = rcx;
-  auto l3_addr = r8;
-  auto ram_addr = r9;
-  auto l2_count_reg = r10;
-  auto l3_count_reg = r11;
-  auto ram_count_reg = r12;
-  auto temp_reg = r13;
-  auto temp_reg2 = rbp;
-  auto offset_reg = r14;
-  auto addrHigh_reg = r15;
-  auto iter_reg = mm0;
-  auto shift_reg = std::vector<Gp>({rdi, rsi, rdx});
-  auto shift_reg32 = std::vector<Gp>({edi, esi, edx});
-  auto nr_shift_regs = 3;
-  auto mul_regs = 3;
-  auto add_regs = 22;
-  auto alt_dst_regs = 5;
-  auto ram_reg = zmm30;
-
-  FuncDetail func;
-  func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl),
-            this->Rt.environment());
-
-  FuncFrame frame;
-  frame.init(func);
+  const auto PointerReg = rax;
+  const auto L1Addr = rbx;
+  const auto L2Addr = rcx;
+  const auto L3Addr = r8;
+  const auto RamAddr = r9;
+  const auto L2CountReg = r10;
+  const auto L3CountReg = r11;
+  const auto RamCountReg = r12;
+  const auto TempReg = r13;
+  const auto TempReg2 = rbp;
+  const auto OffsetReg = r14;
+  const auto AddrHighReg = r15;
+  const auto IterReg = mm0;
+  const auto ShiftReg = std::vector<Gp>({rdi, rsi, rdx});
+  const auto ShiftReg32 = std::vector<Gp>({edi, esi, edx});
+  const auto NrShiftRegs = 3;
+  const auto MulRegs = 3;
+  const auto AddRegs = 22;
+  const auto AltDstRegs = 5;
+  const auto RamReg = zmm30;
+
+  FuncDetail Func;
+  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl), Rt.environment());
+
+  FuncFrame Frame;
+  Frame.init(Func);
 
   // make zmm registers dirty
-  for (int i = 0; i < 32; i++) {
-    frame.addDirtyRegs(Zmm(i));
+  for (int I = 0; I < 32; I++) {
+    Frame.addDirtyRegs(Zmm(I));
   }
-  for (int i = 0; i < 8; i++) {
-    frame.addDirtyRegs(Mm(i));
+  for (int I = 0; I < 8; I++) {
+    Frame.addDirtyRegs(Mm(I));
   }
   // make all other used registers dirty except RAX
-  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg,
-                     temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr);
-  for (const auto& reg : shift_reg) {
-    frame.addDirtyRegs(reg);
+  Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg,
+                     AddrHighReg, IterReg, RamAddr);
+  for (const auto& Reg : ShiftReg) {
+    Frame.addDirtyRegs(Reg);
   }
 
-  FuncArgsAssignment args(&func);
+  FuncArgsAssignment Args(&Func);
   // FIXME: asmjit assigment to mm0 does not seem to be supported
-  args.assignAll(pointer_reg, addrHigh_reg, temp_reg);
-  args.updateFuncFrame(frame);
-  frame.finalize();
+  Args.assignAll(PointerReg, AddrHighReg, TempReg);
+  Args.updateFuncFrame(Frame);
+  Frame.finalize();
 
-  cb.emitProlog(frame);
-  cb.emitArgsAssignment(frame, args);
+  Cb.emitProlog(Frame);
+  Cb.emitArgsAssignment(Frame, Args);
 
   // FIXME: movq from temp_reg to iter_reg
-  cb.movq(iter_reg, temp_reg);
+  Cb.movq(IterReg, TempReg);
 
   // stop right away if low load is selected
-  auto FunctionExit = cb.newLabel();
+  auto FunctionExit = Cb.newLabel();
 
-  cb.mov(temp_reg, ptr_64(addrHigh_reg));
-  cb.test(temp_reg, temp_reg);
-  cb.jz(FunctionExit);
+  Cb.mov(TempReg, ptr_64(AddrHighReg));
+  Cb.test(TempReg, TempReg);
+  Cb.jz(FunctionExit);
 
-  cb.mov(offset_reg,
+  Cb.mov(OffsetReg,
          Imm(64)); // increment after each cache/memory access
   // Initialize registers for shift operations
-  for (auto const& reg : shift_reg32) {
-    cb.mov(reg, Imm(0xAAAAAAAA));
+  for (auto const& Reg : ShiftReg32) {
+    Cb.mov(Reg, Imm(0xAAAAAAAA));
   }
   // Initialize AVX512-Registers for FMA Operations
-  cb.vmovapd(zmm0, zmmword_ptr(pointer_reg));
-  cb.vmovapd(zmm1, zmmword_ptr(pointer_reg, 64));
-  cb.vmovapd(zmm2, zmmword_ptr(pointer_reg, 128));
-  auto add_start = mul_regs;
-  auto add_end = mul_regs + add_regs - 1;
-  auto trans_start = add_regs + mul_regs;
-  auto trans_end = add_regs + mul_regs + alt_dst_regs - 1;
-  for (int i = add_start; i <= trans_end; i++) {
-    cb.vmovapd(Zmm(i), zmmword_ptr(pointer_reg, 256 + i * 64));
+  Cb.vmovapd(zmm0, zmmword_ptr(PointerReg));
+  Cb.vmovapd(zmm1, zmmword_ptr(PointerReg, 64));
+  Cb.vmovapd(zmm2, zmmword_ptr(PointerReg, 128));
+  auto AddStart = MulRegs;
+  auto AddEnd = MulRegs + AddRegs - 1;
+  auto TransStart = AddRegs + MulRegs;
+  auto TransEnd = AddRegs + MulRegs + AltDstRegs - 1;
+  for (int I = AddStart; I <= TransEnd; I++) {
+    Cb.vmovapd(Zmm(I), zmmword_ptr(PointerReg, 256 + (I * 64)));
   }
-  cb.mov(l1_addr, pointer_reg); // address for L1-buffer
-  cb.mov(l2_addr, pointer_reg);
-  cb.add(l2_addr, Imm(l1_size)); // address for L2-buffer
-  cb.mov(l3_addr, pointer_reg);
-  cb.add(l3_addr, Imm(l2_size)); // address for L3-buffer
-  cb.mov(ram_addr, pointer_reg);
-  cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer
-  cb.mov(l2_count_reg, Imm(l2_loop_count));
-  workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop ("
-                     << l2_size / 1024 << ") KiB";
-  cb.mov(l3_count_reg, Imm(l3_loop_count));
-  workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop ("
-                     << l3_size / 1024 << ") KiB";
-  cb.mov(ram_count_reg, Imm(ram_loop_count));
-  workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop ("
-                     << ram_size / 1024 << ") KiB";
-
-  cb.align(AlignMode::kCode, 64);
-
-  auto Loop = cb.newLabel();
-  cb.bind(Loop);
-
-  auto shift_pos = 0;
-  bool left = false;
-  auto add_dest = add_start + 1;
-  auto mov_dst = trans_start;
-  unsigned l1_offset = 0;
-
-#define L1_INCREMENT()                                                                                                 \
-  l1_offset += 64;                                                                                                     \
-  if (l1_offset < l1_size * 0.5) {                                                                                     \
-    cb.add(l1_addr, offset_reg);                                                                                       \
-  } else {                                                                                                             \
-    l1_offset = 0;                                                                                                     \
-    cb.mov(l1_addr, pointer_reg);                                                                                      \
-  }
-
-#define L2_INCREMENT() cb.add(l2_addr, offset_reg)
-
-#define L3_INCREMENT() cb.add(l3_addr, offset_reg)
-
-#define RAM_INCREMENT() cb.add(ram_addr, offset_reg)
-
-  for (unsigned count = 0; count < repetitions; count++) {
-    for (const auto& item : sequence) {
-      if (item == "REG") {
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2);
-        cb.vfmadd231pd(Zmm(mov_dst), zmm2, zmm1);
-        cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], temp_reg);
-        mov_dst++;
-      } else if (item == "L1_L") {
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2);
-        cb.vfmadd231pd(Zmm(add_dest), zmm1, zmmword_ptr(l1_addr, 64));
-        L1_INCREMENT();
-      } else if (item == "L1_BROADCAST") {
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2);
-        cb.vbroadcastsd(Zmm(add_dest), ptr_64(l1_addr, 64));
-        L1_INCREMENT();
-      } else if (item == "L1_S") {
-        cb.vmovapd(zmmword_ptr(l1_addr, 64), Zmm(add_dest));
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2);
-        L1_INCREMENT();
-      } else if (item == "L1_LS") {
-        cb.vmovapd(zmmword_ptr(l1_addr, 64), Zmm(add_dest));
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmmword_ptr(l1_addr, 128));
-        L1_INCREMENT();
-      } else if (item == "L2_L") {
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2);
-        cb.vfmadd231pd(Zmm(add_dest), zmm1, zmmword_ptr(l2_addr, 64));
-        L2_INCREMENT();
-      } else if (item == "L2_S") {
-        cb.vmovapd(zmmword_ptr(l2_addr, 64), Zmm(add_dest));
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2);
-        L2_INCREMENT();
-      } else if (item == "L2_LS") {
-        cb.vmovapd(zmmword_ptr(l2_addr, 64), Zmm(add_dest));
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmmword_ptr(l2_addr, 128));
-        L2_INCREMENT();
-      } else if (item == "L3_L") {
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2);
-        cb.vfmadd231pd(Zmm(add_dest), zmm1, zmmword_ptr(l3_addr, 64));
-        L3_INCREMENT();
-      } else if (item == "L3_S") {
-        cb.vmovapd(zmmword_ptr(l3_addr, 64), Zmm(add_dest));
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2);
-        L3_INCREMENT();
-      } else if (item == "L3_LS") {
-        cb.vmovapd(zmmword_ptr(l3_addr, 64), Zmm(add_dest));
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmmword_ptr(l3_addr, 128));
-        L3_INCREMENT();
-      } else if (item == "L3_P") {
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmmword_ptr(l1_addr, 64));
-        cb.prefetcht2(ptr(l3_addr));
-        L3_INCREMENT();
-      } else if (item == "RAM_L") {
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2);
-        cb.vfmadd231pd(ram_reg, zmm1, zmmword_ptr(ram_addr, 64));
-        RAM_INCREMENT();
-      } else if (item == "RAM_S") {
-        cb.vmovapd(zmmword_ptr(ram_addr, 64), Zmm(add_dest));
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2);
-        RAM_INCREMENT();
-      } else if (item == "RAM_LS") {
-        cb.vmovapd(zmmword_ptr(ram_addr, 64), Zmm(add_dest));
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmmword_ptr(ram_addr, 128));
-        RAM_INCREMENT();
-      } else if (item == "RAM_P") {
-        cb.vfmadd231pd(Zmm(add_dest), zmm0, zmmword_ptr(l1_addr, 64));
-        cb.prefetcht2(ptr(ram_addr));
-        RAM_INCREMENT();
+  Cb.mov(L1Addr, PointerReg); // address for L1-buffer
+  Cb.mov(L2Addr, PointerReg);
+  Cb.add(L2Addr, Imm(L1Size)); // address for L2-buffer
+  Cb.mov(L3Addr, PointerReg);
+  Cb.add(L3Addr, Imm(L2Size)); // address for L3-buffer
+  Cb.mov(RamAddr, PointerReg);
+  Cb.add(RamAddr, Imm(L3Size)); // address for RAM-buffer
+  Cb.mov(L2CountReg, Imm(L2LoopCount));
+  workerLog::trace() << "reset counter for L2-buffer with " << L2LoopCount << " cache line accesses per loop ("
+                     << L2Size / 1024 << ") KiB";
+  Cb.mov(L3CountReg, Imm(L3LoopCount));
+  workerLog::trace() << "reset counter for L3-buffer with " << L3LoopCount << " cache line accesses per loop ("
+                     << L3Size / 1024 << ") KiB";
+  Cb.mov(RamCountReg, Imm(RamLoopCount));
+  workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop ("
+                     << RamSize / 1024 << ") KiB";
+
+  Cb.align(AlignMode::kCode, 64);
+
+  auto Loop = Cb.newLabel();
+  Cb.bind(Loop);
+
+  auto ShiftPos = 0;
+  bool Left = false;
+  auto AddDest = AddStart + 1;
+  auto MovDst = TransStart;
+  unsigned L1Offset = 0;
+
+  const auto L1Increment = [&Cb, &L1Offset, &L1Size, &L1Addr, &OffsetReg, &PointerReg]() {
+    L1Offset += 64;
+    if (L1Offset < L1Size * 0.5) {
+      Cb.add(L1Addr, OffsetReg);
+    } else {
+      L1Offset = 0;
+      Cb.mov(L1Addr, PointerReg);
+    }
+  };
+  const auto L2Increment = [&Cb, &L2Addr, &OffsetReg]() { Cb.add(L2Addr, OffsetReg); };
+  const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); };
+  const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); };
+
+  for (unsigned Count = 0; Count < Repetitions; Count++) {
+    for (const auto& Item : Sequence) {
+      if (Item == "REG") {
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2);
+        Cb.vfmadd231pd(Zmm(MovDst), zmm2, zmm1);
+        Cb.xor_(ShiftReg[(ShiftPos + NrShiftRegs - 1) % NrShiftRegs], TempReg);
+        MovDst++;
+      } else if (Item == "L1_L") {
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2);
+        Cb.vfmadd231pd(Zmm(AddDest), zmm1, zmmword_ptr(L1Addr, 64));
+        L1Increment();
+      } else if (Item == "L1_BROADCAST") {
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2);
+        Cb.vbroadcastsd(Zmm(AddDest), ptr_64(L1Addr, 64));
+        L1Increment();
+      } else if (Item == "L1_S") {
+        Cb.vmovapd(zmmword_ptr(L1Addr, 64), Zmm(AddDest));
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2);
+        L1Increment();
+      } else if (Item == "L1_LS") {
+        Cb.vmovapd(zmmword_ptr(L1Addr, 64), Zmm(AddDest));
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmmword_ptr(L1Addr, 128));
+        L1Increment();
+      } else if (Item == "L2_L") {
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2);
+        Cb.vfmadd231pd(Zmm(AddDest), zmm1, zmmword_ptr(L2Addr, 64));
+        L2Increment();
+      } else if (Item == "L2_S") {
+        Cb.vmovapd(zmmword_ptr(L2Addr, 64), Zmm(AddDest));
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2);
+        L2Increment();
+      } else if (Item == "L2_LS") {
+        Cb.vmovapd(zmmword_ptr(L2Addr, 64), Zmm(AddDest));
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmmword_ptr(L2Addr, 128));
+        L2Increment();
+      } else if (Item == "L3_L") {
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2);
+        Cb.vfmadd231pd(Zmm(AddDest), zmm1, zmmword_ptr(L3Addr, 64));
+        L3Increment();
+      } else if (Item == "L3_S") {
+        Cb.vmovapd(zmmword_ptr(L3Addr, 64), Zmm(AddDest));
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2);
+        L3Increment();
+      } else if (Item == "L3_LS") {
+        Cb.vmovapd(zmmword_ptr(L3Addr, 64), Zmm(AddDest));
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmmword_ptr(L3Addr, 128));
+        L3Increment();
+      } else if (Item == "L3_P") {
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmmword_ptr(L1Addr, 64));
+        Cb.prefetcht2(ptr(L3Addr));
+        L3Increment();
+      } else if (Item == "RAM_L") {
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2);
+        Cb.vfmadd231pd(RamReg, zmm1, zmmword_ptr(RamAddr, 64));
+        RamIncrement();
+      } else if (Item == "RAM_S") {
+        Cb.vmovapd(zmmword_ptr(RamAddr, 64), Zmm(AddDest));
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2);
+        RamIncrement();
+      } else if (Item == "RAM_LS") {
+        Cb.vmovapd(zmmword_ptr(RamAddr, 64), Zmm(AddDest));
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmmword_ptr(RamAddr, 128));
+        RamIncrement();
+      } else if (Item == "RAM_P") {
+        Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmmword_ptr(L1Addr, 64));
+        Cb.prefetcht2(ptr(RamAddr));
+        RamIncrement();
       } else {
-        workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
+        workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
         return EXIT_FAILURE;
       }
 
-      if (left) {
-        cb.shr(shift_reg32[shift_pos], Imm(1));
+      if (Left) {
+        Cb.shr(ShiftReg32[ShiftPos], Imm(1));
       } else {
-        cb.shl(shift_reg32[shift_pos], Imm(1));
+        Cb.shl(ShiftReg32[ShiftPos], Imm(1));
       }
-      add_dest++;
-      if (add_dest > add_end) {
-        add_dest = add_start;
+      AddDest++;
+      if (AddDest > AddEnd) {
+        AddDest = AddStart;
       }
-      if (mov_dst > trans_end) {
-        mov_dst = trans_start;
+      if (MovDst > TransEnd) {
+        MovDst = TransStart;
       }
-      shift_pos++;
-      if (shift_pos == nr_shift_regs) {
-        shift_pos = 0;
-        left = !left;
+      ShiftPos++;
+      if (ShiftPos == NrShiftRegs) {
+        ShiftPos = 0;
+        Left = !Left;
       }
     }
   }
 
-  cb.movq(temp_reg, iter_reg); // restore iteration counter
-  if (getRAMSequenceCount(sequence) > 0) {
+  Cb.movq(TempReg, IterReg); // restore iteration counter
+  if (getRAMSequenceCount(Sequence) > 0) {
     // reset RAM counter
-    auto NoRamReset = cb.newLabel();
-
-    cb.sub(ram_count_reg, Imm(1));
-    cb.jnz(NoRamReset);
-    cb.mov(ram_count_reg, Imm(ram_loop_count));
-    cb.mov(ram_addr, pointer_reg);
-    cb.add(ram_addr, Imm(l3_size));
-    cb.bind(NoRamReset);
+    auto NoRamReset = Cb.newLabel();
+
+    Cb.sub(RamCountReg, Imm(1));
+    Cb.jnz(NoRamReset);
+    Cb.mov(RamCountReg, Imm(RamLoopCount));
+    Cb.mov(RamAddr, PointerReg);
+    Cb.add(RamAddr, Imm(L3Size));
+    Cb.bind(NoRamReset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.inc(temp_reg); // increment iteration counter
-  if (getL2SequenceCount(sequence) > 0) {
+  Cb.inc(TempReg); // increment iteration counter
+  if (getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
-    auto NoL2Reset = cb.newLabel();
-
-    cb.sub(l2_count_reg, Imm(1));
-    cb.jnz(NoL2Reset);
-    cb.mov(l2_count_reg, Imm(l2_loop_count));
-    cb.mov(l2_addr, pointer_reg);
-    cb.add(l2_addr, Imm(l1_size));
-    cb.bind(NoL2Reset);
+    auto NoL2Reset = Cb.newLabel();
+
+    Cb.sub(L2CountReg, Imm(1));
+    Cb.jnz(NoL2Reset);
+    Cb.mov(L2CountReg, Imm(L2LoopCount));
+    Cb.mov(L2Addr, PointerReg);
+    Cb.add(L2Addr, Imm(L1Size));
+    Cb.bind(NoL2Reset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.movq(iter_reg, temp_reg); // store iteration counter
-  if (getL3SequenceCount(sequence) > 0) {
+  Cb.movq(IterReg, TempReg); // store iteration counter
+  if (getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
-    auto NoL3Reset = cb.newLabel();
-
-    cb.sub(l3_count_reg, Imm(1));
-    cb.jnz(NoL3Reset);
-    cb.mov(l3_count_reg, Imm(l3_loop_count));
-    cb.mov(l3_addr, pointer_reg);
-    cb.add(l3_addr, Imm(l2_size));
-    cb.bind(NoL3Reset);
+    auto NoL3Reset = Cb.newLabel();
+
+    Cb.sub(L3CountReg, Imm(1));
+    Cb.jnz(NoL3Reset);
+    Cb.mov(L3CountReg, Imm(L3LoopCount));
+    Cb.mov(L3Addr, PointerReg);
+    Cb.add(L3Addr, Imm(L2Size));
+    Cb.bind(NoL3Reset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.mov(l1_addr, pointer_reg);
+  Cb.mov(L1Addr, PointerReg);
 
   if (DumpRegisters) {
-    auto SkipRegistersDump = cb.newLabel();
+    auto SkipRegistersDump = Cb.newLabel();
 
-    cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait));
-    cb.jnz(SkipRegistersDump);
+    Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
+    Cb.jnz(SkipRegistersDump);
 
     // dump all the ymm register
-    for (int i = 0; i < (int)this->registerCount(); i++) {
-      cb.vmovapd(zmmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Zmm(i));
+    for (unsigned I = 0; I < registerCount(); I++) {
+      Cb.vmovapd(zmmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Zmm(I));
     }
 
     // set read flag
-    cb.mov(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait));
+    Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
 
-    cb.bind(SkipRegistersDump);
+    Cb.bind(SkipRegistersDump);
   }
 
   if (ErrorDetection) {
-    this->emitErrorDetectionCode<decltype(iter_reg), Zmm>(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
+    emitErrorDetectionCode<decltype(IterReg), Zmm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2);
   }
 
-  cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH));
-  cb.jnz(Loop);
+  Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH));
+  Cb.jnz(Loop);
 
-  cb.bind(FunctionExit);
+  Cb.bind(FunctionExit);
 
-  cb.movq(rax, iter_reg);
+  Cb.movq(rax, IterReg);
 
-  cb.emitEpilog(frame);
+  Cb.emitEpilog(Frame);
 
-  cb.finalize();
+  Cb.finalize();
 
   // String sb;
   // cb.dump(sb);
 
-  Error err = this->Rt.add(&this->LoadFunction, &code);
-  if (err) {
+  Error Err = Rt.add(&LoadFunction, &Code);
+  if (Err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
   }
 
   // skip if we could not determine cache size
-  if (l1i_cache_size != 0) {
-    auto loopSize = code.labelOffset(FunctionExit) - code.labelOffset(Loop);
-    auto instructionCachePercentage = 100 * loopSize / l1i_cache_size;
+  if (L1iCacheSize != 0) {
+    auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop);
+    auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize;
 
-    if (loopSize > l1i_cache_size) {
+    if (LoopSize > L1iCacheSize) {
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage
+    workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
-    workerLog::trace() << "Sequence size: " << sequence.size();
-    workerLog::trace() << "Repetition count: " << repetitions;
+    workerLog::trace() << "Sequence size: " << Sequence.size();
+    workerLog::trace() << "Repetition count: " << Repetitions;
   }
 
   return EXIT_SUCCESS;
@@ -405,8 +402,8 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
 auto AVX512Payload::getAvailableInstructions() const -> std::list<std::string> {
   std::list<std::string> Instructions;
 
-  transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(Instructions),
-            [](const auto& item) { return item.first; });
+  transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions),
+            [](const auto& Item) { return Item.first; });
 
   return Instructions;
 }
@@ -414,3 +411,5 @@ auto AVX512Payload::getAvailableInstructions() const -> std::list<std::string> {
 void AVX512Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize) {
   X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
+
+} // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index f3905ff0..8ce30a9f 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -19,434 +19,429 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <cstdint>
 #include <firestarter/Environment/X86/Payload/AVXPayload.hpp>
-#include <firestarter/Logging/Log.hpp>
 
-#include <iterator>
-#include <utility>
+namespace firestarter::environment::x86::payload {
 
-using namespace firestarter::environment::x86::payload;
-using namespace asmjit;
-using namespace asmjit::x86;
+auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
+                                unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
+                                unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+                                bool ErrorDetection) -> int {
+  using namespace asmjit;
+  using namespace asmjit::x86;
 
-int AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
-                               unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
-                               unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-                               bool errorDetection) {
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto sequence = this->generateSequence(proportion);
-  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
+  auto Sequence = generateSequence(Proportion);
+  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
 
   // compute count of flops and memory access for performance report
-  unsigned flops = 0;
-  unsigned bytes = 0;
+  Flops = 0;
+  Bytes = 0;
 
-  for (const auto& item : sequence) {
-    auto it = this->InstructionFlops.find(item);
+  for (const auto& Item : Sequence) {
+    auto It = InstructionFlops.find(Item);
 
-    if (it == this->InstructionFlops.end()) {
-      workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
+    if (It == InstructionFlops.end()) {
+      workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
-    flops += it->second;
+    Flops += It->second;
 
-    it = this->InstructionMemory.find(item);
+    It = InstructionMemory.find(Item);
 
-    if (it != this->InstructionMemory.end()) {
-      bytes += it->second;
+    if (It != InstructionMemory.end()) {
+      Bytes += It->second;
     }
   }
 
-  this->Flops = repetitions * flops;
-  this->Bytes = repetitions * bytes;
-  this->Instructions = repetitions * sequence.size() * 2 + 4;
+  Flops *= Repetitions;
+  Bytes *= Repetitions;
+  Instructions = Repetitions * Sequence.size() * 2 + 4;
 
   // calculate the buffer sizes
-  auto l1i_cache_size = instructionCacheSize / thread;
-  auto dataCacheBufferSizeIterator = dataCacheBufferSize.begin();
-  auto l1_size = *dataCacheBufferSizeIterator / thread;
-  std::advance(dataCacheBufferSizeIterator, 1);
-  auto l2_size = *dataCacheBufferSizeIterator / thread;
-  std::advance(dataCacheBufferSizeIterator, 1);
-  auto l3_size = *dataCacheBufferSizeIterator / thread;
-  auto ram_size = ramBufferSize / thread;
+  const auto L1iCacheSize = InstructionCacheSize / Thread;
+  auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin();
+  const auto L1Size = *DataCacheBufferSizeIterator / Thread;
+  std::advance(DataCacheBufferSizeIterator, 1);
+  const auto L2Size = *DataCacheBufferSizeIterator / Thread;
+  std::advance(DataCacheBufferSizeIterator, 1);
+  const auto L3Size = *DataCacheBufferSizeIterator / Thread;
+  const auto RamSize = RamBufferSize / Thread;
 
   // calculate the reset counters for the buffers
-  auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
-  auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
-  auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
+  const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread);
+  const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
+  const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
-  CodeHolder code;
-  code.init(this->Rt.environment());
+  CodeHolder Code;
+  Code.init(Rt.environment());
 
-  if (nullptr != this->LoadFunction) {
-    this->Rt.release(&this->LoadFunction);
+  if (nullptr != LoadFunction) {
+    Rt.release(&LoadFunction);
   }
 
-  Builder cb(&code);
-  cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
+  Builder Cb(&Code);
+  Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
                           asmjit::DiagnosticOptions::kValidateIntermediate);
 
-  auto pointer_reg = rax;
-  auto l1_addr = rbx;
-  auto l2_addr = rcx;
-  auto l3_addr = rdx;
-  auto ram_addr = rdi;
-  auto l2_count_reg = r8;
-  auto l3_count_reg = r9;
-  auto ram_count_reg = r10;
-  auto temp_reg = r11;
-  auto temp_reg2 = rbp;
-  auto offset_reg = r12;
-  auto addrHigh_reg = r13;
-  auto iter_reg = r14;
-  auto shift_regs = 6;
-  auto add_regs = 10;
-  auto trans_regs = 6;
-
-  FuncDetail func;
-  func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl),
-            this->Rt.environment());
-
-  FuncFrame frame;
-  frame.init(func);
+  const auto PointerReg = rax;
+  const auto L1Addr = rbx;
+  const auto L2Addr = rcx;
+  const auto L3Addr = rdx;
+  const auto RamAddr = rdi;
+  const auto L2CountReg = r8;
+  const auto L3CountReg = r9;
+  const auto RamCountReg = r10;
+  const auto TempReg = r11;
+  const auto TempReg2 = rbp;
+  const auto OffsetReg = r12;
+  const auto AddrHighReg = r13;
+  const auto IterReg = r14;
+  const auto ShiftRegs = 6;
+  const auto AddRegs = 10;
+  const auto TransRegs = 6;
+
+  FuncDetail Func;
+  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl), Rt.environment());
+
+  FuncFrame Frame;
+  Frame.init(Func);
 
   // make xmm registers dirty
-  for (int i = 0; i < 16; i++) {
-    frame.addDirtyRegs(Ymm(i));
+  for (int I = 0; I < 16; I++) {
+    Frame.addDirtyRegs(Ymm(I));
   }
   // make mmx registers dirty
-  for (int i = 0; i < 8; i++) {
-    frame.addDirtyRegs(Mm(i));
+  for (int I = 0; I < 8; I++) {
+    Frame.addDirtyRegs(Mm(I));
   }
   // make all other used registers dirty except RAX
-  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg,
-                     temp_reg2, offset_reg, addrHigh_reg, iter_reg);
+  Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg,
+                     AddrHighReg, IterReg);
 
-  FuncArgsAssignment args(&func);
-  args.assignAll(pointer_reg, addrHigh_reg, iter_reg);
-  args.updateFuncFrame(frame);
-  frame.finalize();
+  FuncArgsAssignment Args(&Func);
+  Args.assignAll(PointerReg, AddrHighReg, IterReg);
+  Args.updateFuncFrame(Frame);
+  Frame.finalize();
 
-  cb.emitProlog(frame);
-  cb.emitArgsAssignment(frame, args);
+  Cb.emitProlog(Frame);
+  Cb.emitArgsAssignment(Frame, Args);
 
   // stop right away if low load is selected
-  auto FunctionExit = cb.newLabel();
+  auto FunctionExit = Cb.newLabel();
 
-  cb.mov(temp_reg, ptr_64(addrHigh_reg));
-  cb.test(temp_reg, temp_reg);
-  cb.jz(FunctionExit);
+  Cb.mov(TempReg, ptr_64(AddrHighReg));
+  Cb.test(TempReg, TempReg);
+  Cb.jz(FunctionExit);
 
-  cb.mov(offset_reg,
+  Cb.mov(OffsetReg,
          Imm(64)); // increment after each cache/memory access
 
   // Initialize AVX-Registers for Addition
-  auto add_start = 0;
-  auto add_end = add_regs - 1;
-  auto trans_start = add_regs;
-  auto trans_end = add_regs + trans_regs - 1;
-  if (add_regs > 0) {
-    for (int i = add_start; i <= add_end; i++) {
-      cb.vmovapd(Ymm(i), ymmword_ptr(pointer_reg, 32 * i));
+  auto AddStart = 0;
+  auto AddEnd = AddRegs - 1;
+  auto TransStart = AddRegs;
+  auto TransEnd = AddRegs + TransRegs - 1;
+  if (AddRegs > 0) {
+    for (int I = AddStart; I <= AddEnd; I++) {
+      Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 32 * I));
     }
   }
 
   // Initialize MMX-Registers for shift operations
-  auto shift_start = 0;
-  auto shift_end = shift_regs - 1;
-  if (shift_regs > 1) {
-    cb.mov(temp_reg, Imm(0x5555555555555555));
-    cb.movq(Mm(shift_start), temp_reg);
-    for (int i = shift_start + 1; i <= shift_end; i++) {
-      cb.movq(Mm(i), Mm(shift_start));
+  auto ShiftStart = 0;
+  auto ShiftEnd = ShiftRegs - 1;
+  if (ShiftRegs > 1) {
+    Cb.mov(TempReg, Imm(0x5555555555555555));
+    Cb.movq(Mm(ShiftStart), TempReg);
+    for (int I = ShiftStart + 1; I <= ShiftEnd; I++) {
+      Cb.movq(Mm(I), Mm(ShiftStart));
     }
   }
 
   // Initialize AVX-Registers for Transfer-Operations
-  if (trans_regs > 0) {
-    if (trans_start % 2 == 0) {
-      cb.mov(temp_reg, Imm(0x0F0F0F0F0F0F0F0F));
+  if (TransRegs > 0) {
+    if (TransStart % 2 == 0) {
+      Cb.mov(TempReg, Imm(0x0F0F0F0F0F0F0F0F));
     } else {
-      cb.mov(temp_reg, Imm(0xF0F0F0F0F0F0F0F0));
+      Cb.mov(TempReg, Imm(0xF0F0F0F0F0F0F0F0));
     }
-    cb.pinsrq(Xmm(trans_start), temp_reg, Imm(0));
-    cb.pinsrq(Xmm(trans_start), temp_reg, Imm(1));
-    cb.vinsertf128(Ymm(trans_start), Ymm(trans_start), Xmm(trans_start), Imm(1));
-    for (int i = trans_start + 1; i <= trans_end; i++) {
-      if (i % 2 == 0) {
-        cb.shr(temp_reg, Imm(4));
+    Cb.pinsrq(Xmm(TransStart), TempReg, Imm(0));
+    Cb.pinsrq(Xmm(TransStart), TempReg, Imm(1));
+    Cb.vinsertf128(Ymm(TransStart), Ymm(TransStart), Xmm(TransStart), Imm(1));
+    for (int I = TransStart + 1; I <= TransEnd; I++) {
+      if (I % 2 == 0) {
+        Cb.shr(TempReg, Imm(4));
       } else {
-        cb.shl(temp_reg, Imm(4));
+        Cb.shl(TempReg, Imm(4));
       }
-      cb.pinsrq(Xmm(i), temp_reg, Imm(0));
-      cb.pinsrq(Xmm(i), temp_reg, Imm(1));
-      cb.vinsertf128(Ymm(i), Ymm(i), Xmm(i), Imm(1));
+      Cb.pinsrq(Xmm(I), TempReg, Imm(0));
+      Cb.pinsrq(Xmm(I), TempReg, Imm(1));
+      Cb.vinsertf128(Ymm(I), Ymm(I), Xmm(I), Imm(1));
     }
   }
 
-  cb.mov(l1_addr, pointer_reg); // address for L1-buffer
-  cb.mov(l2_addr, pointer_reg);
-  cb.add(l2_addr, Imm(l1_size)); // address for L2-buffer
-  cb.mov(l3_addr, pointer_reg);
-  cb.add(l3_addr, Imm(l2_size)); // address for L3-buffer
-  cb.mov(ram_addr, pointer_reg);
-  cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer
-  cb.mov(l2_count_reg, Imm(l2_loop_count));
-  workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop ("
-                     << l2_size / 1024 << ") KiB";
-  cb.mov(l3_count_reg, Imm(l3_loop_count));
-  workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop ("
-                     << l3_size / 1024 << ") KiB";
-  cb.mov(ram_count_reg, Imm(ram_loop_count));
-  workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop ("
-                     << ram_size / 1024 << ") KiB";
-
-  cb.align(AlignMode::kCode, 64);
-
-  auto Loop = cb.newLabel();
-  cb.bind(Loop);
-
-  auto left = false;
-  auto shift_dst = shift_start;
-  auto add_dest = add_start + 1;
-  auto mov_dst = trans_start;
-  auto mov_src = mov_dst + 1;
-  unsigned l1_offset = 0;
-
-#define L1_INCREMENT()                                                                                                 \
-  l1_offset += 64;                                                                                                     \
-  if (l1_offset < l1_size * 0.5) {                                                                                     \
-    cb.add(l1_addr, offset_reg);                                                                                       \
-  } else {                                                                                                             \
-    l1_offset = 0;                                                                                                     \
-    cb.mov(l1_addr, pointer_reg);                                                                                      \
-  }
-
-#define L2_INCREMENT() cb.add(l2_addr, offset_reg);
-
-#define L3_INCREMENT() cb.add(l3_addr, offset_reg)
-
-#define RAM_INCREMENT() cb.add(ram_addr, offset_reg)
-
-  for (unsigned count = 0; count < repetitions; count++) {
-    for (const auto& item : sequence) {
-      if (item == "REG") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        cb.vmovdqa(Ymm(mov_dst), Ymm(mov_src));
-      } else if (item == "L1_L") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32));
-        L1_INCREMENT();
-      } else if (item == "L1_S") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
-        cb.vmovapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest));
-        L1_INCREMENT();
-        this->Instructions++;
-      } else if (item == "L1_LS") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32));
-        cb.vmovapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest));
-        L1_INCREMENT();
-        this->Instructions++;
-      } else if (item == "L2_L") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l2_addr, 64));
-        L2_INCREMENT();
-      } else if (item == "L2_S") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
-        cb.vmovapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest));
-        L2_INCREMENT();
-        this->Instructions++;
-      } else if (item == "L2_LS") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l2_addr, 64));
-        cb.vmovapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest));
-        L2_INCREMENT();
-        this->Instructions++;
-      } else if (item == "L3_L") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64));
-        L3_INCREMENT();
-      } else if (item == "L3_S") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
-        cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
-        L3_INCREMENT();
-        this->Instructions++;
-      } else if (item == "L3_LS") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64));
-        cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
-        L3_INCREMENT();
-        this->Instructions++;
-      } else if (item == "L3_P") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32));
-        cb.prefetcht0(ptr(l3_addr));
-        L3_INCREMENT();
-        this->Instructions++;
-      } else if (item == "RAM_L") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(ram_addr, 64));
-        RAM_INCREMENT();
-      } else if (item == "RAM_S") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
-        cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
-        RAM_INCREMENT();
-        this->Instructions++;
-      } else if (item == "RAM_LS") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64));
-        cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
-        RAM_INCREMENT();
-        this->Instructions++;
-      } else if (item == "RAM_P") {
-        cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32));
-        cb.prefetcht2(ptr(ram_addr));
-        RAM_INCREMENT();
-        this->Instructions++;
+  Cb.mov(L1Addr, PointerReg); // address for L1-buffer
+  Cb.mov(L2Addr, PointerReg);
+  Cb.add(L2Addr, Imm(L1Size)); // address for L2-buffer
+  Cb.mov(L3Addr, PointerReg);
+  Cb.add(L3Addr, Imm(L2Size)); // address for L3-buffer
+  Cb.mov(RamAddr, PointerReg);
+  Cb.add(RamAddr, Imm(L3Size)); // address for RAM-buffer
+  Cb.mov(L2CountReg, Imm(L2LoopCount));
+  workerLog::trace() << "reset counter for L2-buffer with " << L2LoopCount << " cache line accesses per loop ("
+                     << L2Size / 1024 << ") KiB";
+  Cb.mov(L3CountReg, Imm(L3LoopCount));
+  workerLog::trace() << "reset counter for L3-buffer with " << L3LoopCount << " cache line accesses per loop ("
+                     << L3Size / 1024 << ") KiB";
+  Cb.mov(RamCountReg, Imm(RamLoopCount));
+  workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop ("
+                     << RamSize / 1024 << ") KiB";
+
+  Cb.align(AlignMode::kCode, 64);
+
+  auto Loop = Cb.newLabel();
+  Cb.bind(Loop);
+
+  auto Left = false;
+  auto ShiftDest = ShiftStart;
+  auto AddDest = AddStart + 1;
+  auto MovDest = TransStart;
+  auto MovSrc = MovDest + 1;
+  unsigned L1Offset = 0;
+
+  const auto L1Increment = [&Cb, &L1Offset, &L1Size, &L1Addr, &OffsetReg, &PointerReg]() {
+    L1Offset += 64;
+    if (L1Offset < L1Size * 0.5) {
+      Cb.add(L1Addr, OffsetReg);
+    } else {
+      L1Offset = 0;
+      Cb.mov(L1Addr, PointerReg);
+    }
+  };
+  const auto L2Increment = [&Cb, &L2Addr, &OffsetReg]() { Cb.add(L2Addr, OffsetReg); };
+  const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); };
+  const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); };
+
+  for (unsigned Count = 0; Count < Repetitions; Count++) {
+    for (const auto& Item : Sequence) {
+      if (Item == "REG") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
+        Cb.vmovdqa(Ymm(MovDest), Ymm(MovSrc));
+      } else if (Item == "L1_L") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L1Addr, 32));
+        L1Increment();
+      } else if (Item == "L1_S") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
+        Cb.vmovapd(xmmword_ptr(L1Addr, 32), Xmm(AddDest));
+        L1Increment();
+        Instructions++;
+      } else if (Item == "L1_LS") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L1Addr, 32));
+        Cb.vmovapd(xmmword_ptr(L1Addr, 64), Xmm(AddDest));
+        L1Increment();
+        Instructions++;
+      } else if (Item == "L2_L") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L2Addr, 64));
+        L2Increment();
+      } else if (Item == "L2_S") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
+        Cb.vmovapd(xmmword_ptr(L2Addr, 64), Xmm(AddDest));
+        L2Increment();
+        Instructions++;
+      } else if (Item == "L2_LS") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L2Addr, 64));
+        Cb.vmovapd(xmmword_ptr(L2Addr, 96), Xmm(AddDest));
+        L2Increment();
+        Instructions++;
+      } else if (Item == "L3_L") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L3Addr, 64));
+        L3Increment();
+      } else if (Item == "L3_S") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
+        Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest));
+        L3Increment();
+        Instructions++;
+      } else if (Item == "L3_LS") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L3Addr, 64));
+        Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest));
+        L3Increment();
+        Instructions++;
+      } else if (Item == "L3_P") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L1Addr, 32));
+        Cb.prefetcht0(ptr(L3Addr));
+        L3Increment();
+        Instructions++;
+      } else if (Item == "RAM_L") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(RamAddr, 64));
+        RamIncrement();
+      } else if (Item == "RAM_S") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
+        Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest));
+        RamIncrement();
+        Instructions++;
+      } else if (Item == "RAM_LS") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L3Addr, 64));
+        Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest));
+        RamIncrement();
+        Instructions++;
+      } else if (Item == "RAM_P") {
+        Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L1Addr, 32));
+        Cb.prefetcht2(ptr(RamAddr));
+        RamIncrement();
+        Instructions++;
       } else {
-        workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
+        workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
         return EXIT_FAILURE;
       }
 
-      if (shift_regs > 1) {
-        this->Instructions++;
-        if (left) {
-          cb.psrlw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs), Mm(shift_dst));
+      if (ShiftRegs > 1) {
+        Instructions++;
+        if (Left) {
+          Cb.psrlw(Mm(ShiftStart + ((ShiftDest - ShiftStart + 3) % ShiftRegs)), Mm(ShiftDest));
         } else {
-          cb.psllw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs), Mm(shift_dst));
+          Cb.psllw(Mm(ShiftStart + ((ShiftDest - ShiftStart + 3) % ShiftRegs)), Mm(ShiftDest));
         }
       }
 
-      add_dest++;
-      if (add_dest > add_end) {
+      AddDest++;
+      if (AddDest > AddEnd) {
         // DO NOT REMOVE the + 1. It serves for the good of ymm0. If it was to
         // be overriden, the values in the other registers would rise up to inf.
-        add_dest = add_start + 1;
+        AddDest = AddStart + 1;
       }
-      mov_dst++;
-      if (mov_dst > trans_end) {
-        mov_dst = trans_start;
+      MovDest++;
+      if (MovDest > TransEnd) {
+        MovDest = TransStart;
       }
-      mov_src++;
-      if (mov_src > trans_end) {
-        mov_src = trans_start;
+      MovSrc++;
+      if (MovSrc > TransEnd) {
+        MovSrc = TransStart;
       }
-      if (shift_regs > 1) {
-        shift_dst++;
-        if (shift_dst > shift_end) {
-          shift_dst = shift_start;
-          left = !left;
+      if (ShiftRegs > 1) {
+        ShiftDest++;
+        if (ShiftDest > ShiftEnd) {
+          ShiftDest = ShiftStart;
+          Left = !Left;
         }
       }
     }
   }
 
-  if (this->getRAMSequenceCount(sequence) > 0) {
+  if (getRAMSequenceCount(Sequence) > 0) {
     // reset RAM counter
-    auto NoRamReset = cb.newLabel();
-
-    cb.sub(ram_count_reg, Imm(1));
-    cb.jnz(NoRamReset);
-    cb.mov(ram_count_reg, Imm(ram_loop_count));
-    cb.mov(ram_addr, pointer_reg);
-    cb.add(ram_addr, Imm(l3_size));
-    cb.bind(NoRamReset);
+    auto NoRamReset = Cb.newLabel();
+
+    Cb.sub(RamCountReg, Imm(1));
+    Cb.jnz(NoRamReset);
+    Cb.mov(RamCountReg, Imm(RamLoopCount));
+    Cb.mov(RamAddr, PointerReg);
+    Cb.add(RamAddr, Imm(L3Size));
+    Cb.bind(NoRamReset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  if (this->getL2SequenceCount(sequence) > 0) {
+  if (getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
-    auto NoL2Reset = cb.newLabel();
-
-    cb.sub(l2_count_reg, Imm(1));
-    cb.jnz(NoL2Reset);
-    cb.mov(l2_count_reg, Imm(l2_loop_count));
-    cb.mov(l2_addr, pointer_reg);
-    cb.add(l2_addr, Imm(l1_size));
-    cb.bind(NoL2Reset);
+    auto NoL2Reset = Cb.newLabel();
+
+    Cb.sub(L2CountReg, Imm(1));
+    Cb.jnz(NoL2Reset);
+    Cb.mov(L2CountReg, Imm(L2LoopCount));
+    Cb.mov(L2Addr, PointerReg);
+    Cb.add(L2Addr, Imm(L1Size));
+    Cb.bind(NoL2Reset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  if (this->getL3SequenceCount(sequence) > 0) {
+  if (getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
-    auto NoL3Reset = cb.newLabel();
-
-    cb.sub(l3_count_reg, Imm(1));
-    cb.jnz(NoL3Reset);
-    cb.mov(l3_count_reg, Imm(l3_loop_count));
-    cb.mov(l3_addr, pointer_reg);
-    cb.add(l3_addr, Imm(l2_size));
-    cb.bind(NoL3Reset);
+    auto NoL3Reset = Cb.newLabel();
+
+    Cb.sub(L3CountReg, Imm(1));
+    Cb.jnz(NoL3Reset);
+    Cb.mov(L3CountReg, Imm(L3LoopCount));
+    Cb.mov(L3Addr, PointerReg);
+    Cb.add(L3Addr, Imm(L2Size));
+    Cb.bind(NoL3Reset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.inc(iter_reg); // increment iteration counter
-  cb.mov(l1_addr, pointer_reg);
+  Cb.inc(IterReg); // increment iteration counter
+  Cb.mov(L1Addr, PointerReg);
 
-  if (dumpRegisters) {
-    auto SkipRegistersDump = cb.newLabel();
+  if (DumpRegisters) {
+    auto SkipRegistersDump = Cb.newLabel();
 
-    cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait));
-    cb.jnz(SkipRegistersDump);
+    Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
+    Cb.jnz(SkipRegistersDump);
 
     // dump all the ymm register
-    for (int i = 0; i < (int)this->registerCount(); i++) {
-      cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i));
+    for (unsigned I = 0; I < registerCount(); I++) {
+      Cb.vmovapd(ymmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Ymm(I));
     }
 
     // set read flag
-    cb.mov(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait));
+    Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
 
-    cb.bind(SkipRegistersDump);
+    Cb.bind(SkipRegistersDump);
   }
 
-  if (errorDetection) {
-    this->emitErrorDetectionCode<decltype(iter_reg), Ymm>(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
+  if (ErrorDetection) {
+    emitErrorDetectionCode<decltype(IterReg), Ymm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2);
   }
 
-  cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH));
-  cb.jnz(Loop);
+  Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH));
+  Cb.jnz(Loop);
 
-  cb.bind(FunctionExit);
+  Cb.bind(FunctionExit);
 
-  cb.mov(rax, iter_reg); // restore iteration counter
+  Cb.mov(rax, IterReg); // restore iteration counter
 
-  cb.emitEpilog(frame);
+  Cb.emitEpilog(Frame);
 
-  cb.finalize();
+  Cb.finalize();
 
   // String sb;
   // cb.dump(sb);
 
-  Error err = this->Rt.add(&this->LoadFunction, &code);
-  if (err) {
+  Error Err = Rt.add(&LoadFunction, &Code);
+  if (Err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
   }
 
   // skip if we could not determine cache size
-  if (l1i_cache_size != 0) {
-    auto loopSize = code.labelOffset(FunctionExit) - code.labelOffset(Loop);
-    auto instructionCachePercentage = 100 * loopSize / l1i_cache_size;
+  if (L1iCacheSize != 0) {
+    auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop);
+    auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize;
 
-    if (loopSize > l1i_cache_size) {
+    if (LoopSize > L1iCacheSize) {
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage
+    workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
-    workerLog::trace() << "Sequence size: " << sequence.size();
-    workerLog::trace() << "Repetition count: " << repetitions;
+    workerLog::trace() << "Sequence size: " << Sequence.size();
+    workerLog::trace() << "Repetition count: " << Repetitions;
   }
 
   return EXIT_SUCCESS;
 }
 
-std::list<std::string> AVXPayload::getAvailableInstructions() const {
-  std::list<std::string> instructions;
+auto AVXPayload::getAvailableInstructions() const -> std::list<std::string> {
+  std::list<std::string> Instructions;
 
-  transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions),
-            [](const auto& item) { return item.first; });
+  transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions),
+            [](const auto& Item) { return Item.first; });
 
-  return instructions;
+  return Instructions;
 }
 
-void AVXPayload::init(uint64_t* memoryAddr, uint64_t bufferSize) {
-  X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, 1.654738925401e-15);
+void AVXPayload::init(uint64_t* MemoryAddr, uint64_t BufferSize) {
+  X86Payload::init(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
+
+} // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index 9df404e2..7dc06a3f 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -19,412 +19,397 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <cstdint>
 #include <firestarter/Environment/X86/Payload/FMA4Payload.hpp>
-#include <firestarter/Logging/Log.hpp>
 
-#include <iterator>
-#include <utility>
+namespace firestarter::environment::x86::payload {
 
-using namespace firestarter::environment::x86::payload;
-using namespace asmjit;
-using namespace asmjit::x86;
+auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
+                                 unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
+                                 unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+                                 bool ErrorDetection) -> int {
+  using namespace asmjit;
+  using namespace asmjit::x86;
 
-int FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
-                                unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
-                                unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-                                bool errorDetection) {
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto sequence = this->generateSequence(proportion);
-  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
+  auto Sequence = generateSequence(Proportion);
+  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
 
   // compute count of flops and memory access for performance report
-  unsigned flops = 0;
-  unsigned bytes = 0;
+  Flops = 0;
+  Bytes = 0;
 
-  for (const auto& item : sequence) {
-    auto it = this->InstructionFlops.find(item);
+  for (const auto& Item : Sequence) {
+    auto It = InstructionFlops.find(Item);
 
-    if (it == this->InstructionFlops.end()) {
-      workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
+    if (It == InstructionFlops.end()) {
+      workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
-    flops += it->second;
+    Flops += It->second;
 
-    it = this->InstructionMemory.find(item);
+    It = InstructionMemory.find(Item);
 
-    if (it != this->InstructionMemory.end()) {
-      bytes += it->second;
+    if (It != InstructionMemory.end()) {
+      Bytes += It->second;
     }
   }
 
-  this->Flops = repetitions * flops;
-  this->Bytes = repetitions * bytes;
-  this->Instructions = repetitions * sequence.size() * 4 + 6;
+  Flops *= Repetitions;
+  Bytes *= Repetitions;
+  Instructions = Repetitions * Sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
-  auto l1i_cache_size = instructionCacheSize / thread;
-  auto dataCacheBufferSizeIterator = dataCacheBufferSize.begin();
-  auto l1_size = *dataCacheBufferSizeIterator / thread;
-  std::advance(dataCacheBufferSizeIterator, 1);
-  auto l2_size = *dataCacheBufferSizeIterator / thread;
-  std::advance(dataCacheBufferSizeIterator, 1);
-  auto l3_size = *dataCacheBufferSizeIterator / thread;
-  auto ram_size = ramBufferSize / thread;
+  const auto L1iCacheSize = InstructionCacheSize / Thread;
+  auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin();
+  const auto L1Size = *DataCacheBufferSizeIterator / Thread;
+  std::advance(DataCacheBufferSizeIterator, 1);
+  const auto L2Size = *DataCacheBufferSizeIterator / Thread;
+  std::advance(DataCacheBufferSizeIterator, 1);
+  const auto L3Size = *DataCacheBufferSizeIterator / Thread;
+  const auto RamSize = RamBufferSize / Thread;
 
   // calculate the reset counters for the buffers
-  auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
-  auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
-  auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
+  const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread);
+  const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
+  const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
-  CodeHolder code;
-  code.init(this->Rt.environment());
+  CodeHolder Code;
+  Code.init(Rt.environment());
 
-  if (nullptr != this->LoadFunction) {
-    this->Rt.release(&this->LoadFunction);
+  if (nullptr != LoadFunction) {
+    Rt.release(&LoadFunction);
   }
 
-  Builder cb(&code);
-  cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
+  Builder Cb(&Code);
+  Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
                           asmjit::DiagnosticOptions::kValidateIntermediate);
 
-  auto pointer_reg = rax;
-  auto l1_addr = rbx;
-  auto l2_addr = rcx;
-  auto l3_addr = r8;
-  auto ram_addr = r9;
-  auto l2_count_reg = r10;
-  auto l3_count_reg = r11;
-  auto ram_count_reg = r12;
-  auto temp_reg = r13;
-  auto temp_reg2 = rbp;
-  auto offset_reg = r14;
-  auto addrHigh_reg = r15;
-  auto iter_reg = mm0;
-  auto shift_reg = std::vector<Gp>({rdi, rsi, rdx});
-  auto shift_reg32 = std::vector<Gp>({edi, esi, edx});
-  auto nr_shift_regs = 3;
-  auto mul_regs = 2;
-  auto add_regs = 9;
-  auto alt_dst_regs = 3;
-  auto ram_reg = xmm15;
-
-  FuncDetail func;
-  func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl),
-            this->Rt.environment());
-
-  FuncFrame frame;
-  frame.init(func);
+  const auto PointerReg = rax;
+  const auto L1Addr = rbx;
+  const auto L2Addr = rcx;
+  const auto L3Addr = r8;
+  const auto RamAddr = r9;
+  const auto L2CountReg = r10;
+  const auto L3CountReg = r11;
+  const auto RamCountReg = r12;
+  const auto TempReg = r13;
+  const auto TempReg2 = rbp;
+  const auto OffsetReg = r14;
+  const auto AddrHighReg = r15;
+  const auto IterReg = mm0;
+  const auto ShiftReg = std::vector<Gp>({rdi, rsi, rdx});
+  const auto ShiftReg32 = std::vector<Gp>({edi, esi, edx});
+  const auto NbShiftRegs = 3;
+  const auto MulRegs = 2;
+  const auto AddRegs = 9;
+  const auto AltDestRegs = 3;
+  const auto RamReg = xmm15;
+
+  FuncDetail Func;
+  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl), Rt.environment());
+
+  FuncFrame Frame;
+  Frame.init(Func);
 
   // make (x|y)mm registers dirty
-  for (int i = 0; i < 16; i++) {
-    frame.addDirtyRegs(Ymm(i));
+  for (int I = 0; I < 16; I++) {
+    Frame.addDirtyRegs(Ymm(I));
   }
-  for (int i = 0; i < 8; i++) {
-    frame.addDirtyRegs(Mm(i));
+  for (int I = 0; I < 8; I++) {
+    Frame.addDirtyRegs(Mm(I));
   }
   // make all other used registers dirty except RAX
-  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg,
-                     temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr);
-  for (const auto& reg : shift_reg) {
-    frame.addDirtyRegs(reg);
+  Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg,
+                     AddrHighReg, IterReg, RamAddr);
+  for (const auto& Reg : ShiftReg) {
+    Frame.addDirtyRegs(Reg);
   }
 
-  FuncArgsAssignment args(&func);
+  FuncArgsAssignment Args(&Func);
   // FIXME: asmjit assigment to mm0 does not seem to be supported
-  args.assignAll(pointer_reg, addrHigh_reg, temp_reg);
-  args.updateFuncFrame(frame);
-  frame.finalize();
+  Args.assignAll(PointerReg, AddrHighReg, TempReg);
+  Args.updateFuncFrame(Frame);
+  Frame.finalize();
 
-  cb.emitProlog(frame);
-  cb.emitArgsAssignment(frame, args);
+  Cb.emitProlog(Frame);
+  Cb.emitArgsAssignment(Frame, Args);
 
   // FIXME: movq from temp_reg to iter_reg
-  cb.movq(iter_reg, temp_reg);
+  Cb.movq(IterReg, TempReg);
 
   // stop right away if low load is selected
-  auto FunctionExit = cb.newLabel();
+  auto FunctionExit = Cb.newLabel();
 
-  cb.mov(temp_reg, ptr_64(addrHigh_reg));
-  cb.test(temp_reg, temp_reg);
-  cb.jz(FunctionExit);
+  Cb.mov(TempReg, ptr_64(AddrHighReg));
+  Cb.test(TempReg, TempReg);
+  Cb.jz(FunctionExit);
 
-  cb.mov(offset_reg,
+  Cb.mov(OffsetReg,
          Imm(64)); // increment after each cache/memory access
   // Initialize registers for shift operations
-  for (auto const& reg : shift_reg32) {
-    cb.mov(reg, Imm(0xAAAAAAAA));
+  for (auto const& Reg : ShiftReg32) {
+    Cb.mov(Reg, Imm(0xAAAAAAAA));
   }
   // Initialize AVX-Registers for FMA4 Operations
-  cb.vmovapd(ymm0, ymmword_ptr(pointer_reg));
-  cb.vmovapd(ymm1, ymmword_ptr(pointer_reg));
-  auto add_start = mul_regs;
-  auto add_end = mul_regs + add_regs - 1;
-  auto trans_start = add_regs + mul_regs;
-  auto trans_end = add_regs + mul_regs + alt_dst_regs - 1;
-  for (int i = add_start; i <= trans_end; i++) {
-    cb.vmovapd(Ymm(i), ymmword_ptr(pointer_reg, 256 + i * 32));
+  Cb.vmovapd(ymm0, ymmword_ptr(PointerReg));
+  Cb.vmovapd(ymm1, ymmword_ptr(PointerReg));
+  auto AddStart = MulRegs;
+  auto AddEnd = MulRegs + AddRegs - 1;
+  auto TransStart = AddRegs + MulRegs;
+  auto TransEnd = AddRegs + MulRegs + AltDestRegs - 1;
+  for (int I = AddStart; I <= TransEnd; I++) {
+    Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 256 + I * 32));
   }
-  cb.mov(l1_addr, pointer_reg); // address for L1-buffer
-  cb.mov(l2_addr, pointer_reg);
-  cb.add(l2_addr, Imm(l1_size)); // address for L2-buffer
-  cb.mov(l3_addr, pointer_reg);
-  cb.add(l3_addr, Imm(l2_size)); // address for L3-buffer
-  cb.mov(ram_addr, pointer_reg);
-  cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer
-  cb.mov(l2_count_reg, Imm(l2_loop_count));
-  workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop ("
-                     << l2_size / 1024 << ") KiB";
-  cb.mov(l3_count_reg, Imm(l3_loop_count));
-  workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop ("
-                     << l3_size / 1024 << ") KiB";
-  cb.mov(ram_count_reg, Imm(ram_loop_count));
-  workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop ("
-                     << ram_size / 1024 << ") KiB";
-
-  cb.align(AlignMode::kCode, 64);
-
-  auto Loop = cb.newLabel();
-  cb.bind(Loop);
-
-  auto shift_pos = 0;
-  bool left = false;
-  auto add_dest = add_start + 1;
-  auto mov_dst = trans_start;
-  auto mov_src = mov_dst + 1;
-  unsigned l1_offset = 0;
-
-#define L1_INCREMENT()                                                                                                 \
-  l1_offset += 64;                                                                                                     \
-  if (l1_offset < l1_size * 0.5) {                                                                                     \
-    cb.add(l1_addr, offset_reg);                                                                                       \
-  } else {                                                                                                             \
-    l1_offset = 0;                                                                                                     \
-    cb.mov(l1_addr, pointer_reg);                                                                                      \
-  }
-
-#define L2_INCREMENT() cb.add(l2_addr, offset_reg);
-
-#define L3_INCREMENT() cb.add(l3_addr, offset_reg)
-
-#define RAM_INCREMENT() cb.add(ram_addr, offset_reg)
-
-  for (unsigned count = 0; count < repetitions; count++) {
-    for (const auto& item : sequence) {
-      if (item == "REG") {
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
-                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        cb.vfmaddpd(Xmm(mov_dst), Xmm(mov_dst), xmm1,
-                    Xmm(add_start + (add_dest - add_start + add_regs + 2) % add_regs));
-        cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], temp_reg);
-        mov_dst++;
-      } else if (item == "L1_L") {
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
-                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm1, ymmword_ptr(l1_addr, 32));
-        L1_INCREMENT();
-      } else if (item == "L1_S") {
-        cb.vmovapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest));
-        cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm0,
-                    Ymm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        L1_INCREMENT();
-      } else if (item == "L1_LS") {
-        cb.vmovapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest));
-        cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 32));
-        L1_INCREMENT();
-      } else if (item == "L2_L") {
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
-                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm1, xmmword_ptr(l2_addr, 64));
-        L2_INCREMENT();
-      } else if (item == "L2_S") {
-        cb.vmovapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest));
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
-                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        L2_INCREMENT();
-      } else if (item == "L2_LS") {
-        cb.vmovapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest));
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l2_addr, 64));
-        L2_INCREMENT();
-      } else if (item == "L3_L") {
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
-                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm1, xmmword_ptr(l3_addr, 64));
-        L3_INCREMENT();
-      } else if (item == "L3_S") {
-        cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
-                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        L3_INCREMENT();
-      } else if (item == "L3_LS") {
-        cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l3_addr, 64));
-        L3_INCREMENT();
-      } else if (item == "L3_P") {
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l1_addr, 32));
-        cb.prefetcht2(ptr(l3_addr));
-        L3_INCREMENT();
-      } else if (item == "RAM_L") {
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
-                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        cb.vfmaddpd(ram_reg, ram_reg, xmm1, xmmword_ptr(ram_addr, 64));
-        RAM_INCREMENT();
-      } else if (item == "RAM_S") {
-        cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0,
-                    Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        RAM_INCREMENT();
-      } else if (item == "RAM_LS") {
-        cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(ram_addr, 32));
-        RAM_INCREMENT();
-      } else if (item == "RAM_P") {
-        cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l1_addr, 32));
-        cb.prefetcht2(ptr(ram_addr));
-        RAM_INCREMENT();
+  Cb.mov(L1Addr, PointerReg); // address for L1-buffer
+  Cb.mov(L2Addr, PointerReg);
+  Cb.add(L2Addr, Imm(L1Size)); // address for L2-buffer
+  Cb.mov(L3Addr, PointerReg);
+  Cb.add(L3Addr, Imm(L2Size)); // address for L3-buffer
+  Cb.mov(RamAddr, PointerReg);
+  Cb.add(RamAddr, Imm(L3Size)); // address for RAM-buffer
+  Cb.mov(L2CountReg, Imm(L2LoopCount));
+  workerLog::trace() << "reset counter for L2-buffer with " << L2LoopCount << " cache line accesses per loop ("
+                     << L2Size / 1024 << ") KiB";
+  Cb.mov(L3CountReg, Imm(L3LoopCount));
+  workerLog::trace() << "reset counter for L3-buffer with " << L3LoopCount << " cache line accesses per loop ("
+                     << L3Size / 1024 << ") KiB";
+  Cb.mov(RamCountReg, Imm(RamLoopCount));
+  workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop ("
+                     << RamSize / 1024 << ") KiB";
+
+  Cb.align(AlignMode::kCode, 64);
+
+  auto Loop = Cb.newLabel();
+  Cb.bind(Loop);
+
+  auto ShiftPos = 0;
+  bool Left = false;
+  auto AddDest = AddStart + 1;
+  auto MovDest = TransStart;
+  auto MovSrc = MovDest + 1;
+  unsigned L1Offset = 0;
+
+  const auto L1Increment = [&Cb, &L1Offset, &L1Size, &L1Addr, &OffsetReg, &PointerReg]() {
+    L1Offset += 64;
+    if (L1Offset < L1Size * 0.5) {
+      Cb.add(L1Addr, OffsetReg);
+    } else {
+      L1Offset = 0;
+      Cb.mov(L1Addr, PointerReg);
+    }
+  };
+  const auto L2Increment = [&Cb, &L2Addr, &OffsetReg]() { Cb.add(L2Addr, OffsetReg); };
+  const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); };
+  const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); };
+
+  for (unsigned Count = 0; Count < Repetitions; Count++) {
+    for (const auto& Item : Sequence) {
+      if (Item == "REG") {
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
+        Cb.vfmaddpd(Xmm(MovDest), Xmm(MovDest), xmm1, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 2) % AddRegs)));
+        Cb.xor_(ShiftReg[(ShiftPos + NbShiftRegs - 1) % NbShiftRegs], TempReg);
+        MovDest++;
+      } else if (Item == "L1_L") {
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
+        Cb.vfmaddpd(Ymm(AddDest), Ymm(AddDest), ymm1, ymmword_ptr(L1Addr, 32));
+        L1Increment();
+      } else if (Item == "L1_S") {
+        Cb.vmovapd(xmmword_ptr(L1Addr, 32), Xmm(AddDest));
+        Cb.vfmaddpd(Ymm(AddDest), Ymm(AddDest), ymm0, Ymm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
+        L1Increment();
+      } else if (Item == "L1_LS") {
+        Cb.vmovapd(xmmword_ptr(L1Addr, 64), Xmm(AddDest));
+        Cb.vfmaddpd(Ymm(AddDest), Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 32));
+        L1Increment();
+      } else if (Item == "L2_L") {
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm1, xmmword_ptr(L2Addr, 64));
+        L2Increment();
+      } else if (Item == "L2_S") {
+        Cb.vmovapd(xmmword_ptr(L2Addr, 64), Xmm(AddDest));
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
+        L2Increment();
+      } else if (Item == "L2_LS") {
+        Cb.vmovapd(xmmword_ptr(L2Addr, 96), Xmm(AddDest));
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, xmmword_ptr(L2Addr, 64));
+        L2Increment();
+      } else if (Item == "L3_L") {
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm1, xmmword_ptr(L3Addr, 64));
+        L3Increment();
+      } else if (Item == "L3_S") {
+        Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest));
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
+        L3Increment();
+      } else if (Item == "L3_LS") {
+        Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest));
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, xmmword_ptr(L3Addr, 64));
+        L3Increment();
+      } else if (Item == "L3_P") {
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, xmmword_ptr(L1Addr, 32));
+        Cb.prefetcht2(ptr(L3Addr));
+        L3Increment();
+      } else if (Item == "RAM_L") {
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
+        Cb.vfmaddpd(RamReg, RamReg, xmm1, xmmword_ptr(RamAddr, 64));
+        RamIncrement();
+      } else if (Item == "RAM_S") {
+        Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest));
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
+        RamIncrement();
+      } else if (Item == "RAM_LS") {
+        Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest));
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, xmmword_ptr(RamAddr, 32));
+        RamIncrement();
+      } else if (Item == "RAM_P") {
+        Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, xmmword_ptr(L1Addr, 32));
+        Cb.prefetcht2(ptr(RamAddr));
+        RamIncrement();
       } else {
-        workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
+        workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
         return EXIT_FAILURE;
       }
 
-      if (left) {
-        cb.shr(shift_reg32[shift_pos], Imm(1));
+      if (Left) {
+        Cb.shr(ShiftReg32[ShiftPos], Imm(1));
       } else {
-        cb.shl(shift_reg32[shift_pos], Imm(1));
+        Cb.shl(ShiftReg32[ShiftPos], Imm(1));
       }
-      add_dest++;
-      if (add_dest > add_end) {
-        add_dest = add_start;
+      AddDest++;
+      if (AddDest > AddEnd) {
+        AddDest = AddStart;
       }
-      if (mov_dst > trans_end) {
-        mov_dst = trans_start;
+      if (MovDest > TransEnd) {
+        MovDest = TransStart;
       }
-      mov_src++;
-      if (mov_src > trans_end) {
-        mov_src = trans_start;
+      MovSrc++;
+      if (MovSrc > TransEnd) {
+        MovSrc = TransStart;
       }
-      shift_pos++;
-      if (shift_pos == nr_shift_regs) {
-        shift_pos = 0;
-        left = !left;
+      ShiftPos++;
+      if (ShiftPos == NbShiftRegs) {
+        ShiftPos = 0;
+        Left = !Left;
       }
     }
   }
 
-  cb.movq(temp_reg, iter_reg); // restore iteration counter
-  if (this->getRAMSequenceCount(sequence) > 0) {
+  Cb.movq(TempReg, IterReg); // restore iteration counter
+  if (getRAMSequenceCount(Sequence) > 0) {
     // reset RAM counter
-    auto NoRamReset = cb.newLabel();
-
-    cb.sub(ram_count_reg, Imm(1));
-    cb.jnz(NoRamReset);
-    cb.mov(ram_count_reg, Imm(ram_loop_count));
-    cb.mov(ram_addr, pointer_reg);
-    cb.add(ram_addr, Imm(l3_size));
-    cb.bind(NoRamReset);
+    auto NoRamReset = Cb.newLabel();
+
+    Cb.sub(RamCountReg, Imm(1));
+    Cb.jnz(NoRamReset);
+    Cb.mov(RamCountReg, Imm(RamLoopCount));
+    Cb.mov(RamAddr, PointerReg);
+    Cb.add(RamAddr, Imm(L3Size));
+    Cb.bind(NoRamReset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.inc(temp_reg); // increment iteration counter
-  if (this->getL2SequenceCount(sequence) > 0) {
+  Cb.inc(TempReg); // increment iteration counter
+  if (getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
-    auto NoL2Reset = cb.newLabel();
-
-    cb.sub(l2_count_reg, Imm(1));
-    cb.jnz(NoL2Reset);
-    cb.mov(l2_count_reg, Imm(l2_loop_count));
-    cb.mov(l2_addr, pointer_reg);
-    cb.add(l2_addr, Imm(l1_size));
-    cb.bind(NoL2Reset);
+    auto NoL2Reset = Cb.newLabel();
+
+    Cb.sub(L2CountReg, Imm(1));
+    Cb.jnz(NoL2Reset);
+    Cb.mov(L2CountReg, Imm(L2LoopCount));
+    Cb.mov(L2Addr, PointerReg);
+    Cb.add(L2Addr, Imm(L1Size));
+    Cb.bind(NoL2Reset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.movq(iter_reg, temp_reg); // store iteration counter
-  if (this->getL3SequenceCount(sequence) > 0) {
+  Cb.movq(IterReg, TempReg); // store iteration counter
+  if (getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
-    auto NoL3Reset = cb.newLabel();
-
-    cb.sub(l3_count_reg, Imm(1));
-    cb.jnz(NoL3Reset);
-    cb.mov(l3_count_reg, Imm(l3_loop_count));
-    cb.mov(l3_addr, pointer_reg);
-    cb.add(l3_addr, Imm(l2_size));
-    cb.bind(NoL3Reset);
+    auto NoL3Reset = Cb.newLabel();
+
+    Cb.sub(L3CountReg, Imm(1));
+    Cb.jnz(NoL3Reset);
+    Cb.mov(L3CountReg, Imm(L3LoopCount));
+    Cb.mov(L3Addr, PointerReg);
+    Cb.add(L3Addr, Imm(L2Size));
+    Cb.bind(NoL3Reset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.mov(l1_addr, pointer_reg);
+  Cb.mov(L1Addr, PointerReg);
 
-  if (dumpRegisters) {
-    auto SkipRegistersDump = cb.newLabel();
+  if (DumpRegisters) {
+    auto SkipRegistersDump = Cb.newLabel();
 
-    cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait));
-    cb.jnz(SkipRegistersDump);
+    Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
+    Cb.jnz(SkipRegistersDump);
 
     // dump all the ymm register
-    for (int i = 0; i < (int)this->registerCount(); i++) {
-      cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i));
+    for (unsigned I = 0; I < registerCount(); I++) {
+      Cb.vmovapd(ymmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Ymm(I));
     }
 
     // set read flag
-    cb.mov(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait));
+    Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
 
-    cb.bind(SkipRegistersDump);
+    Cb.bind(SkipRegistersDump);
   }
 
-  if (errorDetection) {
-    this->emitErrorDetectionCode<decltype(iter_reg), Ymm>(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
+  if (ErrorDetection) {
+    emitErrorDetectionCode<decltype(IterReg), Ymm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2);
   }
 
-  cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH));
-  cb.jnz(Loop);
+  Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH));
+  Cb.jnz(Loop);
 
-  cb.bind(FunctionExit);
+  Cb.bind(FunctionExit);
 
-  cb.movq(rax, iter_reg);
+  Cb.movq(rax, IterReg);
 
-  cb.emitEpilog(frame);
+  Cb.emitEpilog(Frame);
 
-  cb.finalize();
+  Cb.finalize();
 
   // String sb;
   // cb.dump(sb);
 
-  Error err = this->Rt.add(&this->LoadFunction, &code);
-  if (err) {
+  Error Err = Rt.add(&LoadFunction, &Code);
+  if (Err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
   }
 
   // skip if we could not determine cache size
-  if (l1i_cache_size != 0) {
-    auto loopSize = code.labelOffset(FunctionExit) - code.labelOffset(Loop);
-    auto instructionCachePercentage = 100 * loopSize / l1i_cache_size;
+  if (L1iCacheSize != 0) {
+    auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop);
+    auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize;
 
-    if (loopSize > l1i_cache_size) {
+    if (LoopSize > L1iCacheSize) {
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage
+    workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
-    workerLog::trace() << "Sequence size: " << sequence.size();
-    workerLog::trace() << "Repetition count: " << repetitions;
+    workerLog::trace() << "Sequence size: " << Sequence.size();
+    workerLog::trace() << "Repetition count: " << Repetitions;
   }
 
   return EXIT_SUCCESS;
 }
 
-std::list<std::string> FMA4Payload::getAvailableInstructions() const {
-  std::list<std::string> instructions;
+auto FMA4Payload::getAvailableInstructions() const -> std::list<std::string> {
+  std::list<std::string> Instructions;
 
-  transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions),
-            [](const auto& item) { return item.first; });
+  transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions),
+            [](const auto& Item) { return Item.first; });
 
-  return instructions;
+  return Instructions;
 }
 
-void FMA4Payload::init(uint64_t* memoryAddr, uint64_t bufferSize) {
-  X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4);
+void FMA4Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize) {
+  X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
+
+} // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index ba6534a9..4ecd24ca 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -20,438 +20,433 @@
  *****************************************************************************/
 
 #include <firestarter/Environment/X86/Payload/FMAPayload.hpp>
-#include <firestarter/Logging/Log.hpp>
 
-#include <iterator>
-#include <utility>
+namespace firestarter::environment::x86::payload {
 
-using namespace firestarter::environment::x86::payload;
-using namespace asmjit;
-using namespace asmjit::x86;
+auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
+                                unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
+                                unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+                                bool ErrorDetection) -> int {
+  using namespace asmjit;
+  using namespace asmjit::x86;
 
-int FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
-                               unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
-                               unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-                               bool errorDetection) {
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto sequence = this->generateSequence(proportion);
-  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
+  auto Sequence = generateSequence(Proportion);
+  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
 
   // compute count of flops and memory access for performance report
-  unsigned flops = 0;
-  unsigned bytes = 0;
+  Flops = 0;
+  Bytes = 0;
 
-  for (const auto& item : sequence) {
-    auto it = this->InstructionFlops.find(item);
+  for (const auto& Item : Sequence) {
+    auto It = InstructionFlops.find(Item);
 
-    if (it == this->InstructionFlops.end()) {
-      workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
+    if (It == InstructionFlops.end()) {
+      workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
-    flops += it->second;
+    Flops += It->second;
 
-    it = this->InstructionMemory.find(item);
+    It = InstructionMemory.find(Item);
 
-    if (it != this->InstructionMemory.end()) {
-      bytes += it->second;
+    if (It != InstructionMemory.end()) {
+      Bytes += It->second;
     }
   }
 
-  this->Flops = repetitions * flops;
-  this->Bytes = repetitions * bytes;
-  this->Instructions = repetitions * sequence.size() * 4 + 6;
+  Flops *= Repetitions;
+  Bytes *= Repetitions;
+  Instructions = Repetitions * Sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
-  auto l1i_cache_size = instructionCacheSize / thread;
-  auto dataCacheBufferSizeIterator = dataCacheBufferSize.begin();
-  auto l1_size = *dataCacheBufferSizeIterator / thread;
-  std::advance(dataCacheBufferSizeIterator, 1);
-  auto l2_size = *dataCacheBufferSizeIterator / thread;
-  std::advance(dataCacheBufferSizeIterator, 1);
-  auto l3_size = *dataCacheBufferSizeIterator / thread;
-  auto ram_size = ramBufferSize / thread;
+  const auto L1iCacheSize = InstructionCacheSize / Thread;
+  auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin();
+  const auto L1Size = *DataCacheBufferSizeIterator / Thread;
+  std::advance(DataCacheBufferSizeIterator, 1);
+  const auto L2Size = *DataCacheBufferSizeIterator / Thread;
+  std::advance(DataCacheBufferSizeIterator, 1);
+  const auto L3Size = *DataCacheBufferSizeIterator / Thread;
+  const auto RamSize = RamBufferSize / Thread;
 
   // calculate the reset counters for the buffers
-  auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
-  auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
-  auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
+  const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread);
+  const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
+  const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
-  CodeHolder code;
-  code.init(this->Rt.environment());
+  CodeHolder Code;
+  Code.init(Rt.environment());
 
-  if (nullptr != this->LoadFunction) {
-    this->Rt.release(&this->LoadFunction);
+  if (nullptr != LoadFunction) {
+    Rt.release(&LoadFunction);
   }
 
-  Builder cb(&code);
-  cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
+  Builder Cb(&Code);
+  Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
                           asmjit::DiagnosticOptions::kValidateIntermediate);
 
-  auto pointer_reg = rax;
-  auto l1_addr = rbx;
-  auto l2_addr = rcx;
-  auto l3_addr = r8;
-  auto ram_addr = r9;
-  auto l2_count_reg = r10;
-  auto l3_count_reg = r11;
-  auto ram_count_reg = r12;
-  auto temp_reg = r13;
-  auto temp_reg2 = rbp;
-  auto offset_reg = r14;
-  auto addrHigh_reg = r15;
-  auto iter_reg = mm0;
-  auto shift_reg = std::vector<Gp>({rdi, rsi, rdx});
-  auto shift_reg32 = std::vector<Gp>({edi, esi, edx});
-  auto nr_shift_regs = 3;
-  auto mul_regs = 3;
-  auto add_regs = 9;
-  auto alt_dst_regs = 3;
-  auto ram_reg = ymm15;
-
-  FuncDetail func;
-  func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl),
-            this->Rt.environment());
-
-  FuncFrame frame;
-  frame.init(func);
+  const auto PointerReg = rax;
+  const auto L1Addr = rbx;
+  const auto L2Addr = rcx;
+  const auto L3Addr = r8;
+  const auto RamAddr = r9;
+  const auto L2CountReg = r10;
+  const auto L3CountReg = r11;
+  const auto RamCountReg = r12;
+  const auto TempReg = r13;
+  const auto TempReg2 = rbp;
+  const auto OffsetReg = r14;
+  const auto AddrHighReg = r15;
+  const auto IterReg = mm0;
+  const auto ShiftRegs = std::vector<Gp>({rdi, rsi, rdx});
+  const auto ShiftRegs32 = std::vector<Gp>({edi, esi, edx});
+  const auto NbShiftRegs = 3;
+  const auto MulRegs = 3;
+  const auto AddRegs = 9;
+  const auto AltDestRegs = 3;
+  const auto RamReg = ymm15;
+
+  FuncDetail Func;
+  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl), Rt.environment());
+
+  FuncFrame Frame;
+  Frame.init(Func);
 
   // make (x|y)mm registers dirty
-  for (int i = 0; i < 16; i++) {
-    frame.addDirtyRegs(Ymm(i));
+  for (int I = 0; I < 16; I++) {
+    Frame.addDirtyRegs(Ymm(I));
   }
-  for (int i = 0; i < 8; i++) {
-    frame.addDirtyRegs(Mm(i));
+  for (int I = 0; I < 8; I++) {
+    Frame.addDirtyRegs(Mm(I));
   }
   // make all other used registers dirty except RAX
-  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg,
-                     temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr);
-  for (const auto& reg : shift_reg) {
-    frame.addDirtyRegs(reg);
+  Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg,
+                     AddrHighReg, IterReg, RamAddr);
+  for (const auto& Reg : ShiftRegs) {
+    Frame.addDirtyRegs(Reg);
   }
 
-  FuncArgsAssignment args(&func);
+  FuncArgsAssignment Args(&Func);
   // FIXME: asmjit assigment to mm0 does not seem to be supported
-  args.assignAll(pointer_reg, addrHigh_reg, temp_reg);
-  args.updateFuncFrame(frame);
-  frame.finalize();
+  Args.assignAll(PointerReg, AddrHighReg, TempReg);
+  Args.updateFuncFrame(Frame);
+  Frame.finalize();
 
-  cb.emitProlog(frame);
-  cb.emitArgsAssignment(frame, args);
+  Cb.emitProlog(Frame);
+  Cb.emitArgsAssignment(Frame, Args);
 
   // FIXME: movq from temp_reg to iter_reg
-  cb.movq(iter_reg, temp_reg);
+  Cb.movq(IterReg, TempReg);
 
   // stop right away if low load is selected
-  auto FunctionExit = cb.newLabel();
+  auto FunctionExit = Cb.newLabel();
 
-  cb.mov(temp_reg, ptr_64(addrHigh_reg));
-  cb.test(temp_reg, temp_reg);
-  cb.jz(FunctionExit);
+  Cb.mov(TempReg, ptr_64(AddrHighReg));
+  Cb.test(TempReg, TempReg);
+  Cb.jz(FunctionExit);
 
-  cb.mov(offset_reg,
+  Cb.mov(OffsetReg,
          Imm(64)); // increment after each cache/memory access
   // Initialize registers for shift operations
-  for (auto const& reg : shift_reg32) {
-    cb.mov(reg, Imm(0xAAAAAAAA));
+  for (auto const& Reg : ShiftRegs32) {
+    Cb.mov(Reg, Imm(0xAAAAAAAA));
   }
   // Initialize AVX-Registers for FMA Operations
-  cb.vmovapd(ymm0, ymmword_ptr(pointer_reg));
-  cb.vmovapd(ymm1, ymmword_ptr(pointer_reg, 32));
-  cb.vmovapd(ymm2, ymmword_ptr(pointer_reg, 64));
-  auto add_start = mul_regs;
-  auto add_end = mul_regs + add_regs - 1;
-  auto trans_start = add_regs + mul_regs;
-  auto trans_end = add_regs + mul_regs + alt_dst_regs - 1;
-  for (int i = add_start; i <= trans_end; i++) {
-    cb.vmovapd(Ymm(i), ymmword_ptr(pointer_reg, 256 + i * 32));
+  Cb.vmovapd(ymm0, ymmword_ptr(PointerReg));
+  Cb.vmovapd(ymm1, ymmword_ptr(PointerReg, 32));
+  Cb.vmovapd(ymm2, ymmword_ptr(PointerReg, 64));
+  auto AddStart = MulRegs;
+  auto AddEnd = MulRegs + AddRegs - 1;
+  auto TransStart = AddRegs + MulRegs;
+  auto TransEnd = AddRegs + MulRegs + AltDestRegs - 1;
+  for (int I = AddStart; I <= TransEnd; I++) {
+    Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 256 + (I * 32)));
   }
-  cb.mov(l1_addr, pointer_reg); // address for L1-buffer
-  cb.mov(l2_addr, pointer_reg);
-  cb.add(l2_addr, Imm(l1_size)); // address for L2-buffer
-  cb.mov(l3_addr, pointer_reg);
-  cb.add(l3_addr, Imm(l2_size)); // address for L3-buffer
-  cb.mov(ram_addr, pointer_reg);
-  cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer
-  cb.mov(l2_count_reg, Imm(l2_loop_count));
-  workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop ("
-                     << l2_size / 1024 << ") KiB";
-  cb.mov(l3_count_reg, Imm(l3_loop_count));
-  workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop ("
-                     << l3_size / 1024 << ") KiB";
-  cb.mov(ram_count_reg, Imm(ram_loop_count));
-  workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop ("
-                     << ram_size / 1024 << ") KiB";
-
-  cb.align(AlignMode::kCode, 64);
-
-  auto Loop = cb.newLabel();
-  cb.bind(Loop);
-
-  auto shift_pos = 0;
-  bool left = false;
-  auto add_dest = add_start + 1;
-  auto mov_dst = trans_start;
-  auto mov_src = mov_dst + 1;
-  unsigned l1_offset = 0;
-
-#define L1_INCREMENT_TIMES(n)                                                                                          \
-  l1_offset += n * 64;                                                                                                 \
-  if (l1_offset < l1_size * 0.5) {                                                                                     \
-    cb.add(l1_addr, offset_reg);                                                                                       \
-  } else {                                                                                                             \
-    l1_offset = 0;                                                                                                     \
-    cb.mov(l1_addr, pointer_reg);                                                                                      \
-  }
-
-#define L1_INCREMENT() L1_INCREMENT_TIMES(1)
-
-#define L2_INCREMENT_TIMES(n)                                                                                          \
-  if (n == 1) {                                                                                                        \
-    cb.add(l2_addr, offset_reg);                                                                                       \
-  } else {                                                                                                             \
-    cb.add(l2_addr, n * 64);                                                                                           \
-  }
-
-#define L2_INCREMENT() L2_INCREMENT_TIMES(1)
-
-#define L3_INCREMENT() cb.add(l3_addr, offset_reg)
-
-#define RAM_INCREMENT() cb.add(ram_addr, offset_reg)
-
-  for (unsigned count = 0; count < repetitions; count++) {
-    for (const auto& item : sequence) {
-      if (item == "REG") {
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2);
-        cb.vfmadd231pd(Ymm(mov_dst), ymm2, ymm1);
-        cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], temp_reg);
-        mov_dst++;
-      } else if (item == "L1_L") {
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2);
-        cb.vfmadd231pd(Ymm(add_dest), ymm1, ymmword_ptr(l1_addr, 32));
-        L1_INCREMENT();
-      } else if (item == "L1_2L") {
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 32));
-        cb.vfmadd231pd(Ymm(mov_dst), ymm1, ymmword_ptr(l1_addr, 64));
-        L1_INCREMENT();
-      } else if (item == "L1_S") {
-        cb.vmovapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest));
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2);
-        L1_INCREMENT();
-      } else if (item == "L1_LS") {
-        cb.vmovapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest));
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 32));
-        L1_INCREMENT();
-      } else if (item == "L1_LS_256") {
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 64));
-        cb.vmovapd(ymmword_ptr(l1_addr, 32), Ymm(add_dest));
-        L1_INCREMENT();
-      } else if (item == "L1_2LS_256") {
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 64));
-        cb.vfmadd231pd(Ymm(mov_dst), ymm1, ymmword_ptr(l1_addr, 96));
-        cb.vmovapd(ymmword_ptr(l1_addr, 32), Ymm(add_dest));
-        L1_INCREMENT_TIMES(2);
-      } else if (item == "L2_L") {
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2);
-        cb.vfmadd231pd(Ymm(add_dest), ymm1, ymmword_ptr(l2_addr, 64));
-        L2_INCREMENT();
-      } else if (item == "L2_S") {
-        cb.vmovapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest));
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2);
-        L2_INCREMENT();
-      } else if (item == "L2_LS") {
-        cb.vmovapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest));
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l2_addr, 64));
-        L2_INCREMENT();
-      } else if (item == "L2_LS_256") {
-        cb.vmovapd(ymmword_ptr(l2_addr, 96), Ymm(add_dest));
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ptr(l2_addr, 64));
-        L2_INCREMENT();
-      } else if (item == "L2_2LS_256") {
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ptr(l2_addr, 64));
-        cb.vfmadd231pd(Ymm(mov_dst), ymm1, ptr(l2_addr, 96));
-        cb.vmovapd(ymmword_ptr(l2_addr, 32), Ymm(add_dest));
-        L2_INCREMENT_TIMES(2);
-      } else if (item == "L3_L") {
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2);
-        cb.vfmadd231pd(Ymm(add_dest), ymm1, ymmword_ptr(l3_addr, 64));
-        L3_INCREMENT();
-      } else if (item == "L3_S") {
-        cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2);
-        L3_INCREMENT();
-      } else if (item == "L3_LS") {
-        cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l3_addr, 64));
-        L3_INCREMENT();
-      } else if (item == "L3_LS_256") {
-        cb.vmovapd(ymmword_ptr(l3_addr, 96), Ymm(add_dest));
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l3_addr, 64));
-        L3_INCREMENT();
-      } else if (item == "L3_P") {
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 32));
-        cb.prefetcht2(ptr(l3_addr));
-        L3_INCREMENT();
-      } else if (item == "RAM_L") {
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2);
-        cb.vfmadd231pd(ram_reg, ymm1, ymmword_ptr(ram_addr, 64));
-        RAM_INCREMENT();
-      } else if (item == "RAM_S") {
-        cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2);
-        RAM_INCREMENT();
-      } else if (item == "RAM_LS") {
-        cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(ram_addr, 32));
-        RAM_INCREMENT();
-      } else if (item == "RAM_P") {
-        cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 32));
-        cb.prefetcht2(ptr(ram_addr));
-        RAM_INCREMENT();
+  Cb.mov(L1Addr, PointerReg); // address for L1-buffer
+  Cb.mov(L2Addr, PointerReg);
+  Cb.add(L2Addr, Imm(L1Size)); // address for L2-buffer
+  Cb.mov(L3Addr, PointerReg);
+  Cb.add(L3Addr, Imm(L2Size)); // address for L3-buffer
+  Cb.mov(RamAddr, PointerReg);
+  Cb.add(RamAddr, Imm(L3Size)); // address for RAM-buffer
+  Cb.mov(L2CountReg, Imm(L2LoopCount));
+  workerLog::trace() << "reset counter for L2-buffer with " << L2LoopCount << " cache line accesses per loop ("
+                     << L2Size / 1024 << ") KiB";
+  Cb.mov(L3CountReg, Imm(L3LoopCount));
+  workerLog::trace() << "reset counter for L3-buffer with " << L3LoopCount << " cache line accesses per loop ("
+                     << L3Size / 1024 << ") KiB";
+  Cb.mov(RamCountReg, Imm(RamLoopCount));
+  workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop ("
+                     << RamSize / 1024 << ") KiB";
+
+  Cb.align(AlignMode::kCode, 64);
+
+  auto Loop = Cb.newLabel();
+  Cb.bind(Loop);
+
+  auto ShiftPos = 0;
+  bool Left = false;
+  auto AddDest = AddStart + 1;
+  auto MovDest = TransStart;
+  auto MovSrc = MovDest + 1;
+  unsigned L1Offset = 0;
+
+  const auto L1IncrementTimes = [&Cb, &L1Offset, &L1Size, &L1Addr, &OffsetReg, &PointerReg](unsigned Times) {
+    L1Offset += Times * 64;
+    if (L1Offset < L1Size * 0.5) {
+      Cb.add(L1Addr, OffsetReg);
+    } else {
+      L1Offset = 0;
+      Cb.mov(L1Addr, PointerReg);
+    }
+  };
+  const auto L1Increment = [&L1IncrementTimes] { L1IncrementTimes(1); };
+  const auto L2IncrementTimes = [&Cb, &L2Addr, &OffsetReg](unsigned Times) {
+    if (Times == 1) {
+      Cb.add(L2Addr, OffsetReg);
+    } else {
+      Cb.add(L2Addr, Times * 64);
+    }
+  };
+  const auto L2Increment = [&L2IncrementTimes] { L2IncrementTimes(1); };
+  const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); };
+  const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); };
+
+  for (unsigned Count = 0; Count < Repetitions; Count++) {
+    for (const auto& Item : Sequence) {
+      if (Item == "REG") {
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2);
+        Cb.vfmadd231pd(Ymm(MovDest), ymm2, ymm1);
+        Cb.xor_(ShiftRegs[(ShiftPos + NbShiftRegs - 1) % NbShiftRegs], TempReg);
+        MovDest++;
+      } else if (Item == "L1_L") {
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2);
+        Cb.vfmadd231pd(Ymm(AddDest), ymm1, ymmword_ptr(L1Addr, 32));
+        L1Increment();
+      } else if (Item == "L1_2L") {
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 32));
+        Cb.vfmadd231pd(Ymm(MovDest), ymm1, ymmword_ptr(L1Addr, 64));
+        L1Increment();
+      } else if (Item == "L1_S") {
+        Cb.vmovapd(xmmword_ptr(L1Addr, 32), Xmm(AddDest));
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2);
+        L1Increment();
+      } else if (Item == "L1_LS") {
+        Cb.vmovapd(xmmword_ptr(L1Addr, 64), Xmm(AddDest));
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 32));
+        L1Increment();
+      } else if (Item == "L1_LS_256") {
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 64));
+        Cb.vmovapd(ymmword_ptr(L1Addr, 32), Ymm(AddDest));
+        L1Increment();
+      } else if (Item == "L1_2LS_256") {
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 64));
+        Cb.vfmadd231pd(Ymm(MovDest), ymm1, ymmword_ptr(L1Addr, 96));
+        Cb.vmovapd(ymmword_ptr(L1Addr, 32), Ymm(AddDest));
+        L1IncrementTimes(2);
+      } else if (Item == "L2_L") {
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2);
+        Cb.vfmadd231pd(Ymm(AddDest), ymm1, ymmword_ptr(L2Addr, 64));
+        L2Increment();
+      } else if (Item == "L2_S") {
+        Cb.vmovapd(xmmword_ptr(L2Addr, 64), Xmm(AddDest));
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2);
+        L2Increment();
+      } else if (Item == "L2_LS") {
+        Cb.vmovapd(xmmword_ptr(L2Addr, 96), Xmm(AddDest));
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L2Addr, 64));
+        L2Increment();
+      } else if (Item == "L2_LS_256") {
+        Cb.vmovapd(ymmword_ptr(L2Addr, 96), Ymm(AddDest));
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ptr(L2Addr, 64));
+        L2Increment();
+      } else if (Item == "L2_2LS_256") {
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ptr(L2Addr, 64));
+        Cb.vfmadd231pd(Ymm(MovDest), ymm1, ptr(L2Addr, 96));
+        Cb.vmovapd(ymmword_ptr(L2Addr, 32), Ymm(AddDest));
+        L2IncrementTimes(2);
+      } else if (Item == "L3_L") {
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2);
+        Cb.vfmadd231pd(Ymm(AddDest), ymm1, ymmword_ptr(L3Addr, 64));
+        L3Increment();
+      } else if (Item == "L3_S") {
+        Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest));
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2);
+        L3Increment();
+      } else if (Item == "L3_LS") {
+        Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest));
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L3Addr, 64));
+        L3Increment();
+      } else if (Item == "L3_LS_256") {
+        Cb.vmovapd(ymmword_ptr(L3Addr, 96), Ymm(AddDest));
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L3Addr, 64));
+        L3Increment();
+      } else if (Item == "L3_P") {
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 32));
+        Cb.prefetcht2(ptr(L3Addr));
+        L3Increment();
+      } else if (Item == "RAM_L") {
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2);
+        Cb.vfmadd231pd(RamReg, ymm1, ymmword_ptr(RamAddr, 64));
+        RamIncrement();
+      } else if (Item == "RAM_S") {
+        Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest));
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2);
+        RamIncrement();
+      } else if (Item == "RAM_LS") {
+        Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest));
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(RamAddr, 32));
+        RamIncrement();
+      } else if (Item == "RAM_P") {
+        Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 32));
+        Cb.prefetcht2(ptr(RamAddr));
+        RamIncrement();
       } else {
-        workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
+        workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
         return EXIT_FAILURE;
       }
 
-      if (item != "L1_2LS_256" && item != "L2_2LS_256") {
-        if (left) {
-          cb.shr(shift_reg32[shift_pos], Imm(1));
+      if (Item != "L1_2LS_256" && Item != "L2_2LS_256") {
+        if (Left) {
+          Cb.shr(ShiftRegs32[ShiftPos], Imm(1));
         } else {
-          cb.shl(shift_reg32[shift_pos], Imm(1));
+          Cb.shl(ShiftRegs32[ShiftPos], Imm(1));
         }
       }
-      add_dest++;
-      if (add_dest > add_end) {
-        add_dest = add_start;
+      AddDest++;
+      if (AddDest > AddEnd) {
+        AddDest = AddStart;
       }
-      if (mov_dst > trans_end) {
-        mov_dst = trans_start;
+      if (MovDest > TransEnd) {
+        MovDest = TransStart;
       }
-      mov_src++;
-      if (mov_src > trans_end) {
-        mov_src = trans_start;
+      MovSrc++;
+      if (MovSrc > TransEnd) {
+        MovSrc = TransStart;
       }
-      shift_pos++;
-      if (shift_pos == nr_shift_regs) {
-        shift_pos = 0;
-        left = !left;
+      ShiftPos++;
+      if (ShiftPos == NbShiftRegs) {
+        ShiftPos = 0;
+        Left = !Left;
       }
     }
   }
 
-  cb.movq(temp_reg, iter_reg); // restore iteration counter
-  if (this->getRAMSequenceCount(sequence) > 0) {
+  Cb.movq(TempReg, IterReg); // restore iteration counter
+  if (getRAMSequenceCount(Sequence) > 0) {
     // reset RAM counter
-    auto NoRamReset = cb.newLabel();
-
-    cb.sub(ram_count_reg, Imm(1));
-    cb.jnz(NoRamReset);
-    cb.mov(ram_count_reg, Imm(ram_loop_count));
-    cb.mov(ram_addr, pointer_reg);
-    cb.add(ram_addr, Imm(l3_size));
-    cb.bind(NoRamReset);
+    auto NoRamReset = Cb.newLabel();
+
+    Cb.sub(RamCountReg, Imm(1));
+    Cb.jnz(NoRamReset);
+    Cb.mov(RamCountReg, Imm(RamLoopCount));
+    Cb.mov(RamAddr, PointerReg);
+    Cb.add(RamAddr, Imm(L3Size));
+    Cb.bind(NoRamReset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.inc(temp_reg); // increment iteration counter
-  if (this->getL2SequenceCount(sequence) > 0) {
+  Cb.inc(TempReg); // increment iteration counter
+  if (getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
-    auto NoL2Reset = cb.newLabel();
-
-    cb.sub(l2_count_reg, Imm(1));
-    cb.jnz(NoL2Reset);
-    cb.mov(l2_count_reg, Imm(l2_loop_count));
-    cb.mov(l2_addr, pointer_reg);
-    cb.add(l2_addr, Imm(l1_size));
-    cb.bind(NoL2Reset);
+    auto NoL2Reset = Cb.newLabel();
+
+    Cb.sub(L2CountReg, Imm(1));
+    Cb.jnz(NoL2Reset);
+    Cb.mov(L2CountReg, Imm(L2LoopCount));
+    Cb.mov(L2Addr, PointerReg);
+    Cb.add(L2Addr, Imm(L1Size));
+    Cb.bind(NoL2Reset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.movq(iter_reg, temp_reg); // store iteration counter
-  if (this->getL3SequenceCount(sequence) > 0) {
+  Cb.movq(IterReg, TempReg); // store iteration counter
+  if (getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
-    auto NoL3Reset = cb.newLabel();
-
-    cb.sub(l3_count_reg, Imm(1));
-    cb.jnz(NoL3Reset);
-    cb.mov(l3_count_reg, Imm(l3_loop_count));
-    cb.mov(l3_addr, pointer_reg);
-    cb.add(l3_addr, Imm(l2_size));
-    cb.bind(NoL3Reset);
+    auto NoL3Reset = Cb.newLabel();
+
+    Cb.sub(L3CountReg, Imm(1));
+    Cb.jnz(NoL3Reset);
+    Cb.mov(L3CountReg, Imm(L3LoopCount));
+    Cb.mov(L3Addr, PointerReg);
+    Cb.add(L3Addr, Imm(L2Size));
+    Cb.bind(NoL3Reset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.mov(l1_addr, pointer_reg);
+  Cb.mov(L1Addr, PointerReg);
 
-  if (dumpRegisters) {
-    auto SkipRegistersDump = cb.newLabel();
+  if (DumpRegisters) {
+    auto SkipRegistersDump = Cb.newLabel();
 
-    cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait));
-    cb.jnz(SkipRegistersDump);
+    Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
+    Cb.jnz(SkipRegistersDump);
 
     // dump all the ymm register
-    for (int i = 0; i < (int)this->registerCount(); i++) {
-      cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i));
+    for (unsigned I = 0; I < registerCount(); I++) {
+      Cb.vmovapd(ymmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Ymm(I));
     }
 
     // set read flag
-    cb.mov(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait));
+    Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
 
-    cb.bind(SkipRegistersDump);
+    Cb.bind(SkipRegistersDump);
   }
 
-  if (errorDetection) {
-    this->emitErrorDetectionCode<decltype(iter_reg), Ymm>(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
+  if (ErrorDetection) {
+    emitErrorDetectionCode<decltype(IterReg), Ymm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2);
   }
 
-  cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH));
-  cb.jnz(Loop);
+  Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH));
+  Cb.jnz(Loop);
 
-  cb.bind(FunctionExit);
+  Cb.bind(FunctionExit);
 
-  cb.movq(rax, iter_reg);
+  Cb.movq(rax, IterReg);
 
-  cb.emitEpilog(frame);
+  Cb.emitEpilog(Frame);
 
-  cb.finalize();
+  Cb.finalize();
 
   // String sb;
   // cb.dump(sb);
 
-  Error err = this->Rt.add(&this->LoadFunction, &code);
-  if (err) {
+  Error Err = Rt.add(&LoadFunction, &Code);
+  if (Err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
   }
 
   // skip if we could not determine cache size
-  if (l1i_cache_size != 0) {
-    auto loopSize = code.labelOffset(FunctionExit) - code.labelOffset(Loop);
-    auto instructionCachePercentage = 100 * loopSize / l1i_cache_size;
+  if (L1iCacheSize != 0) {
+    auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop);
+    auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize;
 
-    if (loopSize > l1i_cache_size) {
+    if (LoopSize > L1iCacheSize) {
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage
+    workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
-    workerLog::trace() << "Sequence size: " << sequence.size();
-    workerLog::trace() << "Repetition count: " << repetitions;
+    workerLog::trace() << "Sequence size: " << Sequence.size();
+    workerLog::trace() << "Repetition count: " << Repetitions;
   }
 
   return EXIT_SUCCESS;
 }
 
-std::list<std::string> FMAPayload::getAvailableInstructions() const {
-  std::list<std::string> instructions;
+auto FMAPayload::getAvailableInstructions() const -> std::list<std::string> {
+  std::list<std::string> Instructions;
 
-  transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions),
-            [](const auto& item) { return item.first; });
+  transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions),
+            [](const auto& Item) { return Item.first; });
 
-  return instructions;
+  return Instructions;
 }
 
-void FMAPayload::init(uint64_t* memoryAddr, uint64_t bufferSize) {
-  X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4);
+void FMAPayload::init(uint64_t* MemoryAddr, uint64_t BufferSize) {
+  X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
+
+} // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index 60a98ef1..202cd423 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -20,423 +20,420 @@
  *****************************************************************************/
 
 #include <firestarter/Environment/X86/Payload/SSE2Payload.hpp>
-#include <firestarter/Logging/Log.hpp>
 
-#include <iterator>
-#include <utility>
+namespace firestarter::environment::x86::payload {
 
-using namespace firestarter::environment::x86::payload;
-using namespace asmjit;
-using namespace asmjit::x86;
+auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
+                                 unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
+                                 unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+                                 bool ErrorDetection) -> int {
+  using namespace asmjit;
+  using namespace asmjit::x86;
 
-int SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
-                                unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
-                                unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-                                bool errorDetection) {
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto sequence = this->generateSequence(proportion);
-  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
+  auto Sequence = generateSequence(Proportion);
+  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
 
   // compute count of flops and memory access for performance report
-  unsigned flops = 0;
-  unsigned bytes = 0;
+  Flops = 0;
+  Bytes = 0;
 
-  for (const auto& item : sequence) {
-    auto it = this->InstructionFlops.find(item);
+  for (const auto& Item : Sequence) {
+    auto It = InstructionFlops.find(Item);
 
-    if (it == this->InstructionFlops.end()) {
-      workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
+    if (It == InstructionFlops.end()) {
+      workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
-    flops += it->second;
+    Flops += It->second;
 
-    it = this->InstructionMemory.find(item);
+    It = InstructionMemory.find(Item);
 
-    if (it != this->InstructionMemory.end()) {
-      bytes += it->second;
+    if (It != InstructionMemory.end()) {
+      Bytes += It->second;
     }
   }
 
-  this->Flops = repetitions * flops;
-  this->Bytes = repetitions * bytes;
-  this->Instructions = repetitions * sequence.size() * 2 + 4;
+  Flops *= Repetitions;
+  Bytes *= Repetitions;
+  Instructions = Repetitions * Sequence.size() * 2 + 4;
 
   // calculate the buffer sizes
-  auto l1i_cache_size = instructionCacheSize / thread;
-  auto dataCacheBufferSizeIterator = dataCacheBufferSize.begin();
-  auto l1_size = *dataCacheBufferSizeIterator / thread;
-  std::advance(dataCacheBufferSizeIterator, 1);
-  auto l2_size = *dataCacheBufferSizeIterator / thread;
-  std::advance(dataCacheBufferSizeIterator, 1);
-  auto l3_size = *dataCacheBufferSizeIterator / thread;
-  auto ram_size = ramBufferSize / thread;
+  const auto L1iCacheSize = InstructionCacheSize / Thread;
+  auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin();
+  const auto L1Size = *DataCacheBufferSizeIterator / Thread;
+  std::advance(DataCacheBufferSizeIterator, 1);
+  const auto L2Size = *DataCacheBufferSizeIterator / Thread;
+  std::advance(DataCacheBufferSizeIterator, 1);
+  const auto L3Size = *DataCacheBufferSizeIterator / Thread;
+  const auto RamSize = RamBufferSize / Thread;
 
   // calculate the reset counters for the buffers
-  auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
-  auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
-  auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
+  const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread);
+  const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
+  const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
-  CodeHolder code;
-  code.init(this->Rt.environment());
+  CodeHolder Code;
+  Code.init(Rt.environment());
 
-  if (nullptr != this->LoadFunction) {
-    this->Rt.release(&this->LoadFunction);
+  if (nullptr != LoadFunction) {
+    Rt.release(&LoadFunction);
   }
 
-  Builder cb(&code);
-  cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
+  Builder Cb(&Code);
+  Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
                           asmjit::DiagnosticOptions::kValidateIntermediate);
 
-  auto pointer_reg = rax;
-  auto l1_addr = rbx;
-  auto l2_addr = rcx;
-  auto l3_addr = rdx;
-  auto ram_addr = rdi;
-  auto l2_count_reg = r8;
-  auto l3_count_reg = r9;
-  auto ram_count_reg = r10;
-  auto temp_reg = r11;
-  auto temp_reg2 = rbp;
-  auto offset_reg = r12;
-  auto addrHigh_reg = r13;
-  auto iter_reg = r14;
-  auto mov_regs = 0;
-  auto add_regs = 14;
-  auto trans_regs = 2;
-
-  FuncDetail func;
-  func.init(FuncSignatureT<uint64_t, uint64_t>(CallConvId::kCDecl), this->Rt.environment());
-
-  FuncFrame frame;
-  frame.init(func);
+  const auto PointerReg = rax;
+  const auto L1Addr = rbx;
+  const auto L2Addr = rcx;
+  const auto L3Addr = rdx;
+  const auto RamAddr = rdi;
+  const auto L2CountReg = r8;
+  const auto L3CountReg = r9;
+  const auto RamCountReg = r10;
+  const auto TempReg = r11;
+  const auto TempReg2 = rbp;
+  const auto OffsetReg = r12;
+  const auto AddrHighReg = r13;
+  const auto IterReg = r14;
+  const auto MovRegs = 0;
+  const auto AddRegs = 14;
+  const auto TransRegs = 2;
+
+  FuncDetail Func;
+  Func.init(FuncSignatureT<uint64_t, uint64_t>(CallConvId::kCDecl), Rt.environment());
+
+  FuncFrame Frame;
+  Frame.init(Func);
 
   // make xmm registers dirty
-  for (int i = 0; i < 16; i++) {
-    frame.addDirtyRegs(Xmm(i));
+  for (int I = 0; I < 16; I++) {
+    Frame.addDirtyRegs(Xmm(I));
   }
   // make mmx registers dirty
-  for (int i = 0; i < 8; i++) {
-    frame.addDirtyRegs(Mm(i));
+  for (int I = 0; I < 8; I++) {
+    Frame.addDirtyRegs(Mm(I));
   }
   // make all other used registers dirty except RAX
-  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg,
-                     temp_reg2, offset_reg, addrHigh_reg, iter_reg);
+  Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg,
+                     AddrHighReg, IterReg);
 
-  FuncArgsAssignment args(&func);
-  args.assignAll(pointer_reg, addrHigh_reg, iter_reg);
-  args.updateFuncFrame(frame);
-  frame.finalize();
+  FuncArgsAssignment Args(&Func);
+  Args.assignAll(PointerReg, AddrHighReg, IterReg);
+  Args.updateFuncFrame(Frame);
+  Frame.finalize();
 
-  cb.emitProlog(frame);
-  cb.emitArgsAssignment(frame, args);
+  Cb.emitProlog(Frame);
+  Cb.emitArgsAssignment(Frame, Args);
 
   // stop right away if low load is selected
-  auto FunctionExit = cb.newLabel();
+  auto FunctionExit = Cb.newLabel();
 
-  cb.mov(temp_reg, ptr_64(addrHigh_reg));
-  cb.test(temp_reg, temp_reg);
-  cb.jz(FunctionExit);
+  Cb.mov(TempReg, ptr_64(AddrHighReg));
+  Cb.test(TempReg, TempReg);
+  Cb.jz(FunctionExit);
 
-  cb.mov(offset_reg,
+  Cb.mov(OffsetReg,
          Imm(64)); // increment after each cache/memory access
 
   // Initialize SSE-Registers for Addition
-  auto add_start = 0;
-  auto add_end = add_regs - 1;
-  auto trans_start = add_regs;
-  auto trans_end = add_regs + trans_regs - 1;
-  if (add_regs > 0) {
-    for (int i = add_start; i <= add_end; i++) {
-      cb.movapd(Xmm(i), xmmword_ptr(pointer_reg, 32 * i));
+  const auto AddStart = 0;
+  const auto AddEnd = AddRegs - 1;
+  const auto TransStart = AddRegs;
+  const auto TransEnd = AddRegs + TransRegs - 1;
+  if (AddRegs > 0) {
+    for (int I = AddStart; I <= AddEnd; I++) {
+      Cb.movapd(Xmm(I), xmmword_ptr(PointerReg, 32 * I));
     }
   }
 
   // Initialize MMX-Registers for shift operations
-  auto mov_start = 0;
-  auto mov_end = mov_regs - 1;
-  if (mov_regs > 0) {
-    cb.mov(temp_reg, Imm(0x5555555555555555));
-    cb.movq(Mm(mov_start), temp_reg);
-    for (int i = mov_start + 1; i <= mov_end; i++) {
-      cb.movq(Mm(i), Mm(mov_start));
+  const auto MovStart = 0;
+  const auto MovEnd = MovRegs - 1;
+  if (MovRegs > 0) {
+    Cb.mov(TempReg, Imm(0x5555555555555555));
+    Cb.movq(Mm(MovStart), TempReg);
+    for (int I = MovStart + 1; I <= MovEnd; I++) {
+      Cb.movq(Mm(I), Mm(MovStart));
     }
   }
 
   // Initialize SSE-Registers for Transfer-Operations
-  if (trans_regs > 0) {
-    if (trans_start % 2 == 0) {
-      cb.mov(temp_reg, Imm(0x0F0F0F0F0F0F0F0F));
+  if (TransRegs > 0) {
+    if (TransStart % 2 == 0) {
+      Cb.mov(TempReg, Imm(0x0F0F0F0F0F0F0F0F));
     } else {
-      cb.mov(temp_reg, Imm(0xF0F0F0F0F0F0F0F0));
+      Cb.mov(TempReg, Imm(0xF0F0F0F0F0F0F0F0));
     }
-    cb.pinsrq(Xmm(trans_start), temp_reg, Imm(0));
-    cb.pinsrq(Xmm(trans_start), temp_reg, Imm(1));
-    for (int i = trans_start + 1; i <= trans_end; i++) {
-      if (i % 2 == 0) {
-        cb.shr(temp_reg, Imm(4));
+    Cb.pinsrq(Xmm(TransStart), TempReg, Imm(0));
+    Cb.pinsrq(Xmm(TransStart), TempReg, Imm(1));
+    for (int I = TransStart + 1; I <= TransEnd; I++) {
+      if (I % 2 == 0) {
+        Cb.shr(TempReg, Imm(4));
       } else {
-        cb.shl(temp_reg, Imm(4));
+        Cb.shl(TempReg, Imm(4));
       }
-      cb.pinsrq(Xmm(i), temp_reg, Imm(0));
-      cb.pinsrq(Xmm(i), temp_reg, Imm(1));
+      Cb.pinsrq(Xmm(I), TempReg, Imm(0));
+      Cb.pinsrq(Xmm(I), TempReg, Imm(1));
     }
   }
 
-  cb.mov(l1_addr, pointer_reg); // address for L1-buffer
-  cb.mov(l2_addr, pointer_reg);
-  cb.add(l2_addr, Imm(l1_size)); // address for L2-buffer
-  cb.mov(l3_addr, pointer_reg);
-  cb.add(l3_addr, Imm(l2_size)); // address for L3-buffer
-  cb.mov(ram_addr, pointer_reg);
-  cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer
-  cb.mov(l2_count_reg, Imm(l2_loop_count));
-  workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop ("
-                     << l2_size / 1024 << ") KiB";
-  cb.mov(l3_count_reg, Imm(l3_loop_count));
-  workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop ("
-                     << l3_size / 1024 << ") KiB";
-  cb.mov(ram_count_reg, Imm(ram_loop_count));
-  workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop ("
-                     << ram_size / 1024 << ") KiB";
-
-  cb.align(AlignMode::kCode, 64);
-
-  auto Loop = cb.newLabel();
-  cb.bind(Loop);
-
-  auto movq_dst = mov_start;
-  auto add_dest = add_start + 1;
-  auto mov_dst = trans_start;
-  auto mov_src = mov_dst + 1;
-  unsigned l1_offset = 0;
-
-#define L1_INCREMENT()                                                                                                 \
-  l1_offset += 64;                                                                                                     \
-  if (l1_offset < l1_size * 0.5) {                                                                                     \
-    cb.add(l1_addr, offset_reg);                                                                                       \
-  } else {                                                                                                             \
-    l1_offset = 0;                                                                                                     \
-    cb.mov(l1_addr, pointer_reg);                                                                                      \
-  }
-
-#define L2_INCREMENT() cb.add(l2_addr, offset_reg);
-
-#define L3_INCREMENT() cb.add(l3_addr, offset_reg)
-
-#define RAM_INCREMENT() cb.add(ram_addr, offset_reg)
-
-  for (unsigned count = 0; count < repetitions; count++) {
-    for (const auto& item : sequence) {
-      if (item == "REG") {
-        cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs));
-        cb.movdqa(Xmm(mov_dst), Xmm(mov_src));
-      } else if (item == "L1_L") {
-        cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32));
-        L1_INCREMENT();
-      } else if (item == "L1_S") {
-        cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
-        cb.movapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest));
-        L1_INCREMENT();
-        this->Instructions++;
-      } else if (item == "L1_LS") {
-        cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32));
-        cb.movapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest));
-        L1_INCREMENT();
-        this->Instructions++;
-      } else if (item == "L2_L") {
-        cb.addpd(Xmm(add_dest), xmmword_ptr(l2_addr, 64));
-        L2_INCREMENT();
-      } else if (item == "L2_S") {
-        cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
-        cb.movapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest));
-        L2_INCREMENT();
-        this->Instructions++;
-      } else if (item == "L2_LS") {
-        cb.addpd(Xmm(add_dest), xmmword_ptr(l2_addr, 64));
-        cb.movapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest));
-        L2_INCREMENT();
-        this->Instructions++;
-      } else if (item == "L3_L") {
-        cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64));
-        L3_INCREMENT();
-      } else if (item == "L3_S") {
-        cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
-        cb.movapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
-        L3_INCREMENT();
-        this->Instructions++;
-      } else if (item == "L3_LS") {
-        cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64));
-        cb.movapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest));
-        L3_INCREMENT();
-        this->Instructions++;
-      } else if (item == "L3_P") {
-        cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32));
-        cb.prefetcht0(ptr(l3_addr));
-        L3_INCREMENT();
-        this->Instructions++;
-      } else if (item == "RAM_L") {
-        cb.addpd(Xmm(add_dest), xmmword_ptr(ram_addr, 64));
-        RAM_INCREMENT();
-      } else if (item == "RAM_S") {
-        cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs));
-        cb.movapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
-        RAM_INCREMENT();
-        this->Instructions++;
-      } else if (item == "RAM_LS") {
-        cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64));
-        cb.movapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest));
-        RAM_INCREMENT();
-        this->Instructions++;
-      } else if (item == "RAM_P") {
-        cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32));
-        cb.prefetcht2(ptr(ram_addr));
-        RAM_INCREMENT();
-        this->Instructions++;
+  Cb.mov(L1Addr, PointerReg); // address for L1-buffer
+  Cb.mov(L2Addr, PointerReg);
+  Cb.add(L2Addr, Imm(L1Size)); // address for L2-buffer
+  Cb.mov(L3Addr, PointerReg);
+  Cb.add(L3Addr, Imm(L2Size)); // address for L3-buffer
+  Cb.mov(RamAddr, PointerReg);
+  Cb.add(RamAddr, Imm(L3Size)); // address for RAM-buffer
+  Cb.mov(L2CountReg, Imm(L2LoopCount));
+  workerLog::trace() << "reset counter for L2-buffer with " << L2LoopCount << " cache line accesses per loop ("
+                     << L2Size / 1024 << ") KiB";
+  Cb.mov(L3CountReg, Imm(L3LoopCount));
+  workerLog::trace() << "reset counter for L3-buffer with " << L3LoopCount << " cache line accesses per loop ("
+                     << L3Size / 1024 << ") KiB";
+  Cb.mov(RamCountReg, Imm(RamLoopCount));
+  workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop ("
+                     << RamSize / 1024 << ") KiB";
+
+  Cb.align(AlignMode::kCode, 64);
+
+  auto Loop = Cb.newLabel();
+  Cb.bind(Loop);
+
+  auto MovqDest = MovStart;
+  auto AddDest = AddStart + 1;
+  auto MovDest = TransStart;
+  auto MovSrc = MovDest + 1;
+  unsigned L1Offset = 0;
+
+  const auto L1Increment = [&Cb, &L1Offset, &L1Size, &L1Addr, &OffsetReg, &PointerReg]() {
+    L1Offset += 64;
+    if (L1Offset < L1Size * 0.5) {
+      Cb.add(L1Addr, OffsetReg);
+    } else {
+      L1Offset = 0;
+      Cb.mov(L1Addr, PointerReg);
+    }
+  };
+  const auto L2Increment = [&Cb, &L2Addr, &OffsetReg]() { Cb.add(L2Addr, OffsetReg); };
+  const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); };
+  const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); };
+
+  for (unsigned Count = 0; Count < Repetitions; Count++) {
+    for (const auto& Item : Sequence) {
+      if (Item == "REG") {
+        Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
+        Cb.movdqa(Xmm(MovDest), Xmm(MovSrc));
+      } else if (Item == "L1_L") {
+        Cb.addpd(Xmm(AddDest), xmmword_ptr(L1Addr, 32));
+        L1Increment();
+      } else if (Item == "L1_S") {
+        Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
+        Cb.movapd(xmmword_ptr(L1Addr, 32), Xmm(AddDest));
+        L1Increment();
+        Instructions++;
+      } else if (Item == "L1_LS") {
+        Cb.addpd(Xmm(AddDest), xmmword_ptr(L1Addr, 32));
+        Cb.movapd(xmmword_ptr(L1Addr, 64), Xmm(AddDest));
+        L1Increment();
+        Instructions++;
+      } else if (Item == "L2_L") {
+        Cb.addpd(Xmm(AddDest), xmmword_ptr(L2Addr, 64));
+        L2Increment();
+      } else if (Item == "L2_S") {
+        Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
+        Cb.movapd(xmmword_ptr(L2Addr, 64), Xmm(AddDest));
+        L2Increment();
+        Instructions++;
+      } else if (Item == "L2_LS") {
+        Cb.addpd(Xmm(AddDest), xmmword_ptr(L2Addr, 64));
+        Cb.movapd(xmmword_ptr(L2Addr, 96), Xmm(AddDest));
+        L2Increment();
+        Instructions++;
+      } else if (Item == "L3_L") {
+        Cb.addpd(Xmm(AddDest), xmmword_ptr(L3Addr, 64));
+        L3Increment();
+      } else if (Item == "L3_S") {
+        Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
+        Cb.movapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest));
+        L3Increment();
+        Instructions++;
+      } else if (Item == "L3_LS") {
+        Cb.addpd(Xmm(AddDest), xmmword_ptr(L3Addr, 64));
+        Cb.movapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest));
+        L3Increment();
+        Instructions++;
+      } else if (Item == "L3_P") {
+        Cb.addpd(Xmm(AddDest), xmmword_ptr(L1Addr, 32));
+        Cb.prefetcht0(ptr(L3Addr));
+        L3Increment();
+        Instructions++;
+      } else if (Item == "RAM_L") {
+        Cb.addpd(Xmm(AddDest), xmmword_ptr(RamAddr, 64));
+        RamIncrement();
+      } else if (Item == "RAM_S") {
+        Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
+        Cb.movapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest));
+        RamIncrement();
+        Instructions++;
+      } else if (Item == "RAM_LS") {
+        Cb.addpd(Xmm(AddDest), xmmword_ptr(L3Addr, 64));
+        Cb.movapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest));
+        RamIncrement();
+        Instructions++;
+      } else if (Item == "RAM_P") {
+        Cb.addpd(Xmm(AddDest), xmmword_ptr(L1Addr, 32));
+        Cb.prefetcht2(ptr(RamAddr));
+        RamIncrement();
+        Instructions++;
       } else {
-        workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
+        workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
         return EXIT_FAILURE;
       }
 
-      if (mov_regs > 0) {
-        this->Instructions++;
-        cb.movq(Mm(mov_start + (movq_dst - mov_start + mov_regs - 1) % mov_regs), Mm(movq_dst));
+      if (MovRegs > 0) {
+        Instructions++;
+        Cb.movq(Mm(MovStart + ((MovqDest - MovStart + MovRegs - 1) % MovRegs)), Mm(MovqDest));
       }
 
-      add_dest++;
-      if (add_dest > add_end) {
+      AddDest++;
+      if (AddDest > AddEnd) {
         // DO NOT REMOVE the + 1. It serves for the good of ymm0. If it was to
         // be overriden, the values in the other registers would rise up to inf.
-        add_dest = add_start + 1;
+        AddDest = AddStart + 1;
       }
-      mov_dst++;
-      if (mov_dst > trans_end) {
-        mov_dst = trans_start;
+      MovDest++;
+      if (MovDest > TransEnd) {
+        MovDest = TransStart;
       }
-      mov_src++;
-      if (mov_src > trans_end) {
-        mov_src = trans_start;
+      MovSrc++;
+      if (MovSrc > TransEnd) {
+        MovSrc = TransStart;
       }
-      if (mov_regs > 0) {
-        movq_dst++;
-        if (movq_dst > mov_end) {
-          movq_dst = mov_start;
+      if (MovRegs > 0) {
+        MovqDest++;
+        if (MovqDest > MovEnd) {
+          MovqDest = MovStart;
         }
       }
     }
   }
 
-  if (this->getRAMSequenceCount(sequence) > 0) {
+  if (getRAMSequenceCount(Sequence) > 0) {
     // reset RAM counter
-    auto NoRamReset = cb.newLabel();
-
-    cb.sub(ram_count_reg, Imm(1));
-    cb.jnz(NoRamReset);
-    cb.mov(ram_count_reg, Imm(ram_loop_count));
-    cb.mov(ram_addr, pointer_reg);
-    cb.add(ram_addr, Imm(l3_size));
-    cb.bind(NoRamReset);
+    auto NoRamReset = Cb.newLabel();
+
+    Cb.sub(RamCountReg, Imm(1));
+    Cb.jnz(NoRamReset);
+    Cb.mov(RamCountReg, Imm(RamLoopCount));
+    Cb.mov(RamAddr, PointerReg);
+    Cb.add(RamAddr, Imm(L3Size));
+    Cb.bind(NoRamReset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  if (this->getL2SequenceCount(sequence) > 0) {
+  if (getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
-    auto NoL2Reset = cb.newLabel();
-
-    cb.sub(l2_count_reg, Imm(1));
-    cb.jnz(NoL2Reset);
-    cb.mov(l2_count_reg, Imm(l2_loop_count));
-    cb.mov(l2_addr, pointer_reg);
-    cb.add(l2_addr, Imm(l1_size));
-    cb.bind(NoL2Reset);
+    auto NoL2Reset = Cb.newLabel();
+
+    Cb.sub(L2CountReg, Imm(1));
+    Cb.jnz(NoL2Reset);
+    Cb.mov(L2CountReg, Imm(L2LoopCount));
+    Cb.mov(L2Addr, PointerReg);
+    Cb.add(L2Addr, Imm(L1Size));
+    Cb.bind(NoL2Reset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  if (this->getL3SequenceCount(sequence) > 0) {
+  if (getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
-    auto NoL3Reset = cb.newLabel();
-
-    cb.sub(l3_count_reg, Imm(1));
-    cb.jnz(NoL3Reset);
-    cb.mov(l3_count_reg, Imm(l3_loop_count));
-    cb.mov(l3_addr, pointer_reg);
-    cb.add(l3_addr, Imm(l2_size));
-    cb.bind(NoL3Reset);
+    auto NoL3Reset = Cb.newLabel();
+
+    Cb.sub(L3CountReg, Imm(1));
+    Cb.jnz(NoL3Reset);
+    Cb.mov(L3CountReg, Imm(L3LoopCount));
+    Cb.mov(L3Addr, PointerReg);
+    Cb.add(L3Addr, Imm(L2Size));
+    Cb.bind(NoL3Reset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.inc(iter_reg); // increment iteration counter
-  cb.mov(l1_addr, pointer_reg);
+  Cb.inc(IterReg); // increment iteration counter
+  Cb.mov(L1Addr, PointerReg);
 
-  if (dumpRegisters) {
-    auto SkipRegistersDump = cb.newLabel();
+  if (DumpRegisters) {
+    auto SkipRegistersDump = Cb.newLabel();
 
-    cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait));
-    cb.jnz(SkipRegistersDump);
+    Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
+    Cb.jnz(SkipRegistersDump);
 
     // dump all the xmm register
-    for (int i = 0; i < (int)this->registerCount(); i++) {
-      cb.movapd(xmmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Xmm(i));
+    for (unsigned I = 0; I < registerCount(); I++) {
+      Cb.movapd(xmmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Xmm(I));
     }
 
     // set read flag
-    cb.mov(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait));
+    Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
 
-    cb.bind(SkipRegistersDump);
+    Cb.bind(SkipRegistersDump);
   }
 
-  if (errorDetection) {
-    this->emitErrorDetectionCode<decltype(iter_reg), Xmm>(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
+  if (ErrorDetection) {
+    emitErrorDetectionCode<decltype(IterReg), Xmm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2);
   }
 
-  cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH));
-  cb.jnz(Loop);
+  Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH));
+  Cb.jnz(Loop);
 
-  cb.bind(FunctionExit);
+  Cb.bind(FunctionExit);
 
-  cb.mov(rax, iter_reg); // restore iteration counter
+  Cb.mov(rax, IterReg); // restore iteration counter
 
-  cb.emitEpilog(frame);
+  Cb.emitEpilog(Frame);
 
-  cb.finalize();
+  Cb.finalize();
 
   // String sb;
   // cb.dump(sb);
 
-  Error err = this->Rt.add(&this->LoadFunction, &code);
-  if (err) {
+  Error Err = Rt.add(&LoadFunction, &Code);
+  if (Err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
   }
 
   // skip if we could not determine cache size
-  if (l1i_cache_size != 0) {
-    auto loopSize = code.labelOffset(FunctionExit) - code.labelOffset(Loop);
-    auto instructionCachePercentage = 100 * loopSize / l1i_cache_size;
+  if (L1iCacheSize != 0) {
+    auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop);
+    auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize;
 
-    if (loopSize > l1i_cache_size) {
+    if (LoopSize > L1iCacheSize) {
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage
+    workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
-    workerLog::trace() << "Sequence size: " << sequence.size();
-    workerLog::trace() << "Repetition count: " << repetitions;
+    workerLog::trace() << "Sequence size: " << Sequence.size();
+    workerLog::trace() << "Repetition count: " << Repetitions;
   }
 
   return EXIT_SUCCESS;
 }
 
-std::list<std::string> SSE2Payload::getAvailableInstructions() const {
-  std::list<std::string> instructions;
+auto SSE2Payload::getAvailableInstructions() const -> std::list<std::string> {
+  std::list<std::string> Instructions;
 
-  transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions),
-            [](const auto& item) { return item.first; });
+  transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions),
+            [](const auto& Item) { return Item.first; });
 
-  return instructions;
+  return Instructions;
 }
 
-void SSE2Payload::init(uint64_t* memoryAddr, uint64_t bufferSize) {
-  X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, 1.654738925401e-15);
+void SSE2Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize) {
+  X86Payload::init(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
+
+} // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index 93458d25..d0cedd61 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -22,7 +22,6 @@
 #include <cassert>
 #include <chrono>
 #include <thread>
-#include <type_traits>
 
 #ifdef _MSC_VER
 #include <array>
@@ -31,436 +30,59 @@
 
 #include <firestarter/Environment/X86/Payload/X86Payload.hpp>
 
-using namespace firestarter::environment::x86::payload;
+namespace firestarter::environment::x86::payload {
 
-void X86Payload::lowLoadFunction(volatile uint64_t* addrHigh, uint64_t period) {
-  int nap;
-#ifdef _MSC_VER
-  std::array<int, 4> cpuid;
-#endif
+void X86Payload::lowLoadFunction(volatile uint64_t* AddrHigh, uint64_t Period) {
+  int Nap = Period / 100;
 
-  nap = period / 100;
 #ifndef _MSC_VER
   __asm__ __volatile__("mfence;"
                        "cpuid;" ::
                            : "eax", "ebx", "ecx", "edx");
 #else
+  std::array<int, 4> Cpuid;
   _mm_mfence();
-  __cpuid(cpuid.data(), 0);
+  __cpuid(Cpuid.data(), 0);
 #endif
+
   // while signal low load
-  while (*addrHigh == LOAD_LOW) {
+  while (*AddrHigh == LOAD_LOW) {
 #ifndef _MSC_VER
     __asm__ __volatile__("mfence;"
                          "cpuid;" ::
                              : "eax", "ebx", "ecx", "edx");
 #else
     _mm_mfence();
-    __cpuid(cpuid.data(), 0);
+    __cpuid(Cpuid.data(), 0);
 #endif
-    std::this_thread::sleep_for(std::chrono::microseconds(nap));
+    std::this_thread::sleep_for(std::chrono::microseconds(Nap));
 #ifndef _MSC_VER
     __asm__ __volatile__("mfence;"
                          "cpuid;" ::
                              : "eax", "ebx", "ecx", "edx");
 #else
     _mm_mfence();
-    __cpuid(cpuid.data(), 0);
+    __cpuid(Cpuid.data(), 0);
 #endif
   }
 }
 
-void X86Payload::init(uint64_t* memoryAddr, uint64_t bufferSize, double firstValue, double lastValue) {
+void X86Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue) {
   uint64_t i = 0;
 
-  for (; i < INIT_BLOCKSIZE; i++)
-    *((double*)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * firstValue;
-  for (; i <= bufferSize - INIT_BLOCKSIZE; i += INIT_BLOCKSIZE)
-    std::memcpy(memoryAddr + i, memoryAddr + i - INIT_BLOCKSIZE, sizeof(uint64_t) * INIT_BLOCKSIZE);
-  for (; i < bufferSize; i++)
-    *((double*)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * lastValue;
-}
-
-uint64_t X86Payload::highLoadFunction(uint64_t* addrMem, volatile uint64_t* addrHigh, uint64_t iterations) {
-  return this->LoadFunction(addrMem, addrHigh, iterations);
-}
-
-// add MM regs to dirty regs
-// zmm31 is used for backup if VectorReg is of type asmjit::x86::Zmm
-template <class IterRegT, class VectorRegT>
-void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder& Cb, IterRegT IterReg, asmjit::x86::Gpq addrHigh_reg,
-                                        asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg,
-                                        asmjit::x86::Gpq temp_reg2) {
-  // we don't want anything to break... so we use asserts for everything that
-  // could break it
-  static_assert(std::is_base_of<asmjit::x86::Vec, VectorRegT>::value, "VectorReg must be of asmjit::asmjit::x86::Vec");
-  static_assert(std::is_same<asmjit::x86::Xmm, VectorRegT>::value ||
-                    std::is_same<asmjit::x86::Ymm, VectorRegT>::value ||
-                    std::is_same<asmjit::x86::Zmm, VectorRegT>::value,
-                "VectorReg ist not of any supported type");
-  static_assert(std::is_same<asmjit::x86::Mm, IterRegT>::value || std::is_same<asmjit::x86::Gpq, IterRegT>::value,
-                "IterReg is not of any supported type");
-
-  if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
-    assert((IterReg == asmjit::x86::mm0, "iter_reg must be mm0"));
+  for (; i < INIT_BLOCKSIZE; i++) {
+    reinterpret_cast<double*>(MemoryAddr)[i] = 0.25 + static_cast<double>(i) * 8.0 * FirstValue;
   }
-
-  assert((IterReg != temp_reg, "iter_reg must be != temp_reg"));
-  assert((temp_reg != temp_reg2, "temp_reg must be != temp_reg2"));
-  assert((temp_reg != addrHigh_reg, "temp_reg must be != addrHigh_reg"));
-  assert((temp_reg != pointer_reg, "temp_reg must be != pointer_reg"));
-
-  assert((IterReg != asmjit::x86::r8, "iter_reg must be != r8"));
-  assert((IterReg != asmjit::x86::r9, "iter_reg must be != r9"));
-  assert((IterReg != asmjit::x86::rax, "iter_reg must be != rax"));
-  assert((IterReg != asmjit::x86::rbx, "iter_reg must be != rbx"));
-  assert((IterReg != asmjit::x86::rcx, "iter_reg must be != rcx"));
-  assert((IterReg != asmjit::x86::rdx, "iter_reg must be != rdx"));
-
-  assert((temp_reg != asmjit::x86::r8, "temp_reg must be != r8"));
-  assert((temp_reg != asmjit::x86::r9, "temp_reg must be != r9"));
-  assert((temp_reg != asmjit::x86::rax, "temp_reg must be != rax"));
-  assert((temp_reg != asmjit::x86::rbx, "temp_reg must be != rbx"));
-  assert((temp_reg != asmjit::x86::rcx, "temp_reg must be != rcx"));
-  assert((temp_reg != asmjit::x86::rdx, "temp_reg must be != rdx"));
-
-  assert((temp_reg2 != asmjit::x86::r8, "temp_reg2 must be != r8"));
-  assert((temp_reg2 != asmjit::x86::r9, "temp_reg2 must be != r9"));
-  assert((temp_reg2 != asmjit::x86::rax, "temp_reg2 must be != rax"));
-  assert((temp_reg2 != asmjit::x86::rbx, "temp_reg2 must be != rbx"));
-  assert((temp_reg2 != asmjit::x86::rcx, "temp_reg2 must be != rcx"));
-  assert((temp_reg2 != asmjit::x86::rdx, "temp_reg2 must be != rdx"));
-
-  assert((addrHigh_reg != asmjit::x86::r8, "addrHigh_reg must be != r8"));
-  assert((addrHigh_reg != asmjit::x86::r9, "addrHigh_reg must be != r9"));
-  assert((addrHigh_reg != asmjit::x86::rax, "addrHigh_reg must be != rax"));
-  assert((addrHigh_reg != asmjit::x86::rbx, "addrHigh_reg must be != rbx"));
-  assert((addrHigh_reg != asmjit::x86::rcx, "addrHigh_reg must be != rcx"));
-  assert((addrHigh_reg != asmjit::x86::rdx, "addrHigh_reg must be != rdx"));
-
-  auto SkipErrorDetection = Cb.newLabel();
-
-  if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
-    Cb.movq(temp_reg, IterReg);
-  } else {
-    Cb.mov(temp_reg, IterReg);
+  for (; i <= BufferSize - INIT_BLOCKSIZE; i += INIT_BLOCKSIZE) {
+    std::memcpy(MemoryAddr + i, MemoryAddr + i - INIT_BLOCKSIZE, sizeof(uint64_t) * INIT_BLOCKSIZE);
   }
-  // round about 50-100 Hz
-  // more or less, but this isn't really that relevant
-  Cb.and_(temp_reg, asmjit::Imm(0x3fff));
-  Cb.test(temp_reg, temp_reg);
-  Cb.jnz(SkipErrorDetection);
-
-  Cb.mov(temp_reg, asmjit::Imm(0xffffffff));
-
-  int registerCount = (int)this->registerCount();
-
-  // Create a backup of VectorReg(0)
-  if constexpr (std::is_same<asmjit::x86::Xmm, VectorRegT>::value) {
-    Cb.movq(temp_reg2, asmjit::x86::xmm0);
-    Cb.push(temp_reg2);
-    Cb.crc32(temp_reg, temp_reg2);
-    Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-    Cb.movq(temp_reg2, asmjit::x86::xmm0);
-    Cb.push(temp_reg2);
-    Cb.crc32(temp_reg, temp_reg2);
-
-  } else if constexpr (std::is_same<asmjit::x86::Ymm, VectorRegT>::value &&
-                       std::is_same<asmjit::x86::Mm, IterRegT>::value) {
-    Cb.movq(temp_reg2, asmjit::x86::xmm0);
-    Cb.movq(asmjit::x86::Mm(7), temp_reg2);
-    Cb.crc32(temp_reg, temp_reg2);
-    Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-    Cb.movq(temp_reg2, asmjit::x86::xmm0);
-    Cb.movq(asmjit::x86::Mm(6), temp_reg2);
-    Cb.crc32(temp_reg, temp_reg2);
-
-    Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1));
-
-    Cb.movq(temp_reg2, asmjit::x86::xmm0);
-    Cb.movq(asmjit::x86::Mm(5), temp_reg2);
-    Cb.crc32(temp_reg, temp_reg2);
-    Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-    Cb.movq(temp_reg2, asmjit::x86::xmm0);
-    Cb.movq(asmjit::x86::Mm(4), temp_reg2);
-    Cb.crc32(temp_reg, temp_reg2);
-  } else if constexpr (std::is_same<asmjit::x86::Zmm, VectorRegT>::value &&
-                       std::is_same<asmjit::x86::Mm, IterRegT>::value) {
-    // We use vector registers zmm31 for our backup
-    Cb.vmovapd(asmjit::x86::zmm31, asmjit::x86::zmm0);
-    registerCount--;
+  for (; i < BufferSize; i++) {
+    reinterpret_cast<double*>(MemoryAddr)[i] = 0.25 + static_cast<double>(i) * 8.0 * LastValue;
   }
-
-  // Calculate the hash of the remaining VectorReg
-  // use VectorReg(0) as a temporary place to unpack values
-  for (int i = 1; i < registerCount; i++) {
-    if constexpr (std::is_same<asmjit::x86::Xmm, VectorRegT>::value) {
-      Cb.vmovapd(asmjit::x86::xmm0, asmjit::x86::Xmm(i));
-
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-    } else if constexpr (std::is_same<asmjit::x86::Ymm, VectorRegT>::value) {
-      Cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(i));
-
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-
-      Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1));
-
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-    } else if constexpr (std::is_same<asmjit::x86::Zmm, VectorRegT>::value) {
-      Cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(i));
-
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-
-      Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1));
-
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-
-      Cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(i), asmjit::Imm(2));
-
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-
-      Cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(i), asmjit::Imm(3));
-
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-      Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-      Cb.movq(temp_reg2, asmjit::x86::xmm0);
-      Cb.crc32(temp_reg, temp_reg2);
-    }
-  }
-
-  // Restore VectorReg(0) from backup
-  if constexpr (std::is_same<asmjit::x86::Xmm, VectorRegT>::value) {
-    Cb.pop(temp_reg2);
-    Cb.movq(asmjit::x86::xmm0, temp_reg2);
-    Cb.movlhps(asmjit::x86::xmm0, asmjit::x86::xmm0);
-    Cb.pop(temp_reg2);
-    Cb.pinsrw(asmjit::x86::xmm0, temp_reg2.r32(), asmjit::Imm(0));
-    Cb.shr(temp_reg2, asmjit::Imm(32));
-    Cb.movd(temp_reg2.r32(), asmjit::x86::Mm(7));
-    Cb.pinsrw(asmjit::x86::xmm0, temp_reg2.r32(), asmjit::Imm(1));
-  } else if constexpr (std::is_same<asmjit::x86::Ymm, VectorRegT>::value &&
-                       std::is_same<asmjit::x86::Mm, IterRegT>::value) {
-    Cb.movq(temp_reg2, asmjit::x86::Mm(5));
-    Cb.movq(asmjit::x86::xmm0, temp_reg2);
-    Cb.movq(temp_reg2, asmjit::x86::Mm(4));
-    Cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1));
-
-    Cb.vinsertf128(asmjit::x86::ymm0, asmjit::x86::ymm0, asmjit::x86::xmm0, asmjit::Imm(1));
-
-    Cb.movq(temp_reg2, asmjit::x86::Mm(7));
-    Cb.movq(asmjit::x86::xmm0, temp_reg2);
-    Cb.movq(temp_reg2, asmjit::x86::Mm(6));
-    Cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1));
-  } else if constexpr (std::is_same<asmjit::x86::Zmm, VectorRegT>::value &&
-                       std::is_same<asmjit::x86::Mm, IterRegT>::value) {
-    // We use vector registers zmm31 for our backup
-    Cb.vmovapd(asmjit::x86::zmm0, asmjit::x86::zmm31);
-  }
-
-  // before starting the communication, backup r8, r9, rax, rbx, rcx and rdx
-  if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
-    Cb.movq(asmjit::x86::Mm(7), asmjit::x86::rax);
-    Cb.movq(asmjit::x86::Mm(6), asmjit::x86::rbx);
-    Cb.movq(asmjit::x86::Mm(5), asmjit::x86::rcx);
-    Cb.movq(asmjit::x86::Mm(4), asmjit::x86::rdx);
-    Cb.movq(asmjit::x86::Mm(3), asmjit::x86::r8);
-    Cb.movq(asmjit::x86::Mm(2), asmjit::x86::r9);
-  } else {
-    Cb.push(asmjit::x86::rax);
-    Cb.push(asmjit::x86::rbx);
-    Cb.push(asmjit::x86::rcx);
-    Cb.push(asmjit::x86::rdx);
-    Cb.push(asmjit::x86::r8);
-    Cb.push(asmjit::x86::r9);
-  }
-
-  // do the actual communication
-  // temp_reg contains our hash
-
-  // save the pointer_reg. it might be any of r8, r9, rax, rbx, rcx or rdx
-  Cb.mov(temp_reg2, pointer_reg);
-
-  // Don't touch me!
-  // This sychronization and communication works even if the threads run at
-  // different (changing) speed, with just one "lock cmpxchg16b" Brought to you
-  // by a few hours of headache for two people.
-  auto communication = [&](auto offset) {
-    // communication
-    Cb.mov(asmjit::x86::r8, asmjit::x86::ptr_64(temp_reg2, offset));
-
-    // temp data
-    Cb.mov(asmjit::x86::r9, temp_reg2);
-    Cb.add(asmjit::x86::r9, asmjit::Imm(offset + 8));
-
-    Cb.mov(asmjit::x86::rdx, asmjit::x86::ptr_64(asmjit::x86::r9, 0));
-    Cb.mov(asmjit::x86::rax, asmjit::x86::ptr_64(asmjit::x86::r9, 8));
-
-    auto L0 = Cb.newLabel();
-    Cb.bind(L0);
-
-    Cb.lock();
-    Cb.cmpxchg16b(asmjit::x86::ptr(asmjit::x86::r8));
-
-    auto L1 = Cb.newLabel();
-    Cb.jnz(L1);
-
-    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx);
-    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx);
-    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
-    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
-
-    Cb.mov(asmjit::x86::rax, asmjit::Imm(2));
-
-    auto L6 = Cb.newLabel();
-    Cb.jmp(L6);
-
-    Cb.bind(L1);
-
-    Cb.cmp(asmjit::x86::rcx, asmjit::x86::rdx);
-
-    auto L2 = Cb.newLabel();
-    Cb.jle(L2);
-
-    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx);
-    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx);
-
-    Cb.jmp(L0);
-
-    Cb.bind(L2);
-
-    auto L3 = Cb.newLabel();
-
-    Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
-    Cb.jne(L3);
-    Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
-    Cb.jne(L3);
-
-    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::x86::rdx);
-    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::x86::rax);
-
-    Cb.bind(L3);
-
-    Cb.cmp(asmjit::x86::rcx, asmjit::x86::ptr_64(asmjit::x86::r9, 16));
-    Cb.mov(asmjit::x86::rax, asmjit::Imm(4));
-    Cb.jne(L6);
-
-    Cb.cmp(asmjit::x86::rbx, asmjit::x86::ptr_64(asmjit::x86::r9, 24));
-    auto L4 = Cb.newLabel();
-    Cb.jne(L4);
-
-    Cb.mov(asmjit::x86::rax, asmjit::Imm(0));
-
-    auto L5 = Cb.newLabel();
-    Cb.jmp(L5);
-
-    Cb.bind(L4);
-
-    Cb.mov(asmjit::x86::rax, asmjit::Imm(1));
-
-    Cb.bind(L5);
-
-    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
-    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
-
-    Cb.bind(L6);
-
-    // if check failed
-    Cb.cmp(asmjit::x86::rax, asmjit::Imm(1));
-    auto L7 = Cb.newLabel();
-    Cb.jne(L7);
-
-    // write the error flag
-    Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 32), asmjit::Imm(1));
-
-    // stop the execution after some time
-    Cb.mov(asmjit::x86::ptr_64(addrHigh_reg), asmjit::Imm(LOAD_STOP));
-    Cb.mfence();
-
-    Cb.bind(L7);
-
-    auto L9 = Cb.newLabel();
-    Cb.jmp(L9);
-  };
-
-  // left communication
-  // move hash
-  Cb.mov(asmjit::x86::rbx, temp_reg);
-  // move iterations counter
-  if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
-    Cb.movq(asmjit::x86::rcx, IterReg);
-  } else {
-    Cb.mov(asmjit::x86::rcx, IterReg);
-  }
-
-  communication(-128);
-
-  // right communication
-  // move hash
-  Cb.mov(asmjit::x86::rbx, temp_reg);
-  // move iterations counter
-  if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
-    Cb.movq(asmjit::x86::rcx, IterReg);
-  } else {
-    Cb.mov(asmjit::x86::rcx, IterReg);
-  }
-
-  communication(-64);
-
-  // restore r8, r9, rax, rbx, rcx and rdx
-  if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
-    Cb.movq(asmjit::x86::rax, asmjit::x86::Mm(7));
-    Cb.movq(asmjit::x86::rbx, asmjit::x86::Mm(6));
-    Cb.movq(asmjit::x86::rcx, asmjit::x86::Mm(5));
-    Cb.movq(asmjit::x86::rdx, asmjit::x86::Mm(4));
-    Cb.movq(asmjit::x86::r8, asmjit::x86::Mm(3));
-    Cb.movq(asmjit::x86::r9, asmjit::x86::Mm(2));
-  } else {
-    Cb.pop(asmjit::x86::r9);
-    Cb.pop(asmjit::x86::r8);
-    Cb.pop(asmjit::x86::rdx);
-    Cb.pop(asmjit::x86::rcx);
-    Cb.pop(asmjit::x86::rbx);
-    Cb.pop(asmjit::x86::rax);
-  }
-
-  Cb.bind(SkipErrorDetection);
 }
 
-template void X86Payload::emitErrorDetectionCode<asmjit::x86::Gpq, asmjit::x86::Xmm>(
-    asmjit::x86::Builder& cb, asmjit::x86::Gpq iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg,
-    asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2);
-template void X86Payload::emitErrorDetectionCode<asmjit::x86::Gpq, asmjit::x86::Ymm>(
-    asmjit::x86::Builder& cb, asmjit::x86::Gpq iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg,
-    asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2);
+auto X86Payload::highLoadFunction(uint64_t* AddrMem, volatile uint64_t* AddrHigh, uint64_t Iterations) -> uint64_t {
+  return this->LoadFunction(AddrMem, AddrHigh, Iterations);
+}
 
-template void X86Payload::emitErrorDetectionCode<asmjit::x86::Mm, asmjit::x86::Ymm>(
-    asmjit::x86::Builder& cb, asmjit::x86::Mm iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg,
-    asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2);
-template void X86Payload::emitErrorDetectionCode<asmjit::x86::Mm, asmjit::x86::Zmm>(
-    asmjit::x86::Builder& cb, asmjit::x86::Mm iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg,
-    asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2);
+}; // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index ac7550e1..01c62777 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -20,389 +20,385 @@
  *****************************************************************************/
 
 #include <firestarter/Environment/X86/Payload/ZENFMAPayload.hpp>
-#include <firestarter/Logging/Log.hpp>
 
-#include <iterator>
-#include <utility>
+namespace firestarter::environment::x86::payload {
 
-using namespace firestarter::environment::x86::payload;
-using namespace asmjit;
-using namespace asmjit::x86;
+auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
+                                   unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
+                                   unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+                                   bool ErrorDetection) -> int {
+  using namespace asmjit;
+  using namespace asmjit::x86;
 
-int ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& proportion,
-                                  unsigned instructionCacheSize, std::list<unsigned> const& dataCacheBufferSize,
-                                  unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters,
-                                  bool errorDetection) {
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto sequence = this->generateSequence(proportion);
-  auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread);
+  auto Sequence = generateSequence(Proportion);
+  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
 
   // compute count of flops and memory access for performance report
-  unsigned flops = 0;
-  unsigned bytes = 0;
+  Flops = 0;
+  Bytes = 0;
 
-  for (const auto& item : sequence) {
-    auto it = this->InstructionFlops.find(item);
+  for (const auto& Item : Sequence) {
+    auto It = InstructionFlops.find(Item);
 
-    if (it == this->InstructionFlops.end()) {
-      workerLog::error() << "Instruction group " << item << " undefined in " << name() << ".";
+    if (It == InstructionFlops.end()) {
+      workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
       return EXIT_FAILURE;
     }
 
-    flops += it->second;
+    Flops += It->second;
 
-    it = this->InstructionMemory.find(item);
+    It = InstructionMemory.find(Item);
 
-    if (it != this->InstructionMemory.end()) {
-      bytes += it->second;
+    if (It != InstructionMemory.end()) {
+      Bytes += It->second;
     }
   }
 
-  this->Flops = repetitions * flops;
-  this->Bytes = repetitions * bytes;
-  this->Instructions = repetitions * sequence.size() * 4 + 6;
+  Flops *= Repetitions;
+  Bytes *= Repetitions;
+  Instructions = Repetitions * Sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
-  auto l1i_cache_size = instructionCacheSize / thread;
-  auto dataCacheBufferSizeIterator = dataCacheBufferSize.begin();
-  auto l1_size = *dataCacheBufferSizeIterator / thread;
-  std::advance(dataCacheBufferSizeIterator, 1);
-  auto l2_size = *dataCacheBufferSizeIterator / thread;
-  std::advance(dataCacheBufferSizeIterator, 1);
-  auto l3_size = *dataCacheBufferSizeIterator / thread;
-  auto ram_size = ramBufferSize / thread;
+  auto L1iCacheSize = InstructionCacheSize / Thread;
+  auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin();
+  auto L1Size = *DataCacheBufferSizeIterator / Thread;
+  std::advance(DataCacheBufferSizeIterator, 1);
+  auto L2Size = *DataCacheBufferSizeIterator / Thread;
+  std::advance(DataCacheBufferSizeIterator, 1);
+  auto L3Size = *DataCacheBufferSizeIterator / Thread;
+  auto RamSize = RamBufferSize / Thread;
 
   // calculate the reset counters for the buffers
-  auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread);
-  auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread);
-  auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread);
+  auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread);
+  auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
+  auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
-  CodeHolder code;
-  code.init(this->Rt.environment());
+  CodeHolder Code;
+  Code.init(Rt.environment());
 
-  if (nullptr != this->LoadFunction) {
-    this->Rt.release(&this->LoadFunction);
+  if (nullptr != LoadFunction) {
+    Rt.release(&LoadFunction);
   }
 
-  Builder cb(&code);
-  cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
+  Builder Cb(&Code);
+  Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
                           asmjit::DiagnosticOptions::kValidateIntermediate);
 
-  auto pointer_reg = rax;
-  auto l1_addr = rbx;
-  auto l2_addr = rcx;
-  auto l3_addr = r8;
-  auto ram_addr = r9;
-  auto l2_count_reg = r10;
-  auto l3_count_reg = r11;
-  auto ram_count_reg = r12;
-  auto temp_reg = r13;
-  auto temp_reg2 = rbp;
-  auto offset_reg = r14;
-  auto addrHigh_reg = r15;
-  auto iter_reg = mm0;
-  auto shift_reg = std::vector<Gp>({rdi, rsi, rdx});
-  auto nr_shift_regs = 3;
-  auto nr_add_regs = 11;
-  auto ram_reg = ymm15;
-
-  FuncDetail func;
-  func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl),
-            this->Rt.environment());
-
-  FuncFrame frame;
-  frame.init(func);
+  auto PointerReg = rax;
+  auto L1Addr = rbx;
+  auto L2Addr = rcx;
+  auto L3Addr = r8;
+  auto RamAddr = r9;
+  auto L2CountReg = r10;
+  auto L3CountReg = r11;
+  auto RamCountReg = r12;
+  auto TempReg = r13;
+  auto TempReg2 = rbp;
+  auto OffsetReg = r14;
+  auto AddrHighReg = r15;
+  auto IterReg = mm0;
+  auto ShiftRegs = std::vector<Gp>({rdi, rsi, rdx});
+  auto NbShiftRegs = 3;
+  auto NbAddRegs = 11;
+  auto RamReg = ymm15;
+
+  FuncDetail Func;
+  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl), Rt.environment());
+
+  FuncFrame Frame;
+  Frame.init(Func);
 
   // make (x|y)mm registers dirty
-  for (int i = 0; i < 16; i++) {
-    frame.addDirtyRegs(Ymm(i));
+  for (int I = 0; I < 16; I++) {
+    Frame.addDirtyRegs(Ymm(I));
   }
-  for (int i = 0; i < 8; i++) {
-    frame.addDirtyRegs(Mm(i));
+  for (int I = 0; I < 8; I++) {
+    Frame.addDirtyRegs(Mm(I));
   }
   // make all other used registers dirty except RAX
-  frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg,
-                     temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr);
-  for (const auto& reg : shift_reg) {
-    frame.addDirtyRegs(reg);
+  Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg,
+                     AddrHighReg, IterReg, RamAddr);
+  for (const auto& Reg : ShiftRegs) {
+    Frame.addDirtyRegs(Reg);
   }
 
-  FuncArgsAssignment args(&func);
+  FuncArgsAssignment Args(&Func);
   // FIXME: asmjit assigment to mm0 does not seem to be supported
-  args.assignAll(pointer_reg, addrHigh_reg, temp_reg);
-  args.updateFuncFrame(frame);
-  frame.finalize();
+  Args.assignAll(PointerReg, AddrHighReg, TempReg);
+  Args.updateFuncFrame(Frame);
+  Frame.finalize();
 
-  cb.emitProlog(frame);
-  cb.emitArgsAssignment(frame, args);
+  Cb.emitProlog(Frame);
+  Cb.emitArgsAssignment(Frame, Args);
 
   // FIXME: movq from temp_reg to iter_reg
-  cb.movq(iter_reg, temp_reg);
+  Cb.movq(IterReg, TempReg);
 
   // stop right away if low load is selected
-  auto FunctionExit = cb.newLabel();
+  auto FunctionExit = Cb.newLabel();
 
-  cb.mov(temp_reg, ptr_64(addrHigh_reg));
-  cb.test(temp_reg, temp_reg);
-  cb.jz(FunctionExit);
+  Cb.mov(TempReg, ptr_64(AddrHighReg));
+  Cb.test(TempReg, TempReg);
+  Cb.jz(FunctionExit);
 
-  cb.mov(offset_reg,
+  Cb.mov(OffsetReg,
          Imm(64)); // increment after each cache/memory access
   // Initialize registers for shift operations
-  for (auto const& reg : shift_reg) {
-    cb.mov(reg, Imm(0xAAAAAAAAAAAAAAAA));
+  for (auto const& Reg : ShiftRegs) {
+    Cb.mov(Reg, Imm(0xAAAAAAAAAAAAAAAA));
   }
   // Initialize AVX-Registers for FMA Operations
-  cb.vmovapd(ymm0, ymmword_ptr(pointer_reg));
-  cb.vmovapd(ymm1, ymmword_ptr(pointer_reg, 32));
+  Cb.vmovapd(ymm0, ymmword_ptr(PointerReg));
+  Cb.vmovapd(ymm1, ymmword_ptr(PointerReg, 32));
 
-  auto add_regs_start = 2;
-  auto add_regs_end = add_regs_start + nr_add_regs - 1;
-  for (int i = add_regs_start; i <= add_regs_end; i++) {
-    cb.vmovapd(Ymm(i), ymmword_ptr(pointer_reg, 256 + i * 32));
+  auto AddRegsStart = 2;
+  auto AddRegsEnd = AddRegsStart + NbAddRegs - 1;
+  for (int I = AddRegsStart; I <= AddRegsEnd; I++) {
+    Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 256 + I * 32));
   }
 
   // Initialize xmm14 for shift operation
   // cb.mov(temp_reg, Imm(1));
   // cb.movd(temp_reg, Xmm(14));
-  cb.movd(shift_reg[0], Xmm(13));
-  cb.vbroadcastss(Xmm(13), Xmm(13));
-  cb.vmovapd(Xmm(14), Xmm(13));
-  cb.vpsrlq(Xmm(14), Xmm(14), Imm(1));
-
-  cb.mov(l1_addr, pointer_reg); // address for L1-buffer
-  cb.mov(l2_addr, pointer_reg);
-  cb.add(l2_addr, Imm(l1_size)); // address for L2-buffer
-  cb.mov(l3_addr, pointer_reg);
-  cb.add(l3_addr, Imm(l2_size)); // address for L3-buffer
-  cb.mov(ram_addr, pointer_reg);
-  cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer
-  cb.mov(l2_count_reg, Imm(l2_loop_count));
-  workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop ("
-                     << l2_size / 1024 << ") KiB";
-  cb.mov(l3_count_reg, Imm(l3_loop_count));
-  workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop ("
-                     << l3_size / 1024 << ") KiB";
-  cb.mov(ram_count_reg, Imm(ram_loop_count));
-  workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop ("
-                     << ram_size / 1024 << ") KiB";
-
-  cb.align(AlignMode::kCode, 64);
-
-  auto Loop = cb.newLabel();
-  cb.bind(Loop);
-
-  auto shift_pos = 0;
-  bool left = false;
-  auto itemCount = 0;
-  auto add_dest = add_regs_start;
-  unsigned l1_offset = 0;
-
-#define L1_INCREMENT()                                                                                                 \
-  l1_offset += 64;                                                                                                     \
-  if (l1_offset < l1_size * 0.5) {                                                                                     \
-    cb.add(l1_addr, offset_reg);                                                                                       \
-  } else {                                                                                                             \
-    l1_offset = 0;                                                                                                     \
-    cb.mov(l1_addr, pointer_reg);                                                                                      \
-  }
-
-#define L2_INCREMENT() cb.add(l2_addr, offset_reg);
-
-#define L3_INCREMENT() cb.add(l3_addr, offset_reg)
-
-#define RAM_INCREMENT() cb.add(ram_addr, offset_reg)
+  Cb.movd(ShiftRegs[0], Xmm(13));
+  Cb.vbroadcastss(Xmm(13), Xmm(13));
+  Cb.vmovapd(Xmm(14), Xmm(13));
+  Cb.vpsrlq(Xmm(14), Xmm(14), Imm(1));
+
+  Cb.mov(L1Addr, PointerReg); // address for L1-buffer
+  Cb.mov(L2Addr, PointerReg);
+  Cb.add(L2Addr, Imm(L1Size)); // address for L2-buffer
+  Cb.mov(L3Addr, PointerReg);
+  Cb.add(L3Addr, Imm(L2Size)); // address for L3-buffer
+  Cb.mov(RamAddr, PointerReg);
+  Cb.add(RamAddr, Imm(L3Size)); // address for RAM-buffer
+  Cb.mov(L2CountReg, Imm(L2LoopCount));
+  workerLog::trace() << "reset counter for L2-buffer with " << L2LoopCount << " cache line accesses per loop ("
+                     << L2Size / 1024 << ") KiB";
+  Cb.mov(L3CountReg, Imm(L3LoopCount));
+  workerLog::trace() << "reset counter for L3-buffer with " << L3LoopCount << " cache line accesses per loop ("
+                     << L3Size / 1024 << ") KiB";
+  Cb.mov(RamCountReg, Imm(RamLoopCount));
+  workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop ("
+                     << RamSize / 1024 << ") KiB";
+
+  Cb.align(AlignMode::kCode, 64);
+
+  auto Loop = Cb.newLabel();
+  Cb.bind(Loop);
+
+  auto ShiftPos = 0;
+  bool Left = false;
+  unsigned ItemCount = 0;
+  auto AddDest = AddRegsStart;
+  unsigned L1Offset = 0;
+
+  const auto L1Increment = [&Cb, &L1Offset, &L1Size, &L1Addr, &OffsetReg, &PointerReg]() {
+    L1Offset += 64;
+    if (L1Offset < L1Size * 0.5) {
+      Cb.add(L1Addr, OffsetReg);
+    } else {
+      L1Offset = 0;
+      Cb.mov(L1Addr, PointerReg);
+    }
+  };
+  const auto L2Increment = [&Cb, &L2Addr, &OffsetReg]() { Cb.add(L2Addr, OffsetReg); };
+  const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); };
+  const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); };
 
-  for (unsigned count = 0; count < repetitions; count++) {
-    for (const auto& item : sequence) {
+  for (unsigned Count = 0; Count < Repetitions; Count++) {
+    for (const auto& Item : Sequence) {
 
       // swap second and third param of fma instruction to force bitchanges on
       // the pipes to its execution units
-      Ymm secondParam;
-      Ymm thirdParam;
-      if (0 == itemCount % 2) {
-        secondParam = ymm0;
-        thirdParam = ymm1;
+      Ymm SecondParam;
+      Ymm ThirdParam;
+      if (0 == ItemCount % 2) {
+        SecondParam = ymm0;
+        ThirdParam = ymm1;
       } else {
-        secondParam = ymm1;
-        thirdParam = ymm0;
+        SecondParam = ymm1;
+        ThirdParam = ymm0;
       }
 
-      if (item == "REG") {
-        cb.vfmadd231pd(Ymm(add_dest), secondParam, thirdParam);
-        cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]);
-        if (left) {
-          cb.shr(shift_reg[shift_pos], Imm(1));
+      if (Item == "REG") {
+        Cb.vfmadd231pd(Ymm(AddDest), SecondParam, ThirdParam);
+        Cb.xor_(TempReg, ShiftRegs[(ShiftPos + NbShiftRegs - 1) % NbShiftRegs]);
+        if (Left) {
+          Cb.shr(ShiftRegs[ShiftPos], Imm(1));
         } else {
-          cb.shl(shift_reg[shift_pos], Imm(1));
+          Cb.shl(ShiftRegs[ShiftPos], Imm(1));
         }
-      } else if (item == "L1_LS") {
-        cb.vfmadd231pd(Ymm(add_dest), secondParam, ymmword_ptr(l1_addr, 32));
-        cb.vmovapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest));
-        L1_INCREMENT();
-      } else if (item == "L2_L") {
-        cb.vfmadd231pd(Ymm(add_dest), secondParam, ymmword_ptr(l2_addr, 64));
-        cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]);
-        L2_INCREMENT();
-      } else if (item == "L3_L") {
-        cb.vfmadd231pd(Ymm(add_dest), secondParam, ymmword_ptr(l3_addr, 64));
-        cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]);
-        L3_INCREMENT();
-      } else if (item == "RAM_L") {
-        cb.vfmadd231pd(Ymm(ram_reg), secondParam, ymmword_ptr(ram_addr, 32));
-        cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]);
-        RAM_INCREMENT();
+      } else if (Item == "L1_LS") {
+        Cb.vfmadd231pd(Ymm(AddDest), SecondParam, ymmword_ptr(L1Addr, 32));
+        Cb.vmovapd(xmmword_ptr(L1Addr, 64), Xmm(AddDest));
+        L1Increment();
+      } else if (Item == "L2_L") {
+        Cb.vfmadd231pd(Ymm(AddDest), SecondParam, ymmword_ptr(L2Addr, 64));
+        Cb.xor_(TempReg, ShiftRegs[(ShiftPos + NbShiftRegs - 1) % NbShiftRegs]);
+        L2Increment();
+      } else if (Item == "L3_L") {
+        Cb.vfmadd231pd(Ymm(AddDest), SecondParam, ymmword_ptr(L3Addr, 64));
+        Cb.xor_(TempReg, ShiftRegs[(ShiftPos + NbShiftRegs - 1) % NbShiftRegs]);
+        L3Increment();
+      } else if (Item == "RAM_L") {
+        Cb.vfmadd231pd(Ymm(RamReg), SecondParam, ymmword_ptr(RamAddr, 32));
+        Cb.xor_(TempReg, ShiftRegs[(ShiftPos + NbShiftRegs - 1) % NbShiftRegs]);
+        RamIncrement();
       } else {
-        workerLog::error() << "Instruction group " << item << " not found in " << this->name() << ".";
+        workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
         return EXIT_FAILURE;
       }
 
       // make sure the shifts do could end up shifting out the data one end.
-      if (itemCount < (int)(sequence.size() * repetitions - (sequence.size() * repetitions) % 4)) {
-        switch (itemCount % 4) {
+      if (ItemCount < (Sequence.size() * Repetitions) - ((Sequence.size() * Repetitions) % 4)) {
+        switch (ItemCount % 4) {
         case 0:
-          cb.vpsrlq(Xmm(13), Xmm(13), Imm(1));
+          Cb.vpsrlq(Xmm(13), Xmm(13), Imm(1));
           break;
         case 1:
-          cb.vpsllq(Xmm(14), Xmm(14), Imm(1));
+          Cb.vpsllq(Xmm(14), Xmm(14), Imm(1));
           break;
         case 2:
-          cb.vpsllq(Xmm(13), Xmm(13), Imm(1));
+          Cb.vpsllq(Xmm(13), Xmm(13), Imm(1));
           break;
         case 3:
-          cb.vpsrlq(Xmm(14), Xmm(14), Imm(1));
+          Cb.vpsrlq(Xmm(14), Xmm(14), Imm(1));
           break;
         }
       }
 
-      itemCount++;
+      ItemCount++;
 
-      add_dest++;
-      if (add_dest > add_regs_end) {
-        add_dest = add_regs_start;
+      AddDest++;
+      if (AddDest > AddRegsEnd) {
+        AddDest = AddRegsStart;
       }
 
-      shift_pos++;
-      if (shift_pos == nr_shift_regs) {
-        shift_pos = 0;
-        left = !left;
+      ShiftPos++;
+      if (ShiftPos == NbShiftRegs) {
+        ShiftPos = 0;
+        Left = !Left;
       }
     }
   }
 
-  cb.movq(temp_reg, iter_reg); // restore iteration counter
-  if (this->getRAMSequenceCount(sequence) > 0) {
+  Cb.movq(TempReg, IterReg); // restore iteration counter
+  if (getRAMSequenceCount(Sequence) > 0) {
     // reset RAM counter
-    auto NoRamReset = cb.newLabel();
-
-    cb.sub(ram_count_reg, Imm(1));
-    cb.jnz(NoRamReset);
-    cb.mov(ram_count_reg, Imm(ram_loop_count));
-    cb.mov(ram_addr, pointer_reg);
-    cb.add(ram_addr, Imm(l3_size));
-    cb.bind(NoRamReset);
+    auto NoRamReset = Cb.newLabel();
+
+    Cb.sub(RamCountReg, Imm(1));
+    Cb.jnz(NoRamReset);
+    Cb.mov(RamCountReg, Imm(RamLoopCount));
+    Cb.mov(RamAddr, PointerReg);
+    Cb.add(RamAddr, Imm(L3Size));
+    Cb.bind(NoRamReset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.inc(temp_reg); // increment iteration counter
-  if (this->getL2SequenceCount(sequence) > 0) {
+  Cb.inc(TempReg); // increment iteration counter
+  if (getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
-    auto NoL2Reset = cb.newLabel();
-
-    cb.sub(l2_count_reg, Imm(1));
-    cb.jnz(NoL2Reset);
-    cb.mov(l2_count_reg, Imm(l2_loop_count));
-    cb.mov(l2_addr, pointer_reg);
-    cb.add(l2_addr, Imm(l1_size));
-    cb.bind(NoL2Reset);
+    auto NoL2Reset = Cb.newLabel();
+
+    Cb.sub(L2CountReg, Imm(1));
+    Cb.jnz(NoL2Reset);
+    Cb.mov(L2CountReg, Imm(L2LoopCount));
+    Cb.mov(L2Addr, PointerReg);
+    Cb.add(L2Addr, Imm(L1Size));
+    Cb.bind(NoL2Reset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.movq(iter_reg, temp_reg); // store iteration counter
-  if (this->getL3SequenceCount(sequence) > 0) {
+  Cb.movq(IterReg, TempReg); // store iteration counter
+  if (getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
-    auto NoL3Reset = cb.newLabel();
-
-    cb.sub(l3_count_reg, Imm(1));
-    cb.jnz(NoL3Reset);
-    cb.mov(l3_count_reg, Imm(l3_loop_count));
-    cb.mov(l3_addr, pointer_reg);
-    cb.add(l3_addr, Imm(l2_size));
-    cb.bind(NoL3Reset);
+    auto NoL3Reset = Cb.newLabel();
+
+    Cb.sub(L3CountReg, Imm(1));
+    Cb.jnz(NoL3Reset);
+    Cb.mov(L3CountReg, Imm(L3LoopCount));
+    Cb.mov(L3Addr, PointerReg);
+    Cb.add(L3Addr, Imm(L2Size));
+    Cb.bind(NoL3Reset);
     // adds always two instruction
-    this->Instructions += 2;
+    Instructions += 2;
   }
-  cb.mov(l1_addr, pointer_reg);
+  Cb.mov(L1Addr, PointerReg);
 
-  if (dumpRegisters) {
-    auto SkipRegistersDump = cb.newLabel();
+  if (DumpRegisters) {
+    auto SkipRegistersDump = Cb.newLabel();
 
-    cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait));
-    cb.jnz(SkipRegistersDump);
+    Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
+    Cb.jnz(SkipRegistersDump);
 
     // dump all the ymm register
-    for (int i = 0; i < (int)this->registerCount(); i++) {
-      cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i));
+    for (unsigned I = 0; I < registerCount(); I++) {
+      Cb.vmovapd(ymmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Ymm(I));
     }
 
     // set read flag
-    cb.mov(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait));
+    Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
 
-    cb.bind(SkipRegistersDump);
+    Cb.bind(SkipRegistersDump);
   }
 
-  if (errorDetection) {
-    this->emitErrorDetectionCode<decltype(iter_reg), Ymm>(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2);
+  if (ErrorDetection) {
+    emitErrorDetectionCode<decltype(IterReg), Ymm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2);
   }
 
-  cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH));
-  cb.jnz(Loop);
+  Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH));
+  Cb.jnz(Loop);
 
-  cb.bind(FunctionExit);
+  Cb.bind(FunctionExit);
 
-  cb.movq(rax, iter_reg);
+  Cb.movq(rax, IterReg);
 
-  cb.emitEpilog(frame);
+  Cb.emitEpilog(Frame);
 
-  cb.finalize();
+  Cb.finalize();
 
   // String sb;
   // cb.dump(sb);
 
-  Error err = this->Rt.add(&this->LoadFunction, &code);
-  if (err) {
+  Error Err = Rt.add(&LoadFunction, &Code);
+  if (Err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
   }
 
   // skip if we could not determine cache size
-  if (l1i_cache_size != 0) {
-    auto loopSize = code.labelOffset(FunctionExit) - code.labelOffset(Loop);
-    auto instructionCachePercentage = 100 * loopSize / l1i_cache_size;
+  if (L1iCacheSize != 0) {
+    auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop);
+    auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize;
 
-    if (loopSize > l1i_cache_size) {
+    if (LoopSize > L1iCacheSize) {
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage
+    workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
-    workerLog::trace() << "Sequence size: " << sequence.size();
-    workerLog::trace() << "Repetition count: " << repetitions;
+    workerLog::trace() << "Sequence size: " << Sequence.size();
+    workerLog::trace() << "Repetition count: " << Repetitions;
   }
 
   return EXIT_SUCCESS;
 }
 
-std::list<std::string> ZENFMAPayload::getAvailableInstructions() const {
-  std::list<std::string> instructions;
+auto ZENFMAPayload::getAvailableInstructions() const -> std::list<std::string> {
+  std::list<std::string> Instructions;
 
-  transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions),
-            [](const auto& item) { return item.first; });
+  transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions),
+            [](const auto& Item) { return Item.first; });
 
-  return instructions;
+  return Instructions;
 }
 
-void ZENFMAPayload::init(uint64_t* memoryAddr, uint64_t bufferSize) {
-  X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4);
+void ZENFMAPayload::init(uint64_t* MemoryAddr, uint64_t BufferSize) {
+  X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
+
+} // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp b/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp
index 0cc5abef..cbf977df 100644
--- a/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp
+++ b/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp
@@ -21,5 +21,5 @@
 
 // This file exists to get an entry in the compile commands database. Clangd will interpolate the include directories
 // for header files based on the source file with the best matching score. This file should be the best score for the
-// included header. Therefore the we should not see any errors in this file for missing includes. For more infomation
+// included header. Therefore we should not see any errors in this file for missing includes. For more infomation
 // look in the LLVM code base: clang/lib/Tooling/InterpolatingCompilationDatabase.cpp
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/X86CPUTopology.cpp b/src/firestarter/Environment/X86/X86CPUTopology.cpp
index 283e7f61..df579c38 100644
--- a/src/firestarter/Environment/X86/X86CPUTopology.cpp
+++ b/src/firestarter/Environment/X86/X86CPUTopology.cpp
@@ -148,13 +148,13 @@ auto X86CPUTopology::clockrate() const -> uint64_t {
   ClockT::time_point EndTime;
 
 #if not(defined(__APPLE__) || defined(_WIN32))
-  auto governor = scalingGovernor();
-  if (governor.empty()) {
+  auto Governor = scalingGovernor();
+  if (Governor.empty()) {
     return CPUTopology::clockrate();
   }
 
   /* non invariant TSCs can be used if CPUs run at fixed frequency */
-  if (!hasInvariantRdtsc() && governor.compare("performance") && governor.compare("powersave")) {
+  if (!hasInvariantRdtsc() && Governor.compare("performance") && Governor.compare("powersave")) {
     return CPUTopology::clockrate();
   }
 
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index 2c2dabb0..2f24b683 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -42,14 +42,14 @@ void X86Environment::evaluateFunctions() {
 }
 
 auto X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) -> int {
-  unsigned id = 1;
-  std::string defaultPayloadName("");
+  unsigned Id = 1;
+  std::string DefaultPayloadName;
 
   // if functionId is 0 get the default or fallback
   for (const auto& Config : PlatformConfigs) {
     for (auto const& [thread, functionName] : Config->getThreadMap()) {
       // the selected function
-      if (id == FunctionId) {
+      if (Id == FunctionId) {
         if (!Config->isAvailable()) {
           log::error() << "Function " << FunctionId << " (\"" << functionName << "\") requires "
                        << Config->payload().name() << ", which is not supported by the processor.";
@@ -68,21 +68,20 @@ auto X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
           SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, thread,
                                                                                    topology().instructionCacheSize());
           return EXIT_SUCCESS;
-        } else {
-          defaultPayloadName = Config->payload().name();
         }
+        DefaultPayloadName = Config->payload().name();
       }
-      id++;
+      Id++;
     }
   }
 
   // no default found
   // use fallback
   if (0 == FunctionId) {
-    if (!defaultPayloadName.empty()) {
+    if (!DefaultPayloadName.empty()) {
       // default payload available, but number of threads per core is not
       // supported
-      log::warn() << "No " << defaultPayloadName << " code path for " << topology().numThreadsPerCore()
+      log::warn() << "No " << DefaultPayloadName << " code path for " << topology().numThreadsPerCore()
                   << " threads per core!";
     }
     log::warn() << topology().vendor() << " " << topology().model()
@@ -93,21 +92,21 @@ auto X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
     // fallback
     for (const auto& Config : FallbackPlatformConfigs) {
       if (Config->isAvailable()) {
-        auto selectedThread = 0;
-        auto selectedFunctionName = std::string("");
-        for (auto const& [thread, functionName] : Config->getThreadMap()) {
-          if (thread == topology().numThreadsPerCore()) {
-            selectedThread = thread;
-            selectedFunctionName = functionName;
+        auto SelectedThread = 0;
+        auto SelectedFunctionName = std::string("");
+        for (auto const& [Thread, FunctionName] : Config->getThreadMap()) {
+          if (Thread == topology().numThreadsPerCore()) {
+            SelectedThread = Thread;
+            SelectedFunctionName = FunctionName;
           }
         }
-        if (selectedThread == 0) {
-          selectedThread = Config->getThreadMap().begin()->first;
-          selectedFunctionName = Config->getThreadMap().begin()->second;
+        if (SelectedThread == 0) {
+          SelectedThread = Config->getThreadMap().begin()->first;
+          SelectedFunctionName = Config->getThreadMap().begin()->second;
         }
-        SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, selectedThread,
+        SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, SelectedThread,
                                                                                  topology().instructionCacheSize());
-        log::warn() << "Using function " << selectedFunctionName << " as fallback.\n"
+        log::warn() << "Using function " << SelectedFunctionName << " as fallback.\n"
                     << "You can use the parameter --function to try other "
                        "functions.";
         return EXIT_SUCCESS;
@@ -124,68 +123,68 @@ auto X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
   return EXIT_FAILURE;
 }
 
-int X86Environment::selectInstructionGroups(std::string groups) {
-  const std::string delimiter = ",";
-  const std::regex re("^(\\w+):(\\d+)$");
-  const auto availableInstructionGroups = selectedConfig().platformConfig().payload().getAvailableInstructions();
+auto X86Environment::selectInstructionGroups(std::string Groups) -> int {
+  const std::string Delimiter = ",";
+  const std::regex Re("^(\\w+):(\\d+)$");
+  const auto AvailableInstructionGroups = selectedConfig().platformConfig().payload().getAvailableInstructions();
 
-  std::stringstream ss(groups);
-  std::vector<std::pair<std::string, unsigned>> payloadSettings = {};
+  std::stringstream Ss(Groups);
+  std::vector<std::pair<std::string, unsigned>> PayloadSettings = {};
 
-  while (ss.good()) {
-    std::string token;
-    std::smatch m;
-    std::getline(ss, token, ',');
+  while (Ss.good()) {
+    std::string Token;
+    std::smatch M;
+    std::getline(Ss, Token, ',');
 
-    if (std::regex_match(token, m, re)) {
-      if (std::find(availableInstructionGroups.begin(), availableInstructionGroups.end(), m[1].str()) ==
-          availableInstructionGroups.end()) {
-        log::error() << "Invalid instruction-group: " << m[1].str()
+    if (std::regex_match(Token, M, Re)) {
+      if (std::find(AvailableInstructionGroups.begin(), AvailableInstructionGroups.end(), M[1].str()) ==
+          AvailableInstructionGroups.end()) {
+        log::error() << "Invalid instruction-group: " << M[1].str()
                      << "\n       --run-instruction-groups format: multiple INST:VAL "
                         "pairs comma-seperated";
         return EXIT_FAILURE;
       }
-      int num = std::stoul(m[2].str());
-      if (num == 0) {
+      int Num = std::stoul(M[2].str());
+      if (Num == 0) {
         log::error() << "instruction-group VAL may not contain number 0"
                      << "\n       --run-instruction-groups format: multiple INST:VAL "
                         "pairs comma-seperated";
         return EXIT_FAILURE;
       }
-      payloadSettings.push_back(std::make_pair(m[1].str(), num));
+      PayloadSettings.emplace_back(M[1].str(), Num);
     } else {
-      log::error() << "Invalid symbols in instruction-group: " << token
+      log::error() << "Invalid symbols in instruction-group: " << Token
                    << "\n       --run-instruction-groups format: multiple INST:VAL "
                       "pairs comma-seperated";
       return EXIT_FAILURE;
     }
   }
 
-  selectedConfig().setPayloadSettings(payloadSettings);
+  selectedConfig().setPayloadSettings(PayloadSettings);
 
-  log::info() << "  Running custom instruction group: " << groups;
+  log::info() << "  Running custom instruction group: " << Groups;
 
   return EXIT_SUCCESS;
 }
 
 void X86Environment::printAvailableInstructionGroups() {
-  std::stringstream ss;
+  std::stringstream Ss;
 
-  for (auto const& item : selectedConfig().platformConfig().payload().getAvailableInstructions()) {
-    ss << item << ",";
+  for (auto const& Item : selectedConfig().platformConfig().payload().getAvailableInstructions()) {
+    Ss << Item << ",";
   }
 
-  auto s = ss.str();
-  if (s.size() > 0) {
-    s.pop_back();
+  auto S = Ss.str();
+  if (S.size() > 0) {
+    S.pop_back();
   }
 
   log::info() << " available instruction-groups for payload " << selectedConfig().platformConfig().payload().name()
               << ":\n"
-              << "  " << s;
+              << "  " << S;
 }
 
-void X86Environment::setLineCount(unsigned lineCount) { selectedConfig().setLineCount(lineCount); }
+void X86Environment::setLineCount(unsigned LineCount) { selectedConfig().setLineCount(LineCount); }
 
 void X86Environment::printSelectedCodePathSummary() { selectedConfig().printCodePathSummary(); }
 
@@ -198,19 +197,19 @@ void X86Environment::printFunctionSummary() {
                  "-------------------------------------------------------------"
                  "-----------------------------";
 
-  unsigned id = 1;
-
-  for (auto const& config : PlatformConfigs) {
-    for (auto const& [thread, functionName] : config->getThreadMap()) {
-      const char* available = config->isAvailable() ? "yes" : "no";
-      const char* fmt = "  %4u | %-30s | %-24s | %s";
-      int sz = std::snprintf(nullptr, 0, fmt, id, functionName.c_str(), available,
-                             config->getDefaultPayloadSettingsString().c_str());
-      std::vector<char> buf(sz + 1);
-      std::snprintf(&buf[0], buf.size(), fmt, id, functionName.c_str(), available,
-                    config->getDefaultPayloadSettingsString().c_str());
-      log::info() << std::string(&buf[0]);
-      id++;
+  unsigned Id = 1;
+
+  for (auto const& Config : PlatformConfigs) {
+    for (auto const& [thread, functionName] : Config->getThreadMap()) {
+      const char* Available = Config->isAvailable() ? "yes" : "no";
+      const char* Fmt = "  %4u | %-30s | %-24s | %s";
+      int Sz = std::snprintf(nullptr, 0, Fmt, Id, functionName.c_str(), Available,
+                             Config->getDefaultPayloadSettingsString().c_str());
+      std::vector<char> Buf(Sz + 1);
+      std::snprintf(Buf.data(), Buf.size(), Fmt, Id, functionName.c_str(), Available,
+                    Config->getDefaultPayloadSettingsString().c_str());
+      log::info() << std::string(Buf.data());
+      Id++;
     }
   }
 }
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 0df2c6c3..40e2f690 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -19,33 +19,32 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <algorithm>
 #include <firestarter/Firestarter.hpp>
 #include <firestarter/Logging/Log.hpp>
 #if defined(linux) || defined(__linux__)
+#include <firestarter/Measurement/Metric/IPCEstimate.h>
 #include <firestarter/Optimizer/Algorithm/NSGA2.hpp>
 #include <firestarter/Optimizer/History.hpp>
 #include <firestarter/Optimizer/Problem/CLIArgumentProblem.hpp>
-extern "C" {
-#include <firestarter/Measurement/Metric/IPCEstimate.h>
-}
 #endif
 
 #include <csignal>
 #include <functional>
-#include <thread>
+#include <utility>
 
 #ifdef _MSC_VER
 #include <intrin.h>
 #endif
 
-using namespace firestarter;
+namespace firestarter {
 
 Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds const& Timeout, unsigned LoadPercent,
                          std::chrono::microseconds const& Period, unsigned RequestedNumThreads,
                          std::string const& CpuBind, bool PrintFunctionSummary, unsigned FunctionId,
                          bool ListInstructionGroups, std::string const& InstructionGroups, unsigned LineCount,
                          bool AllowUnavailablePayload, bool DumpRegisters,
-                         std::chrono::seconds const& DumpRegistersTimeDelta, std::string const& DumpRegistersOutpath,
+                         std::chrono::seconds const& DumpRegistersTimeDelta, std::string DumpRegistersOutpath,
                          bool ErrorDetection, int Gpus, unsigned GpuMatrixSize, bool GpuUseFloat, bool GpuUseDouble,
                          bool ListMetrics, bool Measurement, std::chrono::milliseconds const& StartDelta,
                          std::chrono::milliseconds const& StopDelta,
@@ -54,7 +53,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
                          bool Optimize, std::chrono::seconds const& Preheat, std::string const& OptimizationAlgorithm,
                          std::vector<std::string> const& OptimizationMetrics,
                          std::chrono::seconds const& EvaluationDuration, unsigned Individuals,
-                         std::string const& OptimizeOutfile, unsigned Generations, double Nsga2Cr, double Nsga2M)
+                         std::string OptimizeOutfile, unsigned Generations, double Nsga2Cr, double Nsga2M)
     : Argc(Argc)
     , Argv(Argv)
     , Timeout(Timeout)
@@ -62,7 +61,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
     , Period(Period)
     , DumpRegisters(DumpRegisters)
     , DumpRegistersTimeDelta(DumpRegistersTimeDelta)
-    , DumpRegistersOutpath(DumpRegistersOutpath)
+    , DumpRegistersOutpath(std::move(DumpRegistersOutpath))
     , ErrorDetection(ErrorDetection)
     , Gpus(Gpus)
     , GpuMatrixSize(GpuMatrixSize)
@@ -77,11 +76,11 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
     , OptimizationMetrics(OptimizationMetrics)
     , EvaluationDuration(EvaluationDuration)
     , Individuals(Individuals)
-    , OptimizeOutfile(OptimizeOutfile)
+    , OptimizeOutfile(std::move(OptimizeOutfile))
     , Generations(Generations)
     , Nsga2Cr(Nsga2Cr)
     , Nsga2M(Nsga2M) {
-  int returnCode;
+  int ReturnCode = 0;
 
   Load = (Period * LoadPercent) / 100;
   if (LoadPercent == 100 || Load == std::chrono::microseconds::zero()) {
@@ -97,11 +96,11 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
 #endif
 
 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-  this->Environment = new environment::x86::X86Environment();
+  Environment = new environment::x86::X86Environment();
 #endif
 
-  if (EXIT_SUCCESS != (returnCode = this->environment().evaluateCpuAffinity(RequestedNumThreads, CpuBind))) {
-    std::exit(returnCode);
+  if (EXIT_SUCCESS != (ReturnCode = environment().evaluateCpuAffinity(RequestedNumThreads, CpuBind))) {
+    std::exit(ReturnCode);
   }
 
 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
@@ -114,42 +113,42 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
   }
 #endif
 
-  if (ErrorDetection && this->environment().requestedNumThreads() < 2) {
+  if (ErrorDetection && environment().requestedNumThreads() < 2) {
     throw std::invalid_argument("Option --error-detection must run with 2 or more threads. Number of "
                                 "threads is " +
-                                std::to_string(this->environment().requestedNumThreads()) + "\n");
+                                std::to_string(environment().requestedNumThreads()) + "\n");
   }
 
-  this->environment().evaluateFunctions();
+  environment().evaluateFunctions();
 
   if (PrintFunctionSummary) {
-    this->environment().printFunctionSummary();
+    environment().printFunctionSummary();
     std::exit(EXIT_SUCCESS);
   }
 
-  if (EXIT_SUCCESS != (returnCode = this->environment().selectFunction(FunctionId, AllowUnavailablePayload))) {
-    std::exit(returnCode);
+  if (EXIT_SUCCESS != (ReturnCode = environment().selectFunction(FunctionId, AllowUnavailablePayload))) {
+    std::exit(ReturnCode);
   }
 
   if (ListInstructionGroups) {
-    this->environment().printAvailableInstructionGroups();
+    environment().printAvailableInstructionGroups();
     std::exit(EXIT_SUCCESS);
   }
 
   if (!InstructionGroups.empty()) {
-    if (EXIT_SUCCESS != (returnCode = this->environment().selectInstructionGroups(InstructionGroups))) {
-      std::exit(returnCode);
+    if (EXIT_SUCCESS != (ReturnCode = environment().selectInstructionGroups(InstructionGroups))) {
+      std::exit(ReturnCode);
     }
   }
 
   if (LineCount != 0) {
-    this->environment().setLineCount(LineCount);
+    environment().setLineCount(LineCount);
   }
 
 #if defined(linux) || defined(__linux__)
   if (Measurement || ListMetrics || Optimize) {
     MeasurementWorker = std::make_shared<measurement::MeasurementWorker>(
-        MeasurementInterval, this->environment().requestedNumThreads(), MetricPaths, StdinMetrics);
+        MeasurementInterval, environment().requestedNumThreads(), MetricPaths, StdinMetrics);
 
     if (ListMetrics) {
       log::info() << MeasurementWorker->availableMetrics();
@@ -157,112 +156,108 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
     }
 
     // init all metrics
-    auto all = MeasurementWorker->metricNames();
-    auto initialized = MeasurementWorker->initMetrics(all);
+    auto All = MeasurementWorker->metricNames();
+    auto Initialized = MeasurementWorker->initMetrics(All);
 
-    if (initialized.size() == 0) {
+    if (Initialized.size() == 0) {
       log::error() << "No metrics initialized";
       std::exit(EXIT_FAILURE);
     }
 
     // check if selected metrics are initialized
-    for (auto const& optimizationMetric : OptimizationMetrics) {
-      auto nameEqual = [optimizationMetric](auto const& name) {
-        auto invertedName = "-" + name;
-        return name.compare(optimizationMetric) == 0 || invertedName.compare(optimizationMetric) == 0;
+    for (auto const& OptimizationMetric : OptimizationMetrics) {
+      auto NameEqual = [OptimizationMetric](auto const& Name) {
+        auto InvertedName = "-" + Name;
+        return Name.compare(OptimizationMetric) == 0 || InvertedName.compare(OptimizationMetric) == 0;
       };
       // metric name is not found
-      if (std::find_if(all.begin(), all.end(), nameEqual) == all.end()) {
-        log::error() << "Metric \"" << optimizationMetric << "\" does not exist.";
+      if (std::find_if(All.begin(), All.end(), NameEqual) == All.end()) {
+        log::error() << "Metric \"" << OptimizationMetric << "\" does not exist.";
         std::exit(EXIT_FAILURE);
       }
       // metric has not initialized properly
-      if (std::find_if(initialized.begin(), initialized.end(), nameEqual) == initialized.end()) {
-        log::error() << "Metric \"" << optimizationMetric << "\" failed to initialize.";
+      if (std::find_if(Initialized.begin(), Initialized.end(), NameEqual) == Initialized.end()) {
+        log::error() << "Metric \"" << OptimizationMetric << "\" failed to initialize.";
         std::exit(EXIT_FAILURE);
       }
     }
   }
 
   if (Optimize) {
-    auto applySettings = std::bind(
-        [this](std::vector<std::pair<std::string, unsigned>> const& setting) {
+    auto ApplySettings = std::bind(
+        [this](std::vector<std::pair<std::string, unsigned>> const& Setting) {
           using Clock = std::chrono::high_resolution_clock;
-          auto start = Clock::now();
+          auto Start = Clock::now();
 
-          for (auto& thread : this->LoadThreads) {
-            auto td = thread.second;
+          for (auto& Thread : LoadThreads) {
+            auto Td = Thread.second;
 
-            td->config().setPayloadSettings(setting);
+            Td->config().setPayloadSettings(Setting);
           }
 
-          for (auto const& thread : this->LoadThreads) {
-            auto td = thread.second;
+          for (auto const& Thread : LoadThreads) {
+            auto Td = Thread.second;
 
-            td->Mutex.lock();
+            Td->Mutex.lock();
           }
 
-          for (auto const& thread : this->LoadThreads) {
-            auto td = thread.second;
+          for (auto const& Thread : LoadThreads) {
+            auto Td = Thread.second;
 
-            td->Comm = THREAD_SWITCH;
-            td->Mutex.unlock();
+            Td->Comm = THREAD_SWITCH;
+            Td->Mutex.unlock();
           }
 
-          this->LoadVar = LOAD_SWITCH;
+          LoadVar = LOAD_SWITCH;
 
-          for (auto const& thread : this->LoadThreads) {
-            auto td = thread.second;
-            bool ack;
+          for (auto const& Thread : LoadThreads) {
+            auto Td = Thread.second;
+            bool Ack = false;
 
             do {
-              td->Mutex.lock();
-              ack = td->Ack;
-              td->Mutex.unlock();
-            } while (!ack);
-
-            td->Mutex.lock();
-            td->Ack = false;
-            td->Mutex.unlock();
+              Td->Mutex.lock();
+              Ack = Td->Ack;
+              Td->Mutex.unlock();
+            } while (!Ack);
+
+            Td->Mutex.lock();
+            Td->Ack = false;
+            Td->Mutex.unlock();
           }
 
-          this->LoadVar = LOAD_HIGH;
+          LoadVar = LOAD_HIGH;
 
-          this->signalWork();
+          signalWork();
 
-          uint64_t startTimestamp = 0xffffffffffffffff;
-          uint64_t stopTimestamp = 0;
+          uint64_t StartTimestamp = 0xffffffffffffffff;
+          uint64_t StopTimestamp = 0;
 
-          for (auto const& thread : this->LoadThreads) {
-            auto td = thread.second;
+          for (auto const& Thread : LoadThreads) {
+            auto Td = Thread.second;
 
-            if (startTimestamp > td->LastStartTsc) {
-              startTimestamp = td->LastStartTsc;
-            }
-            if (stopTimestamp < td->LastStopTsc) {
-              stopTimestamp = td->LastStopTsc;
-            }
+            StartTimestamp = std::min<uint64_t>(StartTimestamp, Td->LastStartTsc);
+            StopTimestamp = std::max<uint64_t>(StopTimestamp, Td->LastStopTsc);
           }
 
-          for (auto const& thread : this->LoadThreads) {
-            auto td = thread.second;
-            ipcEstimateMetricInsert((double)td->LastIterations *
-                                    (double)this->LoadThreads.front().second->config().payload().instructions() /
-                                    (double)(stopTimestamp - startTimestamp));
+          for (auto const& Thread : LoadThreads) {
+            auto Td = Thread.second;
+            ipcEstimateMetricInsert((double)Td->LastIterations *
+                                    static_cast<double>(LoadThreads.front().second->config().payload().instructions()) /
+                                    static_cast<double>(StopTimestamp - StartTimestamp));
           }
 
-          auto end = Clock::now();
+          auto End = Clock::now();
 
           log::trace() << "Switching payload took "
-                       << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms";
+                       << std::chrono::duration_cast<std::chrono::milliseconds>(End - Start).count() << "ms";
         },
         std::placeholders::_1);
 
-    auto prob = std::make_shared<firestarter::optimizer::problem::CLIArgumentProblem>(
-        std::move(applySettings), MeasurementWorker, OptimizationMetrics, EvaluationDuration, StartDelta, StopDelta,
-        this->environment().selectedConfig().payloadItems());
+    auto Prob = std::make_shared<firestarter::optimizer::problem::CLIArgumentProblem>(
+        std::move(ApplySettings), MeasurementWorker, OptimizationMetrics, EvaluationDuration, StartDelta, StopDelta,
+        environment().selectedConfig().payloadItems());
 
-    Population = firestarter::optimizer::Population(std::move(prob));
+    Population = firestarter::optimizer::Population(std::move(Prob));
 
     if (OptimizationAlgorithm == "NSGA2") {
       Algorithm = std::make_unique<firestarter::optimizer::algorithm::NSGA2>(Generations, Nsga2Cr, Nsga2M);
@@ -274,14 +269,14 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
   }
 #endif
 
-  this->environment().printSelectedCodePathSummary();
+  environment().printSelectedCodePathSummary();
 
-  log::info() << this->environment().topology();
+  log::info() << environment().topology();
 
   // setup thread with either high or low load configured at the start
   // low loads has to know the length of the period
-  if (EXIT_SUCCESS != (returnCode = this->initLoadWorkers((LoadPercent == 0), Period.count()))) {
-    std::exit(returnCode);
+  if (EXIT_SUCCESS != (ReturnCode = initLoadWorkers((LoadPercent == 0), Period.count()))) {
+    std::exit(ReturnCode);
   }
 
   // add some signal handler for aborting FIRESTARTER
@@ -305,14 +300,14 @@ Firestarter::~Firestarter() {
 }
 
 void Firestarter::mainThread() {
-  this->environment().printThreadSummary();
+  environment().printThreadSummary();
 
 #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
-  _cuda = std::make_unique<cuda::Cuda>(&this->loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus);
+  _cuda = std::make_unique<cuda::Cuda>(&loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus);
 #endif
 
 #ifdef FIRESTARTER_BUILD_ONEAPI
-  _oneapi = std::make_unique<oneapi::OneAPI>(&this->loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus);
+  _oneapi = std::make_unique<oneapi::OneAPI>(&loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus);
 #endif
 
 #if defined(linux) || defined(__linux__)
@@ -322,24 +317,24 @@ void Firestarter::mainThread() {
   }
 #endif
 
-  this->signalWork();
+  signalWork();
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
   if (DumpRegisters) {
-    int returnCode;
-    if (EXIT_SUCCESS != (returnCode = this->initDumpRegisterWorker(DumpRegistersTimeDelta, DumpRegistersOutpath))) {
-      std::exit(returnCode);
+    int ReturnCode = 0;
+    if (EXIT_SUCCESS != (ReturnCode = initDumpRegisterWorker(DumpRegistersTimeDelta, DumpRegistersOutpath))) {
+      std::exit(ReturnCode);
     }
   }
 #endif
 
   // worker thread for load control
-  this->watchdogWorker(Period, Load, Timeout);
+  watchdogWorker(Period, Load, Timeout);
 
 #if defined(linux) || defined(__linux__)
   // check if optimization is selected
   if (Optimize) {
-    auto startTime = optimizer::History::getTime();
+    auto StartTime = optimizer::History::getTime();
 
     Firestarter::Optimizer = std::make_unique<optimizer::OptimizerWorker>(std::move(Algorithm), Population,
                                                                           OptimizationAlgorithm, Individuals, Preheat);
@@ -347,12 +342,12 @@ void Firestarter::mainThread() {
     // wait here until optimizer thread terminates
     Firestarter::Optimizer->join();
 
-    auto payloadItems = this->environment().selectedConfig().payloadItems();
+    auto PayloadItems = environment().selectedConfig().payloadItems();
 
-    firestarter::optimizer::History::save(OptimizeOutfile, startTime, payloadItems, Argc, Argv);
+    firestarter::optimizer::History::save(OptimizeOutfile, StartTime, PayloadItems, Argc, Argv);
 
     // print the best 20 according to each metric
-    firestarter::optimizer::History::printBest(OptimizationMetrics, payloadItems);
+    firestarter::optimizer::History::printBest(OptimizationMetrics, PayloadItems);
 
     // stop all the load threads
     std::raise(SIGTERM);
@@ -360,15 +355,15 @@ void Firestarter::mainThread() {
 #endif
 
   // wait for watchdog to timeout or until user terminates
-  this->joinLoadWorkers();
+  joinLoadWorkers();
 #ifdef FIRESTARTER_DEBUG_FEATURES
   if (DumpRegisters) {
-    this->joinDumpRegisterWorker();
+    joinDumpRegisterWorker();
   }
 #endif
 
   if (!Optimize) {
-    this->printPerformanceReport();
+    printPerformanceReport();
   }
 
 #if defined(linux) || defined(__linux__)
@@ -384,13 +379,13 @@ void Firestarter::mainThread() {
 #endif
 
   if (ErrorDetection) {
-    this->printThreadErrorReport();
+    printThreadErrorReport();
   }
 }
 
-void Firestarter::setLoad(uint64_t value) {
+void Firestarter::setLoad(uint64_t Value) {
   // signal load change to workers
-  Firestarter::LoadVar = value;
+  Firestarter::LoadVar = Value;
 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
 #ifndef _MSC_VER
   __asm__ __volatile__("mfence;");
@@ -402,17 +397,17 @@ void Firestarter::setLoad(uint64_t value) {
 #endif
 }
 
-void Firestarter::sigalrmHandler(int signum) { (void)signum; }
+void Firestarter::sigalrmHandler(int Signum) { (void)Signum; }
 
-void Firestarter::sigtermHandler(int signum) {
-  (void)signum;
+void Firestarter::sigtermHandler(int Signum) {
+  (void)Signum;
 
   Firestarter::setLoad(LOAD_STOP);
   // exit loop
   // used in case of 0 < load < 100
   // or interrupt sleep for timeout
   {
-    std::lock_guard<std::mutex> lk(Firestarter::WatchdogTerminateMutex);
+    std::lock_guard<std::mutex> Lk(Firestarter::WatchdogTerminateMutex);
     Firestarter::WatchdogTerminate = true;
   }
   Firestarter::WatchdogTerminateAlert.notify_all();
@@ -424,3 +419,5 @@ void Firestarter::sigtermHandler(int signum) {
   }
 #endif
 }
+
+} // namespace firestarter
\ No newline at end of file
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index c5a998c5..09a68464 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -19,15 +19,14 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <algorithm>
 #include <firestarter/ErrorDetectionStruct.hpp>
 #include <firestarter/Firestarter.hpp>
 #include <firestarter/Logging/Log.hpp>
 #include <iomanip>
 
 #if defined(linux) || defined(__linux__)
-extern "C" {
 #include <firestarter/Measurement/Metric/IPCEstimate.h>
-}
 #endif
 
 #ifdef ENABLE_VTRACING
@@ -39,133 +38,135 @@ extern "C" {
 
 #include <cmath>
 #include <cstdlib>
-#include <functional>
 #include <thread>
 
-using namespace firestarter;
+namespace {
+auto AlignedFreeDeleter = [](void* P) { ALIGNED_FREE(P); };
+
+}
 
-auto aligned_free_deleter = [](void* p) { ALIGNED_FREE(p); };
+namespace firestarter {
 
-int Firestarter::initLoadWorkers(bool lowLoad, uint64_t period) {
-  int returnCode;
+auto Firestarter::initLoadWorkers(bool LowLoad, uint64_t Period) -> int {
+  auto ReturnCode = environment().setCpuAffinity(0);
 
-  if (EXIT_SUCCESS != (returnCode = this->environment().setCpuAffinity(0))) {
+  if (EXIT_SUCCESS != ReturnCode) {
     return EXIT_FAILURE;
   }
 
   // setup load variable to execute low or high load once the threads switch to
   // work.
-  this->LoadVar = lowLoad ? LOAD_LOW : LOAD_HIGH;
+  LoadVar = LowLoad ? LOAD_LOW : LOAD_HIGH;
 
-  auto numThreads = this->environment().requestedNumThreads();
+  auto NumThreads = environment().requestedNumThreads();
 
   // create a std::vector<std::shared_ptr<>> of requestenNumThreads()
   // communication pointers and add these to the threaddata
   if (ErrorDetection) {
-    for (uint64_t i = 0; i < numThreads; i++) {
-      auto commPtr = reinterpret_cast<uint64_t*>(ALIGNED_MALLOC(2 * sizeof(uint64_t), 64));
-      assert(commPtr);
-      this->ErrorCommunication.push_back(std::shared_ptr<uint64_t>(commPtr, aligned_free_deleter));
-      log::debug() << "Threads " << (i + numThreads - 1) % numThreads << " and " << i << " commPtr = 0x"
-                   << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex << (uint64_t)commPtr;
+    for (uint64_t I = 0; I < NumThreads; I++) {
+      auto* CommPtr = reinterpret_cast<uint64_t*>(ALIGNED_MALLOC(2 * sizeof(uint64_t), 64));
+      assert(CommPtr);
+      ErrorCommunication.push_back(std::shared_ptr<uint64_t>(CommPtr, AlignedFreeDeleter));
+      log::debug() << "Threads " << (I + NumThreads - 1) % NumThreads << " and " << I << " commPtr = 0x"
+                   << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex
+                   << reinterpret_cast<uint64_t>(CommPtr);
     }
   }
 
-  for (uint64_t i = 0; i < numThreads; i++) {
-    auto td =
-        std::make_shared<LoadWorkerData>(i, this->environment(), &this->LoadVar, period, DumpRegisters, ErrorDetection);
+  for (uint64_t I = 0; I < NumThreads; I++) {
+    auto Td = std::make_shared<LoadWorkerData>(I, environment(), &LoadVar, Period, DumpRegisters, ErrorDetection);
 
     if (ErrorDetection) {
       // distribute pointers for error deteciton. (set threads in a ring)
       // give this thread the left pointer i and right pointer (i+1) %
       // requestedNumThreads().
-      td->setErrorCommunication(this->ErrorCommunication[i], this->ErrorCommunication[(i + 1) % numThreads]);
+      Td->setErrorCommunication(ErrorCommunication[I], ErrorCommunication[(I + 1) % NumThreads]);
     }
 
-    auto dataCacheSizeIt = td->config().platformConfig().dataCacheBufferSize().begin();
-    auto ramBufferSize = td->config().platformConfig().ramBufferSize();
+    auto DataCacheSizeIt = Td->config().platformConfig().dataCacheBufferSize().begin();
+    auto RamBufferSize = Td->config().platformConfig().ramBufferSize();
 
-    td->BuffersizeMem =
-        (*dataCacheSizeIt + *std::next(dataCacheSizeIt, 1) + *std::next(dataCacheSizeIt, 2) + ramBufferSize) /
-        td->config().thread() / sizeof(uint64_t);
+    Td->BuffersizeMem =
+        (*DataCacheSizeIt + *std::next(DataCacheSizeIt, 1) + *std::next(DataCacheSizeIt, 2) + RamBufferSize) /
+        Td->config().thread() / sizeof(uint64_t);
 
     // create the thread
-    std::thread t(Firestarter::loadThreadWorker, td);
+    std::thread T(Firestarter::loadThreadWorker, Td);
 
-    log::trace() << "Created thread #" << i << " with ID: " << t.get_id();
+    log::trace() << "Created thread #" << I << " with ID: " << T.get_id();
 
-    if (i == 0) {
+    if (I == 0) {
       // only show error for all worker threads except first.
-      firestarter::logging::FirstWorkerThreadFilter<firestarter::logging::Record>::setFirstThread(t.get_id());
+      firestarter::logging::FirstWorkerThreadFilter<firestarter::logging::Record>::setFirstThread(T.get_id());
     }
 
-    this->LoadThreads.push_back(std::make_pair(std::move(t), td));
+    LoadThreads.emplace_back(std::move(T), Td);
   }
 
-  this->signalLoadWorkers(THREAD_INIT);
+  signalLoadWorkers(THREAD_INIT);
 
   return EXIT_SUCCESS;
 }
 
-void Firestarter::signalLoadWorkers(int comm) {
-  bool ack;
+void Firestarter::signalLoadWorkers(int Comm) {
+  bool Ack = false;
 
   // start the work
-  for (auto const& thread : this->LoadThreads) {
-    auto td = thread.second;
+  for (auto const& Thread : LoadThreads) {
+    auto Td = Thread.second;
 
-    td->Mutex.lock();
+    Td->Mutex.lock();
   }
 
-  for (auto const& thread : this->LoadThreads) {
-    auto td = thread.second;
+  for (auto const& Thread : LoadThreads) {
+    auto Td = Thread.second;
 
-    td->Comm = comm;
-    td->Mutex.unlock();
+    Td->Comm = Comm;
+    Td->Mutex.unlock();
   }
 
-  for (auto const& thread : this->LoadThreads) {
-    auto td = thread.second;
+  for (auto const& Thread : LoadThreads) {
+    auto Td = Thread.second;
 
     do {
-      td->Mutex.lock();
-      ack = td->Ack;
-      td->Mutex.unlock();
-    } while (!ack);
-
-    td->Mutex.lock();
-    td->Ack = false;
-    td->Mutex.unlock();
+      Td->Mutex.lock();
+      Ack = Td->Ack;
+      Td->Mutex.unlock();
+    } while (!Ack);
+
+    Td->Mutex.lock();
+    Td->Ack = false;
+    Td->Mutex.unlock();
   }
 }
 
 void Firestarter::joinLoadWorkers() {
   // wait for threads after watchdog has requested termination
-  for (auto& thread : this->LoadThreads) {
-    thread.first.join();
+  for (auto& Thread : LoadThreads) {
+    Thread.first.join();
   }
 }
 
 void Firestarter::printThreadErrorReport() {
   if (ErrorDetection) {
-    auto maxSize = this->LoadThreads.size();
+    auto MaxSize = LoadThreads.size();
 
-    std::vector<bool> errors(maxSize, false);
+    std::vector<bool> Errors(MaxSize, false);
 
-    for (decltype(maxSize) i = 0; i < maxSize; i++) {
-      auto errorDetectionStruct = this->LoadThreads[i].second->errorDetectionStruct();
+    for (decltype(MaxSize) I = 0; I < MaxSize; I++) {
+      const auto* ErrorDetectionStructPtr = LoadThreads[I].second->errorDetectionStruct();
 
-      if (errorDetectionStruct->ErrorLeft) {
-        errors[(i + maxSize - 1) % maxSize] = true;
+      if (ErrorDetectionStructPtr->ErrorLeft) {
+        Errors[(I + MaxSize - 1) % MaxSize] = true;
       }
-      if (errorDetectionStruct->ErrorRight) {
-        errors[i] = true;
+      if (ErrorDetectionStructPtr->ErrorRight) {
+        Errors[I] = true;
       }
     }
 
-    for (decltype(maxSize) i = 0; i < maxSize; i++) {
-      if (errors[i]) {
-        log::fatal() << "Data mismatch between Threads " << i << " and " << (i + 1) % maxSize
+    for (decltype(MaxSize) I = 0; I < MaxSize; I++) {
+      if (Errors[I]) {
+        log::fatal() << "Data mismatch between Threads " << I << " and " << (I + 1) % MaxSize
                      << ".\n       This may be caused by bit-flips in the hardware.";
       }
     }
@@ -174,70 +175,61 @@ void Firestarter::printThreadErrorReport() {
 
 void Firestarter::printPerformanceReport() {
   // performance report
-  uint64_t startTimestamp = 0xffffffffffffffff;
-  uint64_t stopTimestamp = 0;
+  uint64_t StartTimestamp = 0xffffffffffffffff;
+  uint64_t StopTimestamp = 0;
 
-  uint64_t iterations = 0;
+  uint64_t Iterations = 0;
 
   log::debug() << "\nperformance report:\n";
 
-  for (auto const& thread : this->LoadThreads) {
-    auto td = thread.second;
+  for (auto const& Thread : LoadThreads) {
+    auto Td = Thread.second;
 
-    log::debug() << "Thread " << td->id() << ": " << td->Iterations
-                 << " iterations, tsc_delta: " << td->StopTsc - td->StartTsc;
+    log::debug() << "Thread " << Td->id() << ": " << Td->Iterations
+                 << " iterations, tsc_delta: " << Td->StopTsc - Td->StartTsc;
 
-    if (startTimestamp > td->StartTsc) {
-      startTimestamp = td->StartTsc;
-    }
-    if (stopTimestamp < td->StopTsc) {
-      stopTimestamp = td->StopTsc;
-    }
+    StartTimestamp = std::min(StartTimestamp, Td->StartTsc);
+    StopTimestamp = std::max(StopTimestamp, Td->StopTsc);
 
-    iterations += td->Iterations;
+    Iterations += Td->Iterations;
   }
 
-  double runtime = (double)(stopTimestamp - startTimestamp) / (double)this->environment().topology().clockrate();
-  double gFlops =
-      (double)this->LoadThreads.front().second->config().payload().flops() * 0.000000001 * (double)iterations / runtime;
-  double bandwidth =
-      (double)this->LoadThreads.front().second->config().payload().bytes() * 0.000000001 * (double)iterations / runtime;
+  double Runtime =
+      static_cast<double>(StopTimestamp - StartTimestamp) / static_cast<double>(environment().topology().clockrate());
+  double GFlops = static_cast<double>(LoadThreads.front().second->config().payload().flops()) * 0.000000001 *
+                  static_cast<double>(Iterations) / Runtime;
+  double Bandwidth = static_cast<double>(LoadThreads.front().second->config().payload().bytes()) * 0.000000001 *
+                     static_cast<double>(Iterations) / Runtime;
 
   // insert values for ipc-estimate metric
   // if we are on linux
 #if defined(linux) || defined(__linux__)
   if (Measurement) {
-    for (auto const& thread : this->LoadThreads) {
-      auto td = thread.second;
-      ipcEstimateMetricInsert((double)td->Iterations *
-                              (double)this->LoadThreads.front().second->config().payload().instructions() /
-                              (double)(stopTimestamp - startTimestamp));
+    for (auto const& Thread : LoadThreads) {
+      auto Td = Thread.second;
+      ipcEstimateMetricInsert(static_cast<double>(Td->Iterations) *
+                              static_cast<double>(LoadThreads.front().second->config().payload().instructions()) /
+                              static_cast<double>(StopTimestamp - StartTimestamp));
     }
   }
 #endif
 
   // format runtime, gflops and bandwidth %.2f
-  const char* fmt = "%.2f";
-  int size;
-
-#define FORMAT(input)                                                                                                  \
-  size = std::snprintf(nullptr, 0, fmt, input);                                                                        \
-  std::vector<char> input##Vector(size + 1);                                                                           \
-  std::snprintf(&input##Vector[0], input##Vector.size(), fmt, input);                                                  \
-  auto input##String = std::string(&input##Vector[0])
+  const auto FormatString = [](double Value) -> std::string {
+    const char* Fmt = "%.2f";
 
-  FORMAT(runtime);
-  FORMAT(gFlops);
-  FORMAT(bandwidth);
-
-#undef FORMAT
+    auto Size = std::snprintf(nullptr, 0, Fmt, Value);
+    std::vector<char> CharVec(Size + 1);
+    std::snprintf(CharVec.data(), CharVec.size(), Fmt, Value);
+    return {std::string(CharVec.data())};
+  };
 
   log::debug() << "\n"
-               << "total iterations: " << iterations << "\n"
-               << "runtime: " << runtimeString << " seconds (" << stopTimestamp - startTimestamp << " cycles)\n"
+               << "total iterations: " << Iterations << "\n"
+               << "runtime: " << FormatString(Runtime) << " seconds (" << StopTimestamp - StartTimestamp << " cycles)\n"
                << "\n"
-               << "estimated floating point performance: " << gFlopsString << " GFLOPS\n"
-               << "estimated memory bandwidth*: " << bandwidthString << " GB/s\n"
+               << "estimated floating point performance: " << FormatString(GFlops) << " GFLOPS\n"
+               << "estimated memory bandwidth*: " << FormatString(Bandwidth) << " GB/s\n"
                << "\n"
                << "* this estimate is highly unreliable if --function is used in order "
                   "to "
@@ -247,80 +239,80 @@ void Firestarter::printPerformanceReport() {
                << "  executed on an unsupported architecture!";
 }
 
-void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
+void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
 
-  int old = THREAD_WAIT;
+  int Old = THREAD_WAIT;
 
 #if defined(linux) || defined(__linux__)
   pthread_setname_np(pthread_self(), "LoadWorker");
 #endif
 
   for (;;) {
-    td->Mutex.lock();
-    int comm = td->Comm;
-    td->Mutex.unlock();
+    Td->Mutex.lock();
+    int Comm = Td->Comm;
+    Td->Mutex.unlock();
 
-    if (comm != old) {
-      old = comm;
+    if (Comm != Old) {
+      Old = Comm;
 
-      td->Mutex.lock();
-      td->Ack = true;
-      td->Mutex.unlock();
+      Td->Mutex.lock();
+      Td->Ack = true;
+      Td->Mutex.unlock();
     } else {
       std::this_thread::sleep_for(std::chrono::microseconds(1));
       continue;
     }
 
-    switch (comm) {
+    switch (Comm) {
     // allocate and initialize memory
     case THREAD_INIT:
       // set affinity
-      td->environment().setCpuAffinity(td->id());
+      Td->environment().setCpuAffinity(Td->id());
 
       // compile payload
-      td->config().payload().compilePayload(td->config().payloadSettings(), td->config().instructionCacheSize(),
-                                            td->config().dataCacheBufferSize(), td->config().ramBufferSize(),
-                                            td->config().thread(), td->config().lines(), td->DumpRegisters,
-                                            td->ErrorDetection);
+      Td->config().payload().compilePayload(Td->config().payloadSettings(), Td->config().instructionCacheSize(),
+                                            Td->config().dataCacheBufferSize(), Td->config().ramBufferSize(),
+                                            Td->config().thread(), Td->config().lines(), Td->DumpRegisters,
+                                            Td->ErrorDetection);
 
       // allocate memory
       // if we should dump some registers, we use the first part of the memory
       // for them.
-      td->AddrMem =
-          reinterpret_cast<uint64_t*>(ALIGNED_MALLOC((td->BuffersizeMem + td->AddrOffset) * sizeof(uint64_t), 64)) +
-          td->AddrOffset;
+      Td->AddrMem =
+          reinterpret_cast<uint64_t*>(ALIGNED_MALLOC((Td->BuffersizeMem + Td->AddrOffset) * sizeof(uint64_t), 64)) +
+          Td->AddrOffset;
 
       // exit application on error
-      if (td->AddrMem - td->AddrOffset == nullptr) {
-        workerLog::error() << "Could not allocate memory for CPU load thread " << td->id() << "\n";
+      if (Td->AddrMem - Td->AddrOffset == nullptr) {
+        workerLog::error() << "Could not allocate memory for CPU load thread " << Td->id() << "\n";
         exit(ENOMEM);
       }
 
-      if (td->DumpRegisters) {
-        reinterpret_cast<DumpRegisterStruct*>(td->AddrMem - td->AddrOffset)->DumpVar = DumpVariable::Wait;
+      if (Td->DumpRegisters) {
+        reinterpret_cast<DumpRegisterStruct*>(Td->AddrMem - Td->AddrOffset)->DumpVar = DumpVariable::Wait;
       }
 
-      if (td->ErrorDetection) {
-        auto errorDetectionStruct = reinterpret_cast<ErrorDetectionStruct*>(td->AddrMem - td->AddrOffset);
+      if (Td->ErrorDetection) {
+        auto* ErrorDetectionStructPtr = reinterpret_cast<ErrorDetectionStruct*>(Td->AddrMem - Td->AddrOffset);
 
-        std::memset(errorDetectionStruct, 0, sizeof(ErrorDetectionStruct));
+        std::memset(ErrorDetectionStructPtr, 0, sizeof(ErrorDetectionStruct));
 
         // distribute left and right communication pointers
-        errorDetectionStruct->CommunicationLeft = td->CommunicationLeft.get();
-        errorDetectionStruct->CommunicationRight = td->CommunicationRight.get();
+        ErrorDetectionStructPtr->CommunicationLeft = Td->CommunicationLeft.get();
+        ErrorDetectionStructPtr->CommunicationRight = Td->CommunicationRight.get();
 
         // do first touch memset 0 for the communication pointers
-        std::memset((void*)errorDetectionStruct->CommunicationLeft, 0, sizeof(uint64_t) * 2);
+        std::memset((void*)ErrorDetectionStructPtr->CommunicationLeft, 0, sizeof(uint64_t) * 2);
       }
 
       // call init function
-      td->config().payload().init(td->AddrMem, td->BuffersizeMem);
+      Td->config().payload().init(Td->AddrMem, Td->BuffersizeMem);
 
       break;
     // perform stress test
     case THREAD_WORK:
       // record threads start timestamp
-      td->StartTsc = td->environment().topology().timestamp();
+      Td->StartTsc = Td->environment().topology().timestamp();
 
       // will be terminated by watchdog
       for (;;) {
@@ -331,7 +323,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
 #ifdef ENABLE_SCOREP
         SCOREP_USER_REGION_BY_NAME_BEGIN("HIGH", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
-        td->Iterations = td->config().payload().highLoadFunction(td->AddrMem, td->AddrHigh, td->Iterations);
+        Td->Iterations = Td->config().payload().highLoadFunction(Td->AddrMem, Td->AddrHigh, Td->Iterations);
 
         // call low load function
 #ifdef ENABLE_VTRACING
@@ -342,7 +334,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
         SCOREP_USER_REGION_BY_NAME_END("HIGH");
         SCOREP_USER_REGION_BY_NAME_BEGIN("LOW", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
-        td->config().payload().lowLoadFunction(td->AddrHigh, td->Period);
+        Td->config().payload().lowLoadFunction(Td->AddrHigh, Td->Period);
 #ifdef ENABLE_VTRACING
         VT_USER_END("LOW_LOAD_FUNC");
 #endif
@@ -351,14 +343,14 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
 #endif
 
         // terminate if master signals end of run and record stop timestamp
-        if (*td->AddrHigh == LOAD_STOP) {
-          td->StopTsc = td->environment().topology().timestamp();
+        if (*Td->AddrHigh == LOAD_STOP) {
+          Td->StopTsc = Td->environment().topology().timestamp();
 
           return;
         }
 
-        if (*td->AddrHigh == LOAD_SWITCH) {
-          td->StopTsc = td->environment().topology().timestamp();
+        if (*Td->AddrHigh == LOAD_SWITCH) {
+          Td->StopTsc = Td->environment().topology().timestamp();
 
           break;
         }
@@ -366,19 +358,19 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
       break;
     case THREAD_SWITCH:
       // compile payload
-      td->config().payload().compilePayload(td->config().payloadSettings(), td->config().instructionCacheSize(),
-                                            td->config().dataCacheBufferSize(), td->config().ramBufferSize(),
-                                            td->config().thread(), td->config().lines(), td->DumpRegisters,
-                                            td->ErrorDetection);
+      Td->config().payload().compilePayload(Td->config().payloadSettings(), Td->config().instructionCacheSize(),
+                                            Td->config().dataCacheBufferSize(), Td->config().ramBufferSize(),
+                                            Td->config().thread(), Td->config().lines(), Td->DumpRegisters,
+                                            Td->ErrorDetection);
 
       // call init function
-      td->config().payload().init(td->AddrMem, td->BuffersizeMem);
+      Td->config().payload().init(Td->AddrMem, Td->BuffersizeMem);
 
       // save old iteration count
-      td->LastIterations = td->Iterations;
-      td->LastStartTsc = td->StartTsc;
-      td->LastStopTsc = td->StopTsc;
-      td->Iterations = 0;
+      Td->LastIterations = Td->Iterations;
+      Td->LastStartTsc = Td->StartTsc;
+      Td->LastStopTsc = Td->StopTsc;
+      Td->Iterations = 0;
       break;
     case THREAD_WAIT:
       break;
@@ -389,3 +381,5 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> td) {
     }
   }
 }
+
+} // namespace firestarter
\ No newline at end of file
diff --git a/src/firestarter/Main.cpp b/src/firestarter/Main.cpp
index 51b53177..9627adb2 100644
--- a/src/firestarter/Main.cpp
+++ b/src/firestarter/Main.cpp
@@ -27,7 +27,7 @@
 #include <string>
 
 struct Config {
-  inline static const std::vector<std::pair<std::string, std::string>> optionsMap = {
+  inline static const std::vector<std::pair<std::string, std::string>> OptionsMap = {
       {"information", "Information Options:\n"},
       {"general", "General Options:\n"},
       {"specialized-workloads", "Specialized workloads:\n"},
@@ -41,53 +41,55 @@ struct Config {
   };
 
   // default parameters
-  std::chrono::seconds timeout;
-  unsigned loadPercent;
-  std::chrono::microseconds period;
-  unsigned requestedNumThreads;
-  std::string cpuBind = "";
-  bool printFunctionSummary;
-  unsigned functionId;
-  bool listInstructionGroups;
-  std::string instructionGroups;
-  unsigned lineCount = 0;
+  std::chrono::seconds Timeout{};
+  unsigned LoadPercent;
+  std::chrono::microseconds Period{};
+  unsigned RequestedNumThreads;
+  std::string CpuBind;
+  bool PrintFunctionSummary;
+  unsigned FunctionId;
+  bool ListInstructionGroups;
+  std::string InstructionGroups;
+  unsigned LineCount = 0;
   // debug features
-  bool allowUnavailablePayload = false;
-  bool dumpRegisters = false;
-  std::chrono::seconds dumpRegistersTimeDelta = std::chrono::seconds(0);
-  std::string dumpRegistersOutpath = "";
-  bool errorDetection = false;
+  bool AllowUnavailablePayload = false;
+  bool DumpRegisters = false;
+  std::chrono::seconds DumpRegistersTimeDelta = std::chrono::seconds(0);
+  std::string DumpRegistersOutpath;
+  bool ErrorDetection = false;
   // CUDA parameters
-  int gpus = 0;
-  unsigned gpuMatrixSize = 0;
-  bool gpuUseFloat = false;
-  bool gpuUseDouble = false;
+  int Gpus = 0;
+  unsigned GpuMatrixSize = 0;
+  bool GpuUseFloat = false;
+  bool GpuUseDouble = false;
   // linux features
-  bool listMetrics = false;
-  bool measurement = false;
-  std::chrono::milliseconds startDelta = std::chrono::milliseconds(0);
-  std::chrono::milliseconds stopDelta = std::chrono::milliseconds(0);
-  std::chrono::milliseconds measurementInterval = std::chrono::milliseconds(0);
-  std::vector<std::string> stdinMetrics;
+  bool ListMetrics = false;
+  bool Measurement = false;
+  std::chrono::milliseconds StartDelta = std::chrono::milliseconds(0);
+  std::chrono::milliseconds StopDelta = std::chrono::milliseconds(0);
+  std::chrono::milliseconds MeasurementInterval = std::chrono::milliseconds(0);
+  std::vector<std::string> StdinMetrics;
   // linux and dynamic linked binary
-  std::vector<std::string> metricPaths;
+  std::vector<std::string> MetricPaths;
 
   // optimization
-  bool optimize = false;
-  std::chrono::seconds preheat;
-  std::string optimizationAlgorithm;
-  std::vector<std::string> optimizationMetrics;
-  std::chrono::seconds evaluationDuration;
-  unsigned individuals;
-  std::string optimizeOutfile = "";
-  unsigned generations;
-  double nsga2_cr;
-  double nsga2_m;
-
-  Config(int argc, const char** argv);
+  bool Optimize = false;
+  std::chrono::seconds Preheat{};
+  std::string OptimizationAlgorithm;
+  std::vector<std::string> OptimizationMetrics;
+  std::chrono::seconds EvaluationDuration{};
+  unsigned Individuals;
+  std::string OptimizeOutfile;
+  unsigned Generations;
+  double Nsga2Cr;
+  double Nsga2M;
+
+  Config(int Argc, const char** Argv);
 };
 
-void print_copyright() {
+namespace {
+
+void printCopyright() {
   firestarter::log::info() << "This program is free software: you can redistribute it and/or "
                               "modify\n"
                            << "it under the terms of the GNU General Public License as published "
@@ -100,7 +102,7 @@ void print_copyright() {
                               "<http://www.gnu.org/licenses/>.\n";
 }
 
-void print_warranty() {
+void printWarranty() {
   firestarter::log::info() << "This program is distributed in the hope that it will be useful,\n"
                            << "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
                            << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
@@ -111,20 +113,20 @@ void print_warranty() {
                               "<http://www.gnu.org/licenses/>.\n";
 }
 
-void print_help(cxxopts::Options const& parser, std::string const& section) {
-  std::vector<std::pair<std::string, std::string>> options(Config::optionsMap.size());
+void printHelp(cxxopts::Options const& Parser, std::string const& Section) {
+  std::vector<std::pair<std::string, std::string>> Options(Config::OptionsMap.size());
 
-  if (section.size() == 0) {
-    std::copy(Config::optionsMap.begin(), Config::optionsMap.end(), options.begin());
+  if (Section.size() == 0) {
+    std::copy(Config::OptionsMap.begin(), Config::OptionsMap.end(), Options.begin());
   } else {
-    auto findSection = [&](std::pair<std::string, std::string> const& pair) { return pair.first == section; };
-    auto it = std::copy_if(Config::optionsMap.begin(), Config::optionsMap.end(), options.begin(), findSection);
-    options.resize(std::distance(options.begin(), it));
+    auto FindSection = [&](std::pair<std::string, std::string> const& Pair) { return Pair.first == Section; };
+    auto It = std::copy_if(Config::OptionsMap.begin(), Config::OptionsMap.end(), Options.begin(), FindSection);
+    Options.resize(std::distance(Options.begin(), It));
   }
 
   // clang-format off
   firestarter::log::info()
-    << parser.help(options)
+    << Parser.help(Options)
     << "Examples:\n"
     << "  ./FIRESTARTER                 starts FIRESTARTER without timeout\n"
     << "  ./FIRESTARTER -t 300          starts a 5 minute run of FIRESTARTER\n"
@@ -155,12 +157,15 @@ void print_help(cxxopts::Options const& parser, std::string const& section) {
   // clang-format on
 }
 
-Config::Config(int argc, const char** argv) {
+} // namespace
+
+Config::Config(int Argc, const char** Argv) {
+  const auto* ExecutableName = *Argv;
 
-  cxxopts::Options parser(argv[0]);
+  cxxopts::Options Parser(ExecutableName);
 
   // clang-format off
-  parser.add_options("information")
+  Parser.add_options("information")
     ("h,help", "Display usage information. SECTION can be any of: information | general | specialized-workloads"
 #ifdef FIRESTARTER_DEBUG_FEATURES
      " | debug"
@@ -178,7 +183,7 @@ Config::Config(int argc, const char** argv) {
     ("debug", "Print debug output")
     ("a,avail", "List available functions");
 
-  parser.add_options("general")
+  Parser.add_options("general")
     ("i,function", "Specify integer ID of the load-function to be\nused (as listed by --avail)",
       cxxopts::value<unsigned>()->default_value("0"), "ID")
 #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_ONEAPI) || defined(FIRESTARTER_BUILD_HIP)
@@ -206,7 +211,7 @@ Config::Config(int argc, const char** argv) {
 #endif
     ("error-detection", "Enable error detection. This aborts execution when the calculated data is corruped by errors. FIRESTARTER must run with 2 or more threads for this feature. Cannot be used with -l | --load and --optimize.");
 
-  parser.add_options("specialized-workloads")
+  Parser.add_options("specialized-workloads")
     ("list-instruction-groups", "List the available instruction groups for the\npayload of the current platform.")
     ("run-instruction-groups", "Run the payload with the specified\ninstruction groups. GROUPS format: multiple INST:VAL\npairs comma-seperated.",
       cxxopts::value<std::string>()->default_value(""), "GROUPS")
@@ -214,7 +219,7 @@ Config::Config(int argc, const char** argv) {
       cxxopts::value<unsigned>());
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
-  parser.add_options("debug")
+  Parser.add_options("debug")
     ("allow-unavailable-payload", "")
     ("dump-registers", "Dump the working registers on the first\nthread. Depending on the payload these are mm, xmm,\nymm or zmm. Only use it without a timeout and\n100 percent load. DELAY between dumps in secs. Cannot be used with --error-detection.",
       cxxopts::value<unsigned>()->implicit_value("10"), "DELAY")
@@ -223,7 +228,7 @@ Config::Config(int argc, const char** argv) {
 #endif
 
 #if defined(linux) || defined(__linux__)
-  parser.add_options("measurement")
+  Parser.add_options("measurement")
     ("list-metrics", "List the available metrics.")
 #ifndef FIRESTARTER_LINK_STATIC
     ("metric-path", "Add a path to a shared library representing an interface for a metric. This option can be specified multiple times.",
@@ -241,7 +246,7 @@ Config::Config(int argc, const char** argv) {
     ("preheat", "Preheat for N seconds, default: 240",
       cxxopts::value<unsigned>()->default_value("240"), "N");
 
-  parser.add_options("optimization")
+  Parser.add_options("optimization")
     ("optimize", "Run the optimization with one of these algorithms: NSGA2.\nCannot be combined with --measurement.",
       cxxopts::value<std::string>())
     ("optimize-outfile", "Dump the output of the optimization into this\nfile, default: $PWD/$HOSTNAME_$DATE.json",
@@ -260,176 +265,176 @@ Config::Config(int argc, const char** argv) {
   // clang-format on
 
   try {
-    auto options = parser.parse(argc, argv);
+    auto Options = Parser.parse(Argc, Argv);
 
-    if (options.count("quiet")) {
+    if (Options.count("quiet")) {
       firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::warn);
-    } else if (options.count("report")) {
+    } else if (Options.count("report")) {
       firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::debug);
-    } else if (options.count("debug")) {
+    } else if (Options.count("debug")) {
       firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::trace);
     } else {
       firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::info);
     }
 
-    if (options.count("version")) {
+    if (Options.count("version")) {
       std::exit(EXIT_SUCCESS);
     }
 
-    if (options.count("copyright")) {
-      print_copyright();
+    if (Options.count("copyright")) {
+      printCopyright();
       std::exit(EXIT_SUCCESS);
     }
 
-    if (options.count("warranty")) {
-      print_warranty();
+    if (Options.count("warranty")) {
+      printWarranty();
       std::exit(EXIT_SUCCESS);
     }
 
-    firestarter::log::info() << "This program comes with ABSOLUTELY NO WARRANTY; for details run `" << argv[0]
+    firestarter::log::info() << "This program comes with ABSOLUTELY NO WARRANTY; for details run `" << ExecutableName
                              << " -w`.\n"
                              << "This is free software, and you are welcome to redistribute it\n"
-                             << "under certain conditions; run `" << argv[0] << " -c` for details.\n";
+                             << "under certain conditions; run `" << ExecutableName << " -c` for details.\n";
 
-    if (options.count("help")) {
-      auto section = options["help"].as<std::string>();
+    if (Options.count("help")) {
+      auto Section = Options["help"].as<std::string>();
 
       // section not found
-      auto findSection = [&](std::pair<std::string, std::string> const& pair) { return pair.first == section; };
-      if (std::find_if(optionsMap.begin(), optionsMap.end(), findSection) == optionsMap.end() && section.size() != 0) {
-        throw std::invalid_argument("Section \"" + section + "\" not found in help.");
+      auto FindSection = [&](std::pair<std::string, std::string> const& Pair) { return Pair.first == Section; };
+      if (std::find_if(OptionsMap.begin(), OptionsMap.end(), FindSection) == OptionsMap.end() && Section.size() != 0) {
+        throw std::invalid_argument("Section \"" + Section + "\" not found in help.");
       }
 
-      print_help(parser, section);
+      printHelp(Parser, Section);
       std::exit(EXIT_SUCCESS);
     }
 
-    timeout = std::chrono::seconds(options["timeout"].as<unsigned>());
-    loadPercent = options["load"].as<unsigned>();
-    period = std::chrono::microseconds(options["period"].as<unsigned>());
+    Timeout = std::chrono::seconds(Options["timeout"].as<unsigned>());
+    LoadPercent = Options["load"].as<unsigned>();
+    Period = std::chrono::microseconds(Options["period"].as<unsigned>());
 
-    if (loadPercent > 100) {
+    if (LoadPercent > 100) {
       throw std::invalid_argument("Option -l/--load may not be above 100.");
     }
 
-    errorDetection = options.count("error-detection");
-    if (errorDetection && loadPercent != 100) {
+    ErrorDetection = Options.count("error-detection");
+    if (ErrorDetection && LoadPercent != 100) {
       throw std::invalid_argument("Option --error-detection may only be used "
                                   "with -l/--load equal 100.");
     }
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
-    allowUnavailablePayload = options.count("allow-unavailable-payload");
-    dumpRegisters = options.count("dump-registers");
-    if (dumpRegisters) {
-      dumpRegistersTimeDelta = std::chrono::seconds(options["dump-registers"].as<unsigned>());
-      if (timeout != std::chrono::microseconds::zero() && loadPercent != 100) {
+    AllowUnavailablePayload = Options.count("allow-unavailable-payload");
+    DumpRegisters = Options.count("dump-registers");
+    if (DumpRegisters) {
+      DumpRegistersTimeDelta = std::chrono::seconds(Options["dump-registers"].as<unsigned>());
+      if (Timeout != std::chrono::microseconds::zero() && LoadPercent != 100) {
         throw std::invalid_argument("Option --dump-registers may only be used "
                                     "without a timeout and full load.");
       }
-      if (errorDetection) {
+      if (ErrorDetection) {
         throw std::invalid_argument("Options --dump-registers and --error-detection cannot be used "
                                     "together.");
       }
     }
 #endif
 
-    requestedNumThreads = options["threads"].as<unsigned>();
+    RequestedNumThreads = Options["threads"].as<unsigned>();
 
 #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
-    cpuBind = options["bind"].as<std::string>();
-    if (!cpuBind.empty()) {
-      if (requestedNumThreads != 0) {
+    CpuBind = Options["bind"].as<std::string>();
+    if (!CpuBind.empty()) {
+      if (RequestedNumThreads != 0) {
         throw std::invalid_argument("Options -b/--bind and -n/--threads cannot be used together.");
       }
     }
 #endif
 
 #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_ONEAPI) || defined(FIRESTARTER_BUILD_HIP)
-    gpuUseFloat = options.count("usegpufloat");
-    gpuUseDouble = options.count("usegpudouble");
+    GpuUseFloat = Options.count("usegpufloat");
+    GpuUseDouble = Options.count("usegpudouble");
 
-    if (gpuUseFloat && gpuUseDouble) {
+    if (GpuUseFloat && GpuUseDouble) {
       throw std::invalid_argument("Options -f/--usegpufloat and "
                                   "-d/--usegpudouble cannot be used together.");
     }
 
-    gpuMatrixSize = options["matrixsize"].as<unsigned>();
-    if (gpuMatrixSize > 0 && gpuMatrixSize < 64) {
+    GpuMatrixSize = Options["matrixsize"].as<unsigned>();
+    if (GpuMatrixSize > 0 && GpuMatrixSize < 64) {
       throw std::invalid_argument("Option -m/--matrixsize may not be below 64.");
     }
 
-    gpus = options["gpus"].as<int>();
+    Gpus = Options["gpus"].as<int>();
 #endif
 
-    printFunctionSummary = options.count("avail");
+    PrintFunctionSummary = Options.count("avail");
 
-    functionId = options["function"].as<unsigned>();
+    FunctionId = Options["function"].as<unsigned>();
 
-    listInstructionGroups = options.count("list-instruction-groups");
-    instructionGroups = options["run-instruction-groups"].as<std::string>();
-    if (options.count("set-line-count")) {
-      lineCount = options["set-line-count"].as<unsigned>();
+    ListInstructionGroups = Options.count("list-instruction-groups");
+    InstructionGroups = Options["run-instruction-groups"].as<std::string>();
+    if (Options.count("set-line-count")) {
+      LineCount = Options["set-line-count"].as<unsigned>();
     }
 
 #if defined(linux) || defined(__linux__)
-    startDelta = std::chrono::milliseconds(options["start-delta"].as<unsigned>());
-    stopDelta = std::chrono::milliseconds(options["stop-delta"].as<unsigned>());
-    measurementInterval = std::chrono::milliseconds(options["measurement-interval"].as<unsigned>());
+    StartDelta = std::chrono::milliseconds(Options["start-delta"].as<unsigned>());
+    StopDelta = std::chrono::milliseconds(Options["stop-delta"].as<unsigned>());
+    MeasurementInterval = std::chrono::milliseconds(Options["measurement-interval"].as<unsigned>());
 #ifndef FIRESTARTER_LINK_STATIC
-    metricPaths = options["metric-path"].as<std::vector<std::string>>();
+    MetricPaths = Options["metric-path"].as<std::vector<std::string>>();
 #endif
-    if (options.count("metric-from-stdin")) {
-      stdinMetrics = options["metric-from-stdin"].as<std::vector<std::string>>();
+    if (Options.count("metric-from-stdin")) {
+      StdinMetrics = Options["metric-from-stdin"].as<std::vector<std::string>>();
     }
-    measurement = options.count("measurement");
-    listMetrics = options.count("list-metrics");
+    Measurement = Options.count("measurement");
+    ListMetrics = Options.count("list-metrics");
 
-    if ((optimize = options.count("optimize"))) {
-      if (errorDetection) {
+    if ((Optimize = Options.count("optimize"))) {
+      if (ErrorDetection) {
         throw std::invalid_argument("Options --error-detection and --optimize "
                                     "cannot be used together.");
       }
-      if (measurement) {
+      if (Measurement) {
         throw std::invalid_argument("Options --measurement and --optimize cannot be used together.");
       }
-      preheat = std::chrono::seconds(options["preheat"].as<unsigned>());
-      optimizationAlgorithm = options["optimize"].as<std::string>();
-      if (options.count("optimization-metric")) {
-        optimizationMetrics = options["optimization-metric"].as<std::vector<std::string>>();
+      Preheat = std::chrono::seconds(Options["preheat"].as<unsigned>());
+      OptimizationAlgorithm = Options["optimize"].as<std::string>();
+      if (Options.count("optimization-metric")) {
+        OptimizationMetrics = Options["optimization-metric"].as<std::vector<std::string>>();
       }
-      if (loadPercent != 100) {
+      if (LoadPercent != 100) {
         throw std::invalid_argument("Options -p | --period and -l | --load are "
                                     "not compatible with --optimize.");
       }
-      if (timeout == std::chrono::seconds::zero()) {
+      if (Timeout == std::chrono::seconds::zero()) {
         throw std::invalid_argument("Option -t | --timeout must be specified for optimization.");
       }
-      evaluationDuration = timeout;
+      EvaluationDuration = Timeout;
       // this will deactivate the watchdog worker
-      timeout = std::chrono::seconds::zero();
-      individuals = options["individuals"].as<unsigned>();
-      if (options.count("optimize-outfile")) {
-        optimizeOutfile = options["optimize-outfile"].as<std::string>();
+      Timeout = std::chrono::seconds::zero();
+      Individuals = Options["individuals"].as<unsigned>();
+      if (Options.count("optimize-outfile")) {
+        OptimizeOutfile = Options["optimize-outfile"].as<std::string>();
       }
-      generations = options["generations"].as<unsigned>();
-      nsga2_cr = options["nsga2-cr"].as<double>();
-      nsga2_m = options["nsga2-m"].as<double>();
+      Generations = Options["generations"].as<unsigned>();
+      Nsga2Cr = Options["nsga2-cr"].as<double>();
+      Nsga2M = Options["nsga2-m"].as<double>();
 
-      if (optimizationAlgorithm != "NSGA2") {
+      if (OptimizationAlgorithm != "NSGA2") {
         throw std::invalid_argument("Option --optimize must be any of: NSGA2");
       }
     }
 #endif
 
-  } catch (std::exception& e) {
-    firestarter::log::error() << e.what() << "\n";
-    print_help(parser, "");
+  } catch (std::exception& E) {
+    firestarter::log::error() << E.what() << "\n";
+    printHelp(Parser, "");
     std::exit(EXIT_FAILURE);
   }
 }
 
-int main(int argc, const char** argv) {
+auto main(int argc, const char** argv) -> int {
 
   firestarter::log::info() << "FIRESTARTER - A Processor Stress Test Utility, Version " << _FIRESTARTER_VERSION_STRING
                            << "\n"
@@ -444,22 +449,22 @@ int main(int argc, const char** argv) {
                            << "\n";
 #endif
 
-  Config cfg{argc, argv};
+  Config Cfg{argc, argv};
 
   try {
-    firestarter::Firestarter firestarter(
-        argc, argv, cfg.timeout, cfg.loadPercent, cfg.period, cfg.requestedNumThreads, cfg.cpuBind,
-        cfg.printFunctionSummary, cfg.functionId, cfg.listInstructionGroups, cfg.instructionGroups, cfg.lineCount,
-        cfg.allowUnavailablePayload, cfg.dumpRegisters, cfg.dumpRegistersTimeDelta, cfg.dumpRegistersOutpath,
-        cfg.errorDetection, cfg.gpus, cfg.gpuMatrixSize, cfg.gpuUseFloat, cfg.gpuUseDouble, cfg.listMetrics,
-        cfg.measurement, cfg.startDelta, cfg.stopDelta, cfg.measurementInterval, cfg.metricPaths, cfg.stdinMetrics,
-        cfg.optimize, cfg.preheat, cfg.optimizationAlgorithm, cfg.optimizationMetrics, cfg.evaluationDuration,
-        cfg.individuals, cfg.optimizeOutfile, cfg.generations, cfg.nsga2_cr, cfg.nsga2_m);
-
-    firestarter.mainThread();
-
-  } catch (std::exception const& e) {
-    firestarter::log::error() << e.what();
+    firestarter::Firestarter Firestarter(
+        argc, argv, Cfg.Timeout, Cfg.LoadPercent, Cfg.Period, Cfg.RequestedNumThreads, Cfg.CpuBind,
+        Cfg.PrintFunctionSummary, Cfg.FunctionId, Cfg.ListInstructionGroups, Cfg.InstructionGroups, Cfg.LineCount,
+        Cfg.AllowUnavailablePayload, Cfg.DumpRegisters, Cfg.DumpRegistersTimeDelta, Cfg.DumpRegistersOutpath,
+        Cfg.ErrorDetection, Cfg.Gpus, Cfg.GpuMatrixSize, Cfg.GpuUseFloat, Cfg.GpuUseDouble, Cfg.ListMetrics,
+        Cfg.Measurement, Cfg.StartDelta, Cfg.StopDelta, Cfg.MeasurementInterval, Cfg.MetricPaths, Cfg.StdinMetrics,
+        Cfg.Optimize, Cfg.Preheat, Cfg.OptimizationAlgorithm, Cfg.OptimizationMetrics, Cfg.EvaluationDuration,
+        Cfg.Individuals, Cfg.OptimizeOutfile, Cfg.Generations, Cfg.Nsga2Cr, Cfg.Nsga2M);
+
+    Firestarter.mainThread();
+
+  } catch (std::exception const& E) {
+    firestarter::log::error() << E.what();
     return EXIT_FAILURE;
   }
 
diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp
index 0c880bbb..e6d3305b 100644
--- a/src/firestarter/Measurement/MeasurementWorker.cpp
+++ b/src/firestarter/Measurement/MeasurementWorker.cpp
@@ -21,10 +21,7 @@
 
 #include <firestarter/Logging/Log.hpp>
 #include <firestarter/Measurement/MeasurementWorker.hpp>
-
-#include <iostream>
 #include <queue>
-#include <thread>
 
 #ifndef FIRESTARTER_LINK_STATIC
 extern "C" {
@@ -32,99 +29,99 @@ extern "C" {
 }
 #endif
 
-void insertCallback(void* cls, const char* metricName, int64_t timeSinceEpoch, double value) {
-  static_cast<firestarter::measurement::MeasurementWorker*>(cls)->insertCallback(metricName, timeSinceEpoch, value);
+void insertCallback(void* Cls, const char* MetricName, int64_t TimeSinceEpoch, double Value) {
+  static_cast<firestarter::measurement::MeasurementWorker*>(Cls)->insertCallback(MetricName, TimeSinceEpoch, Value);
 }
 
-using namespace firestarter::measurement;
+namespace firestarter::measurement {
 
-MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, uint64_t numThreads,
-                                     std::vector<std::string> const& metricDylibs,
-                                     std::vector<std::string> const& stdinMetrics)
-    : UpdateInterval(updateInterval)
-    , NumThreads(numThreads) {
+MeasurementWorker::MeasurementWorker(std::chrono::milliseconds UpdateInterval, uint64_t NumThreads,
+                                     std::vector<std::string> const& MetricDylibs,
+                                     std::vector<std::string> const& StdinMetrics)
+    : UpdateInterval(UpdateInterval)
+    , NumThreads(NumThreads) {
 
 #ifndef FIRESTARTER_LINK_STATIC
   // open dylibs and find metric symbol.
   // create an entry in _metricDylibs with handle from dlopen and
   // metric_interface_t structure. add this structe as a pointer to metrics.
-  for (auto const& dylib : metricDylibs) {
-    void* handle;
-    const char* filename = dylib.c_str();
+  for (auto const& Dylib : MetricDylibs) {
+    void* Handle = nullptr;
+    const char* Filename = Dylib.c_str();
 
-    handle = dlopen(dylib.c_str(), RTLD_NOW | RTLD_LOCAL);
+    Handle = dlopen(Dylib.c_str(), RTLD_NOW | RTLD_LOCAL);
 
-    if (!handle) {
-      firestarter::log::error() << filename << ": " << dlerror();
+    if (!Handle) {
+      firestarter::log::error() << Filename << ": " << dlerror();
       continue;
     }
 
     // clear existing error
     dlerror();
 
-    metric_interface_t* metric = nullptr;
+    MetricInterface* Metric = nullptr;
 
-    metric = (metric_interface_t*)dlsym(handle, "metric");
+    Metric = static_cast<MetricInterface*>(dlsym(Handle, "metric"));
 
-    char* error;
-    if ((error = dlerror()) != NULL) {
-      firestarter::log::error() << filename << ": " << error;
-      dlclose(handle);
+    char* Error = nullptr;
+    if ((Error = dlerror()) != nullptr) {
+      firestarter::log::error() << Filename << ": " << Error;
+      dlclose(Handle);
       continue;
     }
 
-    if (this->findMetricByName(metric->name) != nullptr) {
-      firestarter::log::error() << "A metric named \"" << metric->name << "\" is already loaded.";
-      dlclose(handle);
+    if (this->findMetricByName(Metric->Name) != nullptr) {
+      firestarter::log::error() << "A metric named \"" << Metric->Name << "\" is already loaded.";
+      dlclose(Handle);
       continue;
     }
 
     // lets push our metric object and the handle
-    this->_metricDylibs.push_back(handle);
-    this->metrics.push_back(metric);
+    this->MetricDylibs.push_back(Handle);
+    this->Metrics.push_back(Metric);
   }
 #else
-  (void)metricDylibs;
+  (void)MetricDylibs;
 #endif
 
   // setup metric objects for metric names passed from stdin.
-  for (auto const& name : stdinMetrics) {
-    if (this->findMetricByName(name) != nullptr) {
-      firestarter::log::error() << "A metric named \"" << name << "\" is already loaded.";
+  for (auto const& Name : StdinMetrics) {
+    if (this->findMetricByName(Name) != nullptr) {
+      firestarter::log::error() << "A metric named \"" << Name << "\" is already loaded.";
       continue;
     }
 
-    this->StdinMetrics.push_back(name);
+    this->StdinMetrics.push_back(Name);
   }
 
-  std::stringstream ss;
-  unsigned maxLength = 0;
-  std::map<std::string, bool> available;
+  std::stringstream Ss;
+  unsigned MaxLength = 0;
+  std::map<std::string, bool> Available;
 
-  for (auto const& metric : this->Metrics) {
-    std::string name(metric->Name);
-    maxLength = maxLength < name.size() ? name.size() : maxLength;
-    int returnCode = metric->Init();
-    metric->Fini();
-    available[name] = returnCode == EXIT_SUCCESS ? true : false;
+  for (auto const& Metric : this->Metrics) {
+    std::string Name(Metric->Name);
+    MaxLength = MaxLength < Name.size() ? Name.size() : MaxLength;
+    auto ReturnCode = Metric->Init();
+    Metric->Fini();
+    Available[Name] = ReturnCode == EXIT_SUCCESS;
   }
 
-  unsigned padding = maxLength > 6 ? maxLength - 6 : 0;
-  ss << "  METRIC" << std::string(padding + 1, ' ') << "| available\n";
-  ss << "  " << std::string(padding + 7, '-') << "-----------\n";
-  for (auto const& [key, value] : available) {
-    ss << "  " << key << std::string(padding + 7 - key.size(), ' ') << "| ";
-    ss << (value ? "yes" : "no") << "\n";
+  unsigned Padding = MaxLength > 6 ? MaxLength - 6 : 0;
+  Ss << "  METRIC" << std::string(Padding + 1, ' ') << "| available\n";
+  Ss << "  " << std::string(Padding + 7, '-') << "-----------\n";
+  for (auto const& [key, value] : Available) {
+    Ss << "  " << key << std::string(Padding + 7 - key.size(), ' ') << "| ";
+    Ss << (value ? "yes" : "no") << "\n";
   }
 
-  this->AvailableMetricsString = ss.str();
+  this->AvailableMetricsString = Ss.str();
 
-  pthread_create(&this->WorkerThread, NULL,
+  pthread_create(&this->WorkerThread, nullptr,
                  reinterpret_cast<void* (*)(void*)>(MeasurementWorker::dataAcquisitionWorker), this);
 
   // create a worker for getting metric values from stdin
   if (this->StdinMetrics.size() > 0) {
-    pthread_create(&this->StdinThread, NULL,
+    pthread_create(&this->StdinThread, nullptr,
                    reinterpret_cast<void* (*)(void*)>(MeasurementWorker::stdinDataAcquisitionWorker), this);
   }
 }
@@ -132,39 +129,39 @@ MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, u
 MeasurementWorker::~MeasurementWorker() {
   pthread_cancel(this->WorkerThread);
 
-  pthread_join(this->WorkerThread, NULL);
+  pthread_join(this->WorkerThread, nullptr);
 
   if (this->StdinMetrics.size() > 0) {
     pthread_cancel(this->StdinThread);
 
-    pthread_join(this->StdinThread, NULL);
+    pthread_join(this->StdinThread, nullptr);
   }
 
   for (auto const& [key, value] : this->Values) {
-    auto metric = this->findMetricByName(key);
-    if (metric == nullptr) {
+    const auto* Metric = this->findMetricByName(key);
+    if (Metric == nullptr) {
       continue;
     }
 
-    metric->Fini();
+    Metric->Fini();
   }
 
 #ifndef FIRESTARTER_LINK_STATIC
-  for (auto handle : this->_metricDylibs) {
-    dlclose(handle);
+  for (auto* Handle : this->MetricDylibs) {
+    dlclose(Handle);
   }
 #endif
 }
 
-std::vector<std::string> MeasurementWorker::metricNames() {
-  std::vector<std::string> metrics;
-  std::transform(this->Metrics.begin(), this->Metrics.end(), std::back_inserter(metrics),
-                 [](auto& metric) -> std::string { return std::string(metric->Name); });
-  for (auto const& name : this->StdinMetrics) {
-    metrics.push_back(name);
+auto MeasurementWorker::metricNames() -> std::vector<std::string> {
+  std::vector<std::string> Metrics;
+  std::transform(this->Metrics.begin(), this->Metrics.end(), std::back_inserter(Metrics),
+                 [](auto& Metric) -> std::string { return std::string(Metric->Name); });
+  for (auto const& Name : this->StdinMetrics) {
+    Metrics.push_back(Name);
   }
 
-  return metrics;
+  return Metrics;
 }
 
 auto MeasurementWorker::findMetricByName(std::string MetricName) -> const MetricInterface* {
@@ -184,49 +181,49 @@ auto MeasurementWorker::findMetricByName(std::string MetricName) -> const Metric
 auto MeasurementWorker::initMetrics(std::vector<std::string> const& MetricNames) -> std::vector<std::string> {
   this->ValuesMutex.lock();
 
-  std::vector<std::string> initialized = {};
+  std::vector<std::string> Initialized = {};
 
   // try to find each metric and initialize it
-  for (auto const& metricName : MetricNames) {
+  for (auto const& MetricName : MetricNames) {
     // init values map with empty vector
-    auto name_equal = [metricName](auto const& pair) { return metricName.compare(pair.first) == 0; };
-    auto pair = std::find_if(this->Values.begin(), this->Values.end(), name_equal);
-    if (pair != this->Values.end()) {
-      pair->second.clear();
+    auto NameEqual = [MetricName](auto const& Pair) { return MetricName.compare(Pair.first) == 0; };
+    auto Pair = std::find_if(this->Values.begin(), this->Values.end(), NameEqual);
+    if (Pair != this->Values.end()) {
+      Pair->second.clear();
     } else {
-      auto metric = this->findMetricByName(metricName);
-      if (metric != nullptr) {
-        int returnValue = metric->Init();
-        if (returnValue != EXIT_SUCCESS) {
-          log::error() << "Metric " << metric->Name << ": " << metric->GetError();
+      const auto* Metric = this->findMetricByName(MetricName);
+      if (Metric != nullptr) {
+        int ReturnValue = Metric->Init();
+        if (ReturnValue != EXIT_SUCCESS) {
+          log::error() << "Metric " << Metric->Name << ": " << Metric->GetError();
           continue;
         }
       }
-      this->Values[metricName] = std::vector<TimeValue>();
-      if (metric != nullptr) {
-        if (metric->Type.InsertCallback) {
-          metric->RegisterInsertCallback(::insertCallback, this);
+      this->Values[MetricName] = std::vector<TimeValue>();
+      if (Metric != nullptr) {
+        if (Metric->Type.InsertCallback) {
+          Metric->RegisterInsertCallback(::insertCallback, this);
         }
       }
-      initialized.push_back(metricName);
+      Initialized.push_back(MetricName);
     }
   }
 
   this->ValuesMutex.unlock();
 
-  return initialized;
+  return Initialized;
 }
 
-void MeasurementWorker::insertCallback(const char* metricName, int64_t timeSinceEpoch, double value) {
+void MeasurementWorker::insertCallback(const char* MetricName, int64_t TimeSinceEpoch, double Value) {
   this->ValuesMutex.lock();
 
   using Duration = std::chrono::duration<int64_t, std::nano>;
-  auto time = std::chrono::time_point<std::chrono::high_resolution_clock, Duration>(Duration(timeSinceEpoch));
-  auto name_equal = [metricName](auto const& pair) { return std::string(metricName).compare(pair.first) == 0; };
-  auto pair = std::find_if(this->Values.begin(), this->Values.end(), name_equal);
+  auto Time = std::chrono::time_point<std::chrono::high_resolution_clock, Duration>(Duration(TimeSinceEpoch));
+  auto NameEqual = [MetricName](auto const& Pair) { return std::string(MetricName).compare(Pair.first) == 0; };
+  auto Pair = std::find_if(this->Values.begin(), this->Values.end(), NameEqual);
 
-  if (pair != this->Values.end()) {
-    pair->second.push_back(TimeValue(time, value));
+  if (Pair != this->Values.end()) {
+    Pair->second.emplace_back(Time, Value);
   }
 
   this->ValuesMutex.unlock();
@@ -234,171 +231,173 @@ void MeasurementWorker::insertCallback(const char* metricName, int64_t timeSince
 
 void MeasurementWorker::startMeasurement() { this->StartTime = std::chrono::high_resolution_clock::now(); }
 
-std::map<std::string, Summary> MeasurementWorker::getValues(std::chrono::milliseconds startDelta,
-                                                            std::chrono::milliseconds stopDelta) {
-  std::map<std::string, Summary> measurment = {};
+auto MeasurementWorker::getValues(std::chrono::milliseconds StartDelta, std::chrono::milliseconds StopDelta)
+    -> std::map<std::string, Summary> {
+  std::map<std::string, Summary> Measurment = {};
 
   this->ValuesMutex.lock();
 
   for (auto& [key, values] : this->Values) {
-    auto startTime = this->StartTime;
-    auto endTime = std::chrono::high_resolution_clock::now();
-    auto metric = this->findMetricByName(key);
+    auto StartTime = this->StartTime;
+    auto EndTime = std::chrono::high_resolution_clock::now();
+    const auto* Metric = this->findMetricByName(key);
 
-    MetricType type;
-    std::memset(&type, 0, sizeof(type));
-    if (metric == nullptr) {
-      type.Absolute = 1;
+    MetricType Type;
+    std::memset(&Type, 0, sizeof(Type));
+    if (Metric == nullptr) {
+      Type.Absolute = 1;
 
-      startTime += startDelta;
-      endTime -= stopDelta;
+      StartTime += StartDelta;
+      EndTime -= StopDelta;
     } else {
-      std::memcpy(&type, &metric->Type, sizeof(type));
+      std::memcpy(&Type, &Metric->Type, sizeof(Type));
 
-      if (metric->Type.IgnoreStartStopDelta == 0) {
-        startTime += startDelta;
-        endTime -= stopDelta;
+      if (Metric->Type.IgnoreStartStopDelta == 0) {
+        StartTime += StartDelta;
+        EndTime -= StopDelta;
       }
     }
 
-    decltype(values) croppedValues(values.size());
+    decltype(values) CroppedValues(values.size());
 
-    auto findAll = [startTime, endTime](auto const& tv) { return startTime <= tv.Time && tv.Time <= endTime; };
-    auto it = std::copy_if(values.begin(), values.end(), croppedValues.begin(), findAll);
-    croppedValues.resize(std::distance(croppedValues.begin(), it));
+    auto FindAll = [StartTime, EndTime](auto const& Tv) { return StartTime <= Tv.Time && Tv.Time <= EndTime; };
+    auto It = std::copy_if(values.begin(), values.end(), CroppedValues.begin(), FindAll);
+    CroppedValues.resize(std::distance(CroppedValues.begin(), It));
 
-    Summary sum = Summary::calculate(croppedValues.begin(), croppedValues.end(), type, this->NumThreads);
+    Summary Sum = Summary::calculate(CroppedValues.begin(), CroppedValues.end(), Type, this->NumThreads);
 
-    measurment[key] = sum;
+    Measurment[key] = Sum;
   }
 
   this->ValuesMutex.unlock();
 
-  return measurment;
+  return Measurment;
 }
 
-int* MeasurementWorker::dataAcquisitionWorker(void* measurementWorker) {
+auto MeasurementWorker::dataAcquisitionWorker(void* MeasurementWorker) -> int* {
 
-  pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
+  pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, nullptr);
 
-  auto _this = reinterpret_cast<MeasurementWorker*>(measurementWorker);
+  auto* This = reinterpret_cast<class MeasurementWorker*>(MeasurementWorker);
 
 #ifndef __APPLE__
   pthread_setname_np(pthread_self(), "DataAcquisition");
 #endif
 
-  using clock = std::chrono::high_resolution_clock;
+  using Clock = std::chrono::high_resolution_clock;
 
-  using callbackTuple =
+  using CallbackTuple =
       std::tuple<void (*)(void), std::chrono::microseconds, std::chrono::high_resolution_clock::time_point>;
-  auto callbackTupleComparator = [](callbackTuple left, callbackTuple right) {
-    return std::get<2>(left) > std::get<2>(right);
+  auto CallbackTupleComparator = [](CallbackTuple Left, CallbackTuple Right) {
+    return std::get<2>(Left) > std::get<2>(Right);
   };
 
   // this datastructure holds a tuple of our callback, the callback frequency
   // and the next timepoint. it will be sorted, so the pop function will give
   // back the next callback
-  std::priority_queue<callbackTuple, std::vector<callbackTuple>, decltype(callbackTupleComparator)> callbackQueue(
-      callbackTupleComparator);
+  std::priority_queue<CallbackTuple, std::vector<CallbackTuple>, decltype(CallbackTupleComparator)> CallbackQueue(
+      CallbackTupleComparator);
 
-  _this->ValuesMutex.lock();
+  This->ValuesMutex.lock();
 
-  for (auto const& [key, value] : _this->Values) {
-    auto metric_interface = _this->findMetricByName(key);
+  for (auto const& [key, value] : This->Values) {
+    const auto* MetricInterface = This->findMetricByName(key);
 
-    if (metric_interface == nullptr) {
+    if (MetricInterface == nullptr) {
       continue;
     }
 
-    auto callbackTime = std::chrono::microseconds(metric_interface->CallbackTime);
-    if (callbackTime.count() == 0) {
+    auto CallbackTime = std::chrono::microseconds(MetricInterface->CallbackTime);
+    if (CallbackTime.count() == 0) {
       continue;
     }
 
-    auto currentTime = clock::now();
+    auto CurrentTime = Clock::now();
 
-    callbackQueue.push(std::make_tuple(metric_interface->Callback, callbackTime, currentTime));
+    CallbackQueue.emplace(MetricInterface->Callback, CallbackTime, CurrentTime);
   }
 
-  _this->ValuesMutex.unlock();
+  This->ValuesMutex.unlock();
 
-  auto nextFetch = clock::now() + _this->UpdateInterval;
+  auto NextFetch = Clock::now() + This->UpdateInterval;
 
   for (;;) {
-    auto now = clock::now();
+    auto Now = Clock::now();
 
-    if (nextFetch <= now) {
-      _this->ValuesMutex.lock();
+    if (NextFetch <= Now) {
+      This->ValuesMutex.lock();
 
-      for (auto& [metricName, values] : _this->Values) {
-        auto metric_interface = _this->findMetricByName(metricName);
+      for (auto& [metricName, values] : This->Values) {
+        const auto* MetricInterface = This->findMetricByName(metricName);
 
-        if (metric_interface == nullptr) {
+        if (MetricInterface == nullptr) {
           continue;
         }
 
-        double value;
+        double Value = NAN;
 
-        if (!metric_interface->Type.InsertCallback && metric_interface->GetReading != nullptr) {
-          if (EXIT_SUCCESS == metric_interface->GetReading(&value)) {
-            auto tv = TimeValue(std::chrono::high_resolution_clock::now(), value);
-            values.push_back(tv);
+        if (!MetricInterface->Type.InsertCallback && MetricInterface->GetReading != nullptr) {
+          if (EXIT_SUCCESS == MetricInterface->GetReading(&Value)) {
+            auto Tv = TimeValue(std::chrono::high_resolution_clock::now(), Value);
+            values.push_back(Tv);
           }
         }
       }
 
-      _this->ValuesMutex.unlock();
+      This->ValuesMutex.unlock();
 
-      nextFetch = now + _this->UpdateInterval;
+      NextFetch = Now + This->UpdateInterval;
     }
 
-    auto nextWake = nextFetch;
+    auto NextWake = NextFetch;
 
-    if (!callbackQueue.empty()) {
-      auto [callbackFunction, callbackTime, nextCallback] = callbackQueue.top();
+    if (!CallbackQueue.empty()) {
+      auto [callbackFunction, callbackTime, nextCallback] = CallbackQueue.top();
 
-      if (nextCallback <= now) {
+      if (nextCallback <= Now) {
         // remove the elment from the queue
-        callbackQueue.pop();
+        CallbackQueue.pop();
 
         // call our callback
         callbackFunction();
 
         // add it with the updated callback time to the queue again
-        nextCallback = now + callbackTime;
-        callbackQueue.push(std::make_tuple(callbackFunction, callbackTime, nextCallback));
+        nextCallback = Now + callbackTime;
+        CallbackQueue.emplace(callbackFunction, callbackTime, nextCallback);
       }
 
-      nextWake = nextCallback < nextWake ? nextCallback : nextWake;
+      NextWake = nextCallback < NextWake ? nextCallback : NextWake;
     }
 
-    std::this_thread::sleep_for(nextWake - clock::now());
+    std::this_thread::sleep_for(NextWake - Clock::now());
   }
 }
 
-int* MeasurementWorker::stdinDataAcquisitionWorker(void* measurementWorker) {
+auto MeasurementWorker::stdinDataAcquisitionWorker(void* MeasurementWorker) -> int* {
 
-  pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
+  pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, nullptr);
 
-  auto _this = reinterpret_cast<MeasurementWorker*>(measurementWorker);
+  auto* This = reinterpret_cast<class MeasurementWorker*>(MeasurementWorker);
 
 #ifndef __APPLE__
   pthread_setname_np(pthread_self(), "StdinDataAcquis");
 #endif
 
-  for (std::string line; std::getline(std::cin, line);) {
-    int64_t time;
-    double value;
-    char name[128];
-    if (std::sscanf(line.c_str(), "%127s %ld %lf", name, &time, &value) == 3) {
-      auto name_equal = [name](auto const& allowedName) { return allowedName.compare(std::string(name)) == 0; };
-      auto item = std::find_if(_this->stdinMetrics().begin(), _this->stdinMetrics().end(), name_equal);
+  for (std::string Line; std::getline(std::cin, Line);) {
+    int64_t Time = 0;
+    double Value = NAN;
+    char Name[128];
+    if (std::sscanf(Line.c_str(), "%127s %ld %lf", Name, &Time, &Value) == 3) {
+      auto NameEqual = [Name](auto const& AllowedName) { return AllowedName.compare(std::string(Name)) == 0; };
+      auto Item = std::find_if(This->stdinMetrics().begin(), This->stdinMetrics().end(), NameEqual);
       // metric name is allowed
-      if (item != _this->stdinMetrics().end()) {
-        _this->insertCallback(name, time, value);
+      if (Item != This->stdinMetrics().end()) {
+        This->insertCallback(Name, Time, Value);
       }
     }
   }
 
-  return NULL;
+  return nullptr;
 }
+
+} // namespace firestarter::measurement
\ No newline at end of file
diff --git a/src/firestarter/Measurement/Metric/IPCEstimate.cpp b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
index 9e18a6be..6bd5f7d9 100644
--- a/src/firestarter/Measurement/Metric/IPCEstimate.cpp
+++ b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
@@ -28,48 +28,48 @@ extern "C" {
 #include <firestarter/Measurement/MetricInterface.h>
 }
 
-static std::string errorString = "";
+static std::string ErrorString;
 
-static void (*callback)(void*, const char*, int64_t, double) = nullptr;
-static void* callback_arg = nullptr;
+static void (*Callback)(void*, const char*, int64_t, double) = nullptr;
+static void* CallbackArg = nullptr;
 
-static int32_t fini(void) {
-  callback = nullptr;
-  callback_arg = nullptr;
+static auto fini() -> int32_t {
+  Callback = nullptr;
+  CallbackArg = nullptr;
 
   return EXIT_SUCCESS;
 }
 
-static int32_t init(void) {
-  errorString = "";
+static auto init() -> int32_t {
+  ErrorString = "";
 
   return EXIT_SUCCESS;
 }
 
-static const char* get_error(void) {
-  const char* errorCString = errorString.c_str();
-  return errorCString;
+static auto getError() -> const char* {
+  const char* ErrorCString = ErrorString.c_str();
+  return ErrorCString;
 }
 
-static int32_t register_insert_callback(void (*c)(void*, const char*, int64_t, double), void* arg) {
-  callback = c;
-  callback_arg = arg;
+static auto registerInsertCallback(void (*C)(void*, const char*, int64_t, double), void* Arg) -> int32_t {
+  Callback = C;
+  CallbackArg = Arg;
   return EXIT_SUCCESS;
 }
 
 void ipcEstimateMetricInsert(double Value) {
-  if (callback == nullptr || callback_arg == nullptr) {
+  if (Callback == nullptr || CallbackArg == nullptr) {
     return;
   }
 
-  int64_t t =
+  int64_t T =
       std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch())
           .count();
 
-  callback(callback_arg, "ipc-estimate", t, Value);
+  Callback(CallbackArg, "ipc-estimate", T, Value);
 }
 
-MetricInterface IpcEstimateMetric = {
+const MetricInterface IpcEstimateMetric = {
     .Name = "ipc-estimate",
     .Type = {.Absolute = 1,
              .Accumalative = 0,
@@ -83,6 +83,6 @@ MetricInterface IpcEstimateMetric = {
     .Init = init,
     .Fini = fini,
     .GetReading = nullptr,
-    .GetError = get_error,
-    .RegisterInsertCallback = register_insert_callback,
+    .GetError = getError,
+    .RegisterInsertCallback = registerInsertCallback,
 };
diff --git a/src/firestarter/Measurement/Metric/Perf.cpp b/src/firestarter/Measurement/Metric/Perf.cpp
index 0d7a0225..d49cc0a0 100644
--- a/src/firestarter/Measurement/Metric/Perf.cpp
+++ b/src/firestarter/Measurement/Metric/Perf.cpp
@@ -31,66 +31,67 @@ extern "C" {
 #include <sys/syscall.h>
 #include <unistd.h>
 
-#define PERF_EVENT_PARANOID "/proc/sys/kernel/perf_event_paranoid"
+static const std::string PerfEventParanoidFile = "/proc/sys/kernel/perf_event_paranoid";
 
-struct read_format {
-  uint64_t nr;
+struct ReadFormat {
+  uint64_t Nr;
   struct {
-    uint64_t value;
-    uint64_t id;
-  } values[2];
+    uint64_t Value;
+    uint64_t Id;
+  } Values[2];
 };
 
-static std::string errorString = "";
+static std::string ErrorString;
 
-static int cpu_cycles_fd = -1;
-static int instructions_fd = -1;
-static uint64_t cpu_cycles_id;
-static uint64_t instructions_id;
-static bool init_done = false;
-static int32_t init_value;
+static int CpuCyclesFd = -1;
+static int InstructionsFd = -1;
+static uint64_t CpuCyclesId;
+static uint64_t InstructionsId;
+static bool InitDone = false;
+static int32_t InitValue;
 
-static struct read_format last;
+static struct ReadFormat Last;
 
-static long perf_event_open(struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) {
-  return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
+static auto perfEventOpen(struct perf_event_attr* HwEvent, pid_t Pid, int Cpu, int GroupFd, unsigned long Flags)
+    -> long {
+  return syscall(__NR_perf_event_open, HwEvent, Pid, Cpu, GroupFd, Flags);
 }
 
-static int32_t fini(void) {
-  if (!(cpu_cycles_fd < 0)) {
-    close(cpu_cycles_fd);
-    cpu_cycles_fd = -1;
+static auto fini() -> int32_t {
+  if (!(CpuCyclesFd < 0)) {
+    close(CpuCyclesFd);
+    CpuCyclesFd = -1;
   }
-  if (!(instructions_fd < 0)) {
-    close(instructions_fd);
-    instructions_fd = -1;
+  if (!(InstructionsFd < 0)) {
+    close(InstructionsFd);
+    InstructionsFd = -1;
   }
-  init_done = false;
+  InitDone = false;
   return EXIT_SUCCESS;
 }
 
-static int32_t init(void) {
-  if (init_done) {
-    return init_value;
+static auto init() -> int32_t {
+  if (InitDone) {
+    return InitValue;
   }
 
-  if (access(PERF_EVENT_PARANOID, F_OK) == -1) {
+  if (access(PerfEventParanoidFile.c_str(), F_OK) == -1) {
     // https://man7.org/linux/man-pages/man2/perf_event_open.2.html
     // The official way of knowing if perf_event_open() support is enabled
     // is checking for the existence of the file
     // /proc/sys/kernel/perf_event_paranoid.
-    errorString = "syscall perf_event_open not supported or file " PERF_EVENT_PARANOID " does not exist";
-    init_value = EXIT_FAILURE;
-    init_done = true;
+    ErrorString = "syscall perf_event_open not supported or file " + PerfEventParanoidFile + " does not exist";
+    InitValue = EXIT_FAILURE;
+    InitDone = true;
     return EXIT_FAILURE;
   }
 
-  struct perf_event_attr cpu_cycles_attr;
-  std::memset(&cpu_cycles_attr, 0, sizeof(struct perf_event_attr));
-  cpu_cycles_attr.type = PERF_TYPE_HARDWARE;
-  cpu_cycles_attr.size = sizeof(struct perf_event_attr);
-  cpu_cycles_attr.config = PERF_COUNT_HW_CPU_CYCLES;
-  cpu_cycles_attr.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
+  struct perf_event_attr CpuCyclesAttr {};
+  std::memset(&CpuCyclesAttr, 0, sizeof(struct perf_event_attr));
+  CpuCyclesAttr.type = PERF_TYPE_HARDWARE;
+  CpuCyclesAttr.size = sizeof(struct perf_event_attr);
+  CpuCyclesAttr.config = PERF_COUNT_HW_CPU_CYCLES;
+  CpuCyclesAttr.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
   // https://man7.org/linux/man-pages/man2/perf_event_open.2.html
   //     inherit
   // The inherit bit specifies that this counter should count
@@ -110,28 +111,28 @@ static int32_t init(void) {
   // changed the check
   // - if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
   // + if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ))
-  cpu_cycles_attr.inherit = 1;
-  cpu_cycles_attr.exclude_kernel = 1;
-  cpu_cycles_attr.exclude_hv = 1;
-
-  if ((cpu_cycles_fd = perf_event_open(&cpu_cycles_attr,
-                                       // pid == 0 and cpu == -1
-                                       // This measures the calling process/thread on any CPU.
-                                       0, -1,
-                                       // The group_fd argument allows event groups to be created.  An event
-                                       // group has one event which is the group leader.  The leader is
-                                       // created first, with group_fd = -1.  The rest of the group members
-                                       // are created with subsequent perf_event_open() calls with group_fd
-                                       // being set to the file descriptor of the group leader.
-                                       -1, 0)) < 0) {
+  CpuCyclesAttr.inherit = 1;
+  CpuCyclesAttr.exclude_kernel = 1;
+  CpuCyclesAttr.exclude_hv = 1;
+
+  if ((CpuCyclesFd = perfEventOpen(&CpuCyclesAttr,
+                                   // pid == 0 and cpu == -1
+                                   // This measures the calling process/thread on any CPU.
+                                   0, -1,
+                                   // The group_fd argument allows event groups to be created.  An event
+                                   // group has one event which is the group leader.  The leader is
+                                   // created first, with group_fd = -1.  The rest of the group members
+                                   // are created with subsequent perf_event_open() calls with group_fd
+                                   // being set to the file descriptor of the group leader.
+                                   -1, 0)) < 0) {
     fini();
-    errorString = "perf_event_open failed for PERF_COUNT_HW_CPU_CYCLES";
-    init_value = EXIT_FAILURE;
-    init_done = true;
+    ErrorString = "perf_event_open failed for PERF_COUNT_HW_CPU_CYCLES";
+    InitValue = EXIT_FAILURE;
+    InitDone = true;
     return EXIT_FAILURE;
   }
 
-  ioctl(cpu_cycles_fd, PERF_EVENT_IOC_ID, &cpu_cycles_id);
+  ioctl(CpuCyclesFd, PERF_EVENT_IOC_ID, &CpuCyclesId);
 
   struct perf_event_attr instructions_attr;
   std::memset(&instructions_attr, 0, sizeof(struct perf_event_attr));
@@ -143,94 +144,94 @@ static int32_t init(void) {
   instructions_attr.exclude_kernel = 1;
   instructions_attr.exclude_hv = 1;
 
-  if ((instructions_fd = perf_event_open(&instructions_attr,
-                                         // pid == 0 and cpu == -1
-                                         // This measures the calling process/thread on any CPU.
-                                         0, -1,
-                                         // The group_fd argument allows event groups to be created.  An event
-                                         // group has one event which is the group leader.  The leader is
-                                         // created first, with group_fd = -1.  The rest of the group members
-                                         // are created with subsequent perf_event_open() calls with group_fd
-                                         // being set to the file descriptor of the group leader.
-                                         cpu_cycles_fd, 0)) < 0) {
+  if ((InstructionsFd = perfEventOpen(&instructions_attr,
+                                      // pid == 0 and cpu == -1
+                                      // This measures the calling process/thread on any CPU.
+                                      0, -1,
+                                      // The group_fd argument allows event groups to be created.  An event
+                                      // group has one event which is the group leader.  The leader is
+                                      // created first, with group_fd = -1.  The rest of the group members
+                                      // are created with subsequent perf_event_open() calls with group_fd
+                                      // being set to the file descriptor of the group leader.
+                                      CpuCyclesFd, 0)) < 0) {
     fini();
-    errorString = "perf_event_open failed for PERF_COUNT_HW_INSTRUCTIONS";
-    init_value = EXIT_FAILURE;
-    init_done = true;
+    ErrorString = "perf_event_open failed for PERF_COUNT_HW_INSTRUCTIONS";
+    InitValue = EXIT_FAILURE;
+    InitDone = true;
     return EXIT_FAILURE;
   }
 
-  ioctl(instructions_fd, PERF_EVENT_IOC_ID, &instructions_id);
+  ioctl(InstructionsFd, PERF_EVENT_IOC_ID, &InstructionsId);
 
-  ioctl(cpu_cycles_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
-  ioctl(cpu_cycles_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
+  ioctl(CpuCyclesFd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
+  ioctl(CpuCyclesFd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
 
-  if (0 == read(cpu_cycles_fd, &last, sizeof(last))) {
+  if (0 == read(CpuCyclesFd, &Last, sizeof(Last))) {
     fini();
-    errorString = "group read failed in init";
-    init_value = EXIT_FAILURE;
-    init_done = true;
+    ErrorString = "group read failed in init";
+    InitValue = EXIT_FAILURE;
+    InitDone = true;
     return EXIT_FAILURE;
   }
 
-  init_value = EXIT_SUCCESS;
-  init_done = true;
+  InitValue = EXIT_SUCCESS;
+  InitDone = true;
   return EXIT_SUCCESS;
 }
 
-static uint64_t value_from_id(struct read_format* values, uint64_t id) {
-  for (decltype(values->nr) i = 0; i < values->nr; ++i) {
-    if (id == values->values[i].id) {
-      return values->values[i].value;
+static auto valueFromId(struct ReadFormat* Values, uint64_t Id) -> uint64_t {
+  for (decltype(Values->Nr) I = 0; I < Values->Nr; ++I) {
+    if (Id == Values->Values[I].Id) {
+      return Values->Values[I].Value;
     }
   }
 
   return 0;
 }
 
-static int32_t get_reading(double* ipc_value, double* freq_value) {
+static auto getReading(double* IpcValue, double* FreqValue) -> int32_t {
 
-  if (cpu_cycles_fd < 0 || instructions_fd < 0) {
+  if (CpuCyclesFd < 0 || InstructionsFd < 0) {
     fini();
     return EXIT_FAILURE;
   }
 
-  struct read_format read_values;
+  struct ReadFormat ReadValues {};
 
-  if (0 == read(cpu_cycles_fd, &read_values, sizeof(read_values))) {
+  if (0 == read(CpuCyclesFd, &ReadValues, sizeof(ReadValues))) {
     fini();
-    errorString = "group read failed";
+    ErrorString = "group read failed";
     return EXIT_FAILURE;
   }
 
-  if (ipc_value != nullptr) {
-    uint64_t diff[2];
-    diff[0] = value_from_id(&read_values, instructions_id) - value_from_id(&last, instructions_id);
-    diff[1] = value_from_id(&read_values, cpu_cycles_id) - value_from_id(&last, cpu_cycles_id);
+  if (IpcValue != nullptr) {
+    uint64_t Diff[2];
+    Diff[0] = valueFromId(&ReadValues, InstructionsId) - valueFromId(&Last, InstructionsId);
+    Diff[1] = valueFromId(&ReadValues, CpuCyclesId) - valueFromId(&Last, CpuCyclesId);
 
-    std::memcpy(&last, &read_values, sizeof(last));
+    std::memcpy(&Last, &ReadValues, sizeof(Last));
 
-    *ipc_value = (double)diff[0] / (double)diff[1];
+    *IpcValue = (double)Diff[0] / (double)Diff[1];
   }
 
-  if (freq_value != nullptr) {
-    *freq_value = (double)value_from_id(&read_values, cpu_cycles_id) / 1e9;
+  if (FreqValue != nullptr) {
+    *FreqValue = (double)valueFromId(&ReadValues, CpuCyclesId) / 1e9;
   }
 
   return EXIT_SUCCESS;
 }
 
-static int32_t get_reading_ipc(double* value) { return get_reading(value, nullptr); }
+static auto getReadingIpc(double* Value) -> int32_t { return getReading(Value, nullptr); }
 
-static int32_t get_reading_freq(double* value) { return get_reading(nullptr, value); }
+static auto getReadingFreq(double* Value) -> int32_t { return getReading(nullptr, Value); }
 
-static const char* get_error(void) {
-  const char* errorCString = errorString.c_str();
-  return errorCString;
+static auto getError() -> const char* {
+  const char* ErrorCString = ErrorString.c_str();
+  return ErrorCString;
 }
 }
 
-MetricInterface PerfIpcMetric = {
+const MetricInterface PerfIpcMetric = {
     .Name = "perf-ipc",
     .Type = {.Absolute = 1,
              .Accumalative = 0,
@@ -243,12 +244,12 @@ MetricInterface PerfIpcMetric = {
     .Callback = nullptr,
     .Init = init,
     .Fini = fini,
-    .GetReading = get_reading_ipc,
-    .GetError = get_error,
+    .GetReading = getReadingIpc,
+    .GetError = getError,
     .RegisterInsertCallback = nullptr,
 };
 
-MetricInterface PerfFreqMetric = {
+const MetricInterface PerfFreqMetric = {
     .Name = "perf-freq",
     .Type = {.Absolute = 0,
              .Accumalative = 1,
@@ -261,7 +262,7 @@ MetricInterface PerfFreqMetric = {
     .Callback = nullptr,
     .Init = init,
     .Fini = fini,
-    .GetReading = get_reading_freq,
-    .GetError = get_error,
+    .GetReading = getReadingFreq,
+    .GetError = getError,
     .RegisterInsertCallback = nullptr,
 };
diff --git a/src/firestarter/Measurement/Metric/RAPL.cpp b/src/firestarter/Measurement/Metric/RAPL.cpp
index c73ef004..e9910fe7 100644
--- a/src/firestarter/Measurement/Metric/RAPL.cpp
+++ b/src/firestarter/Measurement/Metric/RAPL.cpp
@@ -32,42 +32,42 @@ extern "C" {
 
 #include <dirent.h>
 
-#define RAPL_PATH "/sys/class/powercap"
+static const std::string RaplPath = "/sys/class/powercap";
 
-static std::string errorString = "";
+static std::string errorString;
 
-struct reader_def {
-  char* path;
-  long long int last_reading;
-  long long int overflow;
-  long long int max;
+struct ReaderDef {
+  char* Path;
+  long long int LastReading;
+  long long int Overflow;
+  long long int Max;
 };
 
-struct reader_def_free {
-  void operator()(struct reader_def* def) {
-    if (def != nullptr) {
-      if (((void*)def->path) != nullptr) {
-        free((void*)def->path);
+struct ReaderDefFree {
+  void operator()(struct ReaderDef* Def) {
+    if (Def != nullptr) {
+      if (((void*)Def->Path) != nullptr) {
+        free((void*)Def->Path);
       }
-      free((void*)def);
+      free((void*)Def);
     }
   }
 };
 
-static std::vector<std::shared_ptr<struct reader_def>> readers = {};
+static std::vector<std::shared_ptr<struct ReaderDef>> Readers = {};
 
-static int32_t fini(void) {
-  readers.clear();
+static auto fini() -> int32_t {
+  Readers.clear();
 
   return EXIT_SUCCESS;
 }
 
-static int32_t init(void) {
+static auto init() -> int32_t {
   errorString = "";
 
-  DIR* raplDir = opendir(RAPL_PATH);
-  if (raplDir == NULL) {
-    errorString = "Could not open " RAPL_PATH;
+  DIR* RaplDir = opendir(RaplPath.c_str());
+  if (RaplDir == nullptr) {
+    errorString = "Could not open " + RaplPath;
     return EXIT_FAILURE;
   }
 
@@ -76,104 +76,104 @@ static int32_t init(void) {
   // and finally package only.
 
   // contains an empty path if it is not found
-  std::string psysPath = "";
+  std::string PsysPath;
 
   // a vector of all paths to package and dram
-  std::vector<std::string> paths = {};
+  std::vector<std::string> Paths = {};
 
-  struct dirent* dir;
-  while ((dir = readdir(raplDir)) != NULL) {
-    std::stringstream path;
-    std::stringstream namePath;
-    path << RAPL_PATH << "/" << dir->d_name;
-    namePath << path.str() << "/name";
+  struct dirent* Dir = nullptr;
+  while ((Dir = readdir(RaplDir)) != nullptr) {
+    std::stringstream Path;
+    std::stringstream NamePath;
+    Path << RaplPath << "/" << Dir->d_name;
+    NamePath << Path.str() << "/name";
 
-    std::ifstream nameStream(namePath.str());
-    if (!nameStream.good()) {
+    std::ifstream NameStream(NamePath.str());
+    if (!NameStream.good()) {
       // an error opening the file occured
       continue;
     }
 
-    std::string name;
-    std::getline(nameStream, name);
+    std::string Name;
+    std::getline(NameStream, Name);
 
-    if (name == "psys") {
+    if (Name == "psys") {
       // found psys
-      psysPath = path.str();
-    } else if (0 == name.rfind("package", 0) || name == "dram") {
+      PsysPath = Path.str();
+    } else if (0 == Name.rfind("package", 0) || Name == "dram") {
       // find all package and dram
-      paths.push_back(path.str());
+      Paths.push_back(Path.str());
     }
   }
-  closedir(raplDir);
+  closedir(RaplDir);
 
   // make psys the only value if available
-  if (!psysPath.empty()) {
-    paths.clear();
-    paths.push_back(psysPath);
+  if (!PsysPath.empty()) {
+    Paths.clear();
+    Paths.push_back(PsysPath);
   }
 
   // paths now contains all interesting nodes
 
-  if (paths.size() == 0) {
-    errorString = "No valid entries in " RAPL_PATH;
+  if (Paths.size() == 0) {
+    errorString = "No valid entries in " + RaplPath;
     return EXIT_FAILURE;
   }
 
-  for (auto const& path : paths) {
-    std::stringstream energyUjPath;
-    energyUjPath << path << "/energy_uj";
-    std::ifstream energyReadingStream(energyUjPath.str());
-    if (!energyReadingStream.good()) {
+  for (auto const& Path : Paths) {
+    std::stringstream EnergyUjPath;
+    EnergyUjPath << Path << "/energy_uj";
+    std::ifstream EnergyReadingStream(EnergyUjPath.str());
+    if (!EnergyReadingStream.good()) {
       errorString = "Could not read energy_uj";
       break;
     }
 
-    std::stringstream maxEnergyUjRangePath;
-    maxEnergyUjRangePath << path << "/max_energy_range_uj";
-    std::ifstream maxEnergyReadingStream(maxEnergyUjRangePath.str());
-    if (!maxEnergyReadingStream.good()) {
+    std::stringstream MaxEnergyUjRangePath;
+    MaxEnergyUjRangePath << Path << "/max_energy_range_uj";
+    std::ifstream MaxEnergyReadingStream(MaxEnergyUjRangePath.str());
+    if (!MaxEnergyReadingStream.good()) {
       errorString = "Could not read max_energy_range_uj";
       break;
     }
 
-    uint64_t reading;
-    uint64_t max;
-    std::string buffer;
-    int read;
+    uint64_t Reading = 0;
+    uint64_t Max = 0;
+    std::string Buffer;
+    int Read = 0;
 
-    std::getline(energyReadingStream, buffer);
-    read = std::sscanf(buffer.c_str(), "%lu", &reading);
+    std::getline(EnergyReadingStream, Buffer);
+    Read = std::sscanf(Buffer.c_str(), "%lu", &Reading);
 
-    if (read == 0) {
-      std::stringstream ss;
-      ss << "Contents in file " << energyUjPath.str() << " do not conform to mask (uint64_t)";
-      errorString = ss.str();
+    if (Read == 0) {
+      std::stringstream Ss;
+      Ss << "Contents in file " << EnergyUjPath.str() << " do not conform to mask (uint64_t)";
+      errorString = Ss.str();
       break;
     }
 
-    std::getline(maxEnergyReadingStream, buffer);
-    read = std::sscanf(buffer.c_str(), "%lu", &max);
+    std::getline(MaxEnergyReadingStream, Buffer);
+    Read = std::sscanf(Buffer.c_str(), "%lu", &Max);
 
-    if (read == 0) {
+    if (Read == 0) {
       std::stringstream ss;
-      ss << "Contents in file " << maxEnergyUjRangePath.str() << " do not conform to mask (uint64_t)";
+      ss << "Contents in file " << MaxEnergyUjRangePath.str() << " do not conform to mask (uint64_t)";
       errorString = ss.str();
       break;
     }
 
-    std::shared_ptr<struct reader_def> def(reinterpret_cast<struct reader_def*>(malloc(sizeof(struct reader_def))),
-                                           reader_def_free());
-    auto pathName = path.c_str();
-    size_t size = (strlen(pathName) + 1) * sizeof(char);
-    void* name = malloc(size);
-    memcpy(name, pathName, size);
-    def->path = (char*)name;
-    def->max = max;
-    def->last_reading = reading;
-    def->overflow = 0;
-
-    readers.push_back(def);
+    std::shared_ptr<struct ReaderDef> Def(reinterpret_cast<struct ReaderDef*>(malloc(sizeof(struct ReaderDef))),
+                                          ReaderDefFree());
+    const auto* PathName = Path.c_str();
+    size_t Size = (strlen(PathName) + 1) * sizeof(char);
+    void* Name = malloc(Size);
+    memcpy(Name, PathName, Size);
+    Def->Path = (char*)Name;
+    Def->Max = Max;
+    Def->LastReading = Reading;
+    Def->Overflow = 0;
+
+    Readers.push_back(Def);
   }
 
   if (errorString.size() != 0) {
@@ -184,46 +184,46 @@ static int32_t init(void) {
   return EXIT_SUCCESS;
 }
 
-static int32_t get_reading(double* value) {
-  double finalReading = 0.0;
+static auto getReading(double* Value) -> int32_t {
+  double FinalReading = 0.0;
 
-  for (auto& def : readers) {
-    long long int reading;
-    std::string buffer;
+  for (auto& Def : Readers) {
+    long long int Reading = 0;
+    std::string Buffer;
 
-    std::stringstream energyUjPath;
-    energyUjPath << def->path << "/energy_uj";
-    std::ifstream energyReadingStream(energyUjPath.str());
-    std::getline(energyReadingStream, buffer);
-    std::sscanf(buffer.c_str(), "%llu", &reading);
+    std::stringstream EnergyUjPath;
+    EnergyUjPath << Def->Path << "/energy_uj";
+    std::ifstream EnergyReadingStream(EnergyUjPath.str());
+    std::getline(EnergyReadingStream, Buffer);
+    std::sscanf(Buffer.c_str(), "%llu", &Reading);
 
-    if (reading < def->last_reading) {
-      def->overflow += 1;
+    if (Reading < Def->LastReading) {
+      Def->Overflow += 1;
     }
 
-    def->last_reading = reading;
+    Def->LastReading = Reading;
 
-    finalReading += 1.0E-6 * (double)(def->overflow * def->max + def->last_reading);
+    FinalReading += 1.0E-6 * (double)((Def->Overflow * Def->Max) + Def->LastReading);
   }
 
-  if (value != nullptr) {
-    *value = finalReading;
+  if (Value != nullptr) {
+    *Value = FinalReading;
   }
 
   return EXIT_SUCCESS;
 }
 
-static const char* get_error(void) {
-  const char* errorCString = errorString.c_str();
-  return errorCString;
+static auto getError() -> const char* {
+  const char* ErrorCString = errorString.c_str();
+  return ErrorCString;
 }
 
 // this function will be called periodically to make sure we do not miss an
 // overflow of the counter
-static void callback() { get_reading(nullptr); }
+static void callback() { getReading(nullptr); }
 }
 
-MetricInterface RaplMetric = {
+const MetricInterface RaplMetric = {
     .Name = "sysfs-powercap-rapl",
     .Type = {.Absolute = 0,
              .Accumalative = 1,
@@ -236,7 +236,7 @@ MetricInterface RaplMetric = {
     .Callback = callback,
     .Init = init,
     .Fini = fini,
-    .GetReading = get_reading,
-    .GetError = get_error,
+    .GetReading = getReading,
+    .GetError = getError,
     .RegisterInsertCallback = nullptr,
 };
diff --git a/src/firestarter/Measurement/Summary.cpp b/src/firestarter/Measurement/Summary.cpp
index 730775be..da626e9e 100644
--- a/src/firestarter/Measurement/Summary.cpp
+++ b/src/firestarter/Measurement/Summary.cpp
@@ -24,7 +24,7 @@
 #include <cassert>
 #include <cmath>
 
-using namespace firestarter::measurement;
+namespace firestarter::measurement {
 
 // this functions borows a lot of code from
 // https://github.com/metricq/metricq-cpp/blob/master/tools/metricq-summary/src/summary.cpp
@@ -35,34 +35,34 @@ auto Summary::calculate(std::vector<TimeValue>::iterator Begin, std::vector<Time
   // TODO: i would really like to make this code a bit more readable, but i
   // could not find a way yet.
   if (MetricType.Accumalative) {
-    TimeValue prev;
+    TimeValue Prev;
 
     if (Begin != End) {
-      prev = *Begin++;
-      for (auto it = Begin; it != End; ++it) {
-        auto time_diff =
-            1e-6 * (double)std::chrono::duration_cast<std::chrono::microseconds>(it->Time - prev.Time).count();
-        auto value_diff = it->Value - prev.Value;
+      Prev = *Begin++;
+      for (auto It = Begin; It != End; ++It) {
+        auto TimeDiff = 1e-6 * static_cast<double>(
+                                   std::chrono::duration_cast<std::chrono::microseconds>(It->Time - Prev.Time).count());
+        auto ValueDiff = It->Value - Prev.Value;
 
-        double value = value_diff / time_diff;
+        double Value = ValueDiff / TimeDiff;
 
         if (MetricType.DivideByThreadCount) {
-          value /= NumThreads;
+          Value /= NumThreads;
         }
 
-        Values.emplace_back(prev.Time, value);
-        prev = *it;
+        Values.emplace_back(Prev.Time, Value);
+        Prev = *It;
       }
     }
   } else if (MetricType.Absolute) {
-    for (auto it = Begin; it != End; ++it) {
-      double value = it->Value;
+    for (auto It = Begin; It != End; ++It) {
+      double Value = It->Value;
 
       if (MetricType.DivideByThreadCount) {
-        value /= NumThreads;
+        Value /= NumThreads;
       }
 
-      Values.emplace_back(it->Time, value);
+      Values.emplace_back(It->Time, Value);
     }
   } else {
     assert(false);
@@ -77,24 +77,26 @@ auto Summary::calculate(std::vector<TimeValue>::iterator Begin, std::vector<Time
 
   if (SummaryVal.NumTimepoints > 0) {
 
-    auto last = Begin;
-    std::advance(last, SummaryVal.NumTimepoints - 1);
-    SummaryVal.Duration = std::chrono::duration_cast<std::chrono::milliseconds>(last->Time - Begin->Time);
+    auto Last = Begin;
+    std::advance(Last, SummaryVal.NumTimepoints - 1);
+    SummaryVal.Duration = std::chrono::duration_cast<std::chrono::milliseconds>(Last->Time - Begin->Time);
 
-    auto sum_over_nths = [&Begin, End, SummaryVal](auto fn) {
-      double acc = 0.0;
-      for (auto it = Begin; it != End; ++it) {
-        acc += fn(it->Value);
+    auto SumOverNths = [&Begin, End, SummaryVal](auto Fn) {
+      double Acc = 0.0;
+      for (auto It = Begin; It != End; ++It) {
+        Acc += Fn(It->Value);
       }
-      return acc / SummaryVal.NumTimepoints;
+      return Acc / SummaryVal.NumTimepoints;
     };
 
-    SummaryVal.Average = sum_over_nths([](double v) { return v; });
-    SummaryVal.Stddev = std::sqrt(sum_over_nths([&SummaryVal](double v) {
-      double centered = v - SummaryVal.Average;
-      return centered * centered;
+    SummaryVal.Average = SumOverNths([](double V) { return V; });
+    SummaryVal.Stddev = std::sqrt(SumOverNths([&SummaryVal](double V) {
+      double Centered = V - SummaryVal.Average;
+      return Centered * Centered;
     }));
   }
 
   return SummaryVal;
 }
+
+} // namespace firestarter::measurement
\ No newline at end of file
diff --git a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
index e6a703bb..1da46011 100644
--- a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
+++ b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
@@ -28,9 +28,10 @@
 
 #include <algorithm>
 #include <iomanip>
+#include <random>
 #include <stdexcept>
 
-using namespace firestarter::optimizer::algorithm;
+namespace firestarter::optimizer::algorithm {
 
 NSGA2::NSGA2(unsigned Gen, double Cr, double M)
     : Gen(Gen)
@@ -56,7 +57,7 @@ void NSGA2::checkPopulation(firestarter::optimizer::Population const& Pop, std::
                                 std::to_string(Prob.getNobjs()));
   }
 
-  if (PopulationSize < 5u || (PopulationSize % 4 != 0u)) {
+  if (PopulationSize < 5U || (PopulationSize % 4 != 0U)) {
     throw std::invalid_argument("for NSGA-II at least 5 individuals in the "
                                 "population are needed and the "
                                 "population size must be a multiple of 4. "
@@ -66,110 +67,116 @@ void NSGA2::checkPopulation(firestarter::optimizer::Population const& Pop, std::
 }
 
 auto NSGA2::evolve(firestarter::optimizer::Population& Pop) -> firestarter::optimizer::Population {
-  const auto& prob = Pop.problem();
-  const auto bounds = prob.getBounds();
+  const auto& Prob = Pop.problem();
+  const auto Bounds = Prob.getBounds();
   auto NP = Pop.size();
-  auto fevals0 = prob.getFevals();
+  auto Fevals0 = Prob.getFevals();
 
   this->checkPopulation(const_cast<firestarter::optimizer::Population const&>(Pop), NP);
 
-  std::random_device rd;
-  std::mt19937 rng(rd());
+  std::random_device Rd;
+  std::mt19937 Rng(Rd());
 
-  std::vector<Individual::size_type> best_idx(NP), shuffle1(NP), shuffle2(NP);
-  Individual::size_type parent1_idx, parent2_idx;
-  std::pair<Individual, Individual> children;
+  std::vector<Individual::size_type> BestIdx(NP);
+  std::vector<Individual::size_type> Shuffle1(NP);
+  std::vector<Individual::size_type> Shuffle2(NP);
+  Individual::size_type Parent1Idx = 0;
+  Individual::size_type Parent2Idx = 0;
+  std::pair<Individual, Individual> Children;
 
-  std::iota(shuffle1.begin(), shuffle1.end(), Individual::size_type(0));
-  std::iota(shuffle2.begin(), shuffle2.end(), Individual::size_type(0));
+  std::iota(Shuffle1.begin(), Shuffle1.end(), static_cast<Individual::size_type>(0));
+  std::iota(Shuffle2.begin(), Shuffle2.end(), static_cast<Individual::size_type>(0));
 
   {
-    std::stringstream ss;
+    std::stringstream Ss;
 
-    ss << std::endl << std::setw(7) << "Gen:" << std::setw(15) << "Fevals:";
-    for (decltype(prob.getNobjs()) i = 0; i < prob.getNobjs(); ++i) {
-      ss << std::setw(15) << "ideal" << std::to_string(i + 1u) << ":";
+    Ss << '\n' << std::setw(7) << "Gen:" << std::setw(15) << "Fevals:";
+    for (decltype(Prob.getNobjs()) I = 0; I < Prob.getNobjs(); ++I) {
+      Ss << std::setw(15) << "ideal" << std::to_string(I + 1U) << ":";
     }
-    firestarter::log::info() << ss.str();
+    firestarter::log::info() << Ss.str();
   }
 
-  for (decltype(Gen) gen = 1u; gen <= Gen; ++gen) {
+  for (auto I = 1U; I <= Gen; ++I) {
     {
       // Print the logs
-      std::vector<double> idealPoint = util::ideal(Pop.f());
-      std::stringstream ss;
+      std::vector<double> IdealPoint = util::ideal(Pop.f());
+      std::stringstream Ss;
 
-      ss << std::setw(7) << gen << std::setw(15) << prob.getFevals() - fevals0;
-      for (decltype(idealPoint.size()) i = 0; i < idealPoint.size(); ++i) {
-        ss << std::setw(15) << idealPoint[i];
+      Ss << std::setw(7) << I << std::setw(15) << Prob.getFevals() - Fevals0;
+      for (double I : IdealPoint) {
+        Ss << std::setw(15) << I;
       }
 
-      firestarter::log::info() << ss.str();
+      firestarter::log::info() << Ss.str();
     }
 
     // At each generation we make a copy of the population into popnew
-    firestarter::optimizer::Population popnew(Pop);
+    firestarter::optimizer::Population Popnew(Pop);
 
     // We create some pseudo-random permutation of the poulation indexes
-    std::random_shuffle(shuffle1.begin(), shuffle1.end());
-    std::random_shuffle(shuffle2.begin(), shuffle2.end());
+    std::shuffle(Shuffle1.begin(), Shuffle1.end(), std::mt19937(std::random_device()()));
+    std::shuffle(Shuffle2.begin(), Shuffle2.end(), std::mt19937(std::random_device()()));
 
     // We compute crowding distance and non dominated rank for the current
     // population
-    auto fnds_res = util::fastNonDominatedSorting(Pop.f());
-    auto ndf = std::get<0>(fnds_res); // non dominated fronts [[0,3,2],[1,5,6],[4],...]
-    std::vector<double> pop_cd(NP);   // crowding distances of the whole population
-    auto ndr = std::get<3>(fnds_res); // non domination rank [0,1,0,0,2,1,1, ... ]
-    for (const auto& front_idxs : ndf) {
-      if (front_idxs.size() == 1u) { // handles the case where the front has collapsed to one point
-        pop_cd[front_idxs[0]] = std::numeric_limits<double>::infinity();
-      } else if (front_idxs.size() == 2u) { // handles the case where the front
+    auto FndsRes = util::fastNonDominatedSorting(Pop.f());
+    auto Ndf = std::get<0>(FndsRes); // non dominated fronts [[0,3,2],[1,5,6],[4],...]
+    std::vector<double> PopCd(NP);   // crowding distances of the whole population
+    auto Ndr = std::get<3>(FndsRes); // non domination rank [0,1,0,0,2,1,1, ... ]
+    for (const auto& FrontIdxs : Ndf) {
+      if (FrontIdxs.size() == 1U) { // handles the case where the front has collapsed to one point
+        PopCd[FrontIdxs[0]] = std::numeric_limits<double>::infinity();
+      } else if (FrontIdxs.size() == 2U) { // handles the case where the front
         // has collapsed to one point
-        pop_cd[front_idxs[0]] = std::numeric_limits<double>::infinity();
-        pop_cd[front_idxs[1]] = std::numeric_limits<double>::infinity();
+        PopCd[FrontIdxs[0]] = std::numeric_limits<double>::infinity();
+        PopCd[FrontIdxs[1]] = std::numeric_limits<double>::infinity();
       } else {
-        std::vector<std::vector<double>> front;
-        for (auto idx : front_idxs) {
-          front.push_back(Pop.f()[idx]);
+        std::vector<std::vector<double>> Front;
+        Front.reserve(FrontIdxs.size());
+        for (auto Idx : FrontIdxs) {
+          Front.push_back(Pop.f()[Idx]);
         }
-        auto cd = util::crowdingDistance(front);
-        for (decltype(cd.size()) i = 0u; i < cd.size(); ++i) {
-          pop_cd[front_idxs[i]] = cd[i];
+        auto Cd = util::crowdingDistance(Front);
+        for (decltype(Cd.size()) I = 0U; I < Cd.size(); ++I) {
+          PopCd[FrontIdxs[I]] = Cd[I];
         }
       }
     }
 
     // We then loop thorugh all individuals with increment 4 to select two pairs
     // of parents that will each create 2 new offspring
-    for (decltype(NP) i = 0u; i < NP; i += 4) {
+    for (decltype(NP) I = 0U; I < NP; I += 4) {
       // We create two offsprings using the shuffled list 1
-      parent1_idx = util::moTournamentSelection(shuffle1[i], shuffle1[i + 1], ndr, pop_cd, rng);
-      parent2_idx = util::moTournamentSelection(shuffle1[i + 2], shuffle1[i + 3], ndr, pop_cd, rng);
-      children = util::sbxCrossover(Pop.x()[parent1_idx], Pop.x()[parent2_idx], Cr, rng);
-      util::polynomialMutation(children.first, bounds, M, rng);
-      util::polynomialMutation(children.second, bounds, M, rng);
+      Parent1Idx = util::moTournamentSelection(Shuffle1[I], Shuffle1[I + 1], Ndr, PopCd, Rng);
+      Parent2Idx = util::moTournamentSelection(Shuffle1[I + 2], Shuffle1[I + 3], Ndr, PopCd, Rng);
+      Children = util::sbxCrossover(Pop.x()[Parent1Idx], Pop.x()[Parent2Idx], Cr, Rng);
+      util::polynomialMutation(Children.first, Bounds, M, Rng);
+      util::polynomialMutation(Children.second, Bounds, M, Rng);
 
-      popnew.append(children.first);
-      popnew.append(children.second);
+      Popnew.append(Children.first);
+      Popnew.append(Children.second);
 
       // We repeat with the shuffled list 2
-      parent1_idx = util::moTournamentSelection(shuffle2[i], shuffle2[i + 1], ndr, pop_cd, rng);
-      parent2_idx = util::moTournamentSelection(shuffle2[i + 2], shuffle2[i + 3], ndr, pop_cd, rng);
-      children = util::sbxCrossover(Pop.x()[parent1_idx], Pop.x()[parent2_idx], Cr, rng);
-      util::polynomialMutation(children.first, bounds, M, rng);
-      util::polynomialMutation(children.second, bounds, M, rng);
-
-      popnew.append(children.first);
-      popnew.append(children.second);
+      Parent1Idx = util::moTournamentSelection(Shuffle2[I], Shuffle2[I + 1], Ndr, PopCd, Rng);
+      Parent2Idx = util::moTournamentSelection(Shuffle2[I + 2], Shuffle2[I + 3], Ndr, PopCd, Rng);
+      Children = util::sbxCrossover(Pop.x()[Parent1Idx], Pop.x()[Parent2Idx], Cr, Rng);
+      util::polynomialMutation(Children.first, Bounds, M, Rng);
+      util::polynomialMutation(Children.second, Bounds, M, Rng);
+
+      Popnew.append(Children.first);
+      Popnew.append(Children.second);
     } // popnew now contains 2NP individuals
     // This method returns the sorted N best individuals in the population
     // according to the crowded comparison operator
-    best_idx = util::selectBestNMo(popnew.f(), NP);
+    BestIdx = util::selectBestNMo(Popnew.f(), NP);
     // We insert into the population
-    for (decltype(NP) i = 0; i < NP; ++i) {
-      Pop.insert(i, popnew.x()[best_idx[i]], popnew.f()[best_idx[i]]);
+    for (decltype(NP) I = 0; I < NP; ++I) {
+      Pop.insert(I, Popnew.x()[BestIdx[I]], Popnew.f()[BestIdx[I]]);
     }
   }
 
   return Pop;
 }
+
+} // namespace firestarter::optimizer::algorithm
\ No newline at end of file
diff --git a/src/firestarter/Optimizer/OptimizerWorker.cpp b/src/firestarter/Optimizer/OptimizerWorker.cpp
index 610b8cbd..0e7f235a 100644
--- a/src/firestarter/Optimizer/OptimizerWorker.cpp
+++ b/src/firestarter/Optimizer/OptimizerWorker.cpp
@@ -22,49 +22,51 @@
 #include <firestarter/Optimizer/OptimizerWorker.hpp>
 
 #include <thread>
+#include <utility>
 
-using namespace firestarter::optimizer;
+namespace firestarter::optimizer {
 
-OptimizerWorker::OptimizerWorker(std::unique_ptr<firestarter::optimizer::Algorithm>&& algorithm,
-                                 firestarter::optimizer::Population& population,
-                                 std::string const& optimizationAlgorithm, unsigned individuals,
-                                 std::chrono::seconds const& preheat)
-    : Algorithm(std::move(algorithm))
-    , Population(population)
-    , OptimizationAlgorithm(optimizationAlgorithm)
-    , Individuals(individuals)
-    , Preheat(preheat) {
-  pthread_create(&this->WorkerThread, NULL, reinterpret_cast<void* (*)(void*)>(OptimizerWorker::optimizerThread), this);
+OptimizerWorker::OptimizerWorker(std::unique_ptr<firestarter::optimizer::Algorithm>&& Algorithm,
+                                 firestarter::optimizer::Population& Population, std::string OptimizationAlgorithm,
+                                 unsigned Individuals, std::chrono::seconds const& Preheat)
+    : Algorithm(std::move(Algorithm))
+    , Population(Population)
+    , OptimizationAlgorithm(std::move(OptimizationAlgorithm))
+    , Individuals(Individuals)
+    , Preheat(Preheat) {
+  pthread_create(&this->WorkerThread, nullptr, OptimizerWorker::optimizerThread, this);
 }
 
 void OptimizerWorker::kill() {
   // we ignore ESRCH errno if thread already exited
-  pthread_cancel(this->WorkerThread);
+  pthread_cancel(WorkerThread);
 }
 
 void OptimizerWorker::join() {
   // we ignore ESRCH errno if thread already exited
-  pthread_join(this->WorkerThread, NULL);
+  pthread_join(WorkerThread, nullptr);
 }
 
-void* OptimizerWorker::optimizerThread(void* optimizerWorker) {
-  pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
+auto OptimizerWorker::optimizerThread(void* OptimizerWorker) -> void* {
+  pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, nullptr);
 
-  auto _this = reinterpret_cast<OptimizerWorker*>(optimizerWorker);
+  auto* This = reinterpret_cast<class OptimizerWorker*>(OptimizerWorker);
 
 #ifndef __APPLE__
   pthread_setname_np(pthread_self(), "Optimizer");
 #endif
 
   // heat the cpu before attempting to optimize
-  std::this_thread::sleep_for(_this->Preheat);
+  std::this_thread::sleep_for(This->Preheat);
 
   // For NSGA2 we start with a initial population
-  if (_this->OptimizationAlgorithm == "NSGA2") {
-    _this->Population.generateInitialPopulation(_this->Individuals);
+  if (This->OptimizationAlgorithm == "NSGA2") {
+    This->Population.generateInitialPopulation(This->Individuals);
   }
 
-  _this->Algorithm->evolve(_this->Population);
+  This->Algorithm->evolve(This->Population);
 
-  return NULL;
+  return nullptr;
 }
+
+} // namespace firestarter::optimizer
\ No newline at end of file
diff --git a/src/firestarter/Optimizer/Population.cpp b/src/firestarter/Optimizer/Population.cpp
index e136fda6..901ad5a0 100644
--- a/src/firestarter/Optimizer/Population.cpp
+++ b/src/firestarter/Optimizer/Population.cpp
@@ -26,100 +26,100 @@
 #include <algorithm>
 #include <cassert>
 
-using namespace firestarter::optimizer;
+namespace firestarter::optimizer {
 
-void Population::generateInitialPopulation(std::size_t populationSize) {
-  firestarter::log::trace() << "Generating " << populationSize << " random individuals for initial population.";
+void Population::generateInitialPopulation(std::size_t PopulationSize) {
+  firestarter::log::trace() << "Generating " << PopulationSize << " random individuals for initial population.";
 
-  auto dims = this->problem().getDims();
-  auto remaining = populationSize;
+  auto Dims = this->problem().getDims();
+  auto Remaining = PopulationSize;
 
-  if (!(populationSize < dims)) {
-    for (decltype(dims) i = 0; i < dims; i++) {
-      Individual vec(dims, 0);
-      vec[i] = 1;
-      this->append(vec);
+  if (!(PopulationSize < Dims)) {
+    for (decltype(Dims) I = 0; I < Dims; I++) {
+      Individual Vec(Dims, 0);
+      Vec[I] = 1;
+      this->append(Vec);
     }
 
-    remaining -= dims;
+    Remaining -= Dims;
   } else {
-    firestarter::log::trace() << "Population size (" << std::to_string(populationSize)
-                              << ") is less than size of problem dimension (" << std::to_string(dims) << ")";
+    firestarter::log::trace() << "Population size (" << std::to_string(PopulationSize)
+                              << ") is less than size of problem dimension (" << std::to_string(Dims) << ")";
   }
 
-  for (decltype(remaining) i = 0; i < remaining; i++) {
+  for (decltype(Remaining) I = 0; I < Remaining; I++) {
     this->append(this->getRandomIndividual());
   }
 }
 
-std::size_t Population::size() const { return X.size(); }
+auto Population::size() const -> std::size_t { return X.size(); }
 
-void Population::append(Individual const& ind) {
-  assert(this->problem().getDims() == ind.size());
+void Population::append(Individual const& Ind) {
+  assert(this->problem().getDims() == Ind.size());
 
-  std::map<std::string, firestarter::measurement::Summary> metrics;
+  std::map<std::string, firestarter::measurement::Summary> Metrics;
 
   // check if we already evaluated this individual
-  auto optional_metric = History::find(ind);
-  if (optional_metric.has_value()) {
-    metrics = optional_metric.value();
+  auto OptionalMetric = History::find(Ind);
+  if (OptionalMetric.has_value()) {
+    Metrics = OptionalMetric.value();
   } else {
-    metrics = this->ProblemPtr->metrics(ind);
+    Metrics = this->ProblemPtr->metrics(Ind);
   }
 
-  auto fitness = this->ProblemPtr->fitness(metrics);
+  auto Fitness = this->ProblemPtr->fitness(Metrics);
 
-  this->append(ind, fitness);
+  this->append(Ind, Fitness);
 
-  if (!optional_metric.has_value()) {
-    History::append(ind, metrics);
+  if (!OptionalMetric.has_value()) {
+    History::append(Ind, Metrics);
   }
 }
 
-void Population::append(Individual const& ind, std::vector<double> const& fit) {
-  std::stringstream ss;
-  ss << "  - Fitness: ";
-  for (auto const& v : fit) {
-    ss << v << " ";
+void Population::append(Individual const& Ind, std::vector<double> const& Fit) {
+  std::stringstream Ss;
+  Ss << "  - Fitness: ";
+  for (auto const& V : Fit) {
+    Ss << V << " ";
   }
-  firestarter::log::trace() << ss.str();
+  firestarter::log::trace() << Ss.str();
 
-  assert(this->problem().getNobjs() == fit.size());
-  assert(this->problem().getDims() == ind.size());
+  assert(this->problem().getNobjs() == Fit.size());
+  assert(this->problem().getDims() == Ind.size());
 
-  this->X.push_back(ind);
-  this->F.push_back(fit);
+  this->X.push_back(Ind);
+  this->F.push_back(Fit);
 }
 
-void Population::insert(std::size_t idx, Individual const& ind, std::vector<double> const& fit) {
+void Population::insert(std::size_t Idx, Individual const& Ind, std::vector<double> const& Fit) {
   // assert that population is big enough
-  assert(X.size() > idx);
+  assert(X.size() > Idx);
 
-  X[idx] = ind;
-  F[idx] = fit;
+  X[Idx] = Ind;
+  F[Idx] = Fit;
 }
 
-Individual Population::getRandomIndividual() {
-  auto dims = this->problem().getDims();
-  auto const bounds = this->problem().getBounds();
+auto Population::getRandomIndividual() -> Individual {
+  auto Dims = this->problem().getDims();
+  auto const Bounds = this->problem().getBounds();
 
-  firestarter::log::trace() << "Generating random individual of size: " << dims;
+  firestarter::log::trace() << "Generating random individual of size: " << Dims;
 
-  Individual out(dims);
+  Individual Out(Dims);
 
-  for (decltype(dims) i = 0; i < dims; i++) {
-    auto const lb = std::get<0>(bounds[i]);
-    auto const ub = std::get<1>(bounds[i]);
+  for (decltype(Dims) I = 0; I < Dims; I++) {
+    auto const Lb = std::get<0>(Bounds[I]);
+    auto const Ub = std::get<1>(Bounds[I]);
 
-    out[i] = std::uniform_int_distribution<unsigned>(lb, ub)(this->Gen);
+    Out[I] = std::uniform_int_distribution<unsigned>(Lb, Ub)(this->Gen);
 
-    firestarter::log::trace() << "  - " << i << ": [" << lb << "," << ub << "]: " << out[i];
+    firestarter::log::trace() << "  - " << I << ": [" << Lb << "," << Ub << "]: " << Out[I];
   }
 
-  return out;
+  return Out;
 }
 
-std::optional<Individual> Population::bestIndividual() const {
+auto Population::bestIndividual() const -> std::optional<Individual> {
   // return an empty vector if the problem is multi objective, as there is no
   // single best individual
   if (this->problem().isMO()) {
@@ -129,9 +129,11 @@ std::optional<Individual> Population::bestIndividual() const {
   // assert that we have individuals
   assert(this->X.size() > 0);
 
-  auto best = std::max_element(this->X.begin(), this->X.end(), [](auto a, auto b) { return a < b; });
+  auto Best = std::max_element(this->X.begin(), this->X.end(), [](const auto& A, const auto& B) { return A < B; });
 
-  assert(best != this->X.end());
+  assert(Best != this->X.end());
 
-  return *best;
+  return *Best;
 }
+
+} // namespace firestarter::optimizer
\ No newline at end of file
diff --git a/src/firestarter/Optimizer/Util/MultiObjective.cpp b/src/firestarter/Optimizer/Util/MultiObjective.cpp
index 78092234..9a757b11 100644
--- a/src/firestarter/Optimizer/Util/MultiObjective.cpp
+++ b/src/firestarter/Optimizer/Util/MultiObjective.cpp
@@ -32,35 +32,33 @@ namespace firestarter::optimizer::util {
 // Less than compares floating point types placing nans after inf or before -inf
 // It is a useful function when calling e.g. std::sort to guarantee a weak
 // strict ordering and avoid an undefined behaviour
-bool lessThanF(double a, double b) {
-  if (!std::isnan(a)) {
-    if (!std::isnan(b))
-      return a < b; // a < b
-    else
-      return true; // a < nan
-  } else {
-    if (!std::isnan(b))
-      return false; // nan < b
-    else
-      return false; // nan < nan
+auto lessThanF(double A, double B) -> bool {
+  if (!std::isnan(A)) {
+    if (!std::isnan(B)) {
+      return A < B; // a < b
+    }
+    return true; // a < nan
+  }
+  if (!std::isnan(B)) {
+    return false; // nan < b
   }
+  return false; // nan < nan
 }
 
 // Greater than compares floating point types placing nans after inf or before
 // -inf It is a useful function when calling e.g. std::sort to guarantee a weak
 // strict ordering and avoid an undefined behaviour
-bool greaterThanF(double a, double b) {
-  if (!std::isnan(a)) {
-    if (!std::isnan(b))
-      return a > b; // a > b
-    else
-      return false; // a > nan
-  } else {
-    if (!std::isnan(b))
-      return true; // nan > b
-    else
-      return false; // nan > nan
+auto greaterThanF(double A, double B) -> bool {
+  if (!std::isnan(A)) {
+    if (!std::isnan(B)) {
+      return A > B; // a > b
+    }
+    return false; // a > nan
   }
+  if (!std::isnan(B)) {
+    return true; // nan > b
+  }
+  return false; // nan > nan
 }
 
 /// Pareto-dominance
@@ -81,21 +79,22 @@ bool greaterThanF(double a, double b) {
  * @throws std::invalid_argument if the dimensions of the two objectives are
  * different
  */
-bool paretoDominance(const std::vector<double>& obj1, const std::vector<double>& obj2) {
-  if (obj1.size() != obj2.size()) {
+auto paretoDominance(const std::vector<double>& Obj1, const std::vector<double>& Obj2) -> bool {
+  if (Obj1.size() != Obj2.size()) {
     throw std::invalid_argument(
-        "Different number of objectives found in input fitnesses: " + std::to_string(obj1.size()) + " and " +
-        std::to_string(obj2.size()) + ". I cannot define dominance");
+        "Different number of objectives found in input fitnesses: " + std::to_string(Obj1.size()) + " and " +
+        std::to_string(Obj2.size()) + ". I cannot define dominance");
   }
-  bool found_strictly_dominating_dimension = false;
-  for (decltype(obj1.size()) i = 0u; i < obj1.size(); ++i) {
-    if (greaterThanF(obj2[i], obj1[i])) {
+  bool FoundStrictlyDominatingDimension = false;
+  for (decltype(Obj1.size()) I = 0U; I < Obj1.size(); ++I) {
+    if (greaterThanF(Obj2[I], Obj1[I])) {
       return false;
-    } else if (lessThanF(obj2[i], obj1[i])) {
-      found_strictly_dominating_dimension = true;
+    }
+    if (lessThanF(Obj2[I], Obj1[I])) {
+      FoundStrictlyDominatingDimension = true;
     }
   }
-  return found_strictly_dominating_dimension;
+  return FoundStrictlyDominatingDimension;
 }
 
 /// Fast non dominated sorting
@@ -128,63 +127,63 @@ bool paretoDominance(const std::vector<double>& obj1, const std::vector<double>&
  *
  * @throws std::invalid_argument If the size of \p points is not at least 2
  */
-std::tuple<std::vector<std::vector<std::size_t>>, std::vector<std::vector<std::size_t>>, std::vector<std::size_t>,
-           std::vector<std::size_t>>
-fastNonDominatedSorting(const std::vector<std::vector<double>>& points) {
-  auto N = points.size();
+auto fastNonDominatedSorting(const std::vector<std::vector<double>>& Points)
+    -> std::tuple<std::vector<std::vector<std::size_t>>, std::vector<std::vector<std::size_t>>,
+                  std::vector<std::size_t>, std::vector<std::size_t>> {
+  auto N = Points.size();
   // We make sure to have two points at least (one could also be allowed)
-  if (N < 2u) {
+  if (N < 2U) {
     throw std::invalid_argument("At least two points are needed for fast_non_dominated_sorting: " + std::to_string(N) +
                                 " detected.");
   }
   // Initialize the return values
-  std::vector<std::vector<std::size_t>> non_dom_fronts(1u);
-  std::vector<std::vector<std::size_t>> dom_list(N);
-  std::vector<std::size_t> dom_count(N);
-  std::vector<std::size_t> non_dom_rank(N);
+  std::vector<std::vector<std::size_t>> NonDomFronts(1U);
+  std::vector<std::vector<std::size_t>> DomList(N);
+  std::vector<std::size_t> DomCount(N);
+  std::vector<std::size_t> NonDomRank(N);
 
   // Start the fast non dominated sort algorithm
-  for (decltype(N) i = 0u; i < N; ++i) {
-    dom_list[i].clear();
-    dom_count[i] = 0u;
-    for (decltype(N) j = 0u; j < i; ++j) {
-      if (paretoDominance(points[i], points[j])) {
-        dom_list[i].push_back(j);
-        ++dom_count[j];
-      } else if (paretoDominance(points[j], points[i])) {
-        dom_list[j].push_back(i);
-        ++dom_count[i];
+  for (decltype(N) I = 0U; I < N; ++I) {
+    DomList[I].clear();
+    DomCount[I] = 0U;
+    for (decltype(N) J = 0U; J < I; ++J) {
+      if (paretoDominance(Points[I], Points[J])) {
+        DomList[I].push_back(J);
+        ++DomCount[J];
+      } else if (paretoDominance(Points[J], Points[I])) {
+        DomList[J].push_back(I);
+        ++DomCount[I];
       }
     }
   }
-  for (decltype(N) i = 0u; i < N; ++i) {
-    if (dom_count[i] == 0u) {
-      non_dom_rank[i] = 0u;
-      non_dom_fronts[0].push_back(i);
+  for (decltype(N) I = 0U; I < N; ++I) {
+    if (DomCount[I] == 0U) {
+      NonDomRank[I] = 0U;
+      NonDomFronts[0].push_back(I);
     }
   }
   // we copy dom_count as we want to output its value at this point
-  auto dom_count_copy(dom_count);
-  auto current_front = non_dom_fronts[0];
-  std::vector<std::vector<std::size_t>>::size_type front_counter(0u);
-  while (current_front.size() != 0u) {
-    std::vector<std::size_t> next_front;
-    for (decltype(current_front.size()) p = 0u; p < current_front.size(); ++p) {
-      for (decltype(dom_list[current_front[p]].size()) q = 0u; q < dom_list[current_front[p]].size(); ++q) {
-        --dom_count_copy[dom_list[current_front[p]][q]];
-        if (dom_count_copy[dom_list[current_front[p]][q]] == 0u) {
-          non_dom_rank[dom_list[current_front[p]][q]] = front_counter + 1u;
-          next_front.push_back(dom_list[current_front[p]][q]);
+  auto DomCountCopy(DomCount);
+  auto CurrentFront = NonDomFronts[0];
+  std::vector<std::vector<std::size_t>>::size_type FrontCounter(0U);
+  while (CurrentFront.size() != 0U) {
+    std::vector<std::size_t> NextFront;
+    for (const auto& P : CurrentFront) {
+      for (const auto& Q : DomList[P]) {
+        --DomCountCopy[Q];
+        if (DomCountCopy[Q] == 0U) {
+          NonDomRank[Q] = FrontCounter + 1U;
+          NextFront.push_back(Q);
         }
       }
     }
-    ++front_counter;
-    current_front = next_front;
-    if (current_front.size() != 0u) {
-      non_dom_fronts.push_back(current_front);
+    ++FrontCounter;
+    CurrentFront = NextFront;
+    if (CurrentFront.size() != 0U) {
+      NonDomFronts.push_back(CurrentFront);
     }
   }
-  return std::make_tuple(std::move(non_dom_fronts), std::move(dom_list), std::move(dom_count), std::move(non_dom_rank));
+  return std::make_tuple(std::move(NonDomFronts), std::move(DomList), std::move(DomCount), std::move(NonDomRank));
 }
 
 /// Crowding distance
@@ -212,61 +211,64 @@ fastNonDominatedSorting(const std::vector<std::vector<double>>& points) {
  * @throws std::invalid_argument If points in \p non_dom_front do not all have
  * the same dimensionality
  */
-std::vector<double> crowdingDistance(const std::vector<std::vector<double>>& non_dom_front) {
-  auto N = non_dom_front.size();
+auto crowdingDistance(const std::vector<std::vector<double>>& NonDomFront) -> std::vector<double> {
+  auto N = NonDomFront.size();
   // We make sure to have two points at least
-  if (N < 2u) {
+  if (N < 2U) {
     throw std::invalid_argument("A non dominated front must contain at least two points: " + std::to_string(N) +
                                 " detected.");
   }
-  auto M = non_dom_front[0].size();
+  auto M = NonDomFront[0].size();
   // We make sure the first point of the input non dominated front contains at
   // least two objectives
-  if (M < 2u) {
+  if (M < 2U) {
     throw std::invalid_argument("Points in the non dominated front must "
                                 "contain at least two objectives: " +
                                 std::to_string(M) + " detected.");
   }
   // We make sure all points contain the same number of objectives
-  if (!std::all_of(non_dom_front.begin(), non_dom_front.end(),
-                   [M](const std::vector<double>& item) { return item.size() == M; })) {
+  if (!std::all_of(NonDomFront.begin(), NonDomFront.end(),
+                   [M](const std::vector<double>& Item) { return Item.size() == M; })) {
     throw std::invalid_argument("A non dominated front must contain points of "
                                 "uniform dimensionality. Some "
                                 "different sizes were instead detected.");
   }
-  std::vector<std::size_t> indexes(N);
-  std::iota(indexes.begin(), indexes.end(), std::size_t(0u));
-  std::vector<double> retval(N, 0.);
-  for (decltype(M) i = 0u; i < M; ++i) {
-    std::sort(indexes.begin(), indexes.end(), [i, &non_dom_front](std::size_t idx1, std::size_t idx2) {
-      return lessThanF(non_dom_front[idx1][i], non_dom_front[idx2][i]);
+  std::vector<std::size_t> Indexes(N);
+  std::iota(Indexes.begin(), Indexes.end(), static_cast<std::size_t>(0U));
+  std::vector<double> Retval(N, 0.);
+  for (decltype(M) I = 0U; I < M; ++I) {
+    std::sort(Indexes.begin(), Indexes.end(), [I, &NonDomFront](std::size_t Idx1, std::size_t Idx2) {
+      return lessThanF(NonDomFront[Idx1][I], NonDomFront[Idx2][I]);
     });
-    retval[indexes[0]] = std::numeric_limits<double>::infinity();
-    retval[indexes[N - 1u]] = std::numeric_limits<double>::infinity();
-    double df = non_dom_front[indexes[N - 1u]][i] - non_dom_front[indexes[0]][i];
-    for (decltype(N - 2u) j = 1u; j < N - 1u; ++j) {
-      retval[indexes[j]] += (non_dom_front[indexes[j + 1u]][i] - non_dom_front[indexes[j - 1u]][i]) / df;
+    Retval[Indexes[0]] = std::numeric_limits<double>::infinity();
+    Retval[Indexes[N - 1U]] = std::numeric_limits<double>::infinity();
+    double Df = NonDomFront[Indexes[N - 1U]][I] - NonDomFront[Indexes[0]][I];
+    for (decltype(N - 2U) J = 1U; J < N - 1U; ++J) {
+      Retval[Indexes[J]] += (NonDomFront[Indexes[J + 1U]][I] - NonDomFront[Indexes[J - 1U]][I]) / Df;
     }
   }
-  return retval;
+  return Retval;
 }
 
 // Multi-objective tournament selection. Requires all sizes to be consistent.
 // Does not check if input is well formed.
-std::vector<double>::size_type
-moTournamentSelection(std::vector<double>::size_type idx1, std::vector<double>::size_type idx2,
-                      const std::vector<std::vector<double>::size_type>& non_domination_rank,
-                      const std::vector<double>& crowding_d, std::mt19937& mt) {
-  if (non_domination_rank[idx1] < non_domination_rank[idx2])
-    return idx1;
-  if (non_domination_rank[idx1] > non_domination_rank[idx2])
-    return idx2;
-  if (crowding_d[idx1] > crowding_d[idx2])
-    return idx1;
-  if (crowding_d[idx1] < crowding_d[idx2])
-    return idx2;
-  std::uniform_real_distribution<> drng(0., 1.);
-  return ((drng(mt) < 0.5) ? idx1 : idx2);
+auto moTournamentSelection(std::vector<double>::size_type Idx1, std::vector<double>::size_type Idx2,
+                           const std::vector<std::vector<double>::size_type>& NonDominationRank,
+                           const std::vector<double>& CrowdingD, std::mt19937& Mt) -> std::vector<double>::size_type {
+  if (NonDominationRank[Idx1] < NonDominationRank[Idx2]) {
+    return Idx1;
+  }
+  if (NonDominationRank[Idx1] > NonDominationRank[Idx2]) {
+    return Idx2;
+  }
+  if (CrowdingD[Idx1] > CrowdingD[Idx2]) {
+    return Idx1;
+  }
+  if (CrowdingD[Idx1] < CrowdingD[Idx2]) {
+    return Idx2;
+  }
+  std::uniform_real_distribution<> Drng(0., 1.);
+  return ((Drng(Mt) < 0.5) ? Idx1 : Idx2);
 }
 
 // Implementation of the binary crossover.
@@ -274,57 +276,56 @@ moTournamentSelection(std::vector<double>::size_type idx1, std::vector<double>::
 // otherwise Requires dimensions of the parent and bounds to be equal -> out of
 // bound reads. nix is the integer dimension (integer alleles assumed at the end
 // of the chromosome)
-std::pair<firestarter::optimizer::Individual, firestarter::optimizer::Individual>
-sbxCrossover(const firestarter::optimizer::Individual& parent1, const firestarter::optimizer::Individual& parent2,
-             const double p_cr, std::mt19937& mt) {
+auto sbxCrossover(const firestarter::optimizer::Individual& Parent1, const firestarter::optimizer::Individual& Parent2,
+                  const double PCr, std::mt19937& Mt)
+    -> std::pair<firestarter::optimizer::Individual, firestarter::optimizer::Individual> {
   // Decision vector dimensions
-  auto nix = parent1.size();
-  firestarter::optimizer::Individual::size_type site1, site2;
+  auto Nix = Parent1.size();
   // Initialize the child decision vectors
-  firestarter::optimizer::Individual child1 = parent1;
-  firestarter::optimizer::Individual child2 = parent2;
+  firestarter::optimizer::Individual Child1 = Parent1;
+  firestarter::optimizer::Individual Child2 = Parent2;
   // Random distributions
-  std::uniform_real_distribution<> drng(0.,
+  std::uniform_real_distribution<> Drng(0.,
                                         1.); // to generate a number in [0, 1)
 
   // This implements a Simulated Binary Crossover SBX
-  if (drng(mt) < p_cr) { // No crossever at all will happen with probability p_cr
+  if (Drng(Mt) < PCr) { // No crossever at all will happen with probability p_cr
     // This implements two-points crossover and applies it to the integer part
     // of the chromosome.
-    if (nix > 0u) {
-      std::uniform_int_distribution<firestarter::optimizer::Individual::size_type> ra_num(0, nix - 1u);
-      site1 = ra_num(mt);
-      site2 = ra_num(mt);
-      if (site1 > site2) {
-        std::swap(site1, site2);
+    if (Nix > 0U) {
+      std::uniform_int_distribution<firestarter::optimizer::Individual::size_type> RaNum(0, Nix - 1U);
+      auto Site1 = RaNum(Mt);
+      auto Site2 = RaNum(Mt);
+      if (Site1 > Site2) {
+        std::swap(Site1, Site2);
       }
-      for (decltype(site2) j = site1; j <= site2; ++j) {
-        child1[j] = parent2[j];
-        child2[j] = parent1[j];
+      for (decltype(Site2) J = Site1; J <= Site2; ++J) {
+        Child1[J] = Parent2[J];
+        Child2[J] = Parent1[J];
       }
     }
   }
-  return std::make_pair(std::move(child1), std::move(child2));
+  return std::make_pair(std::move(Child1), std::move(Child2));
 }
 
 // Performs polynomial mutation. Requires all sizes to be consistent. Does not
 // check if input is well formed. p_m is the mutation probability
-void polynomialMutation(firestarter::optimizer::Individual& child,
-                        const std::vector<std::tuple<unsigned, unsigned>>& bounds, const double p_m, std::mt19937& mt) {
+void polynomialMutation(firestarter::optimizer::Individual& Child,
+                        const std::vector<std::tuple<unsigned, unsigned>>& Bounds, const double PM, std::mt19937& Mt) {
   // Decision vector dimensions
-  auto nix = child.size();
+  auto Nix = Child.size();
   // Random distributions
-  std::uniform_real_distribution<> drng(0.,
+  std::uniform_real_distribution<> Drng(0.,
                                         1.); // to generate a number in [0, 1)
   // This implements the integer mutation for an individual
-  for (decltype(nix) j = 0; j < nix; ++j) {
-    if (drng(mt) < p_m) {
+  for (decltype(Nix) J = 0; J < Nix; ++J) {
+    if (Drng(Mt) < PM) {
       // We need to draw a random integer in [lb, ub].
-      auto lb = std::get<0>(bounds[j]);
-      auto ub = std::get<1>(bounds[j]);
-      std::uniform_int_distribution<firestarter::optimizer::Individual::size_type> dist(lb, ub);
-      auto mutated = dist(mt);
-      child[j] = mutated;
+      auto Lb = std::get<0>(Bounds[J]);
+      auto Ub = std::get<1>(Bounds[J]);
+      std::uniform_int_distribution<firestarter::optimizer::Individual::size_type> Dist(Lb, Ub);
+      auto Mutated = Dist(Mt);
+      Child[J] = Mutated;
     }
   }
 }
@@ -361,58 +362,58 @@ void polynomialMutation(firestarter::optimizer::Individual& child,
  * @throws unspecified all exceptions thrown by
  * pagmo::fast_non_dominated_sorting and pagmo::crowding_distance
  */
-std::vector<std::size_t> selectBestNMo(const std::vector<std::vector<double>>& input_f, std::size_t N) {
-  if (N == 0u) { // corner case
+auto selectBestNMo(const std::vector<std::vector<double>>& InputF, std::size_t N) -> std::vector<std::size_t> {
+  if (N == 0U) { // corner case
     return {};
   }
-  if (input_f.size() == 0u) { // corner case
+  if (InputF.size() == 0U) { // corner case
     return {};
   }
-  if (input_f.size() == 1u) { // corner case
-    return {0u};
+  if (InputF.size() == 1U) { // corner case
+    return {0U};
   }
-  if (N >= input_f.size()) { // corner case
-    std::vector<std::size_t> retval(input_f.size());
-    std::iota(retval.begin(), retval.end(), std::size_t(0u));
-    return retval;
+  if (N >= InputF.size()) { // corner case
+    std::vector<std::size_t> Retval(InputF.size());
+    std::iota(Retval.begin(), Retval.end(), static_cast<std::size_t>(0U));
+    return Retval;
   }
-  std::vector<std::size_t> retval;
-  std::vector<std::size_t>::size_type front_id(0u);
+  std::vector<std::size_t> Retval;
+  std::vector<std::size_t>::size_type FrontId(0U);
   // Run fast-non-dominated sorting
-  auto tuple = fastNonDominatedSorting(input_f);
+  auto Tuple = fastNonDominatedSorting(InputF);
   // Insert all non dominated fronts if not more than N
-  for (const auto& front : std::get<0>(tuple)) {
-    if (retval.size() + front.size() <= N) {
-      for (auto i : front) {
-        retval.push_back(i);
+  for (const auto& Front : std::get<0>(Tuple)) {
+    if (Retval.size() + Front.size() <= N) {
+      for (auto I : Front) {
+        Retval.push_back(I);
       }
-      if (retval.size() == N) {
-        return retval;
+      if (Retval.size() == N) {
+        return Retval;
       }
-      ++front_id;
+      ++FrontId;
     } else {
       break;
     }
   }
-  auto front = std::get<0>(tuple)[front_id];
-  std::vector<std::vector<double>> non_dom_fits(front.size());
+  auto Front = std::get<0>(Tuple)[FrontId];
+  std::vector<std::vector<double>> NonDomFits(Front.size());
   // Run crowding distance for the front
-  for (decltype(front.size()) i = 0u; i < front.size(); ++i) {
-    non_dom_fits[i] = input_f[front[i]];
+  for (decltype(Front.size()) I = 0U; I < Front.size(); ++I) {
+    NonDomFits[I] = InputF[Front[I]];
   }
-  std::vector<double> cds(crowdingDistance(non_dom_fits));
+  std::vector<double> Cds(crowdingDistance(NonDomFits));
   // We now have front and crowding distance, we sort the front w.r.t. the
   // crowding
-  std::vector<std::size_t> idxs(front.size());
-  std::iota(idxs.begin(), idxs.end(), std::size_t(0u));
-  std::sort(idxs.begin(), idxs.end(), [&cds](std::size_t idx1, std::size_t idx2) {
-    return greaterThanF(cds[idx1], cds[idx2]);
+  std::vector<std::size_t> Idxs(Front.size());
+  std::iota(Idxs.begin(), Idxs.end(), static_cast<std::size_t>(0U));
+  std::sort(Idxs.begin(), Idxs.end(), [&Cds](std::size_t Idx1, std::size_t Idx2) {
+    return greaterThanF(Cds[Idx1], Cds[Idx2]);
   }); // Descending order1
-  auto remaining = N - retval.size();
-  for (decltype(remaining) i = 0u; i < remaining; ++i) {
-    retval.push_back(front[idxs[i]]);
+  auto Remaining = N - Retval.size();
+  for (decltype(Remaining) I = 0U; I < Remaining; ++I) {
+    Retval.push_back(Front[Idxs[I]]);
   }
-  return retval;
+  return Retval;
 }
 
 /// Ideal point
@@ -432,30 +433,30 @@ std::vector<std::size_t> selectBestNMo(const std::vector<std::vector<double>>& i
  * @throws std::invalid_argument if the input objective vectors are not all of
  * the same size
  */
-std::vector<double> ideal(const std::vector<std::vector<double>>& points) {
+auto ideal(const std::vector<std::vector<double>>& Points) -> std::vector<double> {
   // Corner case
-  if (points.size() == 0u) {
+  if (Points.size() == 0U) {
     return {};
   }
 
   // Sanity checks
-  auto M = points[0].size();
-  for (const auto& f : points) {
-    if (f.size() != M) {
+  auto M = Points[0].size();
+  for (const auto& F : Points) {
+    if (F.size() != M) {
       throw std::invalid_argument("Input vector of objectives must contain "
                                   "fitness vector of equal dimension " +
                                   std::to_string(M));
     }
   }
   // Actual algorithm
-  std::vector<double> retval(M);
-  for (decltype(M) i = 0u; i < M; ++i) {
-    retval[i] = (*std::min_element(points.begin(), points.end(),
-                                   [i](const std::vector<double>& f1, const std::vector<double>& f2) {
-                                     return util::greaterThanF(f1[i], f2[i]);
-                                   }))[i];
+  std::vector<double> Retval(M);
+  for (decltype(M) I = 0U; I < M; ++I) {
+    Retval[I] = (*std::min_element(Points.begin(), Points.end(),
+                                   [I](const std::vector<double>& F1, const std::vector<double>& F2) {
+                                     return util::greaterThanF(F1[I], F2[I]);
+                                   }))[I];
   }
-  return retval;
+  return Retval;
 }
 
 } // namespace firestarter::optimizer::util
diff --git a/src/firestarter/WatchdogWorker.cpp b/src/firestarter/WatchdogWorker.cpp
index 8d8218eb..54f6af28 100644
--- a/src/firestarter/WatchdogWorker.cpp
+++ b/src/firestarter/WatchdogWorker.cpp
@@ -28,10 +28,10 @@
 #include <SCOREP_User.h>
 #endif
 
-using namespace firestarter;
+namespace firestarter {
 
-int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::microseconds load,
-                                std::chrono::seconds timeout) {
+auto Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load,
+                                 std::chrono::seconds Timeout) -> int {
 
   using clock = std::chrono::high_resolution_clock;
   using nsec = std::chrono::nanoseconds;
@@ -39,38 +39,38 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m
   using sec = std::chrono::seconds;
 
   // calculate idle time to be the rest of the period
-  auto idle = period - load;
+  auto Idle = Period - Load;
 
   // elapsed time
-  nsec time(0);
+  nsec Time(0);
 
   // do no enter the loop if we do not have to set the load level periodically,
   // at 0 or 100 load.
-  if (period > usec::zero()) {
+  if (Period > usec::zero()) {
     // this first time is critical as the period will be alligend from this
     // point
-    std::chrono::time_point<clock> startTime = clock::now();
+    std::chrono::time_point<clock> StartTime = clock::now();
 
     // this loop will set the load level periodically.
     for (;;) {
-      std::chrono::time_point<clock> currentTime = clock::now();
+      std::chrono::time_point<clock> CurrentTime = clock::now();
 
       // get the time already advanced in the current timeslice
       // this can happen if a load function does not terminates just on time
-      nsec advance =
-          std::chrono::duration_cast<nsec>(currentTime - startTime) % std::chrono::duration_cast<nsec>(period);
+      nsec Advance =
+          std::chrono::duration_cast<nsec>(CurrentTime - StartTime) % std::chrono::duration_cast<nsec>(Period);
 
       // subtract the advaned time from our timeslice by spilting it based on
       // the load level
-      nsec load_reduction =
-          (std::chrono::duration_cast<nsec>(load).count() * advance) / std::chrono::duration_cast<nsec>(period).count();
-      nsec idle_reduction = advance - load_reduction;
+      nsec LoadReduction =
+          (std::chrono::duration_cast<nsec>(Load).count() * Advance) / std::chrono::duration_cast<nsec>(Period).count();
+      nsec IdleReduction = Advance - LoadReduction;
 
       // signal high load level
-      this->setLoad(LOAD_HIGH);
+      setLoad(LOAD_HIGH);
 
       // calculate values for nanosleep
-      nsec load_nsec = load - load_reduction;
+      nsec LoadNsec = Load - LoadReduction;
 
       // wait for time to be ellapsed with high load
 #ifdef ENABLE_VTRACING
@@ -80,11 +80,11 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m
       SCOREP_USER_REGION_BY_NAME_BEGIN("WD_HIGH", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
       {
-        std::unique_lock<std::mutex> lk(this->WatchdogTerminateMutex);
+        std::unique_lock<std::mutex> Lk(WatchdogTerminateMutex);
         // abort waiting if we get the interrupt signal
-        this->WatchdogTerminateAlert.wait_for(lk, load_nsec, [this]() { return this->WatchdogTerminate; });
+        WatchdogTerminateAlert.wait_for(Lk, LoadNsec, []() { return WatchdogTerminate; });
         // terminate on interrupt
-        if (this->WatchdogTerminate) {
+        if (WatchdogTerminate) {
           return EXIT_SUCCESS;
         }
       }
@@ -96,10 +96,10 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m
 #endif
 
       // signal low load
-      this->setLoad(LOAD_LOW);
+      setLoad(LOAD_LOW);
 
       // calculate values for nanosleep
-      nsec idle_nsec = idle - idle_reduction;
+      nsec IdleNsec = Idle - IdleReduction;
 
       // wait for time to be ellapsed with low load
 #ifdef ENABLE_VTRACING
@@ -109,11 +109,11 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m
       SCOREP_USER_REGION_BY_NAME_BEGIN("WD_LOW", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
       {
-        std::unique_lock<std::mutex> lk(this->WatchdogTerminateMutex);
+        std::unique_lock<std::mutex> Lk(WatchdogTerminateMutex);
         // abort waiting if we get the interrupt signal
-        this->WatchdogTerminateAlert.wait_for(lk, idle_nsec, [this]() { return this->WatchdogTerminate; });
+        WatchdogTerminateAlert.wait_for(Lk, IdleNsec, []() { return WatchdogTerminate; });
         // terminate on interrupt
-        if (this->WatchdogTerminate) {
+        if (WatchdogTerminate) {
           return EXIT_SUCCESS;
         }
       }
@@ -125,13 +125,13 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m
 #endif
 
       // increment elapsed time
-      time += period;
+      Time += Period;
 
       // exit when termination signal is received or timeout is reached
       {
-        std::lock_guard<std::mutex> lk(this->WatchdogTerminateMutex);
-        if (this->WatchdogTerminate || (timeout > sec::zero() && (time > timeout))) {
-          this->setLoad(LOAD_STOP);
+        std::lock_guard<std::mutex> Lk(WatchdogTerminateMutex);
+        if (WatchdogTerminate || (Timeout > sec::zero() && (Time > Timeout))) {
+          setLoad(LOAD_STOP);
 
           return EXIT_SUCCESS;
         }
@@ -141,17 +141,19 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m
 
   // if timeout is set, sleep for this time and stop execution.
   // else return and wait for sigterm handler to request threads to stop.
-  if (timeout > sec::zero()) {
+  if (Timeout > sec::zero()) {
     {
-      std::unique_lock<std::mutex> lk(Firestarter::WatchdogTerminateMutex);
+      std::unique_lock<std::mutex> Lk(Firestarter::WatchdogTerminateMutex);
       // abort waiting if we get the interrupt signal
-      Firestarter::WatchdogTerminateAlert.wait_for(lk, timeout, []() { return Firestarter::WatchdogTerminate; });
+      Firestarter::WatchdogTerminateAlert.wait_for(Lk, Timeout, []() { return WatchdogTerminate; });
     }
 
-    this->setLoad(LOAD_STOP);
+    setLoad(LOAD_STOP);
 
     return EXIT_SUCCESS;
   }
 
   return EXIT_SUCCESS;
 }
+
+} // namespace firestarter
\ No newline at end of file

From 1d98664f13094f2a3b9e8cb079207b880daff104 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 3 Oct 2024 14:37:11 +0200
Subject: [PATCH 020/167] clang-tidy: allow big functions

---
 .clang-tidy | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.clang-tidy b/.clang-tidy
index f61bbcf4..02c27d04 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -13,6 +13,8 @@
 
 #  -bugprone-easily-swappable-parameters we are not using strong typedefs
 
+#  -readability-function-cognitive-complexity allow big functions
+
 Checks: >
   -*,
   boost-*,
@@ -33,7 +35,8 @@ Checks: >
   -readability-identifier-length,
   -cppcoreguidelines-avoid-magic-numbers,
   -readability-magic-numbers,
-  -bugprone-easily-swappable-parameters
+  -bugprone-easily-swappable-parameters,
+  -readability-function-cognitive-complexity
   
 # Turn all the warnings from the checks above into errors.
 WarningsAsErrors: "*"

From 5aa731a558bbdaf74d396c279ca6bb367a7d5ad5 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 3 Oct 2024 14:39:02 +0200
Subject: [PATCH 021/167] use enum class for constants

---
 include/firestarter/Constants.hpp             | 29 +++++++------
 include/firestarter/DumpRegisterStruct.hpp    | 10 +++--
 .../Environment/Payload/Payload.hpp           |  7 ++--
 .../Environment/X86/Payload/X86Payload.hpp    |  9 ++--
 include/firestarter/Firestarter.hpp           | 10 +++--
 include/firestarter/LoadWorkerData.hpp        |  8 ++--
 .../Optimizer/Problem/CLIArgumentProblem.hpp  |  2 +-
 src/firestarter/DumpRegisterWorker.cpp        |  9 ++--
 .../Environment/X86/Payload/AVX512Payload.cpp |  2 +-
 .../Environment/X86/Payload/AVXPayload.cpp    |  5 ++-
 .../Environment/X86/Payload/FMA4Payload.cpp   |  5 ++-
 .../Environment/X86/Payload/FMAPayload.cpp    |  5 ++-
 .../Environment/X86/Payload/SSE2Payload.cpp   |  6 ++-
 .../Environment/X86/Payload/X86Payload.cpp    | 12 +++---
 .../Environment/X86/Payload/ZENFMAPayload.cpp |  5 ++-
 src/firestarter/Firestarter.cpp               | 12 +++---
 src/firestarter/LoadWorker.cpp                | 41 +++++++++----------
 src/firestarter/WatchdogWorker.cpp            |  8 ++--
 18 files changed, 102 insertions(+), 83 deletions(-)

diff --git a/include/firestarter/Constants.hpp b/include/firestarter/Constants.hpp
index 419d8b6a..32178f9a 100644
--- a/include/firestarter/Constants.hpp
+++ b/include/firestarter/Constants.hpp
@@ -21,16 +21,21 @@
 
 #pragma once
 
-#define THREAD_WAIT 1
-#define THREAD_WORK 2
-#define THREAD_INIT 3
-#define THREAD_STOP 4
-#define THREAD_SWITCH 5
-#define THREAD_INIT_FAILURE 0xffffffff
+#include <cstdint>
 
-/* DO NOT CHANGE! the asm load-loop tests if load-variable is == 0 */
-#define LOAD_LOW 0
-/* DO NOT CHANGE! the asm load-loop continues until the load-variable is != 1 */
-#define LOAD_HIGH 1
-#define LOAD_STOP 2
-#define LOAD_SWITCH 4
+using CacheLineType = uint64_t;
+
+// We want the type to be the size of a cache line. Disable warnings for bigger enum size than needed.
+// NOLINTBEGIN(performance-enum-size)
+
+enum class LoadThreadState : CacheLineType { ThreadWait = 1, ThreadWork = 2, ThreadInit = 3, ThreadSwitch = 4 };
+
+enum class LoadThreadWorkType : CacheLineType {
+  /* DO NOT CHANGE! the asm load-loop tests if load-variable is == 0 */
+  LoadLow = 0,
+  /* DO NOT CHANGE! the asm load-loop continues until the load-variable is != 1 */
+  LoadHigh = 1,
+  LoadStop = 2,
+  LoadSwitch = 4
+};
+// NOLINTEND(performance-enum-size)
\ No newline at end of file
diff --git a/include/firestarter/DumpRegisterStruct.hpp b/include/firestarter/DumpRegisterStruct.hpp
index d5f162d3..6d8972a4 100644
--- a/include/firestarter/DumpRegisterStruct.hpp
+++ b/include/firestarter/DumpRegisterStruct.hpp
@@ -21,12 +21,16 @@
 
 #pragma once
 
-#include <cstdint>
+#include "firestarter/Constants.hpp"
+
 namespace firestarter {
 
 /* DO NOT CHANGE! the asm load-loop tests if it should dump the current register
  * content */
-enum DumpVariable : uint64_t { Start = 0, Wait = 1 };
+// NOLINTBEGIN(performance-enum-size)
+// Define the variable with the size of a cache line
+enum class DumpVariable : CacheLineType { Start = 0, Wait = 1 };
+// NOLINTEND(performance-enum-size)
 
 #define REGISTER_MAX_NUM 32
 
@@ -34,7 +38,7 @@ struct DumpRegisterStruct {
   // REGISTER_MAX_NUM cachelines
   volatile double RegisterValues[REGISTER_MAX_NUM * 8];
   // pad to use a whole cacheline
-  volatile uint64_t Padding[7];
+  volatile CacheLineType Padding[7];
   volatile DumpVariable DumpVar;
 };
 
diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index 9c37bdfc..12fad147 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include "firestarter/Constants.hpp"
 #include <list>
 #include <memory>
 #include <string>
@@ -90,7 +91,7 @@ class Payload {
 
   [[nodiscard]] virtual auto isAvailable() const -> bool = 0;
 
-  virtual void lowLoadFunction(volatile uint64_t* AddrHigh, uint64_t Period) = 0;
+  virtual void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, uint64_t Period) = 0;
 
   [[nodiscard]] virtual auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
                                             unsigned InstructionCacheSize,
@@ -99,8 +100,8 @@ class Payload {
                                             bool ErrorDetection) -> int = 0;
   [[nodiscard]] virtual auto getAvailableInstructions() const -> std::list<std::string> = 0;
   virtual void init(uint64_t* MemoryAddr, uint64_t BufferSize) = 0;
-  [[nodiscard]] virtual auto highLoadFunction(uint64_t* AddrMem, volatile uint64_t* AddrHigh, uint64_t Iterations)
-      -> uint64_t = 0;
+  [[nodiscard]] virtual auto highLoadFunction(uint64_t* AddrMem, volatile LoadThreadWorkType& LoadVar,
+                                              uint64_t Iterations) -> uint64_t = 0;
 
   [[nodiscard]] virtual auto clone() const -> std::unique_ptr<Payload> = 0;
 };
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index 33839135..2c1b0aa5 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -46,7 +46,7 @@ class X86Payload : public environment::payload::Payload {
   //  asmjit::CodeHolder code;
   asmjit::JitRuntime Rt;
   // typedef int (*LoadFunction)(firestarter::ThreadData *);
-  using LoadFunctionType = uint64_t (*)(uint64_t*, volatile uint64_t*, uint64_t);
+  using LoadFunctionType = uint64_t (*)(uint64_t*, volatile LoadThreadWorkType*, uint64_t);
   LoadFunctionType LoadFunction = nullptr;
 
   [[nodiscard]] auto supportedFeatures() const -> asmjit::CpuFeatures const& { return this->SupportedFeatures; }
@@ -362,7 +362,7 @@ class X86Payload : public environment::payload::Payload {
       Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 32), asmjit::Imm(1));
 
       // stop the execution after some time
-      Cb.mov(asmjit::x86::ptr_64(AddrHighReg), asmjit::Imm(LOAD_STOP));
+      Cb.mov(asmjit::x86::ptr_64(AddrHighReg), asmjit::Imm(LoadThreadWorkType::LoadStop));
       Cb.mfence();
 
       Cb.bind(L7);
@@ -446,9 +446,10 @@ class X86Payload : public environment::payload::Payload {
 #pragma clang diagnostic pop
 #endif
   // use cpuid and usleep as low load
-  void lowLoadFunction(volatile uint64_t* AddrHigh, uint64_t Period) override;
+  void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, uint64_t Period) override;
 
-  auto highLoadFunction(uint64_t* AddrMem, volatile uint64_t* AddrHigh, uint64_t Iterations) -> uint64_t override;
+  auto highLoadFunction(uint64_t* AddrMem, volatile LoadThreadWorkType& LoadVar, uint64_t Iterations)
+      -> uint64_t override;
 };
 
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 6e9ad166..18f353ff 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -62,6 +62,8 @@ namespace firestarter {
 
 class Firestarter {
 public:
+  Firestarter() = delete;
+
   Firestarter(int Argc, const char** Argv, std::chrono::seconds const& Timeout, unsigned LoadPercent,
               std::chrono::microseconds const& Period, unsigned RequestedNumThreads, std::string const& CpuBind,
               bool PrintFunctionSummary, unsigned FunctionId, bool ListInstructionGroups,
@@ -137,7 +139,7 @@ class Firestarter {
   void printThreadErrorReport();
   void printPerformanceReport();
 
-  void signalWork() { signalLoadWorkers(THREAD_WORK); };
+  void signalWork() { signalLoadWorkers(LoadThreadState::ThreadWork); };
 
   // WatchdogWorker.cpp
   static auto watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load,
@@ -150,7 +152,7 @@ class Firestarter {
 #endif
 
   // LoadThreadWorker.cpp
-  void signalLoadWorkers(int Comm);
+  void signalLoadWorkers(LoadThreadState State);
   static void loadThreadWorker(std::shared_ptr<LoadWorkerData> Td);
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
@@ -158,7 +160,7 @@ class Firestarter {
   static void dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Data);
 #endif
 
-  static void setLoad(uint64_t Value);
+  static void setLoad(LoadThreadWorkType Value);
 
   static void sigalrmHandler(int Signum);
   static void sigtermHandler(int Signum);
@@ -169,7 +171,7 @@ class Firestarter {
   inline static std::mutex WatchdogTerminateMutex;
 
   // variable to control the load of the threads
-  inline static volatile uint64_t LoadVar = LOAD_LOW;
+  inline static volatile LoadThreadWorkType LoadVar = LoadThreadWorkType::LoadLow;
 
   std::vector<std::pair<std::thread, std::shared_ptr<LoadWorkerData>>> LoadThreads;
 
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index 6aa5b40f..7ca5a48f 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -50,9 +50,9 @@ namespace firestarter {
 
 class LoadWorkerData {
 public:
-  LoadWorkerData(int Id, environment::Environment& Environment, volatile uint64_t* LoadVar, uint64_t Period,
+  LoadWorkerData(int Id, environment::Environment& Environment, volatile LoadThreadWorkType& LoadVar, uint64_t Period,
                  bool DumpRegisters, bool ErrorDetection)
-      : AddrHigh(LoadVar)
+      : LoadVar(LoadVar)
       , Period(Period)
       , DumpRegisters(DumpRegisters)
       , ErrorDetection(ErrorDetection)
@@ -89,12 +89,12 @@ class LoadWorkerData {
     return reinterpret_cast<ErrorDetectionStruct*>(AddrMem - AddrOffset);
   }
 
-  int Comm = THREAD_WAIT;
+  LoadThreadState State = LoadThreadState::ThreadWait;
   bool Ack = false;
   std::mutex Mutex;
   uint64_t* AddrMem = nullptr;
   uint64_t AddrOffset = 0;
-  volatile uint64_t* AddrHigh;
+  volatile LoadThreadWorkType& LoadVar;
   uint64_t BuffersizeMem{};
   uint64_t Iterations = 0;
   // save the last iteration count when switching payloads
diff --git a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
index 9d3c4ed7..64373716 100644
--- a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
+++ b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
@@ -38,7 +38,7 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem {
                      std::vector<std::string> const& Metrics, std::chrono::seconds Timeout,
                      std::chrono::milliseconds StartDelta, std::chrono::milliseconds StopDelta,
                      std::vector<std::string> const& InstructionGroups)
-      : ChangePayloadFunction(ChangePayloadFunction)
+      : ChangePayloadFunction(std::move(ChangePayloadFunction))
       , MeasurementWorker(MeasurementWorker)
       , Metrics(Metrics)
       , Timeout(Timeout)
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index 5dee113e..511f89e5 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -19,6 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Constants.hpp"
 #ifdef FIRESTARTER_DEBUG_FEATURES
 
 #include <firestarter/Firestarter.hpp>
@@ -77,7 +78,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Dat
 
   auto* DumpRegisterStruct = reinterpret_cast<struct DumpRegisterStruct*>(Data->LoadWorkerDataPtr->AddrMem - Offset);
 
-  auto* DumpVar = reinterpret_cast<volatile uint64_t*>(&DumpRegisterStruct->DumpVar);
+  auto& DumpVar = DumpRegisterStruct->DumpVar;
   // memory of simd variables is before the padding
   auto* DumpMemAddr = static_cast<volatile uint64_t*>(DumpRegisterStruct->Padding) -
                       (static_cast<size_t>(RegisterCount * RegisterSize));
@@ -124,11 +125,11 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Dat
 
   // continue until stop and dump the registers every data->dumpTimeDelta
   // seconds
-  for (; *Data->LoadWorkerDataPtr->AddrHigh != LOAD_STOP;) {
+  for (; Data->LoadWorkerDataPtr->LoadVar != LoadThreadWorkType::LoadStop;) {
     // signal the thread to dump its largest SIMD registers
-    *DumpVar = DumpVariable::Start;
+    DumpVar = DumpVariable::Start;
     __asm__ __volatile__("mfence;");
-    while (*DumpVar == DumpVariable::Start) {
+    while (DumpVar == DumpVariable::Start) {
       std::this_thread::sleep_for(std::chrono::milliseconds(10));
     }
 
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index 3f866f70..730b0e3b 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -361,7 +361,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     emitErrorDetectionCode<decltype(IterReg), Zmm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2);
   }
 
-  Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH));
+  Cb.test(ptr_64(AddrHighReg), Imm(LoadThreadWorkType::LoadHigh));
   Cb.jnz(Loop);
 
   Cb.bind(FunctionExit);
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index 8ce30a9f..b5a80b4c 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -104,7 +104,8 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   const auto TransRegs = 6;
 
   FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl), Rt.environment());
+  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+            Rt.environment());
 
   FuncFrame Frame;
   Frame.init(Func);
@@ -393,7 +394,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     emitErrorDetectionCode<decltype(IterReg), Ymm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2);
   }
 
-  Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH));
+  Cb.test(ptr_64(AddrHighReg), Imm(LoadThreadWorkType::LoadHigh));
   Cb.jnz(Loop);
 
   Cb.bind(FunctionExit);
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index 7dc06a3f..25bdd69a 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -108,7 +108,8 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   const auto RamReg = xmm15;
 
   FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl), Rt.environment());
+  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+            Rt.environment());
 
   FuncFrame Frame;
   Frame.init(Func);
@@ -361,7 +362,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
     emitErrorDetectionCode<decltype(IterReg), Ymm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2);
   }
 
-  Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH));
+  Cb.test(ptr_64(AddrHighReg), Imm(LoadThreadWorkType::LoadHigh));
   Cb.jnz(Loop);
 
   Cb.bind(FunctionExit);
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index 4ecd24ca..c629c302 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -108,7 +108,8 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   const auto RamReg = ymm15;
 
   FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl), Rt.environment());
+  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+            Rt.environment());
 
   FuncFrame Frame;
   Frame.init(Func);
@@ -398,7 +399,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     emitErrorDetectionCode<decltype(IterReg), Ymm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2);
   }
 
-  Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH));
+  Cb.test(ptr_64(AddrHighReg), Imm(LoadThreadWorkType::LoadHigh));
   Cb.jnz(Loop);
 
   Cb.bind(FunctionExit);
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index 202cd423..6c79dce2 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -19,6 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Constants.hpp"
 #include <firestarter/Environment/X86/Payload/SSE2Payload.hpp>
 
 namespace firestarter::environment::x86::payload {
@@ -104,7 +105,8 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   const auto TransRegs = 2;
 
   FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, uint64_t>(CallConvId::kCDecl), Rt.environment());
+  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+            Rt.environment());
 
   FuncFrame Frame;
   Frame.init(Func);
@@ -385,7 +387,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
     emitErrorDetectionCode<decltype(IterReg), Xmm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2);
   }
 
-  Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH));
+  Cb.test(ptr_64(AddrHighReg), Imm(LoadThreadWorkType::LoadHigh));
   Cb.jnz(Loop);
 
   Cb.bind(FunctionExit);
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index d0cedd61..cc44e148 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -19,6 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Constants.hpp"
 #include <cassert>
 #include <chrono>
 #include <thread>
@@ -32,8 +33,8 @@
 
 namespace firestarter::environment::x86::payload {
 
-void X86Payload::lowLoadFunction(volatile uint64_t* AddrHigh, uint64_t Period) {
-  int Nap = Period / 100;
+void X86Payload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, uint64_t Period) {
+  auto Nap = Period / 100;
 
 #ifndef _MSC_VER
   __asm__ __volatile__("mfence;"
@@ -46,7 +47,7 @@ void X86Payload::lowLoadFunction(volatile uint64_t* AddrHigh, uint64_t Period) {
 #endif
 
   // while signal low load
-  while (*AddrHigh == LOAD_LOW) {
+  while (LoadVar == LoadThreadWorkType::LoadLow) {
 #ifndef _MSC_VER
     __asm__ __volatile__("mfence;"
                          "cpuid;" ::
@@ -81,8 +82,9 @@ void X86Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize, double FirstVal
   }
 }
 
-auto X86Payload::highLoadFunction(uint64_t* AddrMem, volatile uint64_t* AddrHigh, uint64_t Iterations) -> uint64_t {
-  return this->LoadFunction(AddrMem, AddrHigh, Iterations);
+auto X86Payload::highLoadFunction(uint64_t* AddrMem, volatile LoadThreadWorkType& AddrHigh, uint64_t Iterations)
+    -> uint64_t {
+  return this->LoadFunction(AddrMem, &AddrHigh, Iterations);
 }
 
 }; // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index 01c62777..b0371b34 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -105,7 +105,8 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   auto RamReg = ymm15;
 
   FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl), Rt.environment());
+  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+            Rt.environment());
 
   FuncFrame Frame;
   Frame.init(Func);
@@ -350,7 +351,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     emitErrorDetectionCode<decltype(IterReg), Ymm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2);
   }
 
-  Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH));
+  Cb.test(ptr_64(AddrHighReg), Imm(LoadThreadWorkType::LoadHigh));
   Cb.jnz(Loop);
 
   Cb.bind(FunctionExit);
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 40e2f690..25cdc374 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -204,11 +204,11 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
           for (auto const& Thread : LoadThreads) {
             auto Td = Thread.second;
 
-            Td->Comm = THREAD_SWITCH;
+            Td->State = LoadThreadState::ThreadSwitch;
             Td->Mutex.unlock();
           }
 
-          LoadVar = LOAD_SWITCH;
+          LoadVar = LoadThreadWorkType::LoadSwitch;
 
           for (auto const& Thread : LoadThreads) {
             auto Td = Thread.second;
@@ -225,7 +225,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
             Td->Mutex.unlock();
           }
 
-          LoadVar = LOAD_HIGH;
+          LoadVar = LoadThreadWorkType::LoadHigh;
 
           signalWork();
 
@@ -241,7 +241,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
 
           for (auto const& Thread : LoadThreads) {
             auto Td = Thread.second;
-            ipcEstimateMetricInsert((double)Td->LastIterations *
+            ipcEstimateMetricInsert(static_cast<double>(Td->LastIterations) *
                                     static_cast<double>(LoadThreads.front().second->config().payload().instructions()) /
                                     static_cast<double>(StopTimestamp - StartTimestamp));
           }
@@ -383,7 +383,7 @@ void Firestarter::mainThread() {
   }
 }
 
-void Firestarter::setLoad(uint64_t Value) {
+void Firestarter::setLoad(LoadThreadWorkType Value) {
   // signal load change to workers
   Firestarter::LoadVar = Value;
 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
@@ -402,7 +402,7 @@ void Firestarter::sigalrmHandler(int Signum) { (void)Signum; }
 void Firestarter::sigtermHandler(int Signum) {
   (void)Signum;
 
-  Firestarter::setLoad(LOAD_STOP);
+  Firestarter::setLoad(LoadThreadWorkType::LoadStop);
   // exit loop
   // used in case of 0 < load < 100
   // or interrupt sleep for timeout
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index 09a68464..f7cfac19 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -19,6 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Constants.hpp"
 #include <algorithm>
 #include <firestarter/ErrorDetectionStruct.hpp>
 #include <firestarter/Firestarter.hpp>
@@ -56,7 +57,7 @@ auto Firestarter::initLoadWorkers(bool LowLoad, uint64_t Period) -> int {
 
   // setup load variable to execute low or high load once the threads switch to
   // work.
-  LoadVar = LowLoad ? LOAD_LOW : LOAD_HIGH;
+  LoadVar = LowLoad ? LoadThreadWorkType::LoadLow : LoadThreadWorkType::LoadHigh;
 
   auto NumThreads = environment().requestedNumThreads();
 
@@ -74,7 +75,7 @@ auto Firestarter::initLoadWorkers(bool LowLoad, uint64_t Period) -> int {
   }
 
   for (uint64_t I = 0; I < NumThreads; I++) {
-    auto Td = std::make_shared<LoadWorkerData>(I, environment(), &LoadVar, Period, DumpRegisters, ErrorDetection);
+    auto Td = std::make_shared<LoadWorkerData>(I, environment(), LoadVar, Period, DumpRegisters, ErrorDetection);
 
     if (ErrorDetection) {
       // distribute pointers for error deteciton. (set threads in a ring)
@@ -103,12 +104,12 @@ auto Firestarter::initLoadWorkers(bool LowLoad, uint64_t Period) -> int {
     LoadThreads.emplace_back(std::move(T), Td);
   }
 
-  signalLoadWorkers(THREAD_INIT);
+  signalLoadWorkers(LoadThreadState::ThreadInit);
 
   return EXIT_SUCCESS;
 }
 
-void Firestarter::signalLoadWorkers(int Comm) {
+void Firestarter::signalLoadWorkers(LoadThreadState State) {
   bool Ack = false;
 
   // start the work
@@ -121,7 +122,7 @@ void Firestarter::signalLoadWorkers(int Comm) {
   for (auto const& Thread : LoadThreads) {
     auto Td = Thread.second;
 
-    Td->Comm = Comm;
+    Td->State = State;
     Td->Mutex.unlock();
   }
 
@@ -241,7 +242,7 @@ void Firestarter::printPerformanceReport() {
 
 void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
 
-  int Old = THREAD_WAIT;
+  auto OldState = LoadThreadState::ThreadWait;
 
 #if defined(linux) || defined(__linux__)
   pthread_setname_np(pthread_self(), "LoadWorker");
@@ -249,11 +250,11 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
 
   for (;;) {
     Td->Mutex.lock();
-    int Comm = Td->Comm;
+    auto CurState = Td->State;
     Td->Mutex.unlock();
 
-    if (Comm != Old) {
-      Old = Comm;
+    if (CurState != OldState) {
+      OldState = CurState;
 
       Td->Mutex.lock();
       Td->Ack = true;
@@ -263,9 +264,9 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
       continue;
     }
 
-    switch (Comm) {
+    switch (CurState) {
     // allocate and initialize memory
-    case THREAD_INIT:
+    case LoadThreadState::ThreadInit:
       // set affinity
       Td->environment().setCpuAffinity(Td->id());
 
@@ -310,7 +311,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
 
       break;
     // perform stress test
-    case THREAD_WORK:
+    case LoadThreadState::ThreadWork:
       // record threads start timestamp
       Td->StartTsc = Td->environment().topology().timestamp();
 
@@ -323,7 +324,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
 #ifdef ENABLE_SCOREP
         SCOREP_USER_REGION_BY_NAME_BEGIN("HIGH", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
-        Td->Iterations = Td->config().payload().highLoadFunction(Td->AddrMem, Td->AddrHigh, Td->Iterations);
+        Td->Iterations = Td->config().payload().highLoadFunction(Td->AddrMem, Td->LoadVar, Td->Iterations);
 
         // call low load function
 #ifdef ENABLE_VTRACING
@@ -334,7 +335,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
         SCOREP_USER_REGION_BY_NAME_END("HIGH");
         SCOREP_USER_REGION_BY_NAME_BEGIN("LOW", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
-        Td->config().payload().lowLoadFunction(Td->AddrHigh, Td->Period);
+        Td->config().payload().lowLoadFunction(Td->LoadVar, Td->Period);
 #ifdef ENABLE_VTRACING
         VT_USER_END("LOW_LOAD_FUNC");
 #endif
@@ -343,20 +344,20 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
 #endif
 
         // terminate if master signals end of run and record stop timestamp
-        if (*Td->AddrHigh == LOAD_STOP) {
+        if (Td->LoadVar == LoadThreadWorkType::LoadStop) {
           Td->StopTsc = Td->environment().topology().timestamp();
 
           return;
         }
 
-        if (*Td->AddrHigh == LOAD_SWITCH) {
+        if (Td->LoadVar == LoadThreadWorkType::LoadSwitch) {
           Td->StopTsc = Td->environment().topology().timestamp();
 
           break;
         }
       }
       break;
-    case THREAD_SWITCH:
+    case LoadThreadState::ThreadSwitch:
       // compile payload
       Td->config().payload().compilePayload(Td->config().payloadSettings(), Td->config().instructionCacheSize(),
                                             Td->config().dataCacheBufferSize(), Td->config().ramBufferSize(),
@@ -372,12 +373,8 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
       Td->LastStopTsc = Td->StopTsc;
       Td->Iterations = 0;
       break;
-    case THREAD_WAIT:
+    case LoadThreadState::ThreadWait:
       break;
-    case THREAD_STOP:
-    default:
-      firestarter::log::debug() << "ERR" << '\n';
-      return;
     }
   }
 }
diff --git a/src/firestarter/WatchdogWorker.cpp b/src/firestarter/WatchdogWorker.cpp
index 54f6af28..f45a7f7d 100644
--- a/src/firestarter/WatchdogWorker.cpp
+++ b/src/firestarter/WatchdogWorker.cpp
@@ -67,7 +67,7 @@ auto Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::
       nsec IdleReduction = Advance - LoadReduction;
 
       // signal high load level
-      setLoad(LOAD_HIGH);
+      setLoad(LoadThreadWorkType::LoadHigh);
 
       // calculate values for nanosleep
       nsec LoadNsec = Load - LoadReduction;
@@ -96,7 +96,7 @@ auto Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::
 #endif
 
       // signal low load
-      setLoad(LOAD_LOW);
+      setLoad(LoadThreadWorkType::LoadLow);
 
       // calculate values for nanosleep
       nsec IdleNsec = Idle - IdleReduction;
@@ -131,7 +131,7 @@ auto Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::
       {
         std::lock_guard<std::mutex> Lk(WatchdogTerminateMutex);
         if (WatchdogTerminate || (Timeout > sec::zero() && (Time > Timeout))) {
-          setLoad(LOAD_STOP);
+          setLoad(LoadThreadWorkType::LoadStop);
 
           return EXIT_SUCCESS;
         }
@@ -148,7 +148,7 @@ auto Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::
       Firestarter::WatchdogTerminateAlert.wait_for(Lk, Timeout, []() { return WatchdogTerminate; });
     }
 
-    setLoad(LOAD_STOP);
+    setLoad(LoadThreadWorkType::LoadStop);
 
     return EXIT_SUCCESS;
   }

From 334b872db0786f6a95e2af7d984c252894464605 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 3 Oct 2024 14:39:28 +0200
Subject: [PATCH 022/167] clang-tidy: disable checks in lib folder

---
 lib/.clang-tidy | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 lib/.clang-tidy

diff --git a/lib/.clang-tidy b/lib/.clang-tidy
new file mode 100644
index 00000000..a49ef83c
--- /dev/null
+++ b/lib/.clang-tidy
@@ -0,0 +1,5 @@
+---
+# Disable all clangd checks for the lib folder
+
+Checks: >
+  -*,

From 6b388f64e9e4bcb77df0c00aa1d106aa9491668f Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 4 Oct 2024 18:30:40 +0200
Subject: [PATCH 023/167] refactor cpp side memory management of the high-load
 function. remove lots of reinterpret_cast and pointer magic. refactor dump
 register code.

---
 include/firestarter/Constants.hpp             |   8 +-
 include/firestarter/DumpRegisterStruct.hpp    |  13 +-
 .../Environment/Payload/Payload.hpp           |  10 +-
 .../Environment/X86/Payload/AVX512Payload.hpp |   2 +-
 .../Environment/X86/Payload/AVXPayload.hpp    |   2 +-
 .../Environment/X86/Payload/FMA4Payload.hpp   |   2 +-
 .../Environment/X86/Payload/FMAPayload.hpp    |   2 +-
 .../Environment/X86/Payload/SSE2Payload.hpp   |   2 +-
 .../Environment/X86/Payload/X86Payload.hpp    | 168 ++++++++++++------
 .../Environment/X86/Payload/ZENFMAPayload.hpp |   2 +-
 include/firestarter/ErrorDetectionStruct.hpp  |  29 ++-
 include/firestarter/LoadWorkerData.hpp        |  66 +++++--
 src/firestarter/DumpRegisterWorker.cpp        |  42 ++---
 .../Environment/X86/Payload/AVX512Payload.cpp |  20 +--
 .../Environment/X86/Payload/AVXPayload.cpp    |  19 +-
 .../Environment/X86/Payload/FMA4Payload.cpp   |  19 +-
 .../Environment/X86/Payload/FMAPayload.cpp    |  19 +-
 .../Environment/X86/Payload/SSE2Payload.cpp   |  20 +--
 .../Environment/X86/Payload/X86Payload.cpp    |  10 +-
 .../Environment/X86/Payload/ZENFMAPayload.cpp |  19 +-
 src/firestarter/LoadWorker.cpp                |  36 ++--
 21 files changed, 257 insertions(+), 253 deletions(-)

diff --git a/include/firestarter/Constants.hpp b/include/firestarter/Constants.hpp
index 32178f9a..958fd035 100644
--- a/include/firestarter/Constants.hpp
+++ b/include/firestarter/Constants.hpp
@@ -23,14 +23,14 @@
 
 #include <cstdint>
 
-using CacheLineType = uint64_t;
+using EightBytesType = uint64_t;
 
-// We want the type to be the size of a cache line. Disable warnings for bigger enum size than needed.
+// We want enum to have the size of 8B. Disable the warnings for bigger enum size than needed.
 // NOLINTBEGIN(performance-enum-size)
 
-enum class LoadThreadState : CacheLineType { ThreadWait = 1, ThreadWork = 2, ThreadInit = 3, ThreadSwitch = 4 };
+enum class LoadThreadState : EightBytesType { ThreadWait = 1, ThreadWork = 2, ThreadInit = 3, ThreadSwitch = 4 };
 
-enum class LoadThreadWorkType : CacheLineType {
+enum class LoadThreadWorkType : EightBytesType {
   /* DO NOT CHANGE! the asm load-loop tests if load-variable is == 0 */
   LoadLow = 0,
   /* DO NOT CHANGE! the asm load-loop continues until the load-variable is != 1 */
diff --git a/include/firestarter/DumpRegisterStruct.hpp b/include/firestarter/DumpRegisterStruct.hpp
index 6d8972a4..bddc538a 100644
--- a/include/firestarter/DumpRegisterStruct.hpp
+++ b/include/firestarter/DumpRegisterStruct.hpp
@@ -29,16 +29,19 @@ namespace firestarter {
  * content */
 // NOLINTBEGIN(performance-enum-size)
 // Define the variable with the size of a cache line
-enum class DumpVariable : CacheLineType { Start = 0, Wait = 1 };
+enum class DumpVariable : EightBytesType { Start = 0, Wait = 1 };
 // NOLINTEND(performance-enum-size)
 
-#define REGISTER_MAX_NUM 32
+// The maximal number of SIMD registers. This is currently 32 for zmm registers.
+constexpr const auto RegisterMaxNum = 32;
+/// The maximal number of doubles in SIMD registers. This is currently 8 for zmm registers.
+constexpr const auto RegisterMaxSize = 8;
 
+// REGISTER_MAX_NUM cachelines
 struct DumpRegisterStruct {
-  // REGISTER_MAX_NUM cachelines
-  volatile double RegisterValues[REGISTER_MAX_NUM * 8];
+  volatile double RegisterValues[RegisterMaxNum * RegisterMaxSize];
   // pad to use a whole cacheline
-  volatile CacheLineType Padding[7];
+  volatile EightBytesType Padding[7];
   volatile DumpVariable DumpVar;
 };
 
diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index 12fad147..90e6dd6f 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -41,9 +41,9 @@ class Payload {
   unsigned Bytes = 0;
   // number of instructions in load loop
   unsigned Instructions = 0;
-  // size of used simd registers in bytes
+  /// The size of the SIMD registers in units of doubles (8B)
   unsigned RegisterSize = 0;
-  // number of used simd registers
+  /// The number of SIMD registers used by the payload
   unsigned RegisterCount = 0;
 
   [[nodiscard]] static auto generateSequence(const std::vector<std::pair<std::string, unsigned>>& Proportion)
@@ -86,7 +86,9 @@ class Payload {
   [[nodiscard]] auto flops() const -> unsigned { return Flops; }
   [[nodiscard]] auto bytes() const -> unsigned { return Bytes; }
   [[nodiscard]] auto instructions() const -> unsigned { return Instructions; }
+  /// The size of the SIMD registers in units of doubles (8B)
   [[nodiscard]] auto registerSize() const -> unsigned { return RegisterSize; }
+  /// The number of SIMD registers used by the payload
   [[nodiscard]] auto registerCount() const -> unsigned { return RegisterCount; }
 
   [[nodiscard]] virtual auto isAvailable() const -> bool = 0;
@@ -99,8 +101,8 @@ class Payload {
                                             unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
                                             bool ErrorDetection) -> int = 0;
   [[nodiscard]] virtual auto getAvailableInstructions() const -> std::list<std::string> = 0;
-  virtual void init(uint64_t* MemoryAddr, uint64_t BufferSize) = 0;
-  [[nodiscard]] virtual auto highLoadFunction(uint64_t* AddrMem, volatile LoadThreadWorkType& LoadVar,
+  virtual void init(double* MemoryAddr, uint64_t BufferSize) = 0;
+  [[nodiscard]] virtual auto highLoadFunction(double* AddrMem, volatile LoadThreadWorkType& LoadVar,
                                               uint64_t Iterations) -> uint64_t = 0;
 
   [[nodiscard]] virtual auto clone() const -> std::unique_ptr<Payload> = 0;
diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
index 9478353e..fef49a12 100644
--- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
@@ -35,7 +35,7 @@ class AVX512Payload final : public X86Payload {
                       std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
                       unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-  void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
+  void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
     return std::make_unique<AVX512Payload>(this->supportedFeatures());
diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
index ca6cac6d..c249fd36 100644
--- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
@@ -35,7 +35,7 @@ class AVXPayload final : public X86Payload {
                       std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
                       unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-  void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
+  void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
     return std::make_unique<AVXPayload>(this->supportedFeatures());
diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
index ccc43d00..1dee4a63 100644
--- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
@@ -37,7 +37,7 @@ class FMA4Payload final : public X86Payload {
                       std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
                       unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-  void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
+  void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
     return std::make_unique<FMA4Payload>(this->supportedFeatures());
diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
index a8443b97..f4181dd8 100644
--- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
@@ -35,7 +35,7 @@ class FMAPayload final : public X86Payload {
                       std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
                       unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-  void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
+  void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
     return std::make_unique<FMAPayload>(this->supportedFeatures());
diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
index 5e363432..13df4abf 100644
--- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
@@ -35,7 +35,7 @@ class SSE2Payload final : public X86Payload {
                       std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
                       unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-  void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
+  void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
     return std::make_unique<SSE2Payload>(this->supportedFeatures());
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index 2c1b0aa5..1b6c193d 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -23,7 +23,8 @@
 
 #include "../../../Constants.hpp"          // IWYU pragma: keep
 #include "../../../DumpRegisterStruct.hpp" // IWYU pragma: keep
-#include "../../../Logging/Log.hpp"        // IWYU pragma: keep
+#include "../../../LoadWorkerData.hpp"
+#include "../../../Logging/Log.hpp" // IWYU pragma: keep
 #include "../../Payload/Payload.hpp"
 #include <asmjit/x86.h>
 #include <cassert>
@@ -46,11 +47,45 @@ class X86Payload : public environment::payload::Payload {
   //  asmjit::CodeHolder code;
   asmjit::JitRuntime Rt;
   // typedef int (*LoadFunction)(firestarter::ThreadData *);
-  using LoadFunctionType = uint64_t (*)(uint64_t*, volatile LoadThreadWorkType*, uint64_t);
+  using LoadFunctionType = uint64_t (*)(double*, volatile LoadThreadWorkType*, uint64_t);
   LoadFunctionType LoadFunction = nullptr;
 
   [[nodiscard]] auto supportedFeatures() const -> asmjit::CpuFeatures const& { return this->SupportedFeatures; }
 
+  /// Emit the code to dump the xmm, ymm or zmm registers into memory for the dump registers feature.
+  /// \arg Vec the type of the vector register used.
+  /// \arg Cb The asmjit code builder that is used to emit the assembler code.
+  /// \arg PointerReg the register containing the pointer into memory in LoadWorkerMemory that is used in the high-load
+  /// routine.
+  /// \arg VecPtr The function that is used to create a ptr to the vector register
+  template <class Vec>
+  void emitDumpRegisterCode(asmjit::x86::Builder& Cb, const asmjit::x86::Gpq& PointerReg,
+                            asmjit::x86::Mem (*VecPtr)(const asmjit::x86::Gp&, int32_t)) {
+    constexpr const auto DumpRegisterStructRegisterValuesTopOffset =
+        -static_cast<int32_t>(LoadWorkerMemory::getMemoryOffset()) +
+        static_cast<int32_t>(offsetof(LoadWorkerMemory, ExtraVars.Drs.Padding));
+    constexpr const auto DumpRegisterStructDumpVariableOffset =
+        -static_cast<int32_t>(LoadWorkerMemory::getMemoryOffset()) +
+        static_cast<int32_t>(offsetof(LoadWorkerMemory, ExtraVars.Drs.DumpVar));
+
+    auto SkipRegistersDump = Cb.newLabel();
+
+    Cb.test(ptr_64(PointerReg, DumpRegisterStructDumpVariableOffset), asmjit::Imm(firestarter::DumpVariable::Wait));
+    Cb.jnz(SkipRegistersDump);
+
+    // dump all the vector registers register
+    for (unsigned I = 0; I < registerCount(); I++) {
+      Cb.vmovapd(VecPtr(PointerReg,
+                        DumpRegisterStructRegisterValuesTopOffset - static_cast<int32_t>(registerSize() * 8 * (I + 1))),
+                 Vec(I));
+    }
+
+    // set read flag
+    Cb.mov(ptr_64(PointerReg, DumpRegisterStructDumpVariableOffset), asmjit::Imm(firestarter::DumpVariable::Wait));
+
+    Cb.bind(SkipRegistersDump);
+  }
+
   // add MM regs to dirty regs
   // zmm31 is used for backup if VectorReg is of type asmjit::x86::Zmm
   template <class MaybeConstIterRegT, class MaybeConstVectorRegT>
@@ -73,42 +108,42 @@ class X86Payload : public environment::payload::Payload {
       assert((IterReg == asmjit::x86::mm0, "iter_reg must be mm0"));
     }
 
-    assert((IterReg != TempReg, "iter_reg must be != temp_reg"));
-    assert((TempReg != TempReg2, "temp_reg must be != temp_reg2"));
-    assert((TempReg != AddrHighReg, "temp_reg must be != addrHigh_reg"));
-    assert((TempReg != PointerReg, "temp_reg must be != pointer_reg"));
-
-    assert((IterReg != asmjit::x86::r8, "iter_reg must be != r8"));
-    assert((IterReg != asmjit::x86::r9, "iter_reg must be != r9"));
-    assert((IterReg != asmjit::x86::rax, "iter_reg must be != rax"));
-    assert((IterReg != asmjit::x86::rbx, "iter_reg must be != rbx"));
-    assert((IterReg != asmjit::x86::rcx, "iter_reg must be != rcx"));
-    assert((IterReg != asmjit::x86::rdx, "iter_reg must be != rdx"));
-
-    assert((TempReg != asmjit::x86::r8, "temp_reg must be != r8"));
-    assert((TempReg != asmjit::x86::r9, "temp_reg must be != r9"));
-    assert((TempReg != asmjit::x86::rax, "temp_reg must be != rax"));
-    assert((TempReg != asmjit::x86::rbx, "temp_reg must be != rbx"));
-    assert((TempReg != asmjit::x86::rcx, "temp_reg must be != rcx"));
-    assert((TempReg != asmjit::x86::rdx, "temp_reg must be != rdx"));
-
-    assert((TempReg2 != asmjit::x86::r8, "temp_reg2 must be != r8"));
-    assert((TempReg2 != asmjit::x86::r9, "temp_reg2 must be != r9"));
-    assert((TempReg2 != asmjit::x86::rax, "temp_reg2 must be != rax"));
-    assert((TempReg2 != asmjit::x86::rbx, "temp_reg2 must be != rbx"));
-    assert((TempReg2 != asmjit::x86::rcx, "temp_reg2 must be != rcx"));
-    assert((TempReg2 != asmjit::x86::rdx, "temp_reg2 must be != rdx"));
-
-    assert((AddrHighReg != asmjit::x86::r8, "addrHigh_reg must be != r8"));
-    assert((AddrHighReg != asmjit::x86::r9, "addrHigh_reg must be != r9"));
-    assert((AddrHighReg != asmjit::x86::rax, "addrHigh_reg must be != rax"));
-    assert((AddrHighReg != asmjit::x86::rbx, "addrHigh_reg must be != rbx"));
-    assert((AddrHighReg != asmjit::x86::rcx, "addrHigh_reg must be != rcx"));
-    assert((AddrHighReg != asmjit::x86::rdx, "addrHigh_reg must be != rdx"));
+    assert(IterReg != TempReg && "iter_reg must be != temp_reg");
+    assert(TempReg != TempReg2 && "temp_reg must be != temp_reg2");
+    assert(TempReg != AddrHighReg && "temp_reg must be != addrHigh_reg");
+    assert(TempReg != PointerReg && "temp_reg must be != pointer_reg");
+
+    assert(IterReg != asmjit::x86::r8 && "iter_reg must be != r8");
+    assert(IterReg != asmjit::x86::r9 && "iter_reg must be != r9");
+    assert(IterReg != asmjit::x86::rax && "iter_reg must be != rax");
+    assert(IterReg != asmjit::x86::rbx && "iter_reg must be != rbx");
+    assert(IterReg != asmjit::x86::rcx && "iter_reg must be != rcx");
+    assert(IterReg != asmjit::x86::rdx && "iter_reg must be != rdx");
+
+    assert(TempReg != asmjit::x86::r8 && "temp_reg must be != r8");
+    assert(TempReg != asmjit::x86::r9 && "temp_reg must be != r9");
+    assert(TempReg != asmjit::x86::rax && "temp_reg must be != rax");
+    assert(TempReg != asmjit::x86::rbx && "temp_reg must be != rbx");
+    assert(TempReg != asmjit::x86::rcx && "temp_reg must be != rcx");
+    assert(TempReg != asmjit::x86::rdx && "temp_reg must be != rdx");
+
+    assert(TempReg2 != asmjit::x86::r8 && "temp_reg2 must be != r8");
+    assert(TempReg2 != asmjit::x86::r9 && "temp_reg2 must be != r9");
+    assert(TempReg2 != asmjit::x86::rax && "temp_reg2 must be != rax");
+    assert(TempReg2 != asmjit::x86::rbx && "temp_reg2 must be != rbx");
+    assert(TempReg2 != asmjit::x86::rcx && "temp_reg2 must be != rcx");
+    assert(TempReg2 != asmjit::x86::rdx && "temp_reg2 must be != rdx");
+
+    assert(AddrHighReg != asmjit::x86::r8 && "addrHigh_reg must be != r8");
+    assert(AddrHighReg != asmjit::x86::r9 && "addrHigh_reg must be != r9");
+    assert(AddrHighReg != asmjit::x86::rax && "addrHigh_reg must be != rax");
+    assert(AddrHighReg != asmjit::x86::rbx && "addrHigh_reg must be != rbx");
+    assert(AddrHighReg != asmjit::x86::rcx && "addrHigh_reg must be != rcx");
+    assert(AddrHighReg != asmjit::x86::rdx && "addrHigh_reg must be != rdx");
 
     auto SkipErrorDetection = Cb.newLabel();
 
-    if constexpr (std::is_same<asmjit::x86::Mm, IterRegT>::value) {
+    if constexpr (std::is_same_v<asmjit::x86::Mm, IterRegT>) {
       Cb.movq(TempReg, IterReg);
     } else {
       Cb.mov(TempReg, IterReg);
@@ -273,16 +308,28 @@ class X86Payload : public environment::payload::Payload {
     // This sychronization and communication works even if the threads run at
     // different (changing) speed, with just one "lock cmpxchg16b" Brought to you
     // by a few hours of headache for two people.
-    auto Communication = [&](auto Offset) {
+    auto Communication = [&](const int32_t ErrorDetetectionStructOffset) {
+      const auto CommunicationOffset =
+          ErrorDetetectionStructOffset + static_cast<int32_t>(offsetof(ErrorDetectionStruct::OneSide, Communication));
+      const auto Local0Offset =
+          ErrorDetetectionStructOffset + static_cast<int32_t>(offsetof(ErrorDetectionStruct::OneSide, Locals[0]));
+      const auto Local1Offset =
+          ErrorDetetectionStructOffset + static_cast<int32_t>(offsetof(ErrorDetectionStruct::OneSide, Locals[1]));
+      const auto Local2Offset =
+          ErrorDetetectionStructOffset + static_cast<int32_t>(offsetof(ErrorDetectionStruct::OneSide, Locals[2]));
+      const auto Local3Offset =
+          ErrorDetetectionStructOffset + static_cast<int32_t>(offsetof(ErrorDetectionStruct::OneSide, Locals[3]));
+      const auto ErrorOffset =
+          ErrorDetetectionStructOffset + static_cast<int32_t>(offsetof(ErrorDetectionStruct::OneSide, Error));
+
       // communication
-      Cb.mov(asmjit::x86::r8, asmjit::x86::ptr_64(TempReg2, Offset));
+      Cb.mov(asmjit::x86::r8, asmjit::x86::ptr_64(TempReg2, CommunicationOffset));
 
       // temp data
       Cb.mov(asmjit::x86::r9, TempReg2);
-      Cb.add(asmjit::x86::r9, asmjit::Imm(Offset + 8));
 
-      Cb.mov(asmjit::x86::rdx, asmjit::x86::ptr_64(asmjit::x86::r9, 0));
-      Cb.mov(asmjit::x86::rax, asmjit::x86::ptr_64(asmjit::x86::r9, 8));
+      Cb.mov(asmjit::x86::rdx, asmjit::x86::ptr_64(asmjit::x86::r9, Local0Offset));
+      Cb.mov(asmjit::x86::rax, asmjit::x86::ptr_64(asmjit::x86::r9, Local1Offset));
 
       auto L0 = Cb.newLabel();
       Cb.bind(L0);
@@ -293,10 +340,10 @@ class X86Payload : public environment::payload::Payload {
       auto L1 = Cb.newLabel();
       Cb.jnz(L1);
 
-      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx);
-      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx);
-      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
-      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local0Offset), asmjit::x86::rcx);
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local1Offset), asmjit::x86::rbx);
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local2Offset), asmjit::Imm(0));
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local3Offset), asmjit::Imm(0));
 
       Cb.mov(asmjit::x86::rax, asmjit::Imm(2));
 
@@ -310,8 +357,8 @@ class X86Payload : public environment::payload::Payload {
       auto L2 = Cb.newLabel();
       Cb.jle(L2);
 
-      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx);
-      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx);
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local0Offset), asmjit::x86::rcx);
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local1Offset), asmjit::x86::rbx);
 
       Cb.jmp(L0);
 
@@ -319,13 +366,13 @@ class X86Payload : public environment::payload::Payload {
 
       auto L3 = Cb.newLabel();
 
-      Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
+      Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, Local2Offset), asmjit::Imm(0));
       Cb.jne(L3);
-      Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
+      Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, Local3Offset), asmjit::Imm(0));
       Cb.jne(L3);
 
-      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::x86::rdx);
-      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::x86::rax);
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local2Offset), asmjit::x86::rdx);
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local3Offset), asmjit::x86::rax);
 
       Cb.bind(L3);
 
@@ -348,8 +395,8 @@ class X86Payload : public environment::payload::Payload {
 
       Cb.bind(L5);
 
-      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0));
-      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0));
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local2Offset), asmjit::Imm(0));
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local3Offset), asmjit::Imm(0));
 
       Cb.bind(L6);
 
@@ -359,7 +406,7 @@ class X86Payload : public environment::payload::Payload {
       Cb.jne(L7);
 
       // write the error flag
-      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 32), asmjit::Imm(1));
+      Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, ErrorOffset), asmjit::Imm(1));
 
       // stop the execution after some time
       Cb.mov(asmjit::x86::ptr_64(AddrHighReg), asmjit::Imm(LoadThreadWorkType::LoadStop));
@@ -371,6 +418,13 @@ class X86Payload : public environment::payload::Payload {
       Cb.jmp(L9);
     };
 
+    constexpr const auto ErrorDetectionStructCommunicationLeftOffset =
+        -static_cast<int32_t>(LoadWorkerMemory::getMemoryOffset()) +
+        static_cast<int32_t>(offsetof(LoadWorkerMemory, ExtraVars.Eds.Left.Communication));
+    constexpr const auto ErrorDetectionStructCommunicationRightOffset =
+        -static_cast<int32_t>(LoadWorkerMemory::getMemoryOffset()) +
+        static_cast<int32_t>(offsetof(LoadWorkerMemory, ExtraVars.Eds.Right.Communication));
+
     // left communication
     // move hash
     Cb.mov(asmjit::x86::rbx, TempReg);
@@ -381,7 +435,7 @@ class X86Payload : public environment::payload::Payload {
       Cb.mov(asmjit::x86::rcx, IterReg);
     }
 
-    Communication(-128);
+    Communication(ErrorDetectionStructCommunicationLeftOffset);
 
     // right communication
     // move hash
@@ -393,7 +447,7 @@ class X86Payload : public environment::payload::Payload {
       Cb.mov(asmjit::x86::rcx, IterReg);
     }
 
-    Communication(-64);
+    Communication(ErrorDetectionStructCommunicationRightOffset);
 
     // restore r8, r9, rax, rbx, rcx and rdx
     if constexpr (std::is_same_v<asmjit::x86::Mm, IterRegT>) {
@@ -440,7 +494,7 @@ class X86Payload : public environment::payload::Payload {
 #endif
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Woverloaded-virtual"
-  void init(uint64_t* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue);
+  void init(double* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue);
 #pragma GCC diagnostic pop
 #if defined(__clang__)
 #pragma clang diagnostic pop
@@ -448,7 +502,7 @@ class X86Payload : public environment::payload::Payload {
   // use cpuid and usleep as low load
   void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, uint64_t Period) override;
 
-  auto highLoadFunction(uint64_t* AddrMem, volatile LoadThreadWorkType& LoadVar, uint64_t Iterations)
+  auto highLoadFunction(double* AddrMem, volatile LoadThreadWorkType& LoadVar, uint64_t Iterations)
       -> uint64_t override;
 };
 
diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
index 35746efc..0b02742f 100644
--- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
@@ -36,7 +36,7 @@ class ZENFMAPayload final : public X86Payload {
                       std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
                       unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-  void init(uint64_t* MemoryAddr, uint64_t BufferSize) override;
+  void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
     return std::make_unique<ZENFMAPayload>(this->supportedFeatures());
diff --git a/include/firestarter/ErrorDetectionStruct.hpp b/include/firestarter/ErrorDetectionStruct.hpp
index 598cc4ed..dc4d43ce 100644
--- a/include/firestarter/ErrorDetectionStruct.hpp
+++ b/include/firestarter/ErrorDetectionStruct.hpp
@@ -25,23 +25,20 @@
 namespace firestarter {
 
 struct ErrorDetectionStruct {
+  struct OneSide {
+    // the pointer to 16B of communication
+    volatile uint64_t* Communication;
+    volatile uint64_t Locals[4];
+    // if this variable is not 0, an error occured in the comparison with the
+    // left thread.
+    volatile uint64_t Error;
+    volatile uint64_t Padding[2];
+  };
+
   // we have two cache lines (64B) containing each two 16B local variable and
   // one ptr (8B)
-
-  // the pointer to 16B of communication
-  volatile uint64_t* CommunicationLeft;
-  volatile uint64_t LocalsLeft[4];
-  // if this variable is not 0, an error occured in the comparison with the
-  // left thread.
-  volatile uint64_t ErrorLeft;
-  volatile uint64_t PaddingLeft[2];
-
-  volatile uint64_t* CommunicationRight;
-  volatile uint64_t LocalsRight[4];
-  // if this variable is not 0, an error occured in the comparison with the
-  // right thread.
-  volatile uint64_t ErrorRight;
-  volatile uint64_t PaddingRight[2];
+  OneSide Left;
+  OneSide Right;
 };
 
-} // namespace firestarter
+} // namespace firestarter
\ No newline at end of file
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index 7ca5a48f..63a292ae 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -48,6 +48,39 @@
 
 namespace firestarter {
 
+/// This struct holds the data for optional FIRESTARTER functionalities.
+struct ExtraLoadWorkerVariables {
+  /// The data for the dump registers functionality.
+  DumpRegisterStruct Drs;
+  /// The data for the error detections functionality.
+  ErrorDetectionStruct Eds;
+};
+
+/// This struct is used to allocate the memory for the high-load routine.
+struct LoadWorkerMemory {
+  /// The extra variables that are before the memory used for the calculation in the high-load routine. They are used
+  /// for features where further communication between the high-load routine is needed e.g., for error detection or
+  /// dumping registers.
+  ExtraLoadWorkerVariables ExtraVars;
+
+  /// A placeholder to extract the address of the memory region with dynamic size which is used for the calculation in
+  /// the high-load routine. Do not write or read to this type directly.
+  EightBytesType DoNotUseAddrMem;
+
+  /// This padding makes shure that we are aligned to a cache line. The allocated memory will most probably reach beyond
+  /// this array.
+  EightBytesType DoNotUsePadding[7];
+
+public:
+  /// Get the pointer to the start of the memory use for computations.
+  /// \returns the pointer to the memory.
+  [[nodiscard]] auto getMemoryAddress() -> auto{ return reinterpret_cast<double*>(&DoNotUseAddrMem); }
+
+  /// Get the offset to the memory which is used by the high-load functions
+  /// \returns the offset to the memory
+  [[nodiscard]] constexpr static auto getMemoryOffset() -> auto{ return offsetof(LoadWorkerMemory, DoNotUseAddrMem); }
+};
+
 class LoadWorkerData {
 public:
   LoadWorkerData(int Id, environment::Environment& Environment, volatile LoadThreadWorkType& LoadVar, uint64_t Period,
@@ -58,20 +91,12 @@ class LoadWorkerData {
       , ErrorDetection(ErrorDetection)
       , Id(Id)
       , Environment(Environment)
-      , Config(new environment::platform::RuntimeConfig(Environment.selectedConfig())) {
-    // use REGISTER_MAX_NUM cache lines for the dumped registers
-    // and another cache line for the control variable.
-    // as we are doing aligned moves we only have the option to waste a
-    // whole cacheline
-    AddrOffset += DumpRegisters ? sizeof(DumpRegisterStruct) / sizeof(uint64_t) : 0;
-
-    AddrOffset += ErrorDetection ? sizeof(ErrorDetectionStruct) / sizeof(uint64_t) : 0;
-  }
+      , Config(new environment::platform::RuntimeConfig(Environment.selectedConfig())) {}
 
   ~LoadWorkerData() {
     delete Config;
-    if (AddrMem - AddrOffset != nullptr) {
-      ALIGNED_FREE(AddrMem - AddrOffset);
+    if (Memory != nullptr) {
+      ALIGNED_FREE(Memory);
     }
   }
 
@@ -85,15 +110,26 @@ class LoadWorkerData {
   [[nodiscard]] auto environment() const -> environment::Environment& { return Environment; }
   [[nodiscard]] auto config() const -> environment::platform::RuntimeConfig& { return *Config; }
 
-  [[nodiscard]] auto errorDetectionStruct() const -> const ErrorDetectionStruct* {
-    return reinterpret_cast<ErrorDetectionStruct*>(AddrMem - AddrOffset);
+  /// Access the DumpRegisterStruct. Asserts when dumping registers is not enabled.
+  /// \returns a reference to the DumpRegisterStruct
+  [[nodiscard]] auto dumpRegisterStruct() const -> DumpRegisterStruct& {
+    assert(DumpRegisters && "Tried to access DumpRegisterStruct, but dumping registers is not enabled.");
+    return Memory->ExtraVars.Drs;
+  }
+
+  /// Access the ErrorDetectionStruct. Asserts when error detections is not enabled.
+  /// \returns a reference to the ErrorDetectionStruct
+  [[nodiscard]] auto errorDetectionStruct() const -> ErrorDetectionStruct& {
+    assert(ErrorDetection && "Tried to access ErrorDetectionStruct, but error detection is not enabled.");
+    return Memory->ExtraVars.Eds;
   }
 
   LoadThreadState State = LoadThreadState::ThreadWait;
   bool Ack = false;
   std::mutex Mutex;
-  uint64_t* AddrMem = nullptr;
-  uint64_t AddrOffset = 0;
+
+  LoadWorkerMemory* Memory = nullptr;
+
   volatile LoadThreadWorkType& LoadVar;
   uint64_t BuffersizeMem{};
   uint64_t Iterations = 0;
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index 511f89e5..2e5e08e0 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -19,7 +19,6 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include "firestarter/Constants.hpp"
 #ifdef FIRESTARTER_DEBUG_FEATURES
 
 #include <firestarter/Firestarter.hpp>
@@ -57,10 +56,12 @@ auto registerNameBySize(unsigned RegisterSize) -> std::string {
 namespace firestarter {
 
 auto Firestarter::initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath) -> int {
-
+  // Create the data for the worker thread. The thread will dump the register contents periodically and calculate the
+  // hamming distance between dumps.
   auto Data = std::make_unique<DumpRegisterWorkerData>(this->LoadThreads.begin()->second, DumpTimeDelta, DumpFilePath);
 
-  this->DumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(Data));
+  // Spawn the thread.
+  DumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(Data));
 
   return EXIT_SUCCESS;
 }
@@ -71,27 +72,18 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Dat
 
   pthread_setname_np(pthread_self(), "DumpRegWorker");
 
-  auto RegisterCount = Data->LoadWorkerDataPtr->config().payload().registerCount();
-  auto RegisterSize = Data->LoadWorkerDataPtr->config().payload().registerSize();
-  std::string RegisterPrefix = registerNameBySize(RegisterSize);
-  auto Offset = sizeof(DumpRegisterStruct) / sizeof(uint64_t);
-
-  auto* DumpRegisterStruct = reinterpret_cast<struct DumpRegisterStruct*>(Data->LoadWorkerDataPtr->AddrMem - Offset);
+  const auto RegisterCount = Data->LoadWorkerDataPtr->config().payload().registerCount();
+  const auto RegisterSize = Data->LoadWorkerDataPtr->config().payload().registerSize();
+  const auto Offset = RegisterCount * RegisterSize;
+  const std::string RegisterPrefix = registerNameBySize(RegisterSize);
 
-  auto& DumpVar = DumpRegisterStruct->DumpVar;
+  auto& DumpRegisterStructRef = Data->LoadWorkerDataPtr->Memory->ExtraVars.Drs;
+  auto& DumpVar = DumpRegisterStructRef.DumpVar;
   // memory of simd variables is before the padding
-  auto* DumpMemAddr = static_cast<volatile uint64_t*>(DumpRegisterStruct->Padding) -
-                      (static_cast<size_t>(RegisterCount * RegisterSize));
+  const auto* DumpMemAddr = static_cast<volatile uint64_t*>(DumpRegisterStructRef.Padding) - Offset;
 
-  // TODO(marenz): maybe use aligned_malloc to make memcpy more efficient and don't
-  // interrupt the workload as much?
-  auto* Last = reinterpret_cast<uint64_t*>(malloc(sizeof(uint64_t) * Offset));
-  auto* Current = reinterpret_cast<uint64_t*>(malloc(sizeof(uint64_t) * Offset));
-
-  if (Last == nullptr || Current == nullptr) {
-    log::error() << "Malloc failed in Firestarter::dumpRegisterWorker";
-    exit(ENOMEM);
-  }
+  // allocate continous memory that fits the register contents
+  auto Last = std::vector<uint64_t>(Offset);
 
   std::stringstream DumpFilePath;
   DumpFilePath << Data->DumpFilePath;
@@ -133,8 +125,9 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Dat
       std::this_thread::sleep_for(std::chrono::milliseconds(10));
     }
 
+    auto Current = std::vector<uint64_t>(Offset);
     // copy the register content to minimize the interruption of the load worker
-    std::memcpy(Current, (void*)DumpMemAddr, sizeof(uint64_t) * Offset);
+    std::memcpy(Current.data(), (void*)DumpMemAddr, Current.size() * sizeof(decltype(Current)::value_type));
 
     // skip the first output, as we first have to get some valid values for last
     if (!SkipFirst) {
@@ -170,15 +163,12 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Dat
       SkipFirst = false;
     }
 
-    std::memcpy(Last, Current, sizeof(uint64_t) * Offset);
+    Last = std::move(Current);
 
     std::this_thread::sleep_for(std::chrono::seconds(Data->DumpTimeDelta));
   }
 
   DumpFile.close();
-
-  free(Last);
-  free(Current);
 }
 
 } // namespace firestarter
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index 730b0e3b..ff53e7c5 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -108,7 +108,8 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   const auto RamReg = zmm30;
 
   FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile uint64_t*, uint64_t>(CallConvId::kCDecl), Rt.environment());
+  Func.init(FuncSignatureT<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+            Rt.environment());
 
   FuncFrame Frame;
   Frame.init(Func);
@@ -341,20 +342,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   Cb.mov(L1Addr, PointerReg);
 
   if (DumpRegisters) {
-    auto SkipRegistersDump = Cb.newLabel();
-
-    Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
-    Cb.jnz(SkipRegistersDump);
-
-    // dump all the ymm register
-    for (unsigned I = 0; I < registerCount(); I++) {
-      Cb.vmovapd(zmmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Zmm(I));
-    }
-
-    // set read flag
-    Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
-
-    Cb.bind(SkipRegistersDump);
+    emitDumpRegisterCode<Zmm>(Cb, PointerReg, zmmword_ptr);
   }
 
   if (ErrorDetection) {
@@ -408,7 +396,7 @@ auto AVX512Payload::getAvailableInstructions() const -> std::list<std::string> {
   return Instructions;
 }
 
-void AVX512Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize) {
+void AVX512Payload::init(double* MemoryAddr, uint64_t BufferSize) {
   X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
 
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index b5a80b4c..4a31c3f2 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -104,7 +104,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   const auto TransRegs = 6;
 
   FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+  Func.init(FuncSignatureT<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
             Rt.environment());
 
   FuncFrame Frame;
@@ -374,20 +374,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   Cb.mov(L1Addr, PointerReg);
 
   if (DumpRegisters) {
-    auto SkipRegistersDump = Cb.newLabel();
-
-    Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
-    Cb.jnz(SkipRegistersDump);
-
-    // dump all the ymm register
-    for (unsigned I = 0; I < registerCount(); I++) {
-      Cb.vmovapd(ymmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Ymm(I));
-    }
-
-    // set read flag
-    Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
-
-    Cb.bind(SkipRegistersDump);
+    emitDumpRegisterCode<Ymm>(Cb, PointerReg, ymmword_ptr);
   }
 
   if (ErrorDetection) {
@@ -441,7 +428,7 @@ auto AVXPayload::getAvailableInstructions() const -> std::list<std::string> {
   return Instructions;
 }
 
-void AVXPayload::init(uint64_t* MemoryAddr, uint64_t BufferSize) {
+void AVXPayload::init(double* MemoryAddr, uint64_t BufferSize) {
   X86Payload::init(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
 
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index 25bdd69a..40697855 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -108,7 +108,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   const auto RamReg = xmm15;
 
   FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+  Func.init(FuncSignatureT<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
             Rt.environment());
 
   FuncFrame Frame;
@@ -342,20 +342,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   Cb.mov(L1Addr, PointerReg);
 
   if (DumpRegisters) {
-    auto SkipRegistersDump = Cb.newLabel();
-
-    Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
-    Cb.jnz(SkipRegistersDump);
-
-    // dump all the ymm register
-    for (unsigned I = 0; I < registerCount(); I++) {
-      Cb.vmovapd(ymmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Ymm(I));
-    }
-
-    // set read flag
-    Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
-
-    Cb.bind(SkipRegistersDump);
+    emitDumpRegisterCode<Ymm>(Cb, PointerReg, ymmword_ptr);
   }
 
   if (ErrorDetection) {
@@ -409,7 +396,7 @@ auto FMA4Payload::getAvailableInstructions() const -> std::list<std::string> {
   return Instructions;
 }
 
-void FMA4Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize) {
+void FMA4Payload::init(double* MemoryAddr, uint64_t BufferSize) {
   X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
 
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index c629c302..3dae4736 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -108,7 +108,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   const auto RamReg = ymm15;
 
   FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+  Func.init(FuncSignatureT<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
             Rt.environment());
 
   FuncFrame Frame;
@@ -379,20 +379,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   Cb.mov(L1Addr, PointerReg);
 
   if (DumpRegisters) {
-    auto SkipRegistersDump = Cb.newLabel();
-
-    Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
-    Cb.jnz(SkipRegistersDump);
-
-    // dump all the ymm register
-    for (unsigned I = 0; I < registerCount(); I++) {
-      Cb.vmovapd(ymmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Ymm(I));
-    }
-
-    // set read flag
-    Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
-
-    Cb.bind(SkipRegistersDump);
+    emitDumpRegisterCode<Ymm>(Cb, PointerReg, ymmword_ptr);
   }
 
   if (ErrorDetection) {
@@ -446,7 +433,7 @@ auto FMAPayload::getAvailableInstructions() const -> std::list<std::string> {
   return Instructions;
 }
 
-void FMAPayload::init(uint64_t* MemoryAddr, uint64_t BufferSize) {
+void FMAPayload::init(double* MemoryAddr, uint64_t BufferSize) {
   X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
 
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index 6c79dce2..bb50a89a 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -19,7 +19,6 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include "firestarter/Constants.hpp"
 #include <firestarter/Environment/X86/Payload/SSE2Payload.hpp>
 
 namespace firestarter::environment::x86::payload {
@@ -105,7 +104,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   const auto TransRegs = 2;
 
   FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+  Func.init(FuncSignatureT<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
             Rt.environment());
 
   FuncFrame Frame;
@@ -367,20 +366,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   Cb.mov(L1Addr, PointerReg);
 
   if (DumpRegisters) {
-    auto SkipRegistersDump = Cb.newLabel();
-
-    Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
-    Cb.jnz(SkipRegistersDump);
-
-    // dump all the xmm register
-    for (unsigned I = 0; I < registerCount(); I++) {
-      Cb.movapd(xmmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Xmm(I));
-    }
-
-    // set read flag
-    Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
-
-    Cb.bind(SkipRegistersDump);
+    emitDumpRegisterCode<Xmm>(Cb, PointerReg, xmmword_ptr);
   }
 
   if (ErrorDetection) {
@@ -434,7 +420,7 @@ auto SSE2Payload::getAvailableInstructions() const -> std::list<std::string> {
   return Instructions;
 }
 
-void SSE2Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize) {
+void SSE2Payload::init(double* MemoryAddr, uint64_t BufferSize) {
   X86Payload::init(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
 
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index cc44e148..4946883d 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -68,23 +68,23 @@ void X86Payload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, uint64_t
   }
 }
 
-void X86Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue) {
+void X86Payload::init(double* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue) {
   uint64_t i = 0;
 
   for (; i < INIT_BLOCKSIZE; i++) {
-    reinterpret_cast<double*>(MemoryAddr)[i] = 0.25 + static_cast<double>(i) * 8.0 * FirstValue;
+    MemoryAddr[i] = 0.25 + static_cast<double>(i) * 8.0 * FirstValue;
   }
   for (; i <= BufferSize - INIT_BLOCKSIZE; i += INIT_BLOCKSIZE) {
     std::memcpy(MemoryAddr + i, MemoryAddr + i - INIT_BLOCKSIZE, sizeof(uint64_t) * INIT_BLOCKSIZE);
   }
   for (; i < BufferSize; i++) {
-    reinterpret_cast<double*>(MemoryAddr)[i] = 0.25 + static_cast<double>(i) * 8.0 * LastValue;
+    MemoryAddr[i] = 0.25 + static_cast<double>(i) * 8.0 * LastValue;
   }
 }
 
-auto X86Payload::highLoadFunction(uint64_t* AddrMem, volatile LoadThreadWorkType& AddrHigh, uint64_t Iterations)
+auto X86Payload::highLoadFunction(double* AddrMem, volatile LoadThreadWorkType& LoadVar, uint64_t Iterations)
     -> uint64_t {
-  return this->LoadFunction(AddrMem, &AddrHigh, Iterations);
+  return this->LoadFunction(AddrMem, &LoadVar, Iterations);
 }
 
 }; // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index b0371b34..c23f637d 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -105,7 +105,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   auto RamReg = ymm15;
 
   FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, uint64_t*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+  Func.init(FuncSignatureT<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
             Rt.environment());
 
   FuncFrame Frame;
@@ -331,20 +331,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   Cb.mov(L1Addr, PointerReg);
 
   if (DumpRegisters) {
-    auto SkipRegistersDump = Cb.newLabel();
-
-    Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
-    Cb.jnz(SkipRegistersDump);
-
-    // dump all the ymm register
-    for (unsigned I = 0; I < registerCount(); I++) {
-      Cb.vmovapd(ymmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Ymm(I));
-    }
-
-    // set read flag
-    Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait));
-
-    Cb.bind(SkipRegistersDump);
+    emitDumpRegisterCode<Ymm>(Cb, PointerReg, ymmword_ptr);
   }
 
   if (ErrorDetection) {
@@ -398,7 +385,7 @@ auto ZENFMAPayload::getAvailableInstructions() const -> std::list<std::string> {
   return Instructions;
 }
 
-void ZENFMAPayload::init(uint64_t* MemoryAddr, uint64_t BufferSize) {
+void ZENFMAPayload::init(double* MemoryAddr, uint64_t BufferSize) {
   X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
 
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index f7cfac19..e7cb7c92 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -20,6 +20,7 @@
  *****************************************************************************/
 
 #include "firestarter/Constants.hpp"
+#include "firestarter/LoadWorkerData.hpp"
 #include <algorithm>
 #include <firestarter/ErrorDetectionStruct.hpp>
 #include <firestarter/Firestarter.hpp>
@@ -42,8 +43,7 @@
 #include <thread>
 
 namespace {
-auto AlignedFreeDeleter = [](void* P) { ALIGNED_FREE(P); };
-
+const auto AlignedFreeDeleter = [](void* P) { ALIGNED_FREE(P); };
 }
 
 namespace firestarter {
@@ -155,12 +155,12 @@ void Firestarter::printThreadErrorReport() {
     std::vector<bool> Errors(MaxSize, false);
 
     for (decltype(MaxSize) I = 0; I < MaxSize; I++) {
-      const auto* ErrorDetectionStructPtr = LoadThreads[I].second->errorDetectionStruct();
+      const auto& ErrorDetectionStructPtr = LoadThreads[I].second->errorDetectionStruct();
 
-      if (ErrorDetectionStructPtr->ErrorLeft) {
+      if (ErrorDetectionStructPtr.Left.Error) {
         Errors[(I + MaxSize - 1) % MaxSize] = true;
       }
-      if (ErrorDetectionStructPtr->ErrorRight) {
+      if (ErrorDetectionStructPtr.Right.Error) {
         Errors[I] = true;
       }
     }
@@ -279,35 +279,34 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
       // allocate memory
       // if we should dump some registers, we use the first part of the memory
       // for them.
-      Td->AddrMem =
-          reinterpret_cast<uint64_t*>(ALIGNED_MALLOC((Td->BuffersizeMem + Td->AddrOffset) * sizeof(uint64_t), 64)) +
-          Td->AddrOffset;
+      Td->Memory = reinterpret_cast<LoadWorkerMemory*>(
+          ALIGNED_MALLOC((Td->BuffersizeMem * sizeof(uint64_t) + sizeof(ExtraLoadWorkerVariables)), 64));
 
       // exit application on error
-      if (Td->AddrMem - Td->AddrOffset == nullptr) {
+      if (Td->Memory == nullptr) {
         workerLog::error() << "Could not allocate memory for CPU load thread " << Td->id() << "\n";
         exit(ENOMEM);
       }
 
       if (Td->DumpRegisters) {
-        reinterpret_cast<DumpRegisterStruct*>(Td->AddrMem - Td->AddrOffset)->DumpVar = DumpVariable::Wait;
+        Td->dumpRegisterStruct().DumpVar = DumpVariable::Wait;
       }
 
       if (Td->ErrorDetection) {
-        auto* ErrorDetectionStructPtr = reinterpret_cast<ErrorDetectionStruct*>(Td->AddrMem - Td->AddrOffset);
+        auto& ErrorDetectionStructRef = Td->errorDetectionStruct();
 
-        std::memset(ErrorDetectionStructPtr, 0, sizeof(ErrorDetectionStruct));
+        std::memset(&ErrorDetectionStructRef, 0, sizeof(ErrorDetectionStruct));
 
         // distribute left and right communication pointers
-        ErrorDetectionStructPtr->CommunicationLeft = Td->CommunicationLeft.get();
-        ErrorDetectionStructPtr->CommunicationRight = Td->CommunicationRight.get();
+        ErrorDetectionStructRef.Left.Communication = Td->CommunicationLeft.get();
+        ErrorDetectionStructRef.Right.Communication = Td->CommunicationRight.get();
 
         // do first touch memset 0 for the communication pointers
-        std::memset((void*)ErrorDetectionStructPtr->CommunicationLeft, 0, sizeof(uint64_t) * 2);
+        std::memset((void*)ErrorDetectionStructRef.Left.Communication, 0, sizeof(uint64_t) * 2);
       }
 
       // call init function
-      Td->config().payload().init(Td->AddrMem, Td->BuffersizeMem);
+      Td->config().payload().init(Td->Memory->getMemoryAddress(), Td->BuffersizeMem);
 
       break;
     // perform stress test
@@ -324,7 +323,8 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
 #ifdef ENABLE_SCOREP
         SCOREP_USER_REGION_BY_NAME_BEGIN("HIGH", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
-        Td->Iterations = Td->config().payload().highLoadFunction(Td->AddrMem, Td->LoadVar, Td->Iterations);
+        Td->Iterations =
+            Td->config().payload().highLoadFunction(Td->Memory->getMemoryAddress(), Td->LoadVar, Td->Iterations);
 
         // call low load function
 #ifdef ENABLE_VTRACING
@@ -365,7 +365,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
                                             Td->ErrorDetection);
 
       // call init function
-      Td->config().payload().init(Td->AddrMem, Td->BuffersizeMem);
+      Td->config().payload().init(Td->Memory->getMemoryAddress(), Td->BuffersizeMem);
 
       // save old iteration count
       Td->LastIterations = Td->Iterations;

From 38764119e7966b5bd6f4eae14d8c7fd86905de80 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 4 Oct 2024 18:48:49 +0200
Subject: [PATCH 024/167] fix divission by zero warning

---
 src/firestarter/Environment/X86/Payload/SSE2Payload.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index bb50a89a..1c416ae6 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -99,7 +99,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   const auto OffsetReg = r12;
   const auto AddrHighReg = r13;
   const auto IterReg = r14;
-  const auto MovRegs = 0;
+  constexpr const auto MovRegs = 0;
   const auto AddRegs = 14;
   const auto TransRegs = 2;
 
@@ -295,7 +295,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
         return EXIT_FAILURE;
       }
 
-      if (MovRegs > 0) {
+      if constexpr (MovRegs > 0) {
         Instructions++;
         Cb.movq(Mm(MovStart + ((MovqDest - MovStart + MovRegs - 1) % MovRegs)), Mm(MovqDest));
       }

From ba43956b00846ad777abbbf3e17e2a740d140934 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 5 Oct 2024 17:58:15 +0200
Subject: [PATCH 025/167] clang-tidy fixes. create struct for allocating whole
 cache lines. clean up memory management for the high-load routine.

---
 .clang-tidy                                   |  4 +-
 include/firestarter/AlignedAlloc.hpp          | 78 +++++++++++++++++++
 .../Environment/Payload/Payload.hpp           |  2 +-
 .../Environment/Platform/PlatformConfig.hpp   |  2 +-
 .../Environment/X86/Payload/X86Payload.hpp    |  2 +-
 include/firestarter/LoadWorkerData.hpp        | 75 +++++++++---------
 lib/.clang-tidy                               |  3 +-
 .../Environment/X86/Payload/X86Payload.cpp    |  6 +-
 src/firestarter/LoadWorker.cpp                | 13 +---
 src/firestarter/Main.cpp                      | 43 +++++-----
 10 files changed, 153 insertions(+), 75 deletions(-)
 create mode 100644 include/firestarter/AlignedAlloc.hpp

diff --git a/.clang-tidy b/.clang-tidy
index 02c27d04..7c7a6449 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -41,10 +41,10 @@ Checks: >
 # Turn all the warnings from the checks above into errors.
 WarningsAsErrors: "*"
 
-HeaderFilterRegex: "(include/).*\\.(h|hpp)$"
+HeaderFilterRegex: "include/firestarter/.*\\.(h|hpp)$"
 
 CheckOptions:
-  - { key: readability-identifier-naming.NamespaceCase,          value: lower_cases }
+  - { key: readability-identifier-naming.NamespaceCase,          value: lower_case }
   - { key: readability-identifier-naming.ClassCase,              value: CamelCase  }
   - { key: readability-identifier-naming.StructCase,             value: CamelCase  }
   - { key: readability-identifier-naming.FunctionCase,           value: camelBack  }
diff --git a/include/firestarter/AlignedAlloc.hpp b/include/firestarter/AlignedAlloc.hpp
new file mode 100644
index 00000000..3c8605e9
--- /dev/null
+++ b/include/firestarter/AlignedAlloc.hpp
@@ -0,0 +1,78 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2024 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#pragma once
+
+#include <cmath>
+#include <cstddef>
+#include <cstdlib>
+
+namespace firestarter {
+
+struct AlignedAlloc {
+private:
+  /// Round the size to the nearest multiple of the aligment
+  /// \arg Size The number to be rounded up.
+  /// \arg Alignment The number to whoose multiple to be round up to.
+  /// \returns Size rounded up to the nearest multiple of the Alignment
+  static auto padSize(const std::size_t Size, const std::size_t Alignment) -> std::size_t {
+    return Alignment * static_cast<int>(std::ceil(static_cast<double>(Size) / static_cast<double>(Alignment)));
+  };
+
+public:
+  /// Allocate memory with a given alignment. The size will automatically increased to the nearest multiple of the
+  /// alignment.
+  /// \arg Size The minimum required memory.
+  /// \arg Alignment describes to which boundary the memory should be aligned. The default is 64B which will account to
+  /// the size of a cache line on most systems.
+  /// \returns The pointer to the allocated memory.
+  static auto malloc(const std::size_t Size, const std::size_t Alignment = 64) -> void* {
+    // NOLINTBEGIN(cppcoreguidelines-owning-memory)
+#if defined(__APPLE__)
+    return aligned_alloc(Alignment, padSize(Size, Alignment));
+#elif defined(__MINGW64__)
+    return _mm_malloc(padSize(Size, Alignment), Alignment);
+#elif defined(_MSC_VER)
+    return _aligned_malloc(padSize(Size, Alignment), Alignment);
+#else
+    return aligned_alloc(Alignment, padSize(Size, Alignment));
+#endif
+    // NOLINTEND(cppcoreguidelines-owning-memory)
+  };
+
+  /// Deallocate memory which has been allocated by the AlignedAlloc::malloc function.
+  /// \arg Ptr The pointer to the allocated memory.
+  static void free(void* Ptr) {
+    // NOLINTBEGIN(cppcoreguidelines-owning-memory,cppcoreguidelines-no-malloc)
+#if defined(__APPLE__)
+    free(Ptr);
+#elif defined(__MINGW64__)
+    _mm_free(Ptr);
+#elif defined(_MSC_VER)
+    _aligned_free(Ptr);
+#else
+    std::free(Ptr);
+#endif
+    // NOLINTEND(cppcoreguidelines-owning-memory,cppcoreguidelines-no-malloc)
+  };
+};
+
+} // namespace firestarter
diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index 90e6dd6f..cedf1041 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -60,7 +60,7 @@ class Payload {
 
   [[nodiscard]] static auto getNumberOfSequenceRepetitions(const std::vector<std::string>& Sequence,
                                                            const unsigned NumberOfLines) -> unsigned {
-    if (Sequence.size() == 0) {
+    if (Sequence.empty()) {
       return 0;
     }
     return NumberOfLines / Sequence.size();
diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp
index 954b6682..88450cd8 100644
--- a/include/firestarter/Environment/Platform/PlatformConfig.hpp
+++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp
@@ -90,7 +90,7 @@ class PlatformConfig {
     }
 
     auto Str = Ss.str();
-    if (Str.size() > 0) {
+    if (!Str.empty()) {
       Str.pop_back();
     }
 
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index 1b6c193d..951aad2f 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -33,7 +33,7 @@
 #include <type_traits>
 #include <utility>
 
-#define INIT_BLOCKSIZE 1024
+constexpr const auto InitBlocksize = 1024;
 
 namespace firestarter::environment::x86::payload {
 
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index 63a292ae..ddd181b4 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -21,47 +21,45 @@
 
 #pragma once
 
+#include "AlignedAlloc.hpp"
 #include "Constants.hpp"
 #include "DumpRegisterStruct.hpp"
 #include "Environment/Environment.hpp"
 #include "ErrorDetectionStruct.hpp"
 #include <atomic>
+#include <cmath>
+#include <cstddef>
 #include <memory>
 #include <mutex>
 #include <utility>
 
-#define PAD_SIZE(size, align) align*(int)std::ceil((double)size / (double)align)
-
-#if defined(__APPLE__)
-#define ALIGNED_MALLOC(size, align) aligned_alloc(align, PAD_SIZE(size, align))
-#define ALIGNED_FREE free
-#elif defined(__MINGW64__)
-#define ALIGNED_MALLOC(size, align) _mm_malloc(PAD_SIZE(size, align), align)
-#define ALIGNED_FREE _mm_free
-#elif defined(_MSC_VER)
-#define ALIGNED_MALLOC(size, align) _aligned_malloc(PAD_SIZE(size, align), align)
-#define ALIGNED_FREE _aligned_free
-#else
-#define ALIGNED_MALLOC(size, align) std::aligned_alloc(align, PAD_SIZE(size, align))
-#define ALIGNED_FREE std::free
-#endif
-
 namespace firestarter {
 
-/// This struct holds the data for optional FIRESTARTER functionalities.
-struct ExtraLoadWorkerVariables {
-  /// The data for the dump registers functionality.
-  DumpRegisterStruct Drs;
-  /// The data for the error detections functionality.
-  ErrorDetectionStruct Eds;
-};
-
 /// This struct is used to allocate the memory for the high-load routine.
 struct LoadWorkerMemory {
+private:
+  LoadWorkerMemory() = default;
+  ~LoadWorkerMemory() = default;
+
+  /// Function to deallocate the memory for this struct to be used with unique_ptr.
+  /// \arg Ptr The pointer to the memory
+  static void deallocate(void* Ptr) {
+    static_cast<LoadWorkerMemory*>(Ptr)->~LoadWorkerMemory();
+    AlignedAlloc::free(Ptr);
+  }
+
+public:
+  using UniquePtr = std::unique_ptr<LoadWorkerMemory, void (*)(void*)>;
+
   /// The extra variables that are before the memory used for the calculation in the high-load routine. They are used
-  /// for features where further communication between the high-load routine is needed e.g., for error detection or
-  /// dumping registers.
-  ExtraLoadWorkerVariables ExtraVars;
+  /// for optional FIRESTARTER features where further communication between the high-load routine is needed e.g., for
+  /// error detection or dumping registers.
+  struct ExtraLoadWorkerVariables {
+    /// The data for the dump registers functionality.
+    DumpRegisterStruct Drs;
+    /// The data for the error detections functionality.
+    ErrorDetectionStruct Eds;
+  } ExtraVars;
 
   /// A placeholder to extract the address of the memory region with dynamic size which is used for the calculation in
   /// the high-load routine. Do not write or read to this type directly.
@@ -71,7 +69,6 @@ struct LoadWorkerMemory {
   /// this array.
   EightBytesType DoNotUsePadding[7];
 
-public:
   /// Get the pointer to the start of the memory use for computations.
   /// \returns the pointer to the memory.
   [[nodiscard]] auto getMemoryAddress() -> auto{ return reinterpret_cast<double*>(&DoNotUseAddrMem); }
@@ -79,6 +76,19 @@ struct LoadWorkerMemory {
   /// Get the offset to the memory which is used by the high-load functions
   /// \returns the offset to the memory
   [[nodiscard]] constexpr static auto getMemoryOffset() -> auto{ return offsetof(LoadWorkerMemory, DoNotUseAddrMem); }
+
+  /// Allocate the memory for the high-load thread on 64B cache line boundaries and return a unique_ptr.
+  /// \arg Bytes The number of bytes allocated for the array whoose start address is returned by the getMemoryAddress
+  /// function.
+  /// \returns A unique_ptr to the memory for the high-load thread.
+  [[nodiscard]] static auto allocate(const std::size_t Bytes) -> UniquePtr {
+    // Allocate the memory for the ExtraLoadWorkerVariables (which are 64B aligned) and the data for the high-load
+    // routine which may not be 64B aligned.
+    static_assert(sizeof(ExtraLoadWorkerVariables) % 64 == 0,
+                  "ExtraLoadWorkerVariables is not a size of 64B i.e., a cacheline.");
+    auto* Ptr = AlignedAlloc::malloc(Bytes + sizeof(ExtraLoadWorkerVariables));
+    return {static_cast<LoadWorkerMemory*>(Ptr), deallocate};
+  }
 };
 
 class LoadWorkerData {
@@ -93,12 +103,7 @@ class LoadWorkerData {
       , Environment(Environment)
       , Config(new environment::platform::RuntimeConfig(Environment.selectedConfig())) {}
 
-  ~LoadWorkerData() {
-    delete Config;
-    if (Memory != nullptr) {
-      ALIGNED_FREE(Memory);
-    }
-  }
+  ~LoadWorkerData() { delete Config; }
 
   void setErrorCommunication(std::shared_ptr<uint64_t> CommunicationLeft,
                              std::shared_ptr<uint64_t> CommunicationRight) {
@@ -128,7 +133,7 @@ class LoadWorkerData {
   bool Ack = false;
   std::mutex Mutex;
 
-  LoadWorkerMemory* Memory = nullptr;
+  LoadWorkerMemory::UniquePtr Memory = {nullptr, nullptr};
 
   volatile LoadThreadWorkType& LoadVar;
   uint64_t BuffersizeMem{};
diff --git a/lib/.clang-tidy b/lib/.clang-tidy
index a49ef83c..cf4dd00b 100644
--- a/lib/.clang-tidy
+++ b/lib/.clang-tidy
@@ -1,5 +1,4 @@
 ---
 # Disable all clangd checks for the lib folder
 
-Checks: >
-  -*,
+Checks: '-*'
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index 4946883d..473041c5 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -71,11 +71,11 @@ void X86Payload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, uint64_t
 void X86Payload::init(double* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue) {
   uint64_t i = 0;
 
-  for (; i < INIT_BLOCKSIZE; i++) {
+  for (; i < InitBlocksize; i++) {
     MemoryAddr[i] = 0.25 + static_cast<double>(i) * 8.0 * FirstValue;
   }
-  for (; i <= BufferSize - INIT_BLOCKSIZE; i += INIT_BLOCKSIZE) {
-    std::memcpy(MemoryAddr + i, MemoryAddr + i - INIT_BLOCKSIZE, sizeof(uint64_t) * INIT_BLOCKSIZE);
+  for (; i <= BufferSize - InitBlocksize; i += InitBlocksize) {
+    std::memcpy(MemoryAddr + i, MemoryAddr + i - InitBlocksize, sizeof(uint64_t) * InitBlocksize);
   }
   for (; i < BufferSize; i++) {
     MemoryAddr[i] = 0.25 + static_cast<double>(i) * 8.0 * LastValue;
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index e7cb7c92..d55a07b6 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -19,9 +19,9 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/AlignedAlloc.hpp"
 #include "firestarter/Constants.hpp"
 #include "firestarter/LoadWorkerData.hpp"
-#include <algorithm>
 #include <firestarter/ErrorDetectionStruct.hpp>
 #include <firestarter/Firestarter.hpp>
 #include <firestarter/Logging/Log.hpp>
@@ -42,10 +42,6 @@
 #include <cstdlib>
 #include <thread>
 
-namespace {
-const auto AlignedFreeDeleter = [](void* P) { ALIGNED_FREE(P); };
-}
-
 namespace firestarter {
 
 auto Firestarter::initLoadWorkers(bool LowLoad, uint64_t Period) -> int {
@@ -65,9 +61,9 @@ auto Firestarter::initLoadWorkers(bool LowLoad, uint64_t Period) -> int {
   // communication pointers and add these to the threaddata
   if (ErrorDetection) {
     for (uint64_t I = 0; I < NumThreads; I++) {
-      auto* CommPtr = reinterpret_cast<uint64_t*>(ALIGNED_MALLOC(2 * sizeof(uint64_t), 64));
+      auto* CommPtr = static_cast<uint64_t*>(AlignedAlloc::malloc(2 * sizeof(uint64_t)));
       assert(CommPtr);
-      ErrorCommunication.push_back(std::shared_ptr<uint64_t>(CommPtr, AlignedFreeDeleter));
+      ErrorCommunication.emplace_back(std::shared_ptr<uint64_t>(CommPtr, AlignedAlloc::free));
       log::debug() << "Threads " << (I + NumThreads - 1) % NumThreads << " and " << I << " commPtr = 0x"
                    << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex
                    << reinterpret_cast<uint64_t>(CommPtr);
@@ -279,8 +275,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
       // allocate memory
       // if we should dump some registers, we use the first part of the memory
       // for them.
-      Td->Memory = reinterpret_cast<LoadWorkerMemory*>(
-          ALIGNED_MALLOC((Td->BuffersizeMem * sizeof(uint64_t) + sizeof(ExtraLoadWorkerVariables)), 64));
+      Td->Memory = LoadWorkerMemory::allocate(Td->BuffersizeMem * sizeof(uint64_t));
 
       // exit application on error
       if (Td->Memory == nullptr) {
diff --git a/src/firestarter/Main.cpp b/src/firestarter/Main.cpp
index 9627adb2..318d246d 100644
--- a/src/firestarter/Main.cpp
+++ b/src/firestarter/Main.cpp
@@ -116,7 +116,7 @@ void printWarranty() {
 void printHelp(cxxopts::Options const& Parser, std::string const& Section) {
   std::vector<std::pair<std::string, std::string>> Options(Config::OptionsMap.size());
 
-  if (Section.size() == 0) {
+  if (Section.empty()) {
     std::copy(Config::OptionsMap.begin(), Config::OptionsMap.end(), Options.begin());
   } else {
     auto FindSection = [&](std::pair<std::string, std::string> const& Pair) { return Pair.first == Section; };
@@ -267,26 +267,26 @@ Config::Config(int Argc, const char** Argv) {
   try {
     auto Options = Parser.parse(Argc, Argv);
 
-    if (Options.count("quiet")) {
+    if (static_cast<bool>(Options.count("quiet"))) {
       firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::warn);
-    } else if (Options.count("report")) {
+    } else if (static_cast<bool>(Options.count("report"))) {
       firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::debug);
-    } else if (Options.count("debug")) {
+    } else if (static_cast<bool>(Options.count("debug"))) {
       firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::trace);
     } else {
       firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::info);
     }
 
-    if (Options.count("version")) {
+    if (static_cast<bool>(Options.count("version"))) {
       std::exit(EXIT_SUCCESS);
     }
 
-    if (Options.count("copyright")) {
+    if (static_cast<bool>(Options.count("copyright"))) {
       printCopyright();
       std::exit(EXIT_SUCCESS);
     }
 
-    if (Options.count("warranty")) {
+    if (static_cast<bool>(Options.count("warranty"))) {
       printWarranty();
       std::exit(EXIT_SUCCESS);
     }
@@ -296,12 +296,12 @@ Config::Config(int Argc, const char** Argv) {
                              << "This is free software, and you are welcome to redistribute it\n"
                              << "under certain conditions; run `" << ExecutableName << " -c` for details.\n";
 
-    if (Options.count("help")) {
+    if (static_cast<bool>(Options.count("help"))) {
       auto Section = Options["help"].as<std::string>();
 
       // section not found
       auto FindSection = [&](std::pair<std::string, std::string> const& Pair) { return Pair.first == Section; };
-      if (std::find_if(OptionsMap.begin(), OptionsMap.end(), FindSection) == OptionsMap.end() && Section.size() != 0) {
+      if (std::find_if(OptionsMap.begin(), OptionsMap.end(), FindSection) == OptionsMap.end() && !Section.empty()) {
         throw std::invalid_argument("Section \"" + Section + "\" not found in help.");
       }
 
@@ -317,15 +317,15 @@ Config::Config(int Argc, const char** Argv) {
       throw std::invalid_argument("Option -l/--load may not be above 100.");
     }
 
-    ErrorDetection = Options.count("error-detection");
+    ErrorDetection = static_cast<bool>(Options.count("error-detection"));
     if (ErrorDetection && LoadPercent != 100) {
       throw std::invalid_argument("Option --error-detection may only be used "
                                   "with -l/--load equal 100.");
     }
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
-    AllowUnavailablePayload = Options.count("allow-unavailable-payload");
-    DumpRegisters = Options.count("dump-registers");
+    AllowUnavailablePayload = static_cast<bool>(Options.count("allow-unavailable-payload"));
+    DumpRegisters = static_cast<bool>(Options.count("dump-registers"));
     if (DumpRegisters) {
       DumpRegistersTimeDelta = std::chrono::seconds(Options["dump-registers"].as<unsigned>());
       if (Timeout != std::chrono::microseconds::zero() && LoadPercent != 100) {
@@ -367,13 +367,13 @@ Config::Config(int Argc, const char** Argv) {
     Gpus = Options["gpus"].as<int>();
 #endif
 
-    PrintFunctionSummary = Options.count("avail");
+    PrintFunctionSummary = static_cast<bool>(Options.count("avail"));
 
     FunctionId = Options["function"].as<unsigned>();
 
-    ListInstructionGroups = Options.count("list-instruction-groups");
+    ListInstructionGroups = static_cast<bool>(Options.count("list-instruction-groups"));
     InstructionGroups = Options["run-instruction-groups"].as<std::string>();
-    if (Options.count("set-line-count")) {
+    if (static_cast<bool>(Options.count("set-line-count"))) {
       LineCount = Options["set-line-count"].as<unsigned>();
     }
 
@@ -384,13 +384,14 @@ Config::Config(int Argc, const char** Argv) {
 #ifndef FIRESTARTER_LINK_STATIC
     MetricPaths = Options["metric-path"].as<std::vector<std::string>>();
 #endif
-    if (Options.count("metric-from-stdin")) {
+    if (static_cast<bool>(Options.count("metric-from-stdin"))) {
       StdinMetrics = Options["metric-from-stdin"].as<std::vector<std::string>>();
     }
-    Measurement = Options.count("measurement");
-    ListMetrics = Options.count("list-metrics");
+    Measurement = static_cast<bool>(Options.count("measurement"));
+    ListMetrics = static_cast<bool>(Options.count("list-metrics"));
+    Optimize = static_cast<bool>(Options.count("optimize"));
 
-    if ((Optimize = Options.count("optimize"))) {
+    if (Optimize) {
       if (ErrorDetection) {
         throw std::invalid_argument("Options --error-detection and --optimize "
                                     "cannot be used together.");
@@ -400,7 +401,7 @@ Config::Config(int Argc, const char** Argv) {
       }
       Preheat = std::chrono::seconds(Options["preheat"].as<unsigned>());
       OptimizationAlgorithm = Options["optimize"].as<std::string>();
-      if (Options.count("optimization-metric")) {
+      if (static_cast<bool>(Options.count("optimization-metric"))) {
         OptimizationMetrics = Options["optimization-metric"].as<std::vector<std::string>>();
       }
       if (LoadPercent != 100) {
@@ -414,7 +415,7 @@ Config::Config(int Argc, const char** Argv) {
       // this will deactivate the watchdog worker
       Timeout = std::chrono::seconds::zero();
       Individuals = Options["individuals"].as<unsigned>();
-      if (Options.count("optimize-outfile")) {
+      if (static_cast<bool>(Options.count("optimize-outfile"))) {
         OptimizeOutfile = Options["optimize-outfile"].as<std::string>();
       }
       Generations = Options["generations"].as<unsigned>();

From 962c0da81a12f24c4d0628eba776e2f7f09fc592 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 5 Oct 2024 21:13:31 +0200
Subject: [PATCH 026/167] clang-tidy fixes

---
 .../firestarter/Environment/CPUTopology.hpp   |   2 +-
 .../firestarter/Environment/Environment.hpp   |   9 +-
 .../Environment/X86/Payload/X86Payload.hpp    |   2 +-
 include/firestarter/Firestarter.hpp           |   2 +-
 .../Measurement/MeasurementWorker.hpp         |   7 +-
 .../firestarter/Measurement/MetricInterface.h |   3 +-
 include/firestarter/OneAPI/OneAPI.hpp         |   2 +-
 include/firestarter/Optimizer/History.hpp     |   4 +-
 .../firestarter/Optimizer/OptimizerWorker.hpp |   4 +-
 .../Optimizer/Problem/CLIArgumentProblem.hpp  |  12 +-
 src/firestarter/Environment/Environment.cpp   | 108 +++++++++---------
 .../Environment/Payload/Payload.cpp           |   2 +-
 .../Environment/X86/Payload/X86Payload.cpp    |  14 +--
 .../Environment/X86/X86Environment.cpp        |   2 +-
 src/firestarter/Firestarter.cpp               |   4 +-
 .../Measurement/MeasurementWorker.cpp         | 102 ++++++++---------
 src/firestarter/Measurement/Metric/Perf.cpp   |  22 ++--
 src/firestarter/Measurement/Metric/RAPL.cpp   |  10 +-
 src/firestarter/Optimizer/OptimizerWorker.cpp |   4 +-
 src/firestarter/Optimizer/Population.cpp      |   2 +-
 .../Optimizer/Util/MultiObjective.cpp         |   8 +-
 21 files changed, 161 insertions(+), 164 deletions(-)

diff --git a/include/firestarter/Environment/CPUTopology.hpp b/include/firestarter/Environment/CPUTopology.hpp
index c58933db..a7ac4681 100644
--- a/include/firestarter/Environment/CPUTopology.hpp
+++ b/include/firestarter/Environment/CPUTopology.hpp
@@ -79,7 +79,7 @@ class CPUTopology {
   std::string ProcessorName;
   unsigned InstructionCacheSize = 0;
   uint64_t Clockrate = 0;
-  hwloc_topology_t Topology;
+  hwloc_topology_t Topology{};
 };
 
 } // namespace firestarter::environment
diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index 57e60094..9be6c374 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -36,7 +36,7 @@ class Environment {
       : Topology(std::move(Topology)) {}
   virtual ~Environment() { delete SelectedConfig; }
 
-  auto evaluateCpuAffinity(unsigned RequestedNumThreads, std::string CpuBind) -> int;
+  auto evaluateCpuAffinity(unsigned RequestedNumThreads, const std::string& CpuBind) -> int;
   auto setCpuAffinity(unsigned Thread) -> int;
   void printThreadSummary();
 
@@ -77,9 +77,10 @@ class Environment {
 private:
   uint64_t RequestedNumThreads = 0;
 
-  // TODO: replace these functions with the builtins one from hwloc
-  auto cpuAllowed(unsigned Id) -> int;
-  auto cpuSet(unsigned Id) -> int;
+  // TODO: replace these functions with the builtins one from hwlocom hwloc
+  static auto cpuAllowed(unsigned Id) -> int;
+  static auto cpuSet(unsigned Id) -> int;
+  auto addCpuSet(unsigned Cpu, cpu_set_t& Mask) const -> bool;
 
   std::vector<unsigned> CpuBind;
 };
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index 951aad2f..924a3861 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -494,7 +494,7 @@ class X86Payload : public environment::payload::Payload {
 #endif
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Woverloaded-virtual"
-  void init(double* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue);
+  static void init(double* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue);
 #pragma GCC diagnostic pop
 #if defined(__clang__)
 #pragma clang diagnostic pop
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 18f353ff..5ebb885f 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -86,7 +86,7 @@ class Firestarter {
   const char** Argv;
   const std::chrono::seconds Timeout;
   const unsigned LoadPercent;
-  std::chrono::microseconds Load;
+  std::chrono::microseconds Load{};
   std::chrono::microseconds Period;
   const bool DumpRegisters;
   const std::chrono::seconds DumpRegistersTimeDelta;
diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp
index 0205bc03..019c6753 100644
--- a/include/firestarter/Measurement/MeasurementWorker.hpp
+++ b/include/firestarter/Measurement/MeasurementWorker.hpp
@@ -41,8 +41,8 @@ namespace firestarter::measurement {
 
 class MeasurementWorker {
 private:
-  pthread_t WorkerThread;
-  pthread_t StdinThread;
+  pthread_t WorkerThread{};
+  pthread_t StdinThread{};
 
   std::vector<const MetricInterface*> Metrics = {&RaplMetric, &PerfIpcMetric, &PerfFreqMetric, &IpcEstimateMetric};
 
@@ -73,7 +73,8 @@ class MeasurementWorker {
 public:
   // creates the worker thread
   MeasurementWorker(std::chrono::milliseconds UpdateInterval, uint64_t NumThreads,
-                    std::vector<std::string> const& MetricDylibs, std::vector<std::string> const& StdinMetrics);
+                    std::vector<std::string> const& MetricDylibsNames,
+                    std::vector<std::string> const& StdinMetricsNames);
 
   // stops the worker threads
   ~MeasurementWorker();
diff --git a/include/firestarter/Measurement/MetricInterface.h b/include/firestarter/Measurement/MetricInterface.h
index 87352868..6d738661 100644
--- a/include/firestarter/Measurement/MetricInterface.h
+++ b/include/firestarter/Measurement/MetricInterface.h
@@ -25,8 +25,7 @@
 extern "C" {
 #endif
 
-#include <stdint.h>
-
+#include <cstdint>
 // NOLINTBEGIN(modernize-use-using)
 typedef struct {
   uint32_t
diff --git a/include/firestarter/OneAPI/OneAPI.hpp b/include/firestarter/OneAPI/OneAPI.hpp
index f6931e4d..603d6df7 100644
--- a/include/firestarter/OneAPI/OneAPI.hpp
+++ b/include/firestarter/OneAPI/OneAPI.hpp
@@ -33,7 +33,7 @@ class OneAPI {
   std::condition_variable WaitForInitCv;
   std::mutex WaitForInitCvMutex;
 
-  static void initGpus(std::condition_variable& Cv, volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble,
+  static void initGpus(std::condition_variable& Cv, const volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble,
                        unsigned MatrixSize, int Gpus);
 
 public:
diff --git a/include/firestarter/Optimizer/History.hpp b/include/firestarter/Optimizer/History.hpp
index 8c573d72..21d969f5 100644
--- a/include/firestarter/Optimizer/History.hpp
+++ b/include/firestarter/Optimizer/History.hpp
@@ -73,7 +73,7 @@ struct History {
 
   static auto find(std::vector<unsigned> const& Individual)
       -> std::optional<std::map<std::string, firestarter::measurement::Summary>> {
-    auto FindEqual = [Individual](auto const& ind) { return ind == Individual; };
+    auto FindEqual = [&Individual](auto const& Ind) { return Ind == Individual; };
     auto Ind = std::find_if(X.begin(), X.end(), FindEqual);
     if (Ind == X.end()) {
       return {};
@@ -126,7 +126,7 @@ struct History {
             continue;
           }
 
-          if (Result.size() != 0) {
+          if (!Result.empty()) {
             Result += ",";
           }
           Result += PayloadItems[I] + ":" + std::to_string(Individual[I]);
diff --git a/include/firestarter/Optimizer/OptimizerWorker.hpp b/include/firestarter/Optimizer/OptimizerWorker.hpp
index f6c3a37f..8fe35abc 100644
--- a/include/firestarter/Optimizer/OptimizerWorker.hpp
+++ b/include/firestarter/Optimizer/OptimizerWorker.hpp
@@ -38,9 +38,9 @@ class OptimizerWorker {
 
   ~OptimizerWorker() = default;
 
-  void join();
+  void join() const;
 
-  void kill();
+  void kill() const;
 
 private:
   static auto optimizerThread(void* OptimizerWorker) -> void*;
diff --git a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
index 64373716..7b43e5e4 100644
--- a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
+++ b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
@@ -34,18 +34,18 @@ namespace firestarter::optimizer::problem {
 class CLIArgumentProblem final : public firestarter::optimizer::Problem {
 public:
   CLIArgumentProblem(std::function<void(std::vector<std::pair<std::string, unsigned>> const&)>&& ChangePayloadFunction,
-                     std::shared_ptr<firestarter::measurement::MeasurementWorker> const& MeasurementWorker,
+                     std::shared_ptr<firestarter::measurement::MeasurementWorker> MeasurementWorker,
                      std::vector<std::string> const& Metrics, std::chrono::seconds Timeout,
                      std::chrono::milliseconds StartDelta, std::chrono::milliseconds StopDelta,
-                     std::vector<std::string> const& InstructionGroups)
+                     std::vector<std::string> InstructionGroups)
       : ChangePayloadFunction(std::move(ChangePayloadFunction))
-      , MeasurementWorker(MeasurementWorker)
+      , MeasurementWorker(std::move(MeasurementWorker))
       , Metrics(Metrics)
       , Timeout(Timeout)
       , StartDelta(StartDelta)
       , StopDelta(StopDelta)
-      , InstructionGroups(InstructionGroups) {
-    assert(Metrics.size() != 0);
+      , InstructionGroups(std::move(InstructionGroups)) {
+    assert(!Metrics.empty());
   }
 
   ~CLIArgumentProblem() override = default;
@@ -90,7 +90,7 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem {
     for (auto const& MetricName : Metrics) {
       auto FindName = [MetricName](auto const& Summary) {
         auto InvertedName = "-" + Summary.first;
-        return MetricName.compare(Summary.first) == 0 || MetricName.compare(InvertedName) == 0;
+        return MetricName == Summary.first || MetricName == InvertedName;
       };
 
       auto It = std::find_if(Summaries.begin(), Summaries.end(), FindName);
diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp
index 67e62d9d..ab9b0295 100644
--- a/src/firestarter/Environment/Environment.cpp
+++ b/src/firestarter/Environment/Environment.cpp
@@ -19,10 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <algorithm>
 #include <firestarter/Environment/Environment.hpp>
 #include <firestarter/Logging/Log.hpp>
-
 #include <regex>
 #include <string>
 
@@ -34,27 +32,6 @@ extern "C" {
 #include <sched.h>
 }
 
-// this code is from the C version of FIRESTARTER
-// TODO: replace this with cpu affinity of hwloc
-#define ADD_CPU_SET(cpu, cpuset)                                                                                       \
-  do {                                                                                                                 \
-    if (this->cpuAllowed(cpu)) {                                                                                       \
-      CPU_SET(cpu, &cpuset);                                                                                           \
-    } else {                                                                                                           \
-      if (cpu >= this->topology().numThreads()) {                                                                      \
-        log::error() << "The given bind argument (-b/--bind) includes CPU " << cpu                                     \
-                     << " that is not available on this system.";                                                      \
-      } else {                                                                                                         \
-        log::error() << "The given bind argument (-b/--bind) cannot "                                                  \
-                        "be implemented with the cpuset given from the OS\n"                                           \
-                     << "This can be caused by the taskset tool, cgroups, "                                            \
-                        "the batch system, or similar mechanisms.\n"                                                   \
-                     << "Please fix the argument to match the restrictions.";                                          \
-      }                                                                                                                \
-      return EACCES;                                                                                                   \
-    }                                                                                                                  \
-  } while (0)
-
 auto Environment::cpuSet(unsigned Id) -> int {
   cpu_set_t Mask;
 
@@ -75,14 +52,32 @@ auto Environment::cpuAllowed(unsigned Id) -> int {
 
   return 0;
 }
+
+auto Environment::addCpuSet(unsigned Cpu, cpu_set_t& Mask) const -> bool {
+  if (cpuAllowed(Cpu)) {
+    CPU_SET(Cpu, &Mask);
+    return true;
+  }
+  if (Cpu >= topology().numThreads()) {
+    log::error() << "The given bind argument (-b/--bind) includes CPU " << std::to_string(Cpu)
+                 << " that is not available on this system.";
+  } else {
+    log::error() << "The given bind argument (-b/--bind) cannot "
+                    "be implemented with the cpuset given from the OS\n"
+                 << "This can be caused by the taskset tool, cgroups, "
+                    "the batch system, or similar mechanisms.\n"
+                 << "Please fix the argument to match the restrictions.";
+  }
+  return false;
+}
 #endif
 
-auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, std::string cpuBind) -> int {
+auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::string& CpuBind) -> int {
 #if not((defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY))
-  (void)cpuBind;
+  (void)CpuBind;
 #endif
 
-  if (RequestedNumThreads > 0 && RequestedNumThreads > this->topology().numThreads()) {
+  if (RequestedNumThreads > 0 && RequestedNumThreads > topology().numThreads()) {
     log::warn() << "Not enough CPUs for requested number of threads";
   }
 
@@ -91,13 +86,13 @@ auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, std::string
 
   CPU_ZERO(&Cpuset);
 
-  if (cpuBind.empty()) {
+  if (CpuBind.empty()) {
     // no cpu binding defined
 
     // use all CPUs if not defined otherwise
     if (RequestedNumThreads == 0) {
-      for (unsigned I = 0; I < this->topology().maxNumThreads(); I++) {
-        if (this->cpuAllowed(I)) {
+      for (unsigned I = 0; I < topology().maxNumThreads(); I++) {
+        if (cpuAllowed(I)) {
           CPU_SET(I, &Cpuset);
           RequestedNumThreads++;
         }
@@ -105,12 +100,14 @@ auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, std::string
     } else {
       // if -n / --threads is set
       unsigned CpuCount = 0;
-      for (unsigned I = 0; I < this->topology().maxNumThreads(); I++) {
+      for (unsigned I = 0; I < topology().maxNumThreads(); I++) {
         // skip if cpu is not available
-        if (!this->cpuAllowed(I)) {
+        if (!cpuAllowed(I)) {
           continue;
         }
-        ADD_CPU_SET(I, Cpuset);
+        if (!addCpuSet(I, Cpuset)) {
+          return EACCES;
+        }
         CpuCount++;
         // we reached the desired amounts of threads
         if (CpuCount >= RequestedNumThreads) {
@@ -130,20 +127,19 @@ auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, std::string
     }
   } else {
     // parse CPULIST for binding
-    const std::string Delimiter = ",";
+    const auto Delimiter = ',';
     const std::regex Re(R"(^(?:(\d+)(?:-([1-9]\d*)(?:\/([1-9]\d*))?)?)$)");
 
-    std::stringstream Ss(cpuBind);
+    std::stringstream Ss(CpuBind);
 
     while (Ss.good()) {
       std::string Token;
       std::smatch M;
-      std::getline(Ss, Token, ',');
-      ;
+      std::getline(Ss, Token, Delimiter);
 
       if (std::regex_match(Token, M, Re)) {
-        unsigned long Y;
-        unsigned long S;
+        unsigned long Y = 0;
+        unsigned long S = 0;
 
         unsigned long X = std::stoul(M[1].str());
         if (M[2].matched) {
@@ -160,8 +156,10 @@ auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, std::string
           log::error() << "y has to be >= x in x-y expressions of CPU list: " << Token;
           return EXIT_FAILURE;
         }
-        for (unsigned long I = X; I <= Y; I += S) {
-          ADD_CPU_SET(I, Cpuset);
+        for (auto I = X; I <= Y; I += S) {
+          if (!addCpuSet(I, Cpuset)) {
+            return EACCES;
+          }
           RequestedNumThreads++;
         }
       } else {
@@ -171,8 +169,8 @@ auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, std::string
     }
   }
 #else
-  if (requestedNumThreads == 0) {
-    requestedNumThreads = this->topology().maxNumThreads();
+  if (RequestedNumThreads == 0) {
+    RequestedNumThreads = topology().maxNumThreads();
   }
 #endif
 
@@ -180,41 +178,39 @@ auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, std::string
     log::error() << "Found no usable CPUs!";
     return 127;
   }
+
 #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
-  for (unsigned I = 0; I < this->topology().maxNumThreads(); I++) {
+  for (unsigned I = 0; I < topology().maxNumThreads(); I++) {
     if (CPU_ISSET(I, &Cpuset)) {
       this->CpuBind.push_back(I);
     }
   }
-
 #endif
 
-  RequestedNumThreads = std::min(RequestedNumThreads, this->topology().maxNumThreads());
-
-  this->RequestedNumThreads = RequestedNumThreads;
+  this->RequestedNumThreads = std::min(RequestedNumThreads, topology().maxNumThreads());
 
   return EXIT_SUCCESS;
 }
 
 void Environment::printThreadSummary() {
-  log::info() << "\n  using " << this->requestedNumThreads() << " threads";
+  log::info() << "\n  using " << requestedNumThreads() << " threads";
 
 #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
   bool PrintCoreIdInfo = false;
-  size_t i = 0;
+  size_t I = 0;
 
   std::vector<unsigned> CpuBind(this->CpuBind);
-  CpuBind.resize(this->requestedNumThreads());
+  CpuBind.resize(requestedNumThreads());
   for (auto const& Bind : CpuBind) {
-    int CoreId = this->topology().getCoreIdFromPU(Bind);
-    int PkgId = this->topology().getPkgIdFromPU(Bind);
+    int CoreId = topology().getCoreIdFromPU(Bind);
+    int PkgId = topology().getPkgIdFromPU(Bind);
 
     if (CoreId != -1 && PkgId != -1) {
-      log::info() << "    - Thread " << i << " run on CPU " << Bind << ", core " << CoreId << " in package: " << PkgId;
+      log::info() << "    - Thread " << I << " run on CPU " << Bind << ", core " << CoreId << " in package: " << PkgId;
       PrintCoreIdInfo = true;
     }
 
-    i++;
+    I++;
   }
 
   if (PrintCoreIdInfo) {
@@ -224,13 +220,13 @@ void Environment::printThreadSummary() {
 }
 
 auto Environment::setCpuAffinity(unsigned Thread) -> int {
-  if (Thread >= this->requestedNumThreads()) {
+  if (Thread >= requestedNumThreads()) {
     log::error() << "Trying to set more CPUs than available.";
     return EXIT_FAILURE;
   }
 
 #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
-  this->cpuSet(this->CpuBind.at(Thread));
+  cpuSet(CpuBind.at(Thread));
 #endif
 
   return EXIT_SUCCESS;
diff --git a/src/firestarter/Environment/Payload/Payload.cpp b/src/firestarter/Environment/Payload/Payload.cpp
index 39c0e6a2..c7ced50b 100644
--- a/src/firestarter/Environment/Payload/Payload.cpp
+++ b/src/firestarter/Environment/Payload/Payload.cpp
@@ -46,7 +46,7 @@ auto Payload::generateSequence(std::vector<std::pair<std::string, unsigned>> con
 
   std::vector<std::string> Sequence = {};
 
-  if (Prop.size() == 0) {
+  if (Prop.empty()) {
     return Sequence;
   }
 
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index 473041c5..1a169df2 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -69,16 +69,16 @@ void X86Payload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, uint64_t
 }
 
 void X86Payload::init(double* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue) {
-  uint64_t i = 0;
+  uint64_t I = 0;
 
-  for (; i < InitBlocksize; i++) {
-    MemoryAddr[i] = 0.25 + static_cast<double>(i) * 8.0 * FirstValue;
+  for (; I < InitBlocksize; I++) {
+    MemoryAddr[I] = 0.25 + static_cast<double>(I) * 8.0 * FirstValue;
   }
-  for (; i <= BufferSize - InitBlocksize; i += InitBlocksize) {
-    std::memcpy(MemoryAddr + i, MemoryAddr + i - InitBlocksize, sizeof(uint64_t) * InitBlocksize);
+  for (; I <= BufferSize - InitBlocksize; I += InitBlocksize) {
+    std::memcpy(MemoryAddr + I, MemoryAddr + I - InitBlocksize, sizeof(uint64_t) * InitBlocksize);
   }
-  for (; i < BufferSize; i++) {
-    MemoryAddr[i] = 0.25 + static_cast<double>(i) * 8.0 * LastValue;
+  for (; I < BufferSize; I++) {
+    MemoryAddr[I] = 0.25 + static_cast<double>(I) * 8.0 * LastValue;
   }
 }
 
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index 2f24b683..3d3a70e8 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -175,7 +175,7 @@ void X86Environment::printAvailableInstructionGroups() {
   }
 
   auto S = Ss.str();
-  if (S.size() > 0) {
+  if (!S.empty()) {
     S.pop_back();
   }
 
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 25cdc374..68e07b0d 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -159,7 +159,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
     auto All = MeasurementWorker->metricNames();
     auto Initialized = MeasurementWorker->initMetrics(All);
 
-    if (Initialized.size() == 0) {
+    if (Initialized.empty()) {
       log::error() << "No metrics initialized";
       std::exit(EXIT_FAILURE);
     }
@@ -168,7 +168,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
     for (auto const& OptimizationMetric : OptimizationMetrics) {
       auto NameEqual = [OptimizationMetric](auto const& Name) {
         auto InvertedName = "-" + Name;
-        return Name.compare(OptimizationMetric) == 0 || InvertedName.compare(OptimizationMetric) == 0;
+        return Name == OptimizationMetric || InvertedName == OptimizationMetric;
       };
       // metric name is not found
       if (std::find_if(All.begin(), All.end(), NameEqual) == All.end()) {
diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp
index e6d3305b..f0ae3900 100644
--- a/src/firestarter/Measurement/MeasurementWorker.cpp
+++ b/src/firestarter/Measurement/MeasurementWorker.cpp
@@ -36,8 +36,8 @@ void insertCallback(void* Cls, const char* MetricName, int64_t TimeSinceEpoch, d
 namespace firestarter::measurement {
 
 MeasurementWorker::MeasurementWorker(std::chrono::milliseconds UpdateInterval, uint64_t NumThreads,
-                                     std::vector<std::string> const& MetricDylibs,
-                                     std::vector<std::string> const& StdinMetrics)
+                                     std::vector<std::string> const& MetricDylibsNames,
+                                     std::vector<std::string> const& StdinMetricsNames)
     : UpdateInterval(UpdateInterval)
     , NumThreads(NumThreads) {
 
@@ -45,7 +45,7 @@ MeasurementWorker::MeasurementWorker(std::chrono::milliseconds UpdateInterval, u
   // open dylibs and find metric symbol.
   // create an entry in _metricDylibs with handle from dlopen and
   // metric_interface_t structure. add this structe as a pointer to metrics.
-  for (auto const& Dylib : MetricDylibs) {
+  for (auto const& Dylib : MetricDylibsNames) {
     void* Handle = nullptr;
     const char* Filename = Dylib.c_str();
 
@@ -70,35 +70,35 @@ MeasurementWorker::MeasurementWorker(std::chrono::milliseconds UpdateInterval, u
       continue;
     }
 
-    if (this->findMetricByName(Metric->Name) != nullptr) {
+    if (findMetricByName(Metric->Name) != nullptr) {
       firestarter::log::error() << "A metric named \"" << Metric->Name << "\" is already loaded.";
       dlclose(Handle);
       continue;
     }
 
     // lets push our metric object and the handle
-    this->MetricDylibs.push_back(Handle);
-    this->Metrics.push_back(Metric);
+    MetricDylibs.push_back(Handle);
+    Metrics.push_back(Metric);
   }
 #else
   (void)MetricDylibs;
 #endif
 
   // setup metric objects for metric names passed from stdin.
-  for (auto const& Name : StdinMetrics) {
-    if (this->findMetricByName(Name) != nullptr) {
+  for (auto const& Name : StdinMetricsNames) {
+    if (findMetricByName(Name) != nullptr) {
       firestarter::log::error() << "A metric named \"" << Name << "\" is already loaded.";
       continue;
     }
 
-    this->StdinMetrics.push_back(Name);
+    StdinMetrics.push_back(Name);
   }
 
   std::stringstream Ss;
   unsigned MaxLength = 0;
   std::map<std::string, bool> Available;
 
-  for (auto const& Metric : this->Metrics) {
+  for (auto const& Metric : Metrics) {
     std::string Name(Metric->Name);
     MaxLength = MaxLength < Name.size() ? Name.size() : MaxLength;
     auto ReturnCode = Metric->Init();
@@ -114,31 +114,31 @@ MeasurementWorker::MeasurementWorker(std::chrono::milliseconds UpdateInterval, u
     Ss << (value ? "yes" : "no") << "\n";
   }
 
-  this->AvailableMetricsString = Ss.str();
+  AvailableMetricsString = Ss.str();
 
-  pthread_create(&this->WorkerThread, nullptr,
-                 reinterpret_cast<void* (*)(void*)>(MeasurementWorker::dataAcquisitionWorker), this);
+  pthread_create(&WorkerThread, nullptr, reinterpret_cast<void* (*)(void*)>(MeasurementWorker::dataAcquisitionWorker),
+                 this);
 
   // create a worker for getting metric values from stdin
-  if (this->StdinMetrics.size() > 0) {
-    pthread_create(&this->StdinThread, nullptr,
+  if (!StdinMetrics.empty()) {
+    pthread_create(&StdinThread, nullptr,
                    reinterpret_cast<void* (*)(void*)>(MeasurementWorker::stdinDataAcquisitionWorker), this);
   }
 }
 
 MeasurementWorker::~MeasurementWorker() {
-  pthread_cancel(this->WorkerThread);
+  pthread_cancel(WorkerThread);
 
-  pthread_join(this->WorkerThread, nullptr);
+  pthread_join(WorkerThread, nullptr);
 
-  if (this->StdinMetrics.size() > 0) {
-    pthread_cancel(this->StdinThread);
+  if (!StdinMetrics.empty()) {
+    pthread_cancel(StdinThread);
 
-    pthread_join(this->StdinThread, nullptr);
+    pthread_join(StdinThread, nullptr);
   }
 
-  for (auto const& [key, value] : this->Values) {
-    const auto* Metric = this->findMetricByName(key);
+  for (auto const& [key, value] : Values) {
+    const auto* Metric = findMetricByName(key);
     if (Metric == nullptr) {
       continue;
     }
@@ -147,29 +147,29 @@ MeasurementWorker::~MeasurementWorker() {
   }
 
 #ifndef FIRESTARTER_LINK_STATIC
-  for (auto* Handle : this->MetricDylibs) {
+  for (auto* Handle : MetricDylibs) {
     dlclose(Handle);
   }
 #endif
 }
 
 auto MeasurementWorker::metricNames() -> std::vector<std::string> {
-  std::vector<std::string> Metrics;
-  std::transform(this->Metrics.begin(), this->Metrics.end(), std::back_inserter(Metrics),
+  std::vector<std::string> MetricNames;
+  std::transform(Metrics.begin(), Metrics.end(), std::back_inserter(MetricNames),
                  [](auto& Metric) -> std::string { return std::string(Metric->Name); });
-  for (auto const& Name : this->StdinMetrics) {
-    Metrics.push_back(Name);
+  for (auto const& Name : StdinMetrics) {
+    MetricNames.push_back(Name);
   }
 
-  return Metrics;
+  return MetricNames;
 }
 
 auto MeasurementWorker::findMetricByName(std::string MetricName) -> const MetricInterface* {
-  auto NameEqual = [MetricName](auto& MetricInterface) { return MetricName.compare(MetricInterface->Name) == 0; };
-  auto Metric = std::find_if(this->Metrics.begin(), this->Metrics.end(), NameEqual);
+  auto NameEqual = [&MetricName](auto& MetricInterface) { return MetricName == MetricInterface->Name; };
+  auto Metric = std::find_if(Metrics.begin(), Metrics.end(), NameEqual);
 
   // metric not found
-  if (Metric == this->Metrics.end()) {
+  if (Metric == Metrics.end()) {
     return nullptr;
   }
   // metric found
@@ -179,19 +179,19 @@ auto MeasurementWorker::findMetricByName(std::string MetricName) -> const Metric
 // this must be called by the main thread.
 // if not done so things like perf_event_attr.inherit might not work as expected
 auto MeasurementWorker::initMetrics(std::vector<std::string> const& MetricNames) -> std::vector<std::string> {
-  this->ValuesMutex.lock();
+  ValuesMutex.lock();
 
   std::vector<std::string> Initialized = {};
 
   // try to find each metric and initialize it
   for (auto const& MetricName : MetricNames) {
     // init values map with empty vector
-    auto NameEqual = [MetricName](auto const& Pair) { return MetricName.compare(Pair.first) == 0; };
-    auto Pair = std::find_if(this->Values.begin(), this->Values.end(), NameEqual);
-    if (Pair != this->Values.end()) {
+    auto NameEqual = [&MetricName](auto const& Pair) { return MetricName == Pair.first; };
+    auto Pair = std::find_if(Values.begin(), Values.end(), NameEqual);
+    if (Pair != Values.end()) {
       Pair->second.clear();
     } else {
-      const auto* Metric = this->findMetricByName(MetricName);
+      const auto* Metric = findMetricByName(MetricName);
       if (Metric != nullptr) {
         int ReturnValue = Metric->Init();
         if (ReturnValue != EXIT_SUCCESS) {
@@ -199,7 +199,7 @@ auto MeasurementWorker::initMetrics(std::vector<std::string> const& MetricNames)
           continue;
         }
       }
-      this->Values[MetricName] = std::vector<TimeValue>();
+      Values[MetricName] = std::vector<TimeValue>();
       if (Metric != nullptr) {
         if (Metric->Type.InsertCallback) {
           Metric->RegisterInsertCallback(::insertCallback, this);
@@ -209,38 +209,38 @@ auto MeasurementWorker::initMetrics(std::vector<std::string> const& MetricNames)
     }
   }
 
-  this->ValuesMutex.unlock();
+  ValuesMutex.unlock();
 
   return Initialized;
 }
 
 void MeasurementWorker::insertCallback(const char* MetricName, int64_t TimeSinceEpoch, double Value) {
-  this->ValuesMutex.lock();
+  ValuesMutex.lock();
 
   using Duration = std::chrono::duration<int64_t, std::nano>;
   auto Time = std::chrono::time_point<std::chrono::high_resolution_clock, Duration>(Duration(TimeSinceEpoch));
-  auto NameEqual = [MetricName](auto const& Pair) { return std::string(MetricName).compare(Pair.first) == 0; };
-  auto Pair = std::find_if(this->Values.begin(), this->Values.end(), NameEqual);
+  auto NameEqual = [&MetricName](auto const& Pair) { return std::string(MetricName) == Pair.first; };
+  auto Pair = std::find_if(Values.begin(), Values.end(), NameEqual);
 
-  if (Pair != this->Values.end()) {
+  if (Pair != Values.end()) {
     Pair->second.emplace_back(Time, Value);
   }
 
-  this->ValuesMutex.unlock();
+  ValuesMutex.unlock();
 }
 
-void MeasurementWorker::startMeasurement() { this->StartTime = std::chrono::high_resolution_clock::now(); }
+void MeasurementWorker::startMeasurement() { StartTime = std::chrono::high_resolution_clock::now(); }
 
 auto MeasurementWorker::getValues(std::chrono::milliseconds StartDelta, std::chrono::milliseconds StopDelta)
     -> std::map<std::string, Summary> {
   std::map<std::string, Summary> Measurment = {};
 
-  this->ValuesMutex.lock();
+  ValuesMutex.lock();
 
-  for (auto& [key, values] : this->Values) {
+  for (auto& [key, values] : Values) {
     auto StartTime = this->StartTime;
     auto EndTime = std::chrono::high_resolution_clock::now();
-    const auto* Metric = this->findMetricByName(key);
+    const auto* Metric = findMetricByName(key);
 
     MetricType Type;
     std::memset(&Type, 0, sizeof(Type));
@@ -260,16 +260,16 @@ auto MeasurementWorker::getValues(std::chrono::milliseconds StartDelta, std::chr
 
     decltype(values) CroppedValues(values.size());
 
-    auto FindAll = [StartTime, EndTime](auto const& Tv) { return StartTime <= Tv.Time && Tv.Time <= EndTime; };
+    auto FindAll = [&StartTime, &EndTime](auto const& Tv) { return StartTime <= Tv.Time && Tv.Time <= EndTime; };
     auto It = std::copy_if(values.begin(), values.end(), CroppedValues.begin(), FindAll);
     CroppedValues.resize(std::distance(CroppedValues.begin(), It));
 
-    Summary Sum = Summary::calculate(CroppedValues.begin(), CroppedValues.end(), Type, this->NumThreads);
+    Summary Sum = Summary::calculate(CroppedValues.begin(), CroppedValues.end(), Type, NumThreads);
 
     Measurment[key] = Sum;
   }
 
-  this->ValuesMutex.unlock();
+  ValuesMutex.unlock();
 
   return Measurment;
 }
@@ -388,7 +388,7 @@ auto MeasurementWorker::stdinDataAcquisitionWorker(void* MeasurementWorker) -> i
     double Value = NAN;
     char Name[128];
     if (std::sscanf(Line.c_str(), "%127s %ld %lf", Name, &Time, &Value) == 3) {
-      auto NameEqual = [Name](auto const& AllowedName) { return AllowedName.compare(std::string(Name)) == 0; };
+      auto NameEqual = [&Name](auto const& AllowedName) { return AllowedName == std::string(Name); };
       auto Item = std::find_if(This->stdinMetrics().begin(), This->stdinMetrics().end(), NameEqual);
       // metric name is allowed
       if (Item != This->stdinMetrics().end()) {
diff --git a/src/firestarter/Measurement/Metric/Perf.cpp b/src/firestarter/Measurement/Metric/Perf.cpp
index d49cc0a0..3ce749c0 100644
--- a/src/firestarter/Measurement/Metric/Perf.cpp
+++ b/src/firestarter/Measurement/Metric/Perf.cpp
@@ -134,17 +134,17 @@ static auto init() -> int32_t {
 
   ioctl(CpuCyclesFd, PERF_EVENT_IOC_ID, &CpuCyclesId);
 
-  struct perf_event_attr instructions_attr;
-  std::memset(&instructions_attr, 0, sizeof(struct perf_event_attr));
-  instructions_attr.type = PERF_TYPE_HARDWARE;
-  instructions_attr.size = sizeof(struct perf_event_attr);
-  instructions_attr.config = PERF_COUNT_HW_INSTRUCTIONS;
-  instructions_attr.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
-  instructions_attr.inherit = 1;
-  instructions_attr.exclude_kernel = 1;
-  instructions_attr.exclude_hv = 1;
-
-  if ((InstructionsFd = perfEventOpen(&instructions_attr,
+  struct perf_event_attr InstructionsAttr {};
+  std::memset(&InstructionsAttr, 0, sizeof(struct perf_event_attr));
+  InstructionsAttr.type = PERF_TYPE_HARDWARE;
+  InstructionsAttr.size = sizeof(struct perf_event_attr);
+  InstructionsAttr.config = PERF_COUNT_HW_INSTRUCTIONS;
+  InstructionsAttr.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
+  InstructionsAttr.inherit = 1;
+  InstructionsAttr.exclude_kernel = 1;
+  InstructionsAttr.exclude_hv = 1;
+
+  if ((InstructionsFd = perfEventOpen(&InstructionsAttr,
                                       // pid == 0 and cpu == -1
                                       // This measures the calling process/thread on any CPU.
                                       0, -1,
diff --git a/src/firestarter/Measurement/Metric/RAPL.cpp b/src/firestarter/Measurement/Metric/RAPL.cpp
index e9910fe7..b05fa626 100644
--- a/src/firestarter/Measurement/Metric/RAPL.cpp
+++ b/src/firestarter/Measurement/Metric/RAPL.cpp
@@ -115,7 +115,7 @@ static auto init() -> int32_t {
 
   // paths now contains all interesting nodes
 
-  if (Paths.size() == 0) {
+  if (Paths.empty()) {
     errorString = "No valid entries in " + RaplPath;
     return EXIT_FAILURE;
   }
@@ -156,9 +156,9 @@ static auto init() -> int32_t {
     Read = std::sscanf(Buffer.c_str(), "%lu", &Max);
 
     if (Read == 0) {
-      std::stringstream ss;
-      ss << "Contents in file " << MaxEnergyUjRangePath.str() << " do not conform to mask (uint64_t)";
-      errorString = ss.str();
+      std::stringstream Ss;
+      Ss << "Contents in file " << MaxEnergyUjRangePath.str() << " do not conform to mask (uint64_t)";
+      errorString = Ss.str();
       break;
     }
 
@@ -176,7 +176,7 @@ static auto init() -> int32_t {
     Readers.push_back(Def);
   }
 
-  if (errorString.size() != 0) {
+  if (!errorString.empty()) {
     fini();
     return EXIT_FAILURE;
   }
diff --git a/src/firestarter/Optimizer/OptimizerWorker.cpp b/src/firestarter/Optimizer/OptimizerWorker.cpp
index 0e7f235a..8e753f80 100644
--- a/src/firestarter/Optimizer/OptimizerWorker.cpp
+++ b/src/firestarter/Optimizer/OptimizerWorker.cpp
@@ -37,12 +37,12 @@ OptimizerWorker::OptimizerWorker(std::unique_ptr<firestarter::optimizer::Algorit
   pthread_create(&this->WorkerThread, nullptr, OptimizerWorker::optimizerThread, this);
 }
 
-void OptimizerWorker::kill() {
+void OptimizerWorker::kill() const {
   // we ignore ESRCH errno if thread already exited
   pthread_cancel(WorkerThread);
 }
 
-void OptimizerWorker::join() {
+void OptimizerWorker::join() const {
   // we ignore ESRCH errno if thread already exited
   pthread_join(WorkerThread, nullptr);
 }
diff --git a/src/firestarter/Optimizer/Population.cpp b/src/firestarter/Optimizer/Population.cpp
index 901ad5a0..2394f0d2 100644
--- a/src/firestarter/Optimizer/Population.cpp
+++ b/src/firestarter/Optimizer/Population.cpp
@@ -127,7 +127,7 @@ auto Population::bestIndividual() const -> std::optional<Individual> {
   }
 
   // assert that we have individuals
-  assert(this->X.size() > 0);
+  assert(!this->X.empty());
 
   auto Best = std::max_element(this->X.begin(), this->X.end(), [](const auto& A, const auto& B) { return A < B; });
 
diff --git a/src/firestarter/Optimizer/Util/MultiObjective.cpp b/src/firestarter/Optimizer/Util/MultiObjective.cpp
index 9a757b11..59ceedd7 100644
--- a/src/firestarter/Optimizer/Util/MultiObjective.cpp
+++ b/src/firestarter/Optimizer/Util/MultiObjective.cpp
@@ -166,7 +166,7 @@ auto fastNonDominatedSorting(const std::vector<std::vector<double>>& Points)
   auto DomCountCopy(DomCount);
   auto CurrentFront = NonDomFronts[0];
   std::vector<std::vector<std::size_t>>::size_type FrontCounter(0U);
-  while (CurrentFront.size() != 0U) {
+  while (!CurrentFront.empty()) {
     std::vector<std::size_t> NextFront;
     for (const auto& P : CurrentFront) {
       for (const auto& Q : DomList[P]) {
@@ -179,7 +179,7 @@ auto fastNonDominatedSorting(const std::vector<std::vector<double>>& Points)
     }
     ++FrontCounter;
     CurrentFront = NextFront;
-    if (CurrentFront.size() != 0U) {
+    if (!CurrentFront.empty()) {
       NonDomFronts.push_back(CurrentFront);
     }
   }
@@ -366,7 +366,7 @@ auto selectBestNMo(const std::vector<std::vector<double>>& InputF, std::size_t N
   if (N == 0U) { // corner case
     return {};
   }
-  if (InputF.size() == 0U) { // corner case
+  if (InputF.empty()) { // corner case
     return {};
   }
   if (InputF.size() == 1U) { // corner case
@@ -435,7 +435,7 @@ auto selectBestNMo(const std::vector<std::vector<double>>& InputF, std::size_t N
  */
 auto ideal(const std::vector<std::vector<double>>& Points) -> std::vector<double> {
   // Corner case
-  if (Points.size() == 0U) {
+  if (Points.empty()) {
     return {};
   }
 

From 5e04c5de856420312378cc42545b14719478f525 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 5 Oct 2024 22:54:30 +0200
Subject: [PATCH 027/167] cleanup load worker thread

---
 include/firestarter/Firestarter.hpp    | 44 +++++++++++++++++-------
 include/firestarter/LoadWorkerData.hpp | 28 ++++++++++-----
 src/firestarter/Firestarter.cpp        | 44 +++---------------------
 src/firestarter/LoadWorker.cpp         | 47 +++++++++++++++-----------
 4 files changed, 84 insertions(+), 79 deletions(-)

diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 5ebb885f..7760fd59 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -139,8 +139,38 @@ class Firestarter {
   void printThreadErrorReport();
   void printPerformanceReport();
 
+  /// Set the load workers to the ThreadInit state.
+  void signalInit() { signalLoadWorkers(LoadThreadState::ThreadInit); }
+
+  /// Set the load workers to the ThreadWork state.
   void signalWork() { signalLoadWorkers(LoadThreadState::ThreadWork); };
 
+  /// Set the load workers to the ThreadWork state.
+  /// \arg Setting The new setting to switch to.
+  void signalSwitch(std::vector<std::pair<std::string, unsigned>> const& Setting) {
+    struct SwitchLoad {
+      static void func() { LoadVar = LoadThreadWorkType::LoadSwitch; };
+    };
+
+    for (auto& Thread : LoadThreads) {
+      auto Td = Thread.second;
+
+      Td->config().setPayloadSettings(Setting);
+    }
+
+    signalLoadWorkers(LoadThreadState::ThreadSwitch, SwitchLoad::func);
+  };
+
+  /// Execute a state change in the load worker threads. This should happen at the same time in all threads. First the
+  /// mutex in all threads are locked an then the state is updated and we wait until we get an acknowledgement from the
+  /// threads.
+  /// \arg State The new state of the threads.
+  /// \arg Function An optional function that will be executed after the state in all threads has been updated and
+  /// before we wait for the acknowledgement of the thread.
+  void signalLoadWorkers(LoadThreadState State, void (*Function)() = nullptr);
+
+  static void loadThreadWorker(std::shared_ptr<LoadWorkerData> Td);
+
   // WatchdogWorker.cpp
   static auto watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load,
                              std::chrono::seconds Timeout) -> int;
@@ -149,15 +179,9 @@ class Firestarter {
   // DumpRegisterWorker.cpp
   auto initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath) -> int;
   void joinDumpRegisterWorker();
-#endif
-
-  // LoadThreadWorker.cpp
-  void signalLoadWorkers(LoadThreadState State);
-  static void loadThreadWorker(std::shared_ptr<LoadWorkerData> Td);
-
-#ifdef FIRESTARTER_DEBUG_FEATURES
-  // DumpRegisterWorker.cpp
   static void dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Data);
+
+  std::thread DumpRegisterWorkerThread;
 #endif
 
   static void setLoad(LoadThreadWorkType Value);
@@ -176,10 +200,6 @@ class Firestarter {
   std::vector<std::pair<std::thread, std::shared_ptr<LoadWorkerData>>> LoadThreads;
 
   std::vector<std::shared_ptr<uint64_t>> ErrorCommunication;
-
-#ifdef FIRESTARTER_DEBUG_FEATURES
-  std::thread DumpRegisterWorkerThread;
-#endif
 };
 
 } // namespace firestarter
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index ddd181b4..823b17c5 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -93,6 +93,19 @@ struct LoadWorkerMemory {
 
 class LoadWorkerData {
 public:
+  struct Metrics {
+    std::atomic<uint64_t> Iterations{};
+    std::atomic<uint64_t> StartTsc{};
+    std::atomic<uint64_t> StopTsc{};
+
+    auto operator=(const Metrics& Other) -> Metrics& {
+      Iterations.store(Other.Iterations.load());
+      StartTsc.store(Other.StartTsc.load());
+      StopTsc.store(Other.StopTsc.load());
+      return *this;
+    }
+  };
+
   LoadWorkerData(int Id, environment::Environment& Environment, volatile LoadThreadWorkType& LoadVar, uint64_t Period,
                  bool DumpRegisters, bool ErrorDetection)
       : LoadVar(LoadVar)
@@ -137,14 +150,13 @@ class LoadWorkerData {
 
   volatile LoadThreadWorkType& LoadVar;
   uint64_t BuffersizeMem{};
-  uint64_t Iterations = 0;
-  // save the last iteration count when switching payloads
-  std::atomic<uint64_t> LastIterations{};
-  uint64_t Flops{};
-  uint64_t StartTsc{};
-  uint64_t StopTsc{};
-  std::atomic<uint64_t> LastStartTsc{};
-  std::atomic<uint64_t> LastStopTsc{};
+
+  /// The collected metrics from the current execution of the LoadThreadState::ThreadWork state. Do not read from it.
+  Metrics CurrentRun;
+
+  /// The collected metrics from the last execution of the LoadThreadState::ThreadWork state.
+  Metrics LastRun;
+
   // period in usecs
   // used in low load routine to sleep 1/100th of this time
   uint64_t Period;
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 68e07b0d..ba0cdb3b 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -189,59 +189,25 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
           using Clock = std::chrono::high_resolution_clock;
           auto Start = Clock::now();
 
-          for (auto& Thread : LoadThreads) {
-            auto Td = Thread.second;
-
-            Td->config().setPayloadSettings(Setting);
-          }
-
-          for (auto const& Thread : LoadThreads) {
-            auto Td = Thread.second;
-
-            Td->Mutex.lock();
-          }
-
-          for (auto const& Thread : LoadThreads) {
-            auto Td = Thread.second;
-
-            Td->State = LoadThreadState::ThreadSwitch;
-            Td->Mutex.unlock();
-          }
-
-          LoadVar = LoadThreadWorkType::LoadSwitch;
-
-          for (auto const& Thread : LoadThreads) {
-            auto Td = Thread.second;
-            bool Ack = false;
-
-            do {
-              Td->Mutex.lock();
-              Ack = Td->Ack;
-              Td->Mutex.unlock();
-            } while (!Ack);
-
-            Td->Mutex.lock();
-            Td->Ack = false;
-            Td->Mutex.unlock();
-          }
+          signalSwitch(Setting);
 
           LoadVar = LoadThreadWorkType::LoadHigh;
 
           signalWork();
 
-          uint64_t StartTimestamp = 0xffffffffffffffff;
+          uint64_t StartTimestamp = std::numeric_limits<uint64_t>::max();
           uint64_t StopTimestamp = 0;
 
           for (auto const& Thread : LoadThreads) {
             auto Td = Thread.second;
 
-            StartTimestamp = std::min<uint64_t>(StartTimestamp, Td->LastStartTsc);
-            StopTimestamp = std::max<uint64_t>(StopTimestamp, Td->LastStopTsc);
+            StartTimestamp = std::min<uint64_t>(StartTimestamp, Td->LastRun.StartTsc);
+            StopTimestamp = std::max<uint64_t>(StopTimestamp, Td->LastRun.StopTsc);
           }
 
           for (auto const& Thread : LoadThreads) {
             auto Td = Thread.second;
-            ipcEstimateMetricInsert(static_cast<double>(Td->LastIterations) *
+            ipcEstimateMetricInsert(static_cast<double>(Td->LastRun.Iterations) *
                                     static_cast<double>(LoadThreads.front().second->config().payload().instructions()) /
                                     static_cast<double>(StopTimestamp - StartTimestamp));
           }
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index d55a07b6..edef1281 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -22,10 +22,12 @@
 #include "firestarter/AlignedAlloc.hpp"
 #include "firestarter/Constants.hpp"
 #include "firestarter/LoadWorkerData.hpp"
+#include <cstdint>
 #include <firestarter/ErrorDetectionStruct.hpp>
 #include <firestarter/Firestarter.hpp>
 #include <firestarter/Logging/Log.hpp>
 #include <iomanip>
+#include <limits>
 
 #if defined(linux) || defined(__linux__)
 #include <firestarter/Measurement/Metric/IPCEstimate.h>
@@ -105,16 +107,17 @@ auto Firestarter::initLoadWorkers(bool LowLoad, uint64_t Period) -> int {
   return EXIT_SUCCESS;
 }
 
-void Firestarter::signalLoadWorkers(LoadThreadState State) {
+void Firestarter::signalLoadWorkers(const LoadThreadState State, void (*Function)()) {
   bool Ack = false;
 
-  // start the work
+  // aquire the lock on all threads
   for (auto const& Thread : LoadThreads) {
     auto Td = Thread.second;
 
     Td->Mutex.lock();
   }
 
+  // switch the state on all threads
   for (auto const& Thread : LoadThreads) {
     auto Td = Thread.second;
 
@@ -122,6 +125,13 @@ void Firestarter::signalLoadWorkers(LoadThreadState State) {
     Td->Mutex.unlock();
   }
 
+  // Execute a function after the state in the threads has been updated. This may be required to terminate an inner
+  // loop.
+  if (Function) {
+    Function();
+  }
+
+  // wait for all threads to finish
   for (auto const& Thread : LoadThreads) {
     auto Td = Thread.second;
 
@@ -172,7 +182,7 @@ void Firestarter::printThreadErrorReport() {
 
 void Firestarter::printPerformanceReport() {
   // performance report
-  uint64_t StartTimestamp = 0xffffffffffffffff;
+  uint64_t StartTimestamp = std::numeric_limits<uint64_t>::max();
   uint64_t StopTimestamp = 0;
 
   uint64_t Iterations = 0;
@@ -182,13 +192,13 @@ void Firestarter::printPerformanceReport() {
   for (auto const& Thread : LoadThreads) {
     auto Td = Thread.second;
 
-    log::debug() << "Thread " << Td->id() << ": " << Td->Iterations
-                 << " iterations, tsc_delta: " << Td->StopTsc - Td->StartTsc;
+    log::debug() << "Thread " << Td->id() << ": " << Td->LastRun.Iterations
+                 << " iterations, tsc_delta: " << Td->LastRun.StopTsc - Td->LastRun.StartTsc;
 
-    StartTimestamp = std::min(StartTimestamp, Td->StartTsc);
-    StopTimestamp = std::max(StopTimestamp, Td->StopTsc);
+    StartTimestamp = std::min(StartTimestamp, Td->LastRun.StartTsc.load());
+    StopTimestamp = std::max(StopTimestamp, Td->LastRun.StopTsc.load());
 
-    Iterations += Td->Iterations;
+    Iterations += Td->LastRun.Iterations.load();
   }
 
   double Runtime =
@@ -204,7 +214,7 @@ void Firestarter::printPerformanceReport() {
   if (Measurement) {
     for (auto const& Thread : LoadThreads) {
       auto Td = Thread.second;
-      ipcEstimateMetricInsert(static_cast<double>(Td->Iterations) *
+      ipcEstimateMetricInsert(static_cast<double>(Td->LastRun.Iterations) *
                               static_cast<double>(LoadThreads.front().second->config().payload().instructions()) /
                               static_cast<double>(StopTimestamp - StartTimestamp));
     }
@@ -306,8 +316,9 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
       break;
     // perform stress test
     case LoadThreadState::ThreadWork:
+      Td->CurrentRun.Iterations = 0;
       // record threads start timestamp
-      Td->StartTsc = Td->environment().topology().timestamp();
+      Td->CurrentRun.StartTsc = Td->environment().topology().timestamp();
 
       // will be terminated by watchdog
       for (;;) {
@@ -318,8 +329,8 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
 #ifdef ENABLE_SCOREP
         SCOREP_USER_REGION_BY_NAME_BEGIN("HIGH", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
-        Td->Iterations =
-            Td->config().payload().highLoadFunction(Td->Memory->getMemoryAddress(), Td->LoadVar, Td->Iterations);
+        Td->CurrentRun.Iterations = Td->config().payload().highLoadFunction(Td->Memory->getMemoryAddress(), Td->LoadVar,
+                                                                            Td->CurrentRun.Iterations);
 
         // call low load function
 #ifdef ENABLE_VTRACING
@@ -340,13 +351,15 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
 
         // terminate if master signals end of run and record stop timestamp
         if (Td->LoadVar == LoadThreadWorkType::LoadStop) {
-          Td->StopTsc = Td->environment().topology().timestamp();
+          Td->CurrentRun.StopTsc = Td->environment().topology().timestamp();
+          Td->LastRun = Td->CurrentRun;
 
           return;
         }
 
         if (Td->LoadVar == LoadThreadWorkType::LoadSwitch) {
-          Td->StopTsc = Td->environment().topology().timestamp();
+          Td->CurrentRun.StopTsc = Td->environment().topology().timestamp();
+          Td->LastRun = Td->CurrentRun;
 
           break;
         }
@@ -361,12 +374,6 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
 
       // call init function
       Td->config().payload().init(Td->Memory->getMemoryAddress(), Td->BuffersizeMem);
-
-      // save old iteration count
-      Td->LastIterations = Td->Iterations;
-      Td->LastStartTsc = Td->StartTsc;
-      Td->LastStopTsc = Td->StopTsc;
-      Td->Iterations = 0;
       break;
     case LoadThreadState::ThreadWait:
       break;

From 43fd6cba151a30a9b0f41b3b2770d837d74afab8 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 5 Oct 2024 23:12:06 +0200
Subject: [PATCH 028/167] refactor load worker data

---
 include/firestarter/LoadWorkerData.hpp | 12 ++++++++---
 src/firestarter/LoadWorker.cpp         | 30 +++++++++++++-------------
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index 823b17c5..21994e0c 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -142,9 +142,15 @@ class LoadWorkerData {
     return Memory->ExtraVars.Eds;
   }
 
-  LoadThreadState State = LoadThreadState::ThreadWait;
-  bool Ack = false;
-  std::mutex Mutex;
+  /// The members in this struct are used for the communication between the main thread and the load thread.
+  struct Communication {
+    /// The state of the load worker.
+    LoadThreadState State = LoadThreadState::ThreadWait;
+    /// This variable will be set to true when the state change was acknowledged by the load thread.
+    bool Ack = false;
+    /// The mutex that is used to lock access to the Ack and State variabels.
+    std::mutex Mutex;
+  } Communication;
 
   LoadWorkerMemory::UniquePtr Memory = {nullptr, nullptr};
 
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index edef1281..252969f6 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -114,15 +114,15 @@ void Firestarter::signalLoadWorkers(const LoadThreadState State, void (*Function
   for (auto const& Thread : LoadThreads) {
     auto Td = Thread.second;
 
-    Td->Mutex.lock();
+    Td->Communication.Mutex.lock();
   }
 
   // switch the state on all threads
   for (auto const& Thread : LoadThreads) {
     auto Td = Thread.second;
 
-    Td->State = State;
-    Td->Mutex.unlock();
+    Td->Communication.State = State;
+    Td->Communication.Mutex.unlock();
   }
 
   // Execute a function after the state in the threads has been updated. This may be required to terminate an inner
@@ -136,14 +136,14 @@ void Firestarter::signalLoadWorkers(const LoadThreadState State, void (*Function
     auto Td = Thread.second;
 
     do {
-      Td->Mutex.lock();
-      Ack = Td->Ack;
-      Td->Mutex.unlock();
+      Td->Communication.Mutex.lock();
+      Ack = Td->Communication.Ack;
+      Td->Communication.Mutex.unlock();
     } while (!Ack);
 
-    Td->Mutex.lock();
-    Td->Ack = false;
-    Td->Mutex.unlock();
+    Td->Communication.Mutex.lock();
+    Td->Communication.Ack = false;
+    Td->Communication.Mutex.unlock();
   }
 }
 
@@ -255,16 +255,16 @@ void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
 #endif
 
   for (;;) {
-    Td->Mutex.lock();
-    auto CurState = Td->State;
-    Td->Mutex.unlock();
+    Td->Communication.Mutex.lock();
+    auto CurState = Td->Communication.State;
+    Td->Communication.Mutex.unlock();
 
     if (CurState != OldState) {
       OldState = CurState;
 
-      Td->Mutex.lock();
-      Td->Ack = true;
-      Td->Mutex.unlock();
+      Td->Communication.Mutex.lock();
+      Td->Communication.Ack = true;
+      Td->Communication.Mutex.unlock();
     } else {
       std::this_thread::sleep_for(std::chrono::microseconds(1));
       continue;

From d608e98684ded16c063439abcefd860f3c5f5b90 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 5 Oct 2024 23:29:39 +0200
Subject: [PATCH 029/167] use std::chrono::microseconds instead of uint64_t

---
 include/firestarter/Environment/Payload/Payload.hpp        | 3 ++-
 include/firestarter/Environment/X86/Payload/X86Payload.hpp | 2 +-
 include/firestarter/Firestarter.hpp                        | 2 +-
 include/firestarter/LoadWorkerData.hpp                     | 7 ++++---
 src/firestarter/Environment/X86/Payload/X86Payload.cpp     | 4 ++--
 src/firestarter/Firestarter.cpp                            | 2 +-
 src/firestarter/LoadWorker.cpp                             | 2 +-
 7 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index cedf1041..a4611ed0 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include "firestarter/Constants.hpp"
+#include <chrono>
 #include <list>
 #include <memory>
 #include <string>
@@ -93,7 +94,7 @@ class Payload {
 
   [[nodiscard]] virtual auto isAvailable() const -> bool = 0;
 
-  virtual void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, uint64_t Period) = 0;
+  virtual void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) = 0;
 
   [[nodiscard]] virtual auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
                                             unsigned InstructionCacheSize,
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index 924a3861..f9d2bb2e 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -500,7 +500,7 @@ class X86Payload : public environment::payload::Payload {
 #pragma clang diagnostic pop
 #endif
   // use cpuid and usleep as low load
-  void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, uint64_t Period) override;
+  void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) override;
 
   auto highLoadFunction(double* AddrMem, volatile LoadThreadWorkType& LoadVar, uint64_t Iterations)
       -> uint64_t override;
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 7760fd59..96f9c2fe 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -134,7 +134,7 @@ class Firestarter {
 #endif
 
   // LoadThreadWorker.cpp
-  auto initLoadWorkers(bool LowLoad, uint64_t Period) -> int;
+  auto initLoadWorkers(bool LowLoad, std::chrono::microseconds Period) -> int;
   void joinLoadWorkers();
   void printThreadErrorReport();
   void printPerformanceReport();
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index 21994e0c..cf5164f5 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -106,8 +106,8 @@ class LoadWorkerData {
     }
   };
 
-  LoadWorkerData(int Id, environment::Environment& Environment, volatile LoadThreadWorkType& LoadVar, uint64_t Period,
-                 bool DumpRegisters, bool ErrorDetection)
+  LoadWorkerData(int Id, environment::Environment& Environment, volatile LoadThreadWorkType& LoadVar,
+                 std::chrono::microseconds Period, bool DumpRegisters, bool ErrorDetection)
       : LoadVar(LoadVar)
       , Period(Period)
       , DumpRegisters(DumpRegisters)
@@ -152,6 +152,7 @@ class LoadWorkerData {
     std::mutex Mutex;
   } Communication;
 
+  /// The memory which is used by the load worker.
   LoadWorkerMemory::UniquePtr Memory = {nullptr, nullptr};
 
   volatile LoadThreadWorkType& LoadVar;
@@ -165,7 +166,7 @@ class LoadWorkerData {
 
   // period in usecs
   // used in low load routine to sleep 1/100th of this time
-  uint64_t Period;
+  std::chrono::microseconds Period;
   bool DumpRegisters;
   bool ErrorDetection;
   std::shared_ptr<uint64_t> CommunicationLeft;
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index 1a169df2..dedf80d3 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -33,7 +33,7 @@
 
 namespace firestarter::environment::x86::payload {
 
-void X86Payload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, uint64_t Period) {
+void X86Payload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) {
   auto Nap = Period / 100;
 
 #ifndef _MSC_VER
@@ -56,7 +56,7 @@ void X86Payload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, uint64_t
     _mm_mfence();
     __cpuid(Cpuid.data(), 0);
 #endif
-    std::this_thread::sleep_for(std::chrono::microseconds(Nap));
+    std::this_thread::sleep_for(Nap);
 #ifndef _MSC_VER
     __asm__ __volatile__("mfence;"
                          "cpuid;" ::
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index ba0cdb3b..eba84fc4 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -241,7 +241,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
 
   // setup thread with either high or low load configured at the start
   // low loads has to know the length of the period
-  if (EXIT_SUCCESS != (ReturnCode = initLoadWorkers((LoadPercent == 0), Period.count()))) {
+  if (EXIT_SUCCESS != (ReturnCode = initLoadWorkers((LoadPercent == 0), Period))) {
     std::exit(ReturnCode);
   }
 
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index 252969f6..b3c4a680 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -46,7 +46,7 @@
 
 namespace firestarter {
 
-auto Firestarter::initLoadWorkers(bool LowLoad, uint64_t Period) -> int {
+auto Firestarter::initLoadWorkers(bool LowLoad, std::chrono::microseconds Period) -> int {
   auto ReturnCode = environment().setCpuAffinity(0);
 
   if (EXIT_SUCCESS != ReturnCode) {

From 5583723d6213ef0614f57e92679bf7321fe579dc Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 5 Oct 2024 23:47:21 +0200
Subject: [PATCH 030/167] use throw instead of returns for Environment class

---
 .../firestarter/Environment/Environment.hpp   | 10 +--
 .../Environment/X86/X86Environment.hpp        |  4 +-
 include/firestarter/Firestarter.hpp           |  4 +-
 src/firestarter/DumpRegisterWorker.cpp        |  4 +-
 src/firestarter/Environment/Environment.cpp   | 62 +++++++------------
 .../Environment/X86/X86Environment.cpp        | 51 +++++++--------
 src/firestarter/Firestarter.cpp               | 32 +++-------
 src/firestarter/LoadWorker.cpp                | 10 +--
 8 files changed, 66 insertions(+), 111 deletions(-)

diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index 9be6c374..ff67efa7 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -36,13 +36,13 @@ class Environment {
       : Topology(std::move(Topology)) {}
   virtual ~Environment() { delete SelectedConfig; }
 
-  auto evaluateCpuAffinity(unsigned RequestedNumThreads, const std::string& CpuBind) -> int;
-  auto setCpuAffinity(unsigned Thread) -> int;
+  void evaluateCpuAffinity(unsigned RequestedNumThreads, const std::string& CpuBind);
+  void setCpuAffinity(unsigned Thread);
   void printThreadSummary();
 
   virtual void evaluateFunctions() = 0;
-  virtual auto selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) -> int = 0;
-  virtual auto selectInstructionGroups(std::string Groups) -> int = 0;
+  virtual void selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) = 0;
+  virtual void selectInstructionGroups(std::string Groups) = 0;
   virtual void printAvailableInstructionGroups() = 0;
   virtual void setLineCount(unsigned LineCount) = 0;
   virtual void printSelectedCodePathSummary() = 0;
@@ -80,7 +80,7 @@ class Environment {
   // TODO: replace these functions with the builtins one from hwlocom hwloc
   static auto cpuAllowed(unsigned Id) -> int;
   static auto cpuSet(unsigned Id) -> int;
-  auto addCpuSet(unsigned Cpu, cpu_set_t& Mask) const -> bool;
+  void addCpuSet(unsigned Cpu, cpu_set_t& Mask) const;
 
   std::vector<unsigned> CpuBind;
 };
diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp
index 3e358d90..99d5a2f6 100644
--- a/include/firestarter/Environment/X86/X86Environment.hpp
+++ b/include/firestarter/Environment/X86/X86Environment.hpp
@@ -55,8 +55,8 @@ class X86Environment final : public Environment {
   auto topology() -> X86CPUTopology const& { return *dynamic_cast<X86CPUTopology*>(Topology.get()); }
 
   void evaluateFunctions() override;
-  auto selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) -> int override;
-  auto selectInstructionGroups(std::string Groups) -> int override;
+  void selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) override;
+  void selectInstructionGroups(std::string Groups) override;
   void printAvailableInstructionGroups() override;
   void setLineCount(unsigned LineCount) override;
   void printSelectedCodePathSummary() override;
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 96f9c2fe..6ed91e94 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -134,7 +134,7 @@ class Firestarter {
 #endif
 
   // LoadThreadWorker.cpp
-  auto initLoadWorkers(bool LowLoad, std::chrono::microseconds Period) -> int;
+  void initLoadWorkers(bool LowLoad, std::chrono::microseconds Period);
   void joinLoadWorkers();
   void printThreadErrorReport();
   void printPerformanceReport();
@@ -177,7 +177,7 @@ class Firestarter {
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
   // DumpRegisterWorker.cpp
-  auto initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath) -> int;
+  void initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath);
   void joinDumpRegisterWorker();
   static void dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Data);
 
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index 2e5e08e0..19b7ca6e 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -55,15 +55,13 @@ auto registerNameBySize(unsigned RegisterSize) -> std::string {
 
 namespace firestarter {
 
-auto Firestarter::initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath) -> int {
+void Firestarter::initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath) {
   // Create the data for the worker thread. The thread will dump the register contents periodically and calculate the
   // hamming distance between dumps.
   auto Data = std::make_unique<DumpRegisterWorkerData>(this->LoadThreads.begin()->second, DumpTimeDelta, DumpFilePath);
 
   // Spawn the thread.
   DumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(Data));
-
-  return EXIT_SUCCESS;
 }
 
 void Firestarter::joinDumpRegisterWorker() { this->DumpRegisterWorkerThread.join(); }
diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp
index ab9b0295..e3040955 100644
--- a/src/firestarter/Environment/Environment.cpp
+++ b/src/firestarter/Environment/Environment.cpp
@@ -22,6 +22,7 @@
 #include <firestarter/Environment/Environment.hpp>
 #include <firestarter/Logging/Log.hpp>
 #include <regex>
+#include <stdexcept>
 #include <string>
 
 namespace firestarter::environment {
@@ -53,26 +54,23 @@ auto Environment::cpuAllowed(unsigned Id) -> int {
   return 0;
 }
 
-auto Environment::addCpuSet(unsigned Cpu, cpu_set_t& Mask) const -> bool {
+void Environment::addCpuSet(unsigned Cpu, cpu_set_t& Mask) const {
   if (cpuAllowed(Cpu)) {
     CPU_SET(Cpu, &Mask);
-    return true;
   }
   if (Cpu >= topology().numThreads()) {
-    log::error() << "The given bind argument (-b/--bind) includes CPU " << std::to_string(Cpu)
-                 << " that is not available on this system.";
-  } else {
-    log::error() << "The given bind argument (-b/--bind) cannot "
-                    "be implemented with the cpuset given from the OS\n"
-                 << "This can be caused by the taskset tool, cgroups, "
-                    "the batch system, or similar mechanisms.\n"
-                 << "Please fix the argument to match the restrictions.";
+    throw std::invalid_argument("The given bind argument (-b/--bind) includes CPU " + std::to_string(Cpu) +
+                                " that is not available on this system.");
   }
-  return false;
+  throw std::invalid_argument("The given bind argument (-b/--bind) cannot "
+                              "be implemented with the cpuset given from the OS\n"
+                              "This can be caused by the taskset tool, cgroups, "
+                              "the batch system, or similar mechanisms.\n"
+                              "Please fix the argument to match the restrictions.");
 }
 #endif
 
-auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::string& CpuBind) -> int {
+void Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::string& CpuBind) {
 #if not((defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY))
   (void)CpuBind;
 #endif
@@ -105,9 +103,7 @@ auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::s
         if (!cpuAllowed(I)) {
           continue;
         }
-        if (!addCpuSet(I, Cpuset)) {
-          return EACCES;
-        }
+        addCpuSet(I, Cpuset);
         CpuCount++;
         // we reached the desired amounts of threads
         if (CpuCount >= RequestedNumThreads) {
@@ -116,13 +112,12 @@ auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::s
       }
       // requested to many threads
       if (CpuCount < RequestedNumThreads) {
-        log::error() << "You are requesting more threads than "
-                        "there are CPUs available in the given cpuset.\n"
-                     << "This can be caused by the taskset tool, cgrous, "
-                        "the batch system, or similar mechanisms.\n"
-                     << "Please fix the -n/--threads argument to match the "
-                        "restrictions.";
-        return EACCES;
+        throw std::invalid_argument("You are requesting more threads than "
+                                    "there are CPUs available in the given cpuset.\n"
+                                    "This can be caused by the taskset tool, cgrous, "
+                                    "the batch system, or similar mechanisms.\n"
+                                    "Please fix the -n/--threads argument to match the "
+                                    "restrictions.");
       }
     }
   } else {
@@ -153,18 +148,14 @@ auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::s
           S = 1;
         }
         if (Y < X) {
-          log::error() << "y has to be >= x in x-y expressions of CPU list: " << Token;
-          return EXIT_FAILURE;
+          throw std::invalid_argument("y has to be >= x in x-y expressions of CPU list: " + Token);
         }
         for (auto I = X; I <= Y; I += S) {
-          if (!addCpuSet(I, Cpuset)) {
-            return EACCES;
-          }
+          addCpuSet(I, Cpuset);
           RequestedNumThreads++;
         }
       } else {
-        log::error() << "Invalid symbols in CPU list: " << Token;
-        return EXIT_FAILURE;
+        throw std::invalid_argument("Invalid symbols in CPU list: " + Token);
       }
     }
   }
@@ -175,8 +166,7 @@ auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::s
 #endif
 
   if (RequestedNumThreads == 0) {
-    log::error() << "Found no usable CPUs!";
-    return 127;
+    throw std::invalid_argument("Found no usable CPUs!");
   }
 
 #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
@@ -188,8 +178,6 @@ auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::s
 #endif
 
   this->RequestedNumThreads = std::min(RequestedNumThreads, topology().maxNumThreads());
-
-  return EXIT_SUCCESS;
 }
 
 void Environment::printThreadSummary() {
@@ -219,17 +207,13 @@ void Environment::printThreadSummary() {
 #endif
 }
 
-auto Environment::setCpuAffinity(unsigned Thread) -> int {
+void Environment::setCpuAffinity(unsigned Thread) {
   if (Thread >= requestedNumThreads()) {
-    log::error() << "Trying to set more CPUs than available.";
-    return EXIT_FAILURE;
+    throw std::invalid_argument("Trying to set more CPUs than available.");
   }
 
 #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
   cpuSet(CpuBind.at(Thread));
 #endif
-
-  return EXIT_SUCCESS;
 }
-
 }; // namespace firestarter::environment
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index 3d3a70e8..9255ffcf 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -41,7 +41,7 @@ void X86Environment::evaluateFunctions() {
   }
 }
 
-auto X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) -> int {
+void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) {
   unsigned Id = 1;
   std::string DefaultPayloadName;
 
@@ -51,23 +51,25 @@ auto X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
       // the selected function
       if (Id == FunctionId) {
         if (!Config->isAvailable()) {
-          log::error() << "Function " << FunctionId << " (\"" << functionName << "\") requires "
-                       << Config->payload().name() << ", which is not supported by the processor.";
-          if (!AllowUnavailablePayload) {
-            return EXIT_FAILURE;
+          const auto ErrorString = "Function " + std::to_string(FunctionId) + " (\"" + functionName + "\") requires " +
+                                   Config->payload().name() + ", which is not supported by the processor.";
+          if (AllowUnavailablePayload) {
+            log::error() << ErrorString;
+          } else {
+            throw std::invalid_argument(ErrorString);
           }
         }
         // found function
         SelectedConfig =
             new ::firestarter::environment::platform::RuntimeConfig(*Config, thread, topology().instructionCacheSize());
-        return EXIT_SUCCESS;
+        return;
       }
       // default function
       if (0 == FunctionId && Config->isDefault()) {
         if (thread == topology().numThreadsPerCore()) {
           SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, thread,
                                                                                    topology().instructionCacheSize());
-          return EXIT_SUCCESS;
+          return;
         }
         DefaultPayloadName = Config->payload().name();
       }
@@ -109,21 +111,19 @@ auto X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
         log::warn() << "Using function " << SelectedFunctionName << " as fallback.\n"
                     << "You can use the parameter --function to try other "
                        "functions.";
-        return EXIT_SUCCESS;
+        return;
       }
     }
 
     // no fallback found
-    log::error() << "No fallback implementation found for available ISA "
-                    "extensions.";
-    return EXIT_FAILURE;
+    throw std::invalid_argument("No fallback implementation found for available ISA "
+                                "extensions.");
   }
 
-  log::error() << "unknown function id: " << FunctionId << ", see --avail for available ids";
-  return EXIT_FAILURE;
+  throw std::invalid_argument("unknown function id: " + std::to_string(FunctionId) + ", see --avail for available ids");
 }
 
-auto X86Environment::selectInstructionGroups(std::string Groups) -> int {
+void X86Environment::selectInstructionGroups(std::string Groups) {
   const std::string Delimiter = ",";
   const std::regex Re("^(\\w+):(\\d+)$");
   const auto AvailableInstructionGroups = selectedConfig().platformConfig().payload().getAvailableInstructions();
@@ -139,32 +139,27 @@ auto X86Environment::selectInstructionGroups(std::string Groups) -> int {
     if (std::regex_match(Token, M, Re)) {
       if (std::find(AvailableInstructionGroups.begin(), AvailableInstructionGroups.end(), M[1].str()) ==
           AvailableInstructionGroups.end()) {
-        log::error() << "Invalid instruction-group: " << M[1].str()
-                     << "\n       --run-instruction-groups format: multiple INST:VAL "
-                        "pairs comma-seperated";
-        return EXIT_FAILURE;
+        throw std::invalid_argument("Invalid instruction-group: " + M[1].str() +
+                                    "\n       --run-instruction-groups format: multiple INST:VAL "
+                                    "pairs comma-seperated");
       }
       int Num = std::stoul(M[2].str());
       if (Num == 0) {
-        log::error() << "instruction-group VAL may not contain number 0"
-                     << "\n       --run-instruction-groups format: multiple INST:VAL "
-                        "pairs comma-seperated";
-        return EXIT_FAILURE;
+        throw std::invalid_argument("instruction-group VAL may not contain number 0"
+                                    "\n       --run-instruction-groups format: multiple INST:VAL "
+                                    "pairs comma-seperated");
       }
       PayloadSettings.emplace_back(M[1].str(), Num);
     } else {
-      log::error() << "Invalid symbols in instruction-group: " << Token
-                   << "\n       --run-instruction-groups format: multiple INST:VAL "
-                      "pairs comma-seperated";
-      return EXIT_FAILURE;
+      throw std::invalid_argument("Invalid symbols in instruction-group: " + Token +
+                                  "\n       --run-instruction-groups format: multiple INST:VAL "
+                                  "pairs comma-seperated");
     }
   }
 
   selectedConfig().setPayloadSettings(PayloadSettings);
 
   log::info() << "  Running custom instruction group: " << Groups;
-
-  return EXIT_SUCCESS;
 }
 
 void X86Environment::printAvailableInstructionGroups() {
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index eba84fc4..ff1fb6c1 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -80,8 +80,6 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
     , Generations(Generations)
     , Nsga2Cr(Nsga2Cr)
     , Nsga2M(Nsga2M) {
-  int ReturnCode = 0;
-
   Load = (Period * LoadPercent) / 100;
   if (LoadPercent == 100 || Load == std::chrono::microseconds::zero()) {
     this->Period = std::chrono::microseconds::zero();
@@ -99,9 +97,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
   Environment = new environment::x86::X86Environment();
 #endif
 
-  if (EXIT_SUCCESS != (ReturnCode = environment().evaluateCpuAffinity(RequestedNumThreads, CpuBind))) {
-    std::exit(ReturnCode);
-  }
+  environment().evaluateCpuAffinity(RequestedNumThreads, CpuBind);
 
 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
   // Error detection uses crc32 instruction added by the SSE4.2 extension to x86
@@ -126,9 +122,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
     std::exit(EXIT_SUCCESS);
   }
 
-  if (EXIT_SUCCESS != (ReturnCode = environment().selectFunction(FunctionId, AllowUnavailablePayload))) {
-    std::exit(ReturnCode);
-  }
+  environment().selectFunction(FunctionId, AllowUnavailablePayload);
 
   if (ListInstructionGroups) {
     environment().printAvailableInstructionGroups();
@@ -136,9 +130,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
   }
 
   if (!InstructionGroups.empty()) {
-    if (EXIT_SUCCESS != (ReturnCode = environment().selectInstructionGroups(InstructionGroups))) {
-      std::exit(ReturnCode);
-    }
+    environment().selectInstructionGroups(InstructionGroups);
   }
 
   if (LineCount != 0) {
@@ -160,8 +152,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
     auto Initialized = MeasurementWorker->initMetrics(All);
 
     if (Initialized.empty()) {
-      log::error() << "No metrics initialized";
-      std::exit(EXIT_FAILURE);
+      std::invalid_argument("No metrics initialized");
     }
 
     // check if selected metrics are initialized
@@ -172,13 +163,11 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
       };
       // metric name is not found
       if (std::find_if(All.begin(), All.end(), NameEqual) == All.end()) {
-        log::error() << "Metric \"" << OptimizationMetric << "\" does not exist.";
-        std::exit(EXIT_FAILURE);
+        std::invalid_argument("Metric \"" + OptimizationMetric + "\" does not exist.");
       }
       // metric has not initialized properly
       if (std::find_if(Initialized.begin(), Initialized.end(), NameEqual) == Initialized.end()) {
-        log::error() << "Metric \"" << OptimizationMetric << "\" failed to initialize.";
-        std::exit(EXIT_FAILURE);
+        std::invalid_argument("Metric \"" + OptimizationMetric + "\" failed to initialize.");
       }
     }
   }
@@ -241,9 +230,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
 
   // setup thread with either high or low load configured at the start
   // low loads has to know the length of the period
-  if (EXIT_SUCCESS != (ReturnCode = initLoadWorkers((LoadPercent == 0), Period))) {
-    std::exit(ReturnCode);
-  }
+  initLoadWorkers((LoadPercent == 0), Period);
 
   // add some signal handler for aborting FIRESTARTER
 #ifndef _WIN32
@@ -287,10 +274,7 @@ void Firestarter::mainThread() {
 
 #ifdef FIRESTARTER_DEBUG_FEATURES
   if (DumpRegisters) {
-    int ReturnCode = 0;
-    if (EXIT_SUCCESS != (ReturnCode = initDumpRegisterWorker(DumpRegistersTimeDelta, DumpRegistersOutpath))) {
-      std::exit(ReturnCode);
-    }
+    initDumpRegisterWorker(DumpRegistersTimeDelta, DumpRegistersOutpath);
   }
 #endif
 
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index b3c4a680..53bbbed7 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -46,12 +46,8 @@
 
 namespace firestarter {
 
-auto Firestarter::initLoadWorkers(bool LowLoad, std::chrono::microseconds Period) -> int {
-  auto ReturnCode = environment().setCpuAffinity(0);
-
-  if (EXIT_SUCCESS != ReturnCode) {
-    return EXIT_FAILURE;
-  }
+void Firestarter::initLoadWorkers(bool LowLoad, std::chrono::microseconds Period) {
+  environment().setCpuAffinity(0);
 
   // setup load variable to execute low or high load once the threads switch to
   // work.
@@ -103,8 +99,6 @@ auto Firestarter::initLoadWorkers(bool LowLoad, std::chrono::microseconds Period
   }
 
   signalLoadWorkers(LoadThreadState::ThreadInit);
-
-  return EXIT_SUCCESS;
 }
 
 void Firestarter::signalLoadWorkers(const LoadThreadState State, void (*Function)()) {

From 8ba518a5b2118322112f0f3e11ff8e6dada5db6b Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 6 Oct 2024 00:44:22 +0200
Subject: [PATCH 031/167] clang-tidy fixes

---
 include/firestarter/Firestarter.hpp           |  2 +-
 src/firestarter/Environment/CPUTopology.cpp   | 12 +++++-----
 src/firestarter/Environment/Environment.cpp   |  4 ++--
 .../Environment/X86/Payload/AVX512Payload.cpp |  2 +-
 .../Environment/X86/Payload/AVXPayload.cpp    |  2 +-
 .../Environment/X86/Payload/FMA4Payload.cpp   |  4 ++--
 .../Environment/X86/Payload/FMAPayload.cpp    |  2 +-
 .../Environment/X86/Payload/SSE2Payload.cpp   |  2 +-
 .../Environment/X86/Payload/ZENFMAPayload.cpp |  4 ++--
 .../Environment/X86/X86CPUTopology.cpp        | 22 +++++++++++--------
 .../Environment/X86/X86Environment.cpp        |  4 ++--
 src/firestarter/LoadWorker.cpp                | 19 ++++++++--------
 src/firestarter/Main.cpp                      |  2 +-
 src/firestarter/WatchdogWorker.cpp            | 16 +++++++-------
 14 files changed, 49 insertions(+), 48 deletions(-)

diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 6ed91e94..c48e975a 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -169,7 +169,7 @@ class Firestarter {
   /// before we wait for the acknowledgement of the thread.
   void signalLoadWorkers(LoadThreadState State, void (*Function)() = nullptr);
 
-  static void loadThreadWorker(std::shared_ptr<LoadWorkerData> Td);
+  static void loadThreadWorker(const std::shared_ptr<LoadWorkerData>& Td);
 
   // WatchdogWorker.cpp
   static auto watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load,
diff --git a/src/firestarter/Environment/CPUTopology.cpp b/src/firestarter/Environment/CPUTopology.cpp
index 62c9224c..fa4d97ca 100644
--- a/src/firestarter/Environment/CPUTopology.cpp
+++ b/src/firestarter/Environment/CPUTopology.cpp
@@ -53,13 +53,11 @@ auto CPUTopology::print(std::ostream& Stream) const -> std::ostream& {
          << "    supported features: " << Ss.str() << "\n"
          << "    Caches:";
 
-  std::vector<hwloc_obj_type_t> Caches = {
+  const std::vector<hwloc_obj_type_t> Caches = {
       HWLOC_OBJ_L1CACHE, HWLOC_OBJ_L1ICACHE, HWLOC_OBJ_L2CACHE, HWLOC_OBJ_L2ICACHE,
       HWLOC_OBJ_L3CACHE, HWLOC_OBJ_L3ICACHE, HWLOC_OBJ_L4CACHE, HWLOC_OBJ_L5CACHE,
   };
 
-  std::vector<std::string> CacheStrings = {};
-
   for (hwloc_obj_type_t const& Cache : Caches) {
     std::stringstream Ss;
 
@@ -128,7 +126,7 @@ CPUTopology::CPUTopology(std::string Architecture)
   hwloc_topology_load(Topology);
 
   // check for hybrid processor
-  int NrCpukinds = hwloc_cpukinds_get_nr(Topology, 0);
+  const auto NrCpukinds = hwloc_cpukinds_get_nr(Topology, 0);
 
   switch (NrCpukinds) {
   case -1:
@@ -300,7 +298,7 @@ CPUTopology::CPUTopology(std::string Architecture)
 #endif
 
   // get L1i-Cache size
-  int Width = hwloc_get_nbobjs_by_type(Topology, HWLOC_OBJ_L1ICACHE);
+  const auto Width = hwloc_get_nbobjs_by_type(Topology, HWLOC_OBJ_L1ICACHE);
 
   if (Width >= 1) {
     hwloc_obj_t CacheObj = hwloc_get_obj_by_type(Topology, HWLOC_OBJ_L1ICACHE, 0);
@@ -370,7 +368,7 @@ auto CPUTopology::maxNumThreads() const -> unsigned {
   unsigned Max = 0;
 
   // There might be more then one kind of cores
-  int NrCpukinds = hwloc_cpukinds_get_nr(Topology, 0);
+  const auto NrCpukinds = hwloc_cpukinds_get_nr(Topology, 0);
 
   // fallback in case this did not work ... can happen on some platforms
   // already printed a warning earlier
@@ -395,7 +393,7 @@ auto CPUTopology::maxNumThreads() const -> unsigned {
 
   // Find CPUs per kind
   for (int KindIndex = 0; KindIndex < NrCpukinds; KindIndex++) {
-    int Result = hwloc_cpukinds_get_info(Topology, KindIndex, Bitmap, nullptr, nullptr, nullptr, 0);
+    const auto Result = hwloc_cpukinds_get_info(Topology, KindIndex, Bitmap, nullptr, nullptr, nullptr, 0);
     if (Result) {
       log::warn() << "Could not get information for CPU kind " << KindIndex;
     }
diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp
index e3040955..6019f388 100644
--- a/src/firestarter/Environment/Environment.cpp
+++ b/src/firestarter/Environment/Environment.cpp
@@ -190,8 +190,8 @@ void Environment::printThreadSummary() {
   std::vector<unsigned> CpuBind(this->CpuBind);
   CpuBind.resize(requestedNumThreads());
   for (auto const& Bind : CpuBind) {
-    int CoreId = topology().getCoreIdFromPU(Bind);
-    int PkgId = topology().getPkgIdFromPU(Bind);
+    const auto CoreId = topology().getCoreIdFromPU(Bind);
+    const auto PkgId = topology().getPkgIdFromPU(Bind);
 
     if (CoreId != -1 && PkgId != -1) {
       log::info() << "    - Thread " << I << " run on CPU " << Bind << ", core " << CoreId << " in package: " << PkgId;
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index ff53e7c5..3230e334 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -363,7 +363,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   // String sb;
   // cb.dump(sb);
 
-  Error Err = Rt.add(&LoadFunction, &Code);
+  const auto Err = Rt.add(&LoadFunction, &Code);
   if (Err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index 4a31c3f2..65e78885 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -395,7 +395,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   // String sb;
   // cb.dump(sb);
 
-  Error Err = Rt.add(&LoadFunction, &Code);
+  const auto Err = Rt.add(&LoadFunction, &Code);
   if (Err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index 40697855..ca416516 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -161,7 +161,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   auto TransStart = AddRegs + MulRegs;
   auto TransEnd = AddRegs + MulRegs + AltDestRegs - 1;
   for (int I = AddStart; I <= TransEnd; I++) {
-    Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 256 + I * 32));
+    Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 256 + (I * 32)));
   }
   Cb.mov(L1Addr, PointerReg); // address for L1-buffer
   Cb.mov(L2Addr, PointerReg);
@@ -363,7 +363,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   // String sb;
   // cb.dump(sb);
 
-  Error Err = Rt.add(&LoadFunction, &Code);
+  const auto Err = Rt.add(&LoadFunction, &Code);
   if (Err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index 3dae4736..451a37ad 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -400,7 +400,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   // String sb;
   // cb.dump(sb);
 
-  Error Err = Rt.add(&LoadFunction, &Code);
+  const auto Err = Rt.add(&LoadFunction, &Code);
   if (Err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index 1c416ae6..bd2aaba9 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -387,7 +387,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   // String sb;
   // cb.dump(sb);
 
-  Error Err = Rt.add(&LoadFunction, &Code);
+  const auto Err = Rt.add(&LoadFunction, &Code);
   if (Err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index c23f637d..e102bb1a 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -157,7 +157,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   auto AddRegsStart = 2;
   auto AddRegsEnd = AddRegsStart + NbAddRegs - 1;
   for (int I = AddRegsStart; I <= AddRegsEnd; I++) {
-    Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 256 + I * 32));
+    Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 256 + (I * 32)));
   }
 
   // Initialize xmm14 for shift operation
@@ -352,7 +352,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   // String sb;
   // cb.dump(sb);
 
-  Error Err = Rt.add(&LoadFunction, &Code);
+  const auto Err = Rt.add(&LoadFunction, &Code);
   if (Err) {
     workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
     return EXIT_FAILURE;
diff --git a/src/firestarter/Environment/X86/X86CPUTopology.cpp b/src/firestarter/Environment/X86/X86CPUTopology.cpp
index df579c38..153fc9df 100644
--- a/src/firestarter/Environment/X86/X86CPUTopology.cpp
+++ b/src/firestarter/Environment/X86/X86CPUTopology.cpp
@@ -160,7 +160,7 @@ auto X86CPUTopology::clockrate() const -> uint64_t {
 
   MinMeasurements = 5;
 #else
-  min_measurements = 20;
+  MinMeasurements = 20;
 #endif
 
   int I = 3;
@@ -170,9 +170,9 @@ auto X86CPUTopology::clockrate() const -> uint64_t {
     uint64_t End2Tsc = 0;
 
     // start timestamp
-    uint64_t Start1Tsc = timestamp();
+    const uint64_t Start1Tsc = timestamp();
     StartTime = ClockT::now();
-    uint64_t Start2Tsc = timestamp();
+    const uint64_t Start2Tsc = timestamp();
 
     // waiting
     do {
@@ -188,20 +188,20 @@ auto X86CPUTopology::clockrate() const -> uint64_t {
       TimeDiff = std::chrono::duration_cast<TicksT>(EndTime - StartTime).count();
     } while (0 == TimeDiff);
 
-    uint64_t ClockLowerBound = (((End1Tsc - Start2Tsc) * 1000000) / (TimeDiff));
-    uint64_t ClockUpperBound = (((End2Tsc - Start1Tsc) * 1000000) / (TimeDiff));
+    const uint64_t ClockLowerBound = (((End1Tsc - Start2Tsc) * 1000000) / (TimeDiff));
+    const uint64_t ClockUpperBound = (((End2Tsc - Start1Tsc) * 1000000) / (TimeDiff));
 
     // if both values differ significantly, the measurement could have been
     // interrupted between 2 rdtsc's
     if ((static_cast<double>(ClockLowerBound) > ((static_cast<double>(ClockUpperBound)) * 0.999)) &&
         ((TimeDiff) > 2000)) {
       NumMeasurements++;
-      uint64_t Clock = (ClockLowerBound + ClockUpperBound) / 2;
-      bool ClockrateUpdateCondition = Clockrate == 0 ||
+      const uint64_t Clock = (ClockLowerBound + ClockUpperBound) / 2;
+      const bool ClockrateUpdateCondition = Clockrate == 0 ||
 #ifndef _WIN32
-                                      Clock < Clockrate;
+                                            Clock < Clockrate;
 #else
-                                      Clock > Clockrate;
+                                            Clock > Clockrate;
 #endif
       if (ClockrateUpdateCondition) {
         Clockrate = Clock;
@@ -219,8 +219,10 @@ auto X86CPUTopology::timestamp() const -> uint64_t {
   }
 
 #ifndef _MSC_VER
+  // NOLINTBEGIN(misc-const-correctness)
   uint64_t Rax = 0;
   uint64_t Rdx = 0;
+  // NOLINTEND(misc-const-correctness)
   __asm__ __volatile__("rdtsc;" : "=a"(Rax), "=d"(Rdx));
   return (Rdx << 32) | (Rax & 0xffffffffULL);
 #else
@@ -230,10 +232,12 @@ auto X86CPUTopology::timestamp() const -> uint64_t {
 
 void X86CPUTopology::cpuid(uint64_t* Rax, uint64_t* Rbx, uint64_t* Rcx, uint64_t* Rdx) {
 #ifndef _MSC_VER
+  // NOLINTBEGIN(misc-const-correctness)
   uint64_t RaxOut = 0;
   uint64_t RbxOut = 0;
   uint64_t RcxOut = 0;
   uint64_t RdxOut = 0;
+  // NOLINTEND(misc-const-correctness)
   __asm__ __volatile__("cpuid;"
                        : "=a"(RaxOut), "=b"(RbxOut), "=c"(RcxOut), "=d"(RdxOut)
                        : "a"(*Rax), "b"(*Rbx), "c"(*Rcx), "d"(*Rdx));
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index 9255ffcf..951c5d05 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -124,7 +124,7 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
 }
 
 void X86Environment::selectInstructionGroups(std::string Groups) {
-  const std::string Delimiter = ",";
+  const auto Delimiter = ',';
   const std::regex Re("^(\\w+):(\\d+)$");
   const auto AvailableInstructionGroups = selectedConfig().platformConfig().payload().getAvailableInstructions();
 
@@ -134,7 +134,7 @@ void X86Environment::selectInstructionGroups(std::string Groups) {
   while (Ss.good()) {
     std::string Token;
     std::smatch M;
-    std::getline(Ss, Token, ',');
+    std::getline(Ss, Token, Delimiter);
 
     if (std::regex_match(Token, M, Re)) {
       if (std::find(AvailableInstructionGroups.begin(), AvailableInstructionGroups.end(), M[1].str()) ==
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index 53bbbed7..7db035f9 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -102,8 +102,6 @@ void Firestarter::initLoadWorkers(bool LowLoad, std::chrono::microseconds Period
 }
 
 void Firestarter::signalLoadWorkers(const LoadThreadState State, void (*Function)()) {
-  bool Ack = false;
-
   // aquire the lock on all threads
   for (auto const& Thread : LoadThreads) {
     auto Td = Thread.second;
@@ -129,11 +127,12 @@ void Firestarter::signalLoadWorkers(const LoadThreadState State, void (*Function
   for (auto const& Thread : LoadThreads) {
     auto Td = Thread.second;
 
-    do {
+    // Wait until we receive the acknowledge
+    for (bool Ack = false; !Ack;) {
       Td->Communication.Mutex.lock();
       Ack = Td->Communication.Ack;
       Td->Communication.Mutex.unlock();
-    } while (!Ack);
+    }
 
     Td->Communication.Mutex.lock();
     Td->Communication.Ack = false;
@@ -195,12 +194,12 @@ void Firestarter::printPerformanceReport() {
     Iterations += Td->LastRun.Iterations.load();
   }
 
-  double Runtime =
+  double const Runtime =
       static_cast<double>(StopTimestamp - StartTimestamp) / static_cast<double>(environment().topology().clockrate());
-  double GFlops = static_cast<double>(LoadThreads.front().second->config().payload().flops()) * 0.000000001 *
-                  static_cast<double>(Iterations) / Runtime;
-  double Bandwidth = static_cast<double>(LoadThreads.front().second->config().payload().bytes()) * 0.000000001 *
-                     static_cast<double>(Iterations) / Runtime;
+  double const GFlops = static_cast<double>(LoadThreads.front().second->config().payload().flops()) * 0.000000001 *
+                        static_cast<double>(Iterations) / Runtime;
+  double const Bandwidth = static_cast<double>(LoadThreads.front().second->config().payload().bytes()) * 0.000000001 *
+                           static_cast<double>(Iterations) / Runtime;
 
   // insert values for ipc-estimate metric
   // if we are on linux
@@ -240,7 +239,7 @@ void Firestarter::printPerformanceReport() {
                << "  executed on an unsupported architecture!";
 }
 
-void Firestarter::loadThreadWorker(std::shared_ptr<LoadWorkerData> Td) {
+void Firestarter::loadThreadWorker(const std::shared_ptr<LoadWorkerData>& Td) {
 
   auto OldState = LoadThreadState::ThreadWait;
 
diff --git a/src/firestarter/Main.cpp b/src/firestarter/Main.cpp
index 318d246d..ac2632c7 100644
--- a/src/firestarter/Main.cpp
+++ b/src/firestarter/Main.cpp
@@ -450,7 +450,7 @@ auto main(int argc, const char** argv) -> int {
                            << "\n";
 #endif
 
-  Config Cfg{argc, argv};
+  Config const Cfg{argc, argv};
 
   try {
     firestarter::Firestarter Firestarter(
diff --git a/src/firestarter/WatchdogWorker.cpp b/src/firestarter/WatchdogWorker.cpp
index f45a7f7d..2dafbb94 100644
--- a/src/firestarter/WatchdogWorker.cpp
+++ b/src/firestarter/WatchdogWorker.cpp
@@ -49,28 +49,28 @@ auto Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::
   if (Period > usec::zero()) {
     // this first time is critical as the period will be alligend from this
     // point
-    std::chrono::time_point<clock> StartTime = clock::now();
+    const auto StartTime = clock::now();
 
     // this loop will set the load level periodically.
     for (;;) {
-      std::chrono::time_point<clock> CurrentTime = clock::now();
+      const auto CurrentTime = clock::now();
 
       // get the time already advanced in the current timeslice
       // this can happen if a load function does not terminates just on time
-      nsec Advance =
+      const auto Advance =
           std::chrono::duration_cast<nsec>(CurrentTime - StartTime) % std::chrono::duration_cast<nsec>(Period);
 
       // subtract the advaned time from our timeslice by spilting it based on
       // the load level
-      nsec LoadReduction =
+      const auto LoadReduction =
           (std::chrono::duration_cast<nsec>(Load).count() * Advance) / std::chrono::duration_cast<nsec>(Period).count();
-      nsec IdleReduction = Advance - LoadReduction;
+      const auto IdleReduction = Advance - LoadReduction;
 
       // signal high load level
       setLoad(LoadThreadWorkType::LoadHigh);
 
       // calculate values for nanosleep
-      nsec LoadNsec = Load - LoadReduction;
+      const auto LoadNsec = Load - LoadReduction;
 
       // wait for time to be ellapsed with high load
 #ifdef ENABLE_VTRACING
@@ -99,7 +99,7 @@ auto Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::
       setLoad(LoadThreadWorkType::LoadLow);
 
       // calculate values for nanosleep
-      nsec IdleNsec = Idle - IdleReduction;
+      const auto IdleNsec = Idle - IdleReduction;
 
       // wait for time to be ellapsed with low load
 #ifdef ENABLE_VTRACING
@@ -129,7 +129,7 @@ auto Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::
 
       // exit when termination signal is received or timeout is reached
       {
-        std::lock_guard<std::mutex> Lk(WatchdogTerminateMutex);
+        const std::lock_guard<std::mutex> Lk(WatchdogTerminateMutex);
         if (WatchdogTerminate || (Timeout > sec::zero() && (Time > Timeout))) {
           setLoad(LoadThreadWorkType::LoadStop);
 

From e87ef3c500742711ca8aa9087477932698e47503 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 6 Oct 2024 12:20:51 +0200
Subject: [PATCH 032/167] remove some ifdefs. clang-tidy fixes

---
 .../firestarter/Environment/CPUTopology.hpp   |  6 ++
 .../firestarter/Environment/Environment.hpp   |  2 +-
 .../Environment/X86/Payload/X86Payload.hpp    |  2 -
 .../Environment/X86/X86Environment.hpp        |  6 +-
 include/firestarter/Firestarter.hpp           | 14 +---
 include/firestarter/LoadWorkerData.hpp        |  4 +-
 src/firestarter/Environment/Environment.cpp   | 19 +++---
 src/firestarter/Firestarter.cpp               | 66 +++++++++----------
 src/firestarter/LoadWorker.cpp                |  8 +--
 src/firestarter/Optimizer/Algorithm/NSGA2.cpp |  4 +-
 src/firestarter/Optimizer/OptimizerWorker.cpp |  4 +-
 .../Optimizer/Util/MultiObjective.cpp         |  2 +-
 12 files changed, 67 insertions(+), 70 deletions(-)

diff --git a/include/firestarter/Environment/CPUTopology.hpp b/include/firestarter/Environment/CPUTopology.hpp
index a7ac4681..2091daee 100644
--- a/include/firestarter/Environment/CPUTopology.hpp
+++ b/include/firestarter/Environment/CPUTopology.hpp
@@ -37,6 +37,8 @@ class CPUTopology {
   explicit CPUTopology(std::string Architecture);
   virtual ~CPUTopology();
 
+  friend auto operator<<(std::ostream& Stream, CPUTopology const& CpuTopology) -> std::ostream&;
+
   [[nodiscard]] auto numThreads() const -> unsigned { return NumThreadsPerCore * NumCoresTotal; }
   [[nodiscard]] auto maxNumThreads() const -> unsigned;
   [[nodiscard]] auto numThreadsPerCore() const -> unsigned { return NumThreadsPerCore; }
@@ -82,4 +84,8 @@ class CPUTopology {
   hwloc_topology_t Topology{};
 };
 
+inline auto operator<<(std::ostream& Stream, CPUTopology const& CpuTopology) -> std::ostream& {
+  return CpuTopology.print(Stream);
+}
+
 } // namespace firestarter::environment
diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index ff67efa7..86aa9dd2 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -66,7 +66,7 @@ class Environment {
   [[nodiscard]] auto requestedNumThreads() const -> uint64_t { return RequestedNumThreads; }
 
   [[nodiscard]] auto topology() const -> CPUTopology const& {
-    assert(Topology != nullptr);
+    assert(Topology != nullptr && "Topology is a nullptr");
     return *Topology;
   }
 
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index f9d2bb2e..7de87098 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -44,9 +44,7 @@ class X86Payload : public environment::payload::Payload {
   std::list<asmjit::CpuFeatures::X86::Id> FeatureRequests;
 
 protected:
-  //  asmjit::CodeHolder code;
   asmjit::JitRuntime Rt;
-  // typedef int (*LoadFunction)(firestarter::ThreadData *);
   using LoadFunctionType = uint64_t (*)(double*, volatile LoadThreadWorkType*, uint64_t);
   LoadFunctionType LoadFunction = nullptr;
 
diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp
index 99d5a2f6..fec0143f 100644
--- a/include/firestarter/Environment/X86/X86Environment.hpp
+++ b/include/firestarter/Environment/X86/X86Environment.hpp
@@ -52,7 +52,11 @@ class X86Environment final : public Environment {
   X86Environment()
       : Environment(std::make_unique<X86CPUTopology>()) {}
 
-  auto topology() -> X86CPUTopology const& { return *dynamic_cast<X86CPUTopology*>(Topology.get()); }
+  [[nodiscard]] auto topology() const -> X86CPUTopology const& {
+    const auto* X86Topology = dynamic_cast<X86CPUTopology*>(Topology.get());
+    assert(X86Topology != nullptr && "X86Topology is a nullptr");
+    return *X86Topology;
+  }
 
   void evaluateFunctions() override;
   void selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) override;
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index c48e975a..f56f7164 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -41,10 +41,6 @@
 #include "DumpRegisterWorkerData.hpp"
 #include "LoadWorkerData.hpp"
 
-#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-#include "Environment/X86/X86Environment.hpp"
-#endif
-
 #include <chrono>
 #include <condition_variable>
 #include <memory>
@@ -77,7 +73,7 @@ class Firestarter {
               std::vector<std::string> const& OptimizationMetrics, std::chrono::seconds const& EvaluationDuration,
               unsigned Individuals, std::string OptimizeOutfile, unsigned Generations, double Nsga2Cr, double Nsga2M);
 
-  ~Firestarter();
+  ~Firestarter() = default;
 
   void mainThread();
 
@@ -110,13 +106,7 @@ class Firestarter {
   const double Nsga2Cr;
   const double Nsga2M;
 
-#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-  environment::x86::X86Environment* Environment = nullptr;
-
-  [[nodiscard]] auto environment() const -> environment::x86::X86Environment& { return *Environment; }
-#else
-#error "FIRESTARTER is not implemented for this ISA"
-#endif
+  std::unique_ptr<environment::Environment> Environment;
 
 #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
   std::unique_ptr<cuda::Cuda> _cuda;
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index cf5164f5..ef45404c 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -67,7 +67,7 @@ struct LoadWorkerMemory {
 
   /// This padding makes shure that we are aligned to a cache line. The allocated memory will most probably reach beyond
   /// this array.
-  EightBytesType DoNotUsePadding[7];
+  std::array<EightBytesType, 7> DoNotUsePadding;
 
   /// Get the pointer to the start of the memory use for computations.
   /// \returns the pointer to the memory.
@@ -85,7 +85,7 @@ struct LoadWorkerMemory {
     // Allocate the memory for the ExtraLoadWorkerVariables (which are 64B aligned) and the data for the high-load
     // routine which may not be 64B aligned.
     static_assert(sizeof(ExtraLoadWorkerVariables) % 64 == 0,
-                  "ExtraLoadWorkerVariables is not a size of 64B i.e., a cacheline.");
+                  "ExtraLoadWorkerVariables is not a multiple of 64B i.e., multiple cachelines.");
     auto* Ptr = AlignedAlloc::malloc(Bytes + sizeof(ExtraLoadWorkerVariables));
     return {static_cast<LoadWorkerMemory*>(Ptr), deallocate};
   }
diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp
index 6019f388..a8c55cbc 100644
--- a/src/firestarter/Environment/Environment.cpp
+++ b/src/firestarter/Environment/Environment.cpp
@@ -57,16 +57,17 @@ auto Environment::cpuAllowed(unsigned Id) -> int {
 void Environment::addCpuSet(unsigned Cpu, cpu_set_t& Mask) const {
   if (cpuAllowed(Cpu)) {
     CPU_SET(Cpu, &Mask);
+  } else {
+    if (Cpu >= topology().numThreads()) {
+      throw std::invalid_argument("The given bind argument (-b/--bind) includes CPU " + std::to_string(Cpu) +
+                                  " that is not available on this system.");
+    }
+    throw std::invalid_argument("The given bind argument (-b/--bind) cannot "
+                                "be implemented with the cpuset given from the OS\n"
+                                "This can be caused by the taskset tool, cgroups, "
+                                "the batch system, or similar mechanisms.\n"
+                                "Please fix the argument to match the restrictions.");
   }
-  if (Cpu >= topology().numThreads()) {
-    throw std::invalid_argument("The given bind argument (-b/--bind) includes CPU " + std::to_string(Cpu) +
-                                " that is not available on this system.");
-  }
-  throw std::invalid_argument("The given bind argument (-b/--bind) cannot "
-                              "be implemented with the cpuset given from the OS\n"
-                              "This can be caused by the taskset tool, cgroups, "
-                              "the batch system, or similar mechanisms.\n"
-                              "Please fix the argument to match the restrictions.");
 }
 #endif
 
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index ff1fb6c1..d92d1b44 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -19,9 +19,6 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <algorithm>
-#include <firestarter/Firestarter.hpp>
-#include <firestarter/Logging/Log.hpp>
 #if defined(linux) || defined(__linux__)
 #include <firestarter/Measurement/Metric/IPCEstimate.h>
 #include <firestarter/Optimizer/Algorithm/NSGA2.hpp>
@@ -29,14 +26,21 @@
 #include <firestarter/Optimizer/Problem/CLIArgumentProblem.hpp>
 #endif
 
-#include <csignal>
-#include <functional>
-#include <utility>
+#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#include <firestarter/Environment/X86/X86Environment.hpp>
+#endif
 
 #ifdef _MSC_VER
 #include <intrin.h>
 #endif
 
+#include <algorithm>
+#include <csignal>
+#include <firestarter/Firestarter.hpp>
+#include <firestarter/Logging/Log.hpp>
+#include <functional>
+#include <utility>
+
 namespace firestarter {
 
 Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds const& Timeout, unsigned LoadPercent,
@@ -94,53 +98,56 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
 #endif
 
 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-  Environment = new environment::x86::X86Environment();
+  Environment = std::make_unique<environment::x86::X86Environment>();
+  const auto& X86Env = *dynamic_cast<environment::x86::X86Environment*>(Environment.get());
+#else
+#error "FIRESTARTER is not implemented for this ISA"
 #endif
 
-  environment().evaluateCpuAffinity(RequestedNumThreads, CpuBind);
+  Environment->evaluateCpuAffinity(RequestedNumThreads, CpuBind);
 
 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
   // Error detection uses crc32 instruction added by the SSE4.2 extension to x86
   if (ErrorDetection) {
-    if (!Environment->topology().featuresAsmjit().has(asmjit::CpuFeatures::X86::kSSE4_2)) {
+    if (!X86Env.topology().featuresAsmjit().has(asmjit::CpuFeatures::X86::kSSE4_2)) {
       throw std::invalid_argument("Option --error-detection requires the crc32 "
                                   "instruction added with SSE_4_2.\n");
     }
   }
 #endif
 
-  if (ErrorDetection && environment().requestedNumThreads() < 2) {
+  if (ErrorDetection && Environment->requestedNumThreads() < 2) {
     throw std::invalid_argument("Option --error-detection must run with 2 or more threads. Number of "
                                 "threads is " +
-                                std::to_string(environment().requestedNumThreads()) + "\n");
+                                std::to_string(Environment->requestedNumThreads()) + "\n");
   }
 
-  environment().evaluateFunctions();
+  Environment->evaluateFunctions();
 
   if (PrintFunctionSummary) {
-    environment().printFunctionSummary();
+    Environment->printFunctionSummary();
     std::exit(EXIT_SUCCESS);
   }
 
-  environment().selectFunction(FunctionId, AllowUnavailablePayload);
+  Environment->selectFunction(FunctionId, AllowUnavailablePayload);
 
   if (ListInstructionGroups) {
-    environment().printAvailableInstructionGroups();
+    Environment->printAvailableInstructionGroups();
     std::exit(EXIT_SUCCESS);
   }
 
   if (!InstructionGroups.empty()) {
-    environment().selectInstructionGroups(InstructionGroups);
+    Environment->selectInstructionGroups(InstructionGroups);
   }
 
   if (LineCount != 0) {
-    environment().setLineCount(LineCount);
+    Environment->setLineCount(LineCount);
   }
 
 #if defined(linux) || defined(__linux__)
   if (Measurement || ListMetrics || Optimize) {
     MeasurementWorker = std::make_shared<measurement::MeasurementWorker>(
-        MeasurementInterval, environment().requestedNumThreads(), MetricPaths, StdinMetrics);
+        MeasurementInterval, Environment->requestedNumThreads(), MetricPaths, StdinMetrics);
 
     if (ListMetrics) {
       log::info() << MeasurementWorker->availableMetrics();
@@ -210,7 +217,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
 
     auto Prob = std::make_shared<firestarter::optimizer::problem::CLIArgumentProblem>(
         std::move(ApplySettings), MeasurementWorker, OptimizationMetrics, EvaluationDuration, StartDelta, StopDelta,
-        environment().selectedConfig().payloadItems());
+        Environment->selectedConfig().payloadItems());
 
     Population = firestarter::optimizer::Population(std::move(Prob));
 
@@ -224,9 +231,9 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
   }
 #endif
 
-  environment().printSelectedCodePathSummary();
+  Environment->printSelectedCodePathSummary();
 
-  log::info() << environment().topology();
+  log::info() << Environment->topology();
 
   // setup thread with either high or low load configured at the start
   // low loads has to know the length of the period
@@ -241,19 +248,8 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
   std::signal(SIGINT, Firestarter::sigtermHandler);
 }
 
-Firestarter::~Firestarter() {
-#if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
-  _cuda.reset();
-#endif
-#ifdef FIRESTARTER_BUILD_ONEAPI
-  _oneapi.reset();
-#endif
-
-  delete Environment;
-}
-
 void Firestarter::mainThread() {
-  environment().printThreadSummary();
+  Environment->printThreadSummary();
 
 #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
   _cuda = std::make_unique<cuda::Cuda>(&loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus);
@@ -292,7 +288,7 @@ void Firestarter::mainThread() {
     // wait here until optimizer thread terminates
     Firestarter::Optimizer->join();
 
-    auto PayloadItems = environment().selectedConfig().payloadItems();
+    auto PayloadItems = Environment->selectedConfig().payloadItems();
 
     firestarter::optimizer::History::save(OptimizeOutfile, StartTime, PayloadItems, Argc, Argv);
 
@@ -357,7 +353,7 @@ void Firestarter::sigtermHandler(int Signum) {
   // used in case of 0 < load < 100
   // or interrupt sleep for timeout
   {
-    std::lock_guard<std::mutex> Lk(Firestarter::WatchdogTerminateMutex);
+    const std::lock_guard<std::mutex> Lk(Firestarter::WatchdogTerminateMutex);
     Firestarter::WatchdogTerminate = true;
   }
   Firestarter::WatchdogTerminateAlert.notify_all();
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index 7db035f9..2ecf5d43 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -47,13 +47,13 @@
 namespace firestarter {
 
 void Firestarter::initLoadWorkers(bool LowLoad, std::chrono::microseconds Period) {
-  environment().setCpuAffinity(0);
+  Environment->setCpuAffinity(0);
 
   // setup load variable to execute low or high load once the threads switch to
   // work.
   LoadVar = LowLoad ? LoadThreadWorkType::LoadLow : LoadThreadWorkType::LoadHigh;
 
-  auto NumThreads = environment().requestedNumThreads();
+  auto NumThreads = Environment->requestedNumThreads();
 
   // create a std::vector<std::shared_ptr<>> of requestenNumThreads()
   // communication pointers and add these to the threaddata
@@ -69,7 +69,7 @@ void Firestarter::initLoadWorkers(bool LowLoad, std::chrono::microseconds Period
   }
 
   for (uint64_t I = 0; I < NumThreads; I++) {
-    auto Td = std::make_shared<LoadWorkerData>(I, environment(), LoadVar, Period, DumpRegisters, ErrorDetection);
+    auto Td = std::make_shared<LoadWorkerData>(I, *Environment, LoadVar, Period, DumpRegisters, ErrorDetection);
 
     if (ErrorDetection) {
       // distribute pointers for error deteciton. (set threads in a ring)
@@ -195,7 +195,7 @@ void Firestarter::printPerformanceReport() {
   }
 
   double const Runtime =
-      static_cast<double>(StopTimestamp - StartTimestamp) / static_cast<double>(environment().topology().clockrate());
+      static_cast<double>(StopTimestamp - StartTimestamp) / static_cast<double>(Environment->topology().clockrate());
   double const GFlops = static_cast<double>(LoadThreads.front().second->config().payload().flops()) * 0.000000001 *
                         static_cast<double>(Iterations) / Runtime;
   double const Bandwidth = static_cast<double>(LoadThreads.front().second->config().payload().bytes()) * 0.000000001 *
diff --git a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
index 1da46011..6824ea0e 100644
--- a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
+++ b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
@@ -100,11 +100,11 @@ auto NSGA2::evolve(firestarter::optimizer::Population& Pop) -> firestarter::opti
   for (auto I = 1U; I <= Gen; ++I) {
     {
       // Print the logs
-      std::vector<double> IdealPoint = util::ideal(Pop.f());
+      const auto IdealPoint = util::ideal(Pop.f());
       std::stringstream Ss;
 
       Ss << std::setw(7) << I << std::setw(15) << Prob.getFevals() - Fevals0;
-      for (double I : IdealPoint) {
+      for (const auto I : IdealPoint) {
         Ss << std::setw(15) << I;
       }
 
diff --git a/src/firestarter/Optimizer/OptimizerWorker.cpp b/src/firestarter/Optimizer/OptimizerWorker.cpp
index 8e753f80..bbbee14b 100644
--- a/src/firestarter/Optimizer/OptimizerWorker.cpp
+++ b/src/firestarter/Optimizer/OptimizerWorker.cpp
@@ -48,9 +48,11 @@ void OptimizerWorker::join() const {
 }
 
 auto OptimizerWorker::optimizerThread(void* OptimizerWorker) -> void* {
+  // NOLINTBEGIN(cert-pos47-c,concurrency-thread-canceltype-asynchronous)
   pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, nullptr);
+  // NOLINTEND(cert-pos47-c,concurrency-thread-canceltype-asynchronous)
 
-  auto* This = reinterpret_cast<class OptimizerWorker*>(OptimizerWorker);
+  auto* This = static_cast<class OptimizerWorker*>(OptimizerWorker);
 
 #ifndef __APPLE__
   pthread_setname_np(pthread_self(), "Optimizer");
diff --git a/src/firestarter/Optimizer/Util/MultiObjective.cpp b/src/firestarter/Optimizer/Util/MultiObjective.cpp
index 59ceedd7..3c0d1c8f 100644
--- a/src/firestarter/Optimizer/Util/MultiObjective.cpp
+++ b/src/firestarter/Optimizer/Util/MultiObjective.cpp
@@ -242,7 +242,7 @@ auto crowdingDistance(const std::vector<std::vector<double>>& NonDomFront) -> st
     });
     Retval[Indexes[0]] = std::numeric_limits<double>::infinity();
     Retval[Indexes[N - 1U]] = std::numeric_limits<double>::infinity();
-    double Df = NonDomFront[Indexes[N - 1U]][I] - NonDomFront[Indexes[0]][I];
+    const double Df = NonDomFront[Indexes[N - 1U]][I] - NonDomFront[Indexes[0]][I];
     for (decltype(N - 2U) J = 1U; J < N - 1U; ++J) {
       Retval[Indexes[J]] += (NonDomFront[Indexes[J + 1U]][I] - NonDomFront[Indexes[J - 1U]][I]) / Df;
     }

From 7208aebb78ba508b62738fe79e35e37e89bd8e2c Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 6 Oct 2024 13:00:50 +0200
Subject: [PATCH 033/167] remove more ifdefs

---
 include/firestarter/Constants.hpp |  41 +++-
 src/firestarter/Main.cpp          | 306 +++++++++++++++---------------
 2 files changed, 194 insertions(+), 153 deletions(-)

diff --git a/include/firestarter/Constants.hpp b/include/firestarter/Constants.hpp
index 958fd035..f9e8ac26 100644
--- a/include/firestarter/Constants.hpp
+++ b/include/firestarter/Constants.hpp
@@ -23,6 +23,8 @@
 
 #include <cstdint>
 
+namespace firestarter {
+
 using EightBytesType = uint64_t;
 
 // We want enum to have the size of 8B. Disable the warnings for bigger enum size than needed.
@@ -38,4 +40,41 @@ enum class LoadThreadWorkType : EightBytesType {
   LoadStop = 2,
   LoadSwitch = 4
 };
-// NOLINTEND(performance-enum-size)
\ No newline at end of file
+// NOLINTEND(performance-enum-size)
+
+/// This struct holds infomation about enabled or disabled compile time features for FIRESTARTER.
+struct FirestarterOptionalFeatures {
+  /// Do we have a build that enabled optimization?
+  bool OptimizationEnabled = false;
+  /// Do we have a build that enabled CUDA or HIP?
+  bool CudaEnabled = false;
+  /// Do we have a build that enabled OneAPU?
+  bool OneAPIEnabled = false;
+  /// Is error detection enabled?
+  bool ErrorDetectionEnabled = false;
+  /// Are debug features enabled?
+  bool DebugFeatureEnabled = false;
+  /// Is dumping registers enabled?
+  bool DumpRegisterEnabled = false;
+
+  /// Is one of the GPU features enabled?
+  [[nodiscard]] constexpr auto gpuEnabled() const -> bool { return CudaEnabled || OneAPIEnabled; }
+};
+
+static constexpr const FirestarterOptionalFeatures OptionalFeatures {
+#if defined(linux) || defined(__linux__)
+  .OptimizationEnabled = true,
+#endif
+#if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
+  .CudaEnabled = true,
+#endif
+#ifdef FIRESTARTER_BUILD_ONEAPI
+  .OneAPIEnabled = true,
+#endif
+  .ErrorDetectionEnabled = true,
+#ifdef FIRESTARTER_DEBUG_FEATURES
+  .DebugFeatureEnabled = true, .DumpRegisterEnabled = true,
+#endif
+};
+
+} // namespace firestarter
\ No newline at end of file
diff --git a/src/firestarter/Main.cpp b/src/firestarter/Main.cpp
index ac2632c7..5c7ea3ba 100644
--- a/src/firestarter/Main.cpp
+++ b/src/firestarter/Main.cpp
@@ -132,27 +132,25 @@ void printHelp(cxxopts::Options const& Parser, std::string const& Section) {
     << "  ./FIRESTARTER -t 300          starts a 5 minute run of FIRESTARTER\n"
     << "  ./FIRESTARTER -l 50 -t 600    starts a 10 minute run of FIRESTARTER with\n"
     << "                                50\% high load and 50\% idle time\n"
-#if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_ONEAPI) || defined(FIRESTARTER_BUILD_HIP)
-    << "                                on CPUs and full load on GPUs\n"
-#endif
+    << (firestarter::OptionalFeatures.gpuEnabled() ? 
+       "                                on CPUs and full load on GPUs\n"
+     : "")
     << "  ./FIRESTARTER -l 75 -p 20000000\n"
     << "                                starts FIRESTARTER with an interval length\n"
     << "                                of 2 sec, 1.5s high load"
-#if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_ONEAPI) || defined(FIRESTARTER_BUILD_HIP)
-    << "                                on CPUs and full load on GPUs\n"
-#else
-    << "\n"
-#endif
-#if defined(linux) || defined(__linux__) 
-    << "  ./FIRESTARTER --measurement --start-delta=300000 -t 900\n"
-    << "                                starts FIRESTARTER measuring all available\n"
-    << "                                metrics for 15 minutes disregarding the first\n"
-    << "                                5 minutes and last two seconds (default to `--stop-delta`)\n"
-    << "  ./FIRESTARTER -t 20 --optimize=NSGA2 --optimization-metric sysfs-powercap-rapl,perf-ipc\n"
-    << "                                starts FIRESTARTER optimizing with the sysfs-powercap-rapl\n"
-    << "                                and perf-ipc metric. The duration is 20s long. The default\n"
-    << "                                instruction groups for the current platform will be used.\n"
-#endif
+    << (firestarter::OptionalFeatures.gpuEnabled() ? 
+       "                                on CPUs and full load on GPUs\n"
+     : "\n")
+    << (firestarter::OptionalFeatures.OptimizationEnabled ?
+       "  ./FIRESTARTER --measurement --start-delta=300000 -t 900\n"
+       "                                starts FIRESTARTER measuring all available\n"
+       "                                metrics for 15 minutes disregarding the first\n"
+       "                                5 minutes and last two seconds (default to `--stop-delta`)\n"
+       "  ./FIRESTARTER -t 20 --optimize=NSGA2 --optimization-metric sysfs-powercap-rapl,perf-ipc\n"
+       "                                starts FIRESTARTER optimizing with the sysfs-powercap-rapl\n"
+       "                                and perf-ipc metric. The duration is 20s long. The default\n"
+       "                                instruction groups for the current platform will be used.\n"
+     : "")
     ;
   // clang-format on
 }
@@ -164,16 +162,19 @@ Config::Config(int Argc, const char** Argv) {
 
   cxxopts::Options Parser(ExecutableName);
 
+  const auto HelpDescription =
+      std::string("Display usage information. SECTION can be any of: information | general | specialized-workloads") +
+      (firestarter::OptionalFeatures.DebugFeatureEnabled ? " | debug" : "") +
+      (firestarter::OptionalFeatures.OptimizationEnabled ? "\n| measurement | optimization" : "");
+
+  const auto LoadDescription =
+      std::string("Set the percentage of high CPU load to LOAD\n(%) default: 100, valid values: 0 <= LOAD <=\n100, "
+                  "threads will be idle in the remaining time,\nfrequency of load changes is determined by -p.") +
+      (firestarter::OptionalFeatures.gpuEnabled() ? " This option does NOT influence the GPU\nworkload!" : "");
+
   // clang-format off
   Parser.add_options("information")
-    ("h,help", "Display usage information. SECTION can be any of: information | general | specialized-workloads"
-#ifdef FIRESTARTER_DEBUG_FEATURES
-     " | debug"
-#endif
-#if defined(linux) || defined(__linux__)
-     "\n| measurement | optimization"
-#endif
-     ,
+    ("h,help", HelpDescription,
       cxxopts::value<std::string>()->implicit_value(""), "SECTION")
     ("v,version", "Display version information")
     ("c,copyright", "Display copyright information")
@@ -185,22 +186,23 @@ Config::Config(int Argc, const char** Argv) {
 
   Parser.add_options("general")
     ("i,function", "Specify integer ID of the load-function to be\nused (as listed by --avail)",
-      cxxopts::value<unsigned>()->default_value("0"), "ID")
-#if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_ONEAPI) || defined(FIRESTARTER_BUILD_HIP)
-    ("f,usegpufloat", "Use single precision matrix multiplications\ninstead of default")
-    ("d,usegpudouble", "Use double precision matrix multiplications\ninstead of default")
-    ("g,gpus", "Number of gpus to use, default: -1 (all)",
-      cxxopts::value<int>()->default_value("-1"))
-    ("m,matrixsize", "Size of the matrix to calculate, default: 0 (maximum)",
-      cxxopts::value<unsigned>()->default_value("0"))
-#endif
+      cxxopts::value<unsigned>()->default_value("0"), "ID");
+
+  if (firestarter::OptionalFeatures.gpuEnabled()) {
+    Parser.add_options("general")
+      ("f,usegpufloat", "Use single precision matrix multiplications\ninstead of default")
+      ("d,usegpudouble", "Use double precision matrix multiplications\ninstead of default")
+      ("g,gpus", "Number of gpus to use, default: -1 (all)",
+        cxxopts::value<int>()->default_value("-1"))
+      ("m,matrixsize", "Size of the matrix to calculate, default: 0 (maximum)",
+        cxxopts::value<unsigned>()->default_value("0"));
+  }
+
+  Parser.add_options("general")
     ("t,timeout", "Set the timeout (seconds) after which FIRESTARTER\nterminates itself, default: 0 (no timeout)",
       cxxopts::value<unsigned>()->default_value("0"), "TIMEOUT")
-    ("l,load", "Set the percentage of high CPU load to LOAD\n(%) default: 100, valid values: 0 <= LOAD <=\n100, threads will be idle in the remaining time,\nfrequency of load changes is determined by -p."
-#if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_ONEAPI) || defined(FIRESTARTER_BUILD_HIP)
-     " This option does NOT influence the GPU\nworkload!"
-#endif
-     , cxxopts::value<unsigned>()->default_value("100"), "LOAD")
+    ("l,load", LoadDescription,
+      cxxopts::value<unsigned>()->default_value("100"), "LOAD")
     ("p,period", "Set the interval length for CPUs to PERIOD\n(usec), default: 100000, each interval contains\na high load and an idle phase, the percentage\nof high load is defined by -l.",
       cxxopts::value<unsigned>()->default_value("100000"), "PERIOD")
     ("n,threads", "Specify the number of threads. Cannot be\ncombined with -b | --bind, which impicitly\nspecifies the number of threads.",
@@ -218,50 +220,50 @@ Config::Config(int Argc, const char** Argv) {
     ("set-line-count", "Set the number of lines for a payload.",
       cxxopts::value<unsigned>());
 
-#ifdef FIRESTARTER_DEBUG_FEATURES
-  Parser.add_options("debug")
-    ("allow-unavailable-payload", "")
-    ("dump-registers", "Dump the working registers on the first\nthread. Depending on the payload these are mm, xmm,\nymm or zmm. Only use it without a timeout and\n100 percent load. DELAY between dumps in secs. Cannot be used with --error-detection.",
-      cxxopts::value<unsigned>()->implicit_value("10"), "DELAY")
-    ("dump-registers-outpath", "Path for the dump of the output files. If\nPATH is not given, current working directory will\nbe used.",
-      cxxopts::value<std::string>()->default_value(""), "PATH");
-#endif
+  if (firestarter::OptionalFeatures.DebugFeatureEnabled) {
+    Parser.add_options("debug")
+      ("allow-unavailable-payload", "")
+      ("dump-registers", "Dump the working registers on the first\nthread. Depending on the payload these are mm, xmm,\nymm or zmm. Only use it without a timeout and\n100 percent load. DELAY between dumps in secs. Cannot be used with --error-detection.",
+        cxxopts::value<unsigned>()->implicit_value("10"), "DELAY")
+      ("dump-registers-outpath", "Path for the dump of the output files. If\nPATH is not given, current working directory will\nbe used.",
+        cxxopts::value<std::string>()->default_value(""), "PATH");
+  }
 
-#if defined(linux) || defined(__linux__)
-  Parser.add_options("measurement")
-    ("list-metrics", "List the available metrics.")
+  if (firestarter::OptionalFeatures.OptimizationEnabled) {
+    Parser.add_options("measurement")
+      ("list-metrics", "List the available metrics.")
 #ifndef FIRESTARTER_LINK_STATIC
-    ("metric-path", "Add a path to a shared library representing an interface for a metric. This option can be specified multiple times.",
-      cxxopts::value<std::vector<std::string>>()->default_value(""))
-#endif
-    ("metric-from-stdin", "Add a metric NAME with values from stdin.\nFormat of input: \"NAME TIME_SINCE_EPOCH VALUE\\n\".\nTIME_SINCE_EPOCH is a int64 in nanoseconds. VALUE is a double. (Do not forget to flush\nlines!)",
-      cxxopts::value<std::vector<std::string>>(), "NAME")
-    ("measurement", "Start a measurement for the time specified by\n-t | --timeout. (The timeout must be greater\nthan the start and stop deltas.) Cannot be\ncombined with --optimize.")
-    ("measurement-interval", "Interval of measurements in milliseconds, default: 100",
-      cxxopts::value<unsigned>()->default_value("100"))
-    ("start-delta", "Cut of first N milliseconds of measurement, default: 5000",
-      cxxopts::value<unsigned>()->default_value("5000"), "N")
-    ("stop-delta", "Cut of last N milliseconds of measurement, default: 2000",
-      cxxopts::value<unsigned>()->default_value("2000"), "N")
-    ("preheat", "Preheat for N seconds, default: 240",
-      cxxopts::value<unsigned>()->default_value("240"), "N");
-
-  Parser.add_options("optimization")
-    ("optimize", "Run the optimization with one of these algorithms: NSGA2.\nCannot be combined with --measurement.",
-      cxxopts::value<std::string>())
-    ("optimize-outfile", "Dump the output of the optimization into this\nfile, default: $PWD/$HOSTNAME_$DATE.json",
-      cxxopts::value<std::string>())
-    ("optimization-metric", "Use a metric for optimization. Metrics listed\nwith cli argument --list-metrics or specified\nwith --metric-from-stdin are valid.",
-      cxxopts::value<std::vector<std::string>>())
-    ("individuals", "Number of individuals for the population. For\nNSGA2 specify at least 5 and a multiple of 4,\ndefault: 20",
-      cxxopts::value<unsigned>()->default_value("20"))
-    ("generations", "Number of generations, default: 20",
-      cxxopts::value<unsigned>()->default_value("20"))
-    ("nsga2-cr", "Crossover probability. Must be in range [0,1[\ndefault: 0.6",
-      cxxopts::value<double>()->default_value("0.6"))
-    ("nsga2-m", "Mutation probability. Must be in range [0,1]\ndefault: 0.4",
-      cxxopts::value<double>()->default_value("0.4"));
+      ("metric-path", "Add a path to a shared library representing an interface for a metric. This option can be specified multiple times.",
+        cxxopts::value<std::vector<std::string>>()->default_value(""))
 #endif
+      ("metric-from-stdin", "Add a metric NAME with values from stdin.\nFormat of input: \"NAME TIME_SINCE_EPOCH VALUE\\n\".\nTIME_SINCE_EPOCH is a int64 in nanoseconds. VALUE is a double. (Do not forget to flush\nlines!)",
+        cxxopts::value<std::vector<std::string>>(), "NAME")
+      ("measurement", "Start a measurement for the time specified by\n-t | --timeout. (The timeout must be greater\nthan the start and stop deltas.) Cannot be\ncombined with --optimize.")
+      ("measurement-interval", "Interval of measurements in milliseconds, default: 100",
+        cxxopts::value<unsigned>()->default_value("100"))
+      ("start-delta", "Cut of first N milliseconds of measurement, default: 5000",
+        cxxopts::value<unsigned>()->default_value("5000"), "N")
+      ("stop-delta", "Cut of last N milliseconds of measurement, default: 2000",
+        cxxopts::value<unsigned>()->default_value("2000"), "N")
+      ("preheat", "Preheat for N seconds, default: 240",
+        cxxopts::value<unsigned>()->default_value("240"), "N");
+  
+    Parser.add_options("optimization")
+      ("optimize", "Run the optimization with one of these algorithms: NSGA2.\nCannot be combined with --measurement.",
+        cxxopts::value<std::string>())
+      ("optimize-outfile", "Dump the output of the optimization into this\nfile, default: $PWD/$HOSTNAME_$DATE.json",
+        cxxopts::value<std::string>())
+      ("optimization-metric", "Use a metric for optimization. Metrics listed\nwith cli argument --list-metrics or specified\nwith --metric-from-stdin are valid.",
+        cxxopts::value<std::vector<std::string>>())
+      ("individuals", "Number of individuals for the population. For\nNSGA2 specify at least 5 and a multiple of 4,\ndefault: 20",
+        cxxopts::value<unsigned>()->default_value("20"))
+      ("generations", "Number of generations, default: 20",
+        cxxopts::value<unsigned>()->default_value("20"))
+      ("nsga2-cr", "Crossover probability. Must be in range [0,1[\ndefault: 0.6",
+        cxxopts::value<double>()->default_value("0.6"))
+      ("nsga2-m", "Mutation probability. Must be in range [0,1]\ndefault: 0.4",
+        cxxopts::value<double>()->default_value("0.4"));
+  }
   // clang-format on
 
   try {
@@ -323,21 +325,21 @@ Config::Config(int Argc, const char** Argv) {
                                   "with -l/--load equal 100.");
     }
 
-#ifdef FIRESTARTER_DEBUG_FEATURES
-    AllowUnavailablePayload = static_cast<bool>(Options.count("allow-unavailable-payload"));
-    DumpRegisters = static_cast<bool>(Options.count("dump-registers"));
-    if (DumpRegisters) {
-      DumpRegistersTimeDelta = std::chrono::seconds(Options["dump-registers"].as<unsigned>());
-      if (Timeout != std::chrono::microseconds::zero() && LoadPercent != 100) {
-        throw std::invalid_argument("Option --dump-registers may only be used "
-                                    "without a timeout and full load.");
-      }
-      if (ErrorDetection) {
-        throw std::invalid_argument("Options --dump-registers and --error-detection cannot be used "
-                                    "together.");
+    if (firestarter::OptionalFeatures.DebugFeatureEnabled) {
+      AllowUnavailablePayload = static_cast<bool>(Options.count("allow-unavailable-payload"));
+      DumpRegisters = static_cast<bool>(Options.count("dump-registers"));
+      if (DumpRegisters) {
+        DumpRegistersTimeDelta = std::chrono::seconds(Options["dump-registers"].as<unsigned>());
+        if (Timeout != std::chrono::microseconds::zero() && LoadPercent != 100) {
+          throw std::invalid_argument("Option --dump-registers may only be used "
+                                      "without a timeout and full load.");
+        }
+        if (ErrorDetection) {
+          throw std::invalid_argument("Options --dump-registers and --error-detection cannot be used "
+                                      "together.");
+        }
       }
     }
-#endif
 
     RequestedNumThreads = Options["threads"].as<unsigned>();
 
@@ -350,22 +352,22 @@ Config::Config(int Argc, const char** Argv) {
     }
 #endif
 
-#if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_ONEAPI) || defined(FIRESTARTER_BUILD_HIP)
-    GpuUseFloat = Options.count("usegpufloat");
-    GpuUseDouble = Options.count("usegpudouble");
+    if (firestarter::OptionalFeatures.gpuEnabled()) {
+      GpuUseFloat = static_cast<bool>(Options.count("usegpufloat"));
+      GpuUseDouble = static_cast<bool>(Options.count("usegpudouble"));
 
-    if (GpuUseFloat && GpuUseDouble) {
-      throw std::invalid_argument("Options -f/--usegpufloat and "
-                                  "-d/--usegpudouble cannot be used together.");
-    }
+      if (GpuUseFloat && GpuUseDouble) {
+        throw std::invalid_argument("Options -f/--usegpufloat and "
+                                    "-d/--usegpudouble cannot be used together.");
+      }
 
-    GpuMatrixSize = Options["matrixsize"].as<unsigned>();
-    if (GpuMatrixSize > 0 && GpuMatrixSize < 64) {
-      throw std::invalid_argument("Option -m/--matrixsize may not be below 64.");
-    }
+      GpuMatrixSize = Options["matrixsize"].as<unsigned>();
+      if (GpuMatrixSize > 0 && GpuMatrixSize < 64) {
+        throw std::invalid_argument("Option -m/--matrixsize may not be below 64.");
+      }
 
-    Gpus = Options["gpus"].as<int>();
-#endif
+      Gpus = Options["gpus"].as<int>();
+    }
 
     PrintFunctionSummary = static_cast<bool>(Options.count("avail"));
 
@@ -377,56 +379,56 @@ Config::Config(int Argc, const char** Argv) {
       LineCount = Options["set-line-count"].as<unsigned>();
     }
 
-#if defined(linux) || defined(__linux__)
-    StartDelta = std::chrono::milliseconds(Options["start-delta"].as<unsigned>());
-    StopDelta = std::chrono::milliseconds(Options["stop-delta"].as<unsigned>());
-    MeasurementInterval = std::chrono::milliseconds(Options["measurement-interval"].as<unsigned>());
+    if (firestarter::OptionalFeatures.OptimizationEnabled) {
+      StartDelta = std::chrono::milliseconds(Options["start-delta"].as<unsigned>());
+      StopDelta = std::chrono::milliseconds(Options["stop-delta"].as<unsigned>());
+      MeasurementInterval = std::chrono::milliseconds(Options["measurement-interval"].as<unsigned>());
 #ifndef FIRESTARTER_LINK_STATIC
-    MetricPaths = Options["metric-path"].as<std::vector<std::string>>();
+      MetricPaths = Options["metric-path"].as<std::vector<std::string>>();
 #endif
-    if (static_cast<bool>(Options.count("metric-from-stdin"))) {
-      StdinMetrics = Options["metric-from-stdin"].as<std::vector<std::string>>();
-    }
-    Measurement = static_cast<bool>(Options.count("measurement"));
-    ListMetrics = static_cast<bool>(Options.count("list-metrics"));
-    Optimize = static_cast<bool>(Options.count("optimize"));
-
-    if (Optimize) {
-      if (ErrorDetection) {
-        throw std::invalid_argument("Options --error-detection and --optimize "
-                                    "cannot be used together.");
-      }
-      if (Measurement) {
-        throw std::invalid_argument("Options --measurement and --optimize cannot be used together.");
-      }
-      Preheat = std::chrono::seconds(Options["preheat"].as<unsigned>());
-      OptimizationAlgorithm = Options["optimize"].as<std::string>();
-      if (static_cast<bool>(Options.count("optimization-metric"))) {
-        OptimizationMetrics = Options["optimization-metric"].as<std::vector<std::string>>();
-      }
-      if (LoadPercent != 100) {
-        throw std::invalid_argument("Options -p | --period and -l | --load are "
-                                    "not compatible with --optimize.");
+      if (static_cast<bool>(Options.count("metric-from-stdin"))) {
+        StdinMetrics = Options["metric-from-stdin"].as<std::vector<std::string>>();
       }
-      if (Timeout == std::chrono::seconds::zero()) {
-        throw std::invalid_argument("Option -t | --timeout must be specified for optimization.");
-      }
-      EvaluationDuration = Timeout;
-      // this will deactivate the watchdog worker
-      Timeout = std::chrono::seconds::zero();
-      Individuals = Options["individuals"].as<unsigned>();
-      if (static_cast<bool>(Options.count("optimize-outfile"))) {
-        OptimizeOutfile = Options["optimize-outfile"].as<std::string>();
-      }
-      Generations = Options["generations"].as<unsigned>();
-      Nsga2Cr = Options["nsga2-cr"].as<double>();
-      Nsga2M = Options["nsga2-m"].as<double>();
-
-      if (OptimizationAlgorithm != "NSGA2") {
-        throw std::invalid_argument("Option --optimize must be any of: NSGA2");
+      Measurement = static_cast<bool>(Options.count("measurement"));
+      ListMetrics = static_cast<bool>(Options.count("list-metrics"));
+      Optimize = static_cast<bool>(Options.count("optimize"));
+
+      if (Optimize) {
+        if (ErrorDetection) {
+          throw std::invalid_argument("Options --error-detection and --optimize "
+                                      "cannot be used together.");
+        }
+        if (Measurement) {
+          throw std::invalid_argument("Options --measurement and --optimize cannot be used together.");
+        }
+        Preheat = std::chrono::seconds(Options["preheat"].as<unsigned>());
+        OptimizationAlgorithm = Options["optimize"].as<std::string>();
+        if (static_cast<bool>(Options.count("optimization-metric"))) {
+          OptimizationMetrics = Options["optimization-metric"].as<std::vector<std::string>>();
+        }
+        if (LoadPercent != 100) {
+          throw std::invalid_argument("Options -p | --period and -l | --load are "
+                                      "not compatible with --optimize.");
+        }
+        if (Timeout == std::chrono::seconds::zero()) {
+          throw std::invalid_argument("Option -t | --timeout must be specified for optimization.");
+        }
+        EvaluationDuration = Timeout;
+        // this will deactivate the watchdog worker
+        Timeout = std::chrono::seconds::zero();
+        Individuals = Options["individuals"].as<unsigned>();
+        if (static_cast<bool>(Options.count("optimize-outfile"))) {
+          OptimizeOutfile = Options["optimize-outfile"].as<std::string>();
+        }
+        Generations = Options["generations"].as<unsigned>();
+        Nsga2Cr = Options["nsga2-cr"].as<double>();
+        Nsga2M = Options["nsga2-m"].as<double>();
+
+        if (OptimizationAlgorithm != "NSGA2") {
+          throw std::invalid_argument("Option --optimize must be any of: NSGA2");
+        }
       }
     }
-#endif
 
   } catch (std::exception& E) {
     firestarter::log::error() << E.what() << "\n";

From 3f32f6f0a10d92f1a1a7f6ae76f42acb13522d12 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 6 Oct 2024 13:54:36 +0200
Subject: [PATCH 034/167] remove more ifdefs

---
 include/firestarter/Constants.hpp             |  17 ++
 .../firestarter/Environment/Environment.hpp   |  12 +-
 .../Environment/Platform/RuntimeConfig.hpp    |  14 +-
 include/firestarter/Firestarter.hpp           |   2 -
 include/firestarter/WindowsCompat.hpp         |  49 ++++
 src/firestarter/DumpRegisterWorker.cpp        |   6 +-
 .../Environment/X86/Payload/X86Payload.cpp    |  61 ++--
 src/firestarter/Firestarter.cpp               | 274 +++++++++---------
 8 files changed, 230 insertions(+), 205 deletions(-)
 create mode 100644 include/firestarter/WindowsCompat.hpp

diff --git a/include/firestarter/Constants.hpp b/include/firestarter/Constants.hpp
index f9e8ac26..31591866 100644
--- a/include/firestarter/Constants.hpp
+++ b/include/firestarter/Constants.hpp
@@ -56,6 +56,12 @@ struct FirestarterOptionalFeatures {
   bool DebugFeatureEnabled = false;
   /// Is dumping registers enabled?
   bool DumpRegisterEnabled = false;
+  /// Is the current build for X86?
+  bool IsX86 = false;
+  /// Is the current build for Windows?
+  bool IsWin32 = false;
+  /// Is the current build built with Windows MSC?
+  bool IsMsc = false;
 
   /// Is one of the GPU features enabled?
   [[nodiscard]] constexpr auto gpuEnabled() const -> bool { return CudaEnabled || OneAPIEnabled; }
@@ -75,6 +81,17 @@ static constexpr const FirestarterOptionalFeatures OptionalFeatures {
 #ifdef FIRESTARTER_DEBUG_FEATURES
   .DebugFeatureEnabled = true, .DumpRegisterEnabled = true,
 #endif
+#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+  .IsX86 = true,
+#else
+#error "FIRESTARTER is not implemented for this ISA"
+#endif
+#ifdef _WIN32
+  .IsWin32 = true,
+#endif
+#ifdef _MSC_VER
+  .IsMsc = true,
+#endif
 };
 
 } // namespace firestarter
\ No newline at end of file
diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index 86aa9dd2..9c348cc2 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -49,17 +49,7 @@ class Environment {
   virtual void printFunctionSummary() = 0;
 
   [[nodiscard]] auto selectedConfig() const -> platform::RuntimeConfig& {
-#if defined(__clang__)
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-value"
-#endif
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-value"
-    assert(("No RuntimeConfig selected", SelectedConfig != nullptr));
-#pragma GCC diagnostic pop
-#if defined(__clang__)
-#pragma clang diagnostic pop
-#endif
+    assert(SelectedConfig != nullptr && "No RuntimeConfig selected");
     return *SelectedConfig;
   }
 
diff --git a/include/firestarter/Environment/Platform/RuntimeConfig.hpp b/include/firestarter/Environment/Platform/RuntimeConfig.hpp
index 17770e97..46c3a142 100644
--- a/include/firestarter/Environment/Platform/RuntimeConfig.hpp
+++ b/include/firestarter/Environment/Platform/RuntimeConfig.hpp
@@ -67,19 +67,7 @@ class RuntimeConfig {
 
   [[nodiscard]] auto platformConfig() const -> PlatformConfig const& { return PlatformConfigValue; }
   [[nodiscard]] auto payload() const -> payload::Payload& {
-#if defined(__clang__)
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-value"
-#endif
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-value"
-    assert(("Payload pointer is null. Each thread has to use it's own "
-            "RuntimeConfig",
-            Payload != nullptr));
-#pragma GCC diagnostic pop
-#if defined(__clang__)
-#pragma clang diagnostic pop
-#endif
+    assert(Payload != nullptr && "Payload pointer is null. Each thread has to use it's own RuntimeConfig");
     return *Payload;
   }
   [[nodiscard]] auto thread() const -> unsigned { return Thread; }
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index f56f7164..06f69a38 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -165,14 +165,12 @@ class Firestarter {
   static auto watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load,
                              std::chrono::seconds Timeout) -> int;
 
-#ifdef FIRESTARTER_DEBUG_FEATURES
   // DumpRegisterWorker.cpp
   void initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath);
   void joinDumpRegisterWorker();
   static void dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Data);
 
   std::thread DumpRegisterWorkerThread;
-#endif
 
   static void setLoad(LoadThreadWorkType Value);
 
diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
new file mode 100644
index 00000000..3181dad3
--- /dev/null
+++ b/include/firestarter/WindowsCompat.hpp
@@ -0,0 +1,49 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2024 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#pragma once
+
+// NOLINTBEGIN(cert-dcl59-cpp,google-build-namespaces)
+#ifdef _MSC_VER
+#include <intrin.h>
+#else
+namespace {
+
+/// Define the _mm_mfence and __cpuid function when we are not using MSC to enable the use of if constexpr instead of
+/// ifdefs.
+// NOLINTBEGIN(readability-identifier-naming,cert-dcl37-c,cert-dcl37-cpp,cert-dcl51-cpp,bugprone-reserved-identifier)
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-function"
+#endif
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+void _mm_mfence() noexcept {};
+void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
+#pragma GCC diagnostic pop
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+// NOLINTEND(readability-identifier-naming,cert-dcl37-c,cert-dcl37-cpp,cert-dcl51-cpp,bugprone-reserved-identifier)
+
+} // namespace
+#endif
+// NOLINTEND(cert-dcl59-cpp,google-build-namespaces)
\ No newline at end of file
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index 19b7ca6e..1ab1563b 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -19,8 +19,6 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#ifdef FIRESTARTER_DEBUG_FEATURES
-
 #include <firestarter/Firestarter.hpp>
 #include <firestarter/Logging/Log.hpp>
 
@@ -169,6 +167,4 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Dat
   DumpFile.close();
 }
 
-} // namespace firestarter
-
-#endif
\ No newline at end of file
+} // namespace firestarter
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index dedf80d3..6d63a187 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -19,52 +19,49 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include "firestarter/Constants.hpp"
 #include <cassert>
 #include <chrono>
-#include <thread>
-
-#ifdef _MSC_VER
-#include <array>
-#include <intrin.h>
-#endif
-
+#include <firestarter/Constants.hpp>
 #include <firestarter/Environment/X86/Payload/X86Payload.hpp>
+#include <firestarter/WindowsCompat.hpp>
+#include <thread>
 
 namespace firestarter::environment::x86::payload {
 
 void X86Payload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) {
   auto Nap = Period / 100;
 
-#ifndef _MSC_VER
-  __asm__ __volatile__("mfence;"
-                       "cpuid;" ::
-                           : "eax", "ebx", "ecx", "edx");
-#else
-  std::array<int, 4> Cpuid;
-  _mm_mfence();
-  __cpuid(Cpuid.data(), 0);
-#endif
-
-  // while signal low load
-  while (LoadVar == LoadThreadWorkType::LoadLow) {
-#ifndef _MSC_VER
-    __asm__ __volatile__("mfence;"
-                         "cpuid;" ::
-                             : "eax", "ebx", "ecx", "edx");
-#else
+  if constexpr (firestarter::OptionalFeatures.IsMsc) {
+    std::array<int, 4> Cpuid{};
     _mm_mfence();
     __cpuid(Cpuid.data(), 0);
-#endif
-    std::this_thread::sleep_for(Nap);
-#ifndef _MSC_VER
+  } else {
     __asm__ __volatile__("mfence;"
                          "cpuid;" ::
                              : "eax", "ebx", "ecx", "edx");
-#else
-    _mm_mfence();
-    __cpuid(Cpuid.data(), 0);
-#endif
+  }
+
+  // while signal low load
+  while (LoadVar == LoadThreadWorkType::LoadLow) {
+    if constexpr (firestarter::OptionalFeatures.IsMsc) {
+      std::array<int, 4> Cpuid{};
+      _mm_mfence();
+      __cpuid(Cpuid.data(), 0);
+    } else {
+      __asm__ __volatile__("mfence;"
+                           "cpuid;" ::
+                               : "eax", "ebx", "ecx", "edx");
+    }
+    std::this_thread::sleep_for(Nap);
+    if constexpr (firestarter::OptionalFeatures.IsMsc) {
+      std::array<int, 4> Cpuid{};
+      _mm_mfence();
+      __cpuid(Cpuid.data(), 0);
+    } else {
+      __asm__ __volatile__("mfence;"
+                           "cpuid;" ::
+                               : "eax", "ebx", "ecx", "edx");
+    }
   }
 }
 
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index d92d1b44..b3e85763 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -26,18 +26,12 @@
 #include <firestarter/Optimizer/Problem/CLIArgumentProblem.hpp>
 #endif
 
-#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-#include <firestarter/Environment/X86/X86Environment.hpp>
-#endif
-
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
-
 #include <algorithm>
 #include <csignal>
+#include <firestarter/Environment/X86/X86Environment.hpp>
 #include <firestarter/Firestarter.hpp>
 #include <firestarter/Logging/Log.hpp>
+#include <firestarter/WindowsCompat.hpp>
 #include <functional>
 #include <utility>
 
@@ -89,32 +83,29 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
     this->Period = std::chrono::microseconds::zero();
   }
 
-#if defined(linux) || defined(__linux__)
-#else
-  (void)ListMetrics;
-  (void)MeasurementInterval;
-  (void)MetricPaths;
-  (void)StdinMetrics;
-#endif
+  if constexpr (!firestarter::OptionalFeatures.OptimizationEnabled) {
+    (void)ListMetrics;
+    (void)MeasurementInterval;
+    (void)MetricPaths;
+    (void)StdinMetrics;
+  }
 
-#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-  Environment = std::make_unique<environment::x86::X86Environment>();
-  const auto& X86Env = *dynamic_cast<environment::x86::X86Environment*>(Environment.get());
-#else
-#error "FIRESTARTER is not implemented for this ISA"
-#endif
+  if constexpr (firestarter::OptionalFeatures.IsX86) {
+    Environment = std::make_unique<environment::x86::X86Environment>();
+  }
 
   Environment->evaluateCpuAffinity(RequestedNumThreads, CpuBind);
 
-#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-  // Error detection uses crc32 instruction added by the SSE4.2 extension to x86
-  if (ErrorDetection) {
-    if (!X86Env.topology().featuresAsmjit().has(asmjit::CpuFeatures::X86::kSSE4_2)) {
-      throw std::invalid_argument("Option --error-detection requires the crc32 "
-                                  "instruction added with SSE_4_2.\n");
+  if constexpr (firestarter::OptionalFeatures.IsX86) {
+    // Error detection uses crc32 instruction added by the SSE4.2 extension to x86
+    if (ErrorDetection) {
+      const auto& X86Env = *dynamic_cast<environment::x86::X86Environment*>(Environment.get());
+      if (!X86Env.topology().featuresAsmjit().has(asmjit::CpuFeatures::X86::kSSE4_2)) {
+        throw std::invalid_argument("Option --error-detection requires the crc32 "
+                                    "instruction added with SSE_4_2.\n");
+      }
     }
   }
-#endif
 
   if (ErrorDetection && Environment->requestedNumThreads() < 2) {
     throw std::invalid_argument("Option --error-detection must run with 2 or more threads. Number of "
@@ -144,92 +135,93 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
     Environment->setLineCount(LineCount);
   }
 
-#if defined(linux) || defined(__linux__)
-  if (Measurement || ListMetrics || Optimize) {
-    MeasurementWorker = std::make_shared<measurement::MeasurementWorker>(
-        MeasurementInterval, Environment->requestedNumThreads(), MetricPaths, StdinMetrics);
+  if constexpr (firestarter::OptionalFeatures.OptimizationEnabled) {
+    if (Measurement || ListMetrics || Optimize) {
+      MeasurementWorker = std::make_shared<measurement::MeasurementWorker>(
+          MeasurementInterval, Environment->requestedNumThreads(), MetricPaths, StdinMetrics);
 
-    if (ListMetrics) {
-      log::info() << MeasurementWorker->availableMetrics();
-      std::exit(EXIT_SUCCESS);
-    }
+      if (ListMetrics) {
+        log::info() << MeasurementWorker->availableMetrics();
+        std::exit(EXIT_SUCCESS);
+      }
 
-    // init all metrics
-    auto All = MeasurementWorker->metricNames();
-    auto Initialized = MeasurementWorker->initMetrics(All);
+      // init all metrics
+      auto All = MeasurementWorker->metricNames();
+      auto Initialized = MeasurementWorker->initMetrics(All);
 
-    if (Initialized.empty()) {
-      std::invalid_argument("No metrics initialized");
-    }
-
-    // check if selected metrics are initialized
-    for (auto const& OptimizationMetric : OptimizationMetrics) {
-      auto NameEqual = [OptimizationMetric](auto const& Name) {
-        auto InvertedName = "-" + Name;
-        return Name == OptimizationMetric || InvertedName == OptimizationMetric;
-      };
-      // metric name is not found
-      if (std::find_if(All.begin(), All.end(), NameEqual) == All.end()) {
-        std::invalid_argument("Metric \"" + OptimizationMetric + "\" does not exist.");
+      if (Initialized.empty()) {
+        std::invalid_argument("No metrics initialized");
       }
-      // metric has not initialized properly
-      if (std::find_if(Initialized.begin(), Initialized.end(), NameEqual) == Initialized.end()) {
-        std::invalid_argument("Metric \"" + OptimizationMetric + "\" failed to initialize.");
+
+      // check if selected metrics are initialized
+      for (auto const& OptimizationMetric : OptimizationMetrics) {
+        auto NameEqual = [OptimizationMetric](auto const& Name) {
+          auto InvertedName = "-" + Name;
+          return Name == OptimizationMetric || InvertedName == OptimizationMetric;
+        };
+        // metric name is not found
+        if (std::find_if(All.begin(), All.end(), NameEqual) == All.end()) {
+          std::invalid_argument("Metric \"" + OptimizationMetric + "\" does not exist.");
+        }
+        // metric has not initialized properly
+        if (std::find_if(Initialized.begin(), Initialized.end(), NameEqual) == Initialized.end()) {
+          std::invalid_argument("Metric \"" + OptimizationMetric + "\" failed to initialize.");
+        }
       }
     }
-  }
 
-  if (Optimize) {
-    auto ApplySettings = std::bind(
-        [this](std::vector<std::pair<std::string, unsigned>> const& Setting) {
-          using Clock = std::chrono::high_resolution_clock;
-          auto Start = Clock::now();
+    if (Optimize) {
+      auto ApplySettings = std::bind(
+          [this](std::vector<std::pair<std::string, unsigned>> const& Setting) {
+            using Clock = std::chrono::high_resolution_clock;
+            auto Start = Clock::now();
 
-          signalSwitch(Setting);
+            signalSwitch(Setting);
 
-          LoadVar = LoadThreadWorkType::LoadHigh;
+            LoadVar = LoadThreadWorkType::LoadHigh;
 
-          signalWork();
+            signalWork();
 
-          uint64_t StartTimestamp = std::numeric_limits<uint64_t>::max();
-          uint64_t StopTimestamp = 0;
+            uint64_t StartTimestamp = std::numeric_limits<uint64_t>::max();
+            uint64_t StopTimestamp = 0;
 
-          for (auto const& Thread : LoadThreads) {
-            auto Td = Thread.second;
+            for (auto const& Thread : LoadThreads) {
+              auto Td = Thread.second;
 
-            StartTimestamp = std::min<uint64_t>(StartTimestamp, Td->LastRun.StartTsc);
-            StopTimestamp = std::max<uint64_t>(StopTimestamp, Td->LastRun.StopTsc);
-          }
+              StartTimestamp = std::min<uint64_t>(StartTimestamp, Td->LastRun.StartTsc);
+              StopTimestamp = std::max<uint64_t>(StopTimestamp, Td->LastRun.StopTsc);
+            }
 
-          for (auto const& Thread : LoadThreads) {
-            auto Td = Thread.second;
-            ipcEstimateMetricInsert(static_cast<double>(Td->LastRun.Iterations) *
-                                    static_cast<double>(LoadThreads.front().second->config().payload().instructions()) /
-                                    static_cast<double>(StopTimestamp - StartTimestamp));
-          }
+            for (auto const& Thread : LoadThreads) {
+              auto Td = Thread.second;
+              ipcEstimateMetricInsert(
+                  static_cast<double>(Td->LastRun.Iterations) *
+                  static_cast<double>(LoadThreads.front().second->config().payload().instructions()) /
+                  static_cast<double>(StopTimestamp - StartTimestamp));
+            }
 
-          auto End = Clock::now();
+            auto End = Clock::now();
 
-          log::trace() << "Switching payload took "
-                       << std::chrono::duration_cast<std::chrono::milliseconds>(End - Start).count() << "ms";
-        },
-        std::placeholders::_1);
+            log::trace() << "Switching payload took "
+                         << std::chrono::duration_cast<std::chrono::milliseconds>(End - Start).count() << "ms";
+          },
+          std::placeholders::_1);
 
-    auto Prob = std::make_shared<firestarter::optimizer::problem::CLIArgumentProblem>(
-        std::move(ApplySettings), MeasurementWorker, OptimizationMetrics, EvaluationDuration, StartDelta, StopDelta,
-        Environment->selectedConfig().payloadItems());
+      auto Prob = std::make_shared<firestarter::optimizer::problem::CLIArgumentProblem>(
+          std::move(ApplySettings), MeasurementWorker, OptimizationMetrics, EvaluationDuration, StartDelta, StopDelta,
+          Environment->selectedConfig().payloadItems());
 
-    Population = firestarter::optimizer::Population(std::move(Prob));
+      Population = firestarter::optimizer::Population(std::move(Prob));
 
-    if (OptimizationAlgorithm == "NSGA2") {
-      Algorithm = std::make_unique<firestarter::optimizer::algorithm::NSGA2>(Generations, Nsga2Cr, Nsga2M);
-    } else {
-      throw std::invalid_argument("Algorithm " + OptimizationAlgorithm + " unknown.");
-    }
+      if (OptimizationAlgorithm == "NSGA2") {
+        Algorithm = std::make_unique<firestarter::optimizer::algorithm::NSGA2>(Generations, Nsga2Cr, Nsga2M);
+      } else {
+        throw std::invalid_argument("Algorithm " + OptimizationAlgorithm + " unknown.");
+      }
 
-    Algorithm->checkPopulation(static_cast<firestarter::optimizer::Population const&>(Population), Individuals);
+      Algorithm->checkPopulation(static_cast<firestarter::optimizer::Population const&>(Population), Individuals);
+    }
   }
-#endif
 
   Environment->printSelectedCodePathSummary();
 
@@ -240,9 +232,9 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
   initLoadWorkers((LoadPercent == 0), Period);
 
   // add some signal handler for aborting FIRESTARTER
-#ifndef _WIN32
-  std::signal(SIGALRM, Firestarter::sigalrmHandler);
-#endif
+  if constexpr (!firestarter::OptionalFeatures.IsWin32) {
+    std::signal(SIGALRM, Firestarter::sigalrmHandler);
+  }
 
   std::signal(SIGTERM, Firestarter::sigtermHandler);
   std::signal(SIGINT, Firestarter::sigtermHandler);
@@ -259,70 +251,70 @@ void Firestarter::mainThread() {
   _oneapi = std::make_unique<oneapi::OneAPI>(&loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus);
 #endif
 
-#if defined(linux) || defined(__linux__)
-  // if measurement is enabled, start it here
-  if (Measurement) {
-    MeasurementWorker->startMeasurement();
+  if constexpr (firestarter::OptionalFeatures.OptimizationEnabled) {
+    // if measurement is enabled, start it here
+    if (Measurement) {
+      MeasurementWorker->startMeasurement();
+    }
   }
-#endif
 
   signalWork();
 
-#ifdef FIRESTARTER_DEBUG_FEATURES
-  if (DumpRegisters) {
-    initDumpRegisterWorker(DumpRegistersTimeDelta, DumpRegistersOutpath);
+  if constexpr (firestarter::OptionalFeatures.DumpRegisterEnabled) {
+    if (DumpRegisters) {
+      initDumpRegisterWorker(DumpRegistersTimeDelta, DumpRegistersOutpath);
+    }
   }
-#endif
 
   // worker thread for load control
   watchdogWorker(Period, Load, Timeout);
 
-#if defined(linux) || defined(__linux__)
-  // check if optimization is selected
-  if (Optimize) {
-    auto StartTime = optimizer::History::getTime();
+  if constexpr (firestarter::OptionalFeatures.OptimizationEnabled) {
+    // check if optimization is selected
+    if (Optimize) {
+      auto StartTime = optimizer::History::getTime();
 
-    Firestarter::Optimizer = std::make_unique<optimizer::OptimizerWorker>(std::move(Algorithm), Population,
-                                                                          OptimizationAlgorithm, Individuals, Preheat);
+      Firestarter::Optimizer = std::make_unique<optimizer::OptimizerWorker>(
+          std::move(Algorithm), Population, OptimizationAlgorithm, Individuals, Preheat);
 
-    // wait here until optimizer thread terminates
-    Firestarter::Optimizer->join();
+      // wait here until optimizer thread terminates
+      Firestarter::Optimizer->join();
 
-    auto PayloadItems = Environment->selectedConfig().payloadItems();
+      auto PayloadItems = Environment->selectedConfig().payloadItems();
 
-    firestarter::optimizer::History::save(OptimizeOutfile, StartTime, PayloadItems, Argc, Argv);
+      firestarter::optimizer::History::save(OptimizeOutfile, StartTime, PayloadItems, Argc, Argv);
 
-    // print the best 20 according to each metric
-    firestarter::optimizer::History::printBest(OptimizationMetrics, PayloadItems);
+      // print the best 20 according to each metric
+      firestarter::optimizer::History::printBest(OptimizationMetrics, PayloadItems);
 
-    // stop all the load threads
-    std::raise(SIGTERM);
+      // stop all the load threads
+      std::raise(SIGTERM);
+    }
   }
-#endif
 
   // wait for watchdog to timeout or until user terminates
   joinLoadWorkers();
-#ifdef FIRESTARTER_DEBUG_FEATURES
-  if (DumpRegisters) {
-    joinDumpRegisterWorker();
+  if constexpr (firestarter::OptionalFeatures.DumpRegisterEnabled) {
+    if (DumpRegisters) {
+      joinDumpRegisterWorker();
+    }
   }
-#endif
 
   if (!Optimize) {
     printPerformanceReport();
   }
 
-#if defined(linux) || defined(__linux__)
-  // if measurment is enabled, stop it here
-  if (Measurement) {
-    // TODO: clear this up
-    log::info() << "metric,num_timepoints,duration_ms,average,stddev";
-    for (auto const& [name, sum] : MeasurementWorker->getValues(StartDelta, StopDelta)) {
-      log::info() << std::quoted(name) << "," << sum.NumTimepoints << "," << sum.Duration.count() << "," << sum.Average
-                  << "," << sum.Stddev;
+  if constexpr (firestarter::OptionalFeatures.OptimizationEnabled) {
+    // if measurment is enabled, stop it here
+    if (Measurement) {
+      // TODO: clear this up
+      log::info() << "metric,num_timepoints,duration_ms,average,stddev";
+      for (auto const& [name, sum] : MeasurementWorker->getValues(StartDelta, StopDelta)) {
+        log::info() << std::quoted(name) << "," << sum.NumTimepoints << "," << sum.Duration.count() << ","
+                    << sum.Average << "," << sum.Stddev;
+      }
     }
   }
-#endif
 
   if (ErrorDetection) {
     printThreadErrorReport();
@@ -332,15 +324,13 @@ void Firestarter::mainThread() {
 void Firestarter::setLoad(LoadThreadWorkType Value) {
   // signal load change to workers
   Firestarter::LoadVar = Value;
-#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-#ifndef _MSC_VER
-  __asm__ __volatile__("mfence;");
-#else
-  _mm_mfence();
-#endif
-#else
-#error "FIRESTARTER is not implemented for this ISA"
-#endif
+  if constexpr (firestarter::OptionalFeatures.IsX86) {
+    if constexpr (firestarter::OptionalFeatures.IsMsc) {
+      _mm_mfence();
+    } else {
+      __asm__ __volatile__("mfence;");
+    }
+  }
 }
 
 void Firestarter::sigalrmHandler(int Signum) { (void)Signum; }

From 35a73b12a7d7d4aa8391f6e057f5f4440d6bb2fe Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 6 Oct 2024 14:08:30 +0200
Subject: [PATCH 035/167] remove more ifdefs

---
 include/firestarter/Cuda/Cuda.hpp     | 18 +++++++++++++++---
 include/firestarter/Firestarter.hpp   | 22 +++++-----------------
 include/firestarter/OneAPI/OneAPI.hpp | 19 ++++++++++++++++---
 src/firestarter/Firestarter.cpp       |  9 ++-------
 4 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/include/firestarter/Cuda/Cuda.hpp b/include/firestarter/Cuda/Cuda.hpp
index 1c6f234e..23d6c4bf 100644
--- a/include/firestarter/Cuda/Cuda.hpp
+++ b/include/firestarter/Cuda/Cuda.hpp
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include "firestarter/Constants.hpp"
 #include <condition_variable>
 #include <mutex>
 #include <thread>
@@ -33,11 +34,22 @@ class Cuda {
   std::condition_variable WaitForInitCv;
   std::mutex WaitForInitCvMutex;
 
-  static void initGpus(std::condition_variable& Cv, volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble,
-                       unsigned MatrixSize, int Gpus);
+  static void initGpus(std::condition_variable& Cv, const volatile firestarter::LoadThreadWorkType& LoadVar,
+                       bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus);
 
 public:
-  Cuda(volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus);
+  Cuda(volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus)
+#if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
+      ;
+#else
+  {
+    (void)&LoadVar;
+    (void)UseFloat;
+    (void)UseDouble;
+    (void)MatrixSize;
+    (void)Gpus;
+  }
+#endif
 
   ~Cuda() {
     if (InitThread.joinable()) {
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 06f69a38..2d42543b 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -21,16 +21,6 @@
 
 #pragma once
 
-#if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
-#include "Cuda/Cuda.hpp"
-#endif
-
-#ifdef FIRESTARTER_BUILD_ONEAPI
-#include "OneAPI/OneAPI.hpp"
-#endif
-
-#include "Constants.hpp"
-
 #if defined(linux) || defined(__linux__)
 #include "Measurement/MeasurementWorker.hpp"
 #include "Optimizer/Algorithm.hpp"
@@ -38,8 +28,11 @@
 #include "Optimizer/Population.hpp"
 #endif
 
+#include "Constants.hpp"
+#include "Cuda/Cuda.hpp"
 #include "DumpRegisterWorkerData.hpp"
 #include "LoadWorkerData.hpp"
+#include "OneAPI/OneAPI.hpp"
 
 #include <chrono>
 #include <condition_variable>
@@ -108,13 +101,8 @@ class Firestarter {
 
   std::unique_ptr<environment::Environment> Environment;
 
-#if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
-  std::unique_ptr<cuda::Cuda> _cuda;
-#endif
-
-#ifdef FIRESTARTER_BUILD_ONEAPI
-  std::unique_ptr<oneapi::OneAPI> _oneapi;
-#endif
+  std::unique_ptr<cuda::Cuda> Cuda;
+  std::unique_ptr<oneapi::OneAPI> Oneapi;
 
 #if defined(linux) || defined(__linux__)
   inline static std::unique_ptr<optimizer::OptimizerWorker> Optimizer;
diff --git a/include/firestarter/OneAPI/OneAPI.hpp b/include/firestarter/OneAPI/OneAPI.hpp
index 603d6df7..a15a1572 100644
--- a/include/firestarter/OneAPI/OneAPI.hpp
+++ b/include/firestarter/OneAPI/OneAPI.hpp
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include "firestarter/Constants.hpp"
 #include <condition_variable>
 #include <mutex>
 #include <thread>
@@ -33,11 +34,23 @@ class OneAPI {
   std::condition_variable WaitForInitCv;
   std::mutex WaitForInitCvMutex;
 
-  static void initGpus(std::condition_variable& Cv, const volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble,
-                       unsigned MatrixSize, int Gpus);
+  static void initGpus(std::condition_variable& Cv, const volatile firestarter::LoadThreadWorkType& LoadVar,
+                       bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus);
 
 public:
-  OneAPI(volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus);
+  OneAPI(volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize,
+         int Gpus)
+#if defined(FIRESTARTER_BUILD_ONEAPI)
+      ;
+#else
+  {
+    (void)&LoadVar;
+    (void)UseFloat;
+    (void)UseDouble;
+    (void)MatrixSize;
+    (void)Gpus;
+  }
+#endif
 
   ~OneAPI() {
     if (InitThread.joinable()) {
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index b3e85763..dda9dd54 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -243,13 +243,8 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
 void Firestarter::mainThread() {
   Environment->printThreadSummary();
 
-#if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
-  _cuda = std::make_unique<cuda::Cuda>(&loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus);
-#endif
-
-#ifdef FIRESTARTER_BUILD_ONEAPI
-  _oneapi = std::make_unique<oneapi::OneAPI>(&loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus);
-#endif
+  Cuda = std::make_unique<cuda::Cuda>(LoadVar, GpuUseFloat, GpuUseDouble, GpuMatrixSize, Gpus);
+  Oneapi = std::make_unique<oneapi::OneAPI>(LoadVar, GpuUseFloat, GpuUseDouble, GpuMatrixSize, Gpus);
 
   if constexpr (firestarter::OptionalFeatures.OptimizationEnabled) {
     // if measurement is enabled, start it here

From 6fdcf47081c6c09d2fc3fd67d0710d0fd861d91a Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 6 Oct 2024 14:12:22 +0200
Subject: [PATCH 036/167] remove more ifdefs

---
 include/firestarter/Firestarter.hpp | 11 ++++-------
 src/firestarter/Firestarter.cpp     | 11 ++++-------
 src/firestarter/Main.cpp            |  2 +-
 3 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 2d42543b..ad33374a 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -21,18 +21,15 @@
 
 #pragma once
 
-#if defined(linux) || defined(__linux__)
-#include "Measurement/MeasurementWorker.hpp"
-#include "Optimizer/Algorithm.hpp"
-#include "Optimizer/OptimizerWorker.hpp"
-#include "Optimizer/Population.hpp"
-#endif
-
 #include "Constants.hpp"
 #include "Cuda/Cuda.hpp"
 #include "DumpRegisterWorkerData.hpp"
 #include "LoadWorkerData.hpp"
+#include "Measurement/MeasurementWorker.hpp"
 #include "OneAPI/OneAPI.hpp"
+#include "Optimizer/Algorithm.hpp"
+#include "Optimizer/OptimizerWorker.hpp"
+#include "Optimizer/Population.hpp"
 
 #include <chrono>
 #include <condition_variable>
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index dda9dd54..d2a80226 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -19,18 +19,15 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#if defined(linux) || defined(__linux__)
-#include <firestarter/Measurement/Metric/IPCEstimate.h>
-#include <firestarter/Optimizer/Algorithm/NSGA2.hpp>
-#include <firestarter/Optimizer/History.hpp>
-#include <firestarter/Optimizer/Problem/CLIArgumentProblem.hpp>
-#endif
-
 #include <algorithm>
 #include <csignal>
 #include <firestarter/Environment/X86/X86Environment.hpp>
 #include <firestarter/Firestarter.hpp>
 #include <firestarter/Logging/Log.hpp>
+#include <firestarter/Measurement/Metric/IPCEstimate.h>
+#include <firestarter/Optimizer/Algorithm/NSGA2.hpp>
+#include <firestarter/Optimizer/History.hpp>
+#include <firestarter/Optimizer/Problem/CLIArgumentProblem.hpp>
 #include <firestarter/WindowsCompat.hpp>
 #include <functional>
 #include <utility>
diff --git a/src/firestarter/Main.cpp b/src/firestarter/Main.cpp
index 5c7ea3ba..ce16bcd9 100644
--- a/src/firestarter/Main.cpp
+++ b/src/firestarter/Main.cpp
@@ -472,4 +472,4 @@ auto main(int argc, const char** argv) -> int {
   }
 
   return EXIT_SUCCESS;
-}
+}
\ No newline at end of file

From 4da8714dc14f04e7006c6ed067930ecee8b63ccc Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 6 Oct 2024 14:17:07 +0200
Subject: [PATCH 037/167] remove more ifdefs

---
 include/firestarter/DumpRegisterWorkerData.hpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/firestarter/DumpRegisterWorkerData.hpp b/include/firestarter/DumpRegisterWorkerData.hpp
index cf5e22ba..4e4f9e18 100644
--- a/include/firestarter/DumpRegisterWorkerData.hpp
+++ b/include/firestarter/DumpRegisterWorkerData.hpp
@@ -26,8 +26,6 @@
 #include <chrono>
 #include <utility>
 
-#ifdef FIRESTARTER_DEBUG_FEATURES
-
 namespace firestarter {
 
 class DumpRegisterWorkerData {
@@ -58,6 +56,4 @@ class DumpRegisterWorkerData {
   std::string DumpFilePath;
 };
 
-} // namespace firestarter
-
-#endif
+} // namespace firestarter
\ No newline at end of file

From 05ab3120ca1a46e928cb591bd822990d36ce38f6 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 6 Oct 2024 14:41:42 +0200
Subject: [PATCH 038/167] remove more ifdefs

---
 include/firestarter/Firestarter.hpp |  2 --
 src/firestarter/Firestarter.cpp     | 10 +++++-----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index ad33374a..22de1d33 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -101,12 +101,10 @@ class Firestarter {
   std::unique_ptr<cuda::Cuda> Cuda;
   std::unique_ptr<oneapi::OneAPI> Oneapi;
 
-#if defined(linux) || defined(__linux__)
   inline static std::unique_ptr<optimizer::OptimizerWorker> Optimizer;
   std::shared_ptr<measurement::MeasurementWorker> MeasurementWorker;
   std::unique_ptr<firestarter::optimizer::Algorithm> Algorithm;
   firestarter::optimizer::Population Population;
-#endif
 
   // LoadThreadWorker.cpp
   void initLoadWorkers(bool LowLoad, std::chrono::microseconds Period);
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index d2a80226..abc02acb 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -340,12 +340,12 @@ void Firestarter::sigtermHandler(int Signum) {
   }
   Firestarter::WatchdogTerminateAlert.notify_all();
 
-#if defined(linux) || defined(__linux__)
-  // if we have optimization running stop it
-  if (Firestarter::Optimizer) {
-    Firestarter::Optimizer->kill();
+  if constexpr (firestarter::OptionalFeatures.OptimizationEnabled) {
+    // if we have optimization running stop it
+    if (Firestarter::Optimizer) {
+      Firestarter::Optimizer->kill();
+    }
   }
-#endif
 }
 
 } // namespace firestarter
\ No newline at end of file

From f58bfd52f15740838aba8cc52ab1dafd92af3380 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 6 Oct 2024 16:07:44 +0200
Subject: [PATCH 039/167] fix mingw compatibility

---
 include/firestarter/DumpRegisterWorkerData.hpp  |  7 ++++---
 include/firestarter/Environment/CPUTopology.hpp |  1 +
 include/firestarter/Environment/Environment.hpp |  2 ++
 include/firestarter/WindowsCompat.hpp           | 14 +++++++++++++-
 src/firestarter/DumpRegisterWorker.cpp          |  2 +-
 5 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/include/firestarter/DumpRegisterWorkerData.hpp b/include/firestarter/DumpRegisterWorkerData.hpp
index 4e4f9e18..3de34fa1 100644
--- a/include/firestarter/DumpRegisterWorkerData.hpp
+++ b/include/firestarter/DumpRegisterWorkerData.hpp
@@ -23,6 +23,7 @@
 
 #include "LoadWorkerData.hpp"
 #include "Logging/Log.hpp"
+#include "WindowsCompat.hpp" // IWYU pragma: keep
 #include <chrono>
 #include <utility>
 
@@ -37,9 +38,9 @@ class DumpRegisterWorkerData {
       : LoadWorkerDataPtr(std::move(LoadWorkerDataPtr))
       , DumpTimeDelta(DumpTimeDelta) {
     if (DumpFilePath.empty()) {
-      char Cwd[PATH_MAX];
-      if (getcwd(Cwd, sizeof(Cwd)) != nullptr) {
-        this->DumpFilePath = Cwd;
+      char* Pwd = get_current_dir_name();
+      if (Pwd) {
+        this->DumpFilePath = Pwd;
       } else {
         log::error() << "getcwd() failed. Set --dump-registers-outpath to /tmp";
         this->DumpFilePath = "/tmp";
diff --git a/include/firestarter/Environment/CPUTopology.hpp b/include/firestarter/Environment/CPUTopology.hpp
index 2091daee..e50a0215 100644
--- a/include/firestarter/Environment/CPUTopology.hpp
+++ b/include/firestarter/Environment/CPUTopology.hpp
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include <cstdint>
 #include <list>
 #include <ostream>
 #include <sstream>
diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index 9c348cc2..60e2f338 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -67,10 +67,12 @@ class Environment {
 private:
   uint64_t RequestedNumThreads = 0;
 
+#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
   // TODO: replace these functions with the builtins one from hwlocom hwloc
   static auto cpuAllowed(unsigned Id) -> int;
   static auto cpuSet(unsigned Id) -> int;
   void addCpuSet(unsigned Cpu, cpu_set_t& Mask) const;
+#endif
 
   std::vector<unsigned> CpuBind;
 };
diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index 3181dad3..b82aacb7 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -36,7 +36,9 @@ namespace {
 #endif
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-function"
+#if not(defined(__MINGW32__) || defined(__MINGW64__))
 void _mm_mfence() noexcept {};
+#endif
 void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
 #pragma GCC diagnostic pop
 #if defined(__clang__)
@@ -46,4 +48,14 @@ void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
 
 } // namespace
 #endif
-// NOLINTEND(cert-dcl59-cpp,google-build-namespaces)
\ No newline at end of file
+// NOLINTEND(cert-dcl59-cpp,google-build-namespaces)
+
+#ifdef _WIN32
+// SIGALRM is not available on Windows
+#define SIGALRM 0
+
+namespace {
+#include <direct.h>
+inline auto get_current_dir_name() -> char* { return _getcwd(nullptr, 0); }
+} // namespace
+#endif
\ No newline at end of file
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index 1ab1563b..97d5e135 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -84,7 +84,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Dat
   std::stringstream DumpFilePath;
   DumpFilePath << Data->DumpFilePath;
 #if defined(__MINGW32__) || defined(__MINGW64__)
-  dumpFilePath << "\\";
+  DumpFilePath << "\\";
 #else
   DumpFilePath << "/";
 #endif

From f0e9439931176808d2abc7281ceccd32f27ab2fc Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 6 Oct 2024 21:52:16 +0200
Subject: [PATCH 040/167] fix some MSC errors

---
 include/firestarter/Constants.hpp             | 54 +++++++++++++------
 .../Measurement/MeasurementWorker.hpp         |  5 +-
 .../firestarter/Optimizer/OptimizerWorker.hpp |  5 +-
 include/firestarter/WindowsCompat.hpp         | 10 ++++
 4 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/include/firestarter/Constants.hpp b/include/firestarter/Constants.hpp
index 31591866..9549f581 100644
--- a/include/firestarter/Constants.hpp
+++ b/include/firestarter/Constants.hpp
@@ -45,52 +45,72 @@ enum class LoadThreadWorkType : EightBytesType {
 /// This struct holds infomation about enabled or disabled compile time features for FIRESTARTER.
 struct FirestarterOptionalFeatures {
   /// Do we have a build that enabled optimization?
-  bool OptimizationEnabled = false;
+  bool OptimizationEnabled;
   /// Do we have a build that enabled CUDA or HIP?
-  bool CudaEnabled = false;
+  bool CudaEnabled;
   /// Do we have a build that enabled OneAPU?
-  bool OneAPIEnabled = false;
+  bool OneAPIEnabled;
   /// Is error detection enabled?
-  bool ErrorDetectionEnabled = false;
+  bool ErrorDetectionEnabled;
   /// Are debug features enabled?
-  bool DebugFeatureEnabled = false;
+  bool DebugFeatureEnabled;
   /// Is dumping registers enabled?
-  bool DumpRegisterEnabled = false;
+  bool DumpRegisterEnabled;
   /// Is the current build for X86?
-  bool IsX86 = false;
+  bool IsX86;
   /// Is the current build for Windows?
-  bool IsWin32 = false;
+  bool IsWin32;
   /// Is the current build built with Windows MSC?
-  bool IsMsc = false;
+  bool IsMsc;
 
   /// Is one of the GPU features enabled?
   [[nodiscard]] constexpr auto gpuEnabled() const -> bool { return CudaEnabled || OneAPIEnabled; }
 };
 
+// MSC only supports designated initializers from C++20
 static constexpr const FirestarterOptionalFeatures OptionalFeatures {
 #if defined(linux) || defined(__linux__)
-  .OptimizationEnabled = true,
+  /*OptimizationEnabled=*/true,
+#else
+  /*OptimizationEnabled=*/false,
 #endif
+
 #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
-  .CudaEnabled = true,
+      /*CudaEnabled=*/true,
+#else
+      /*CudaEnabled=*/false,
 #endif
+
 #ifdef FIRESTARTER_BUILD_ONEAPI
-  .OneAPIEnabled = true,
+      /*OneAPIEnabled=*/true,
+#else
+      /*OneAPIEnabled=*/false,
 #endif
-  .ErrorDetectionEnabled = true,
+
+      /*ErrorDetectionEnabled=*/true,
+
 #ifdef FIRESTARTER_DEBUG_FEATURES
-  .DebugFeatureEnabled = true, .DumpRegisterEnabled = true,
+      /*DebugFeatureEnabled=*/true, /*DumpRegisterEnabled =*/true,
+#else
+      /*DebugFeatureEnabled=*/false, /*DumpRegisterEnabled =*/false,
 #endif
+
 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-  .IsX86 = true,
+      /*IsX86=*/true,
 #else
 #error "FIRESTARTER is not implemented for this ISA"
 #endif
+
 #ifdef _WIN32
-  .IsWin32 = true,
+      /*IsWin32=*/true,
+#else
+      /*IsWin32=*/false,
 #endif
+
 #ifdef _MSC_VER
-  .IsMsc = true,
+      /*IsMsc=*/true,
+#else
+      /*IsMsc=*/false,
 #endif
 };
 
diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp
index 019c6753..7396e713 100644
--- a/include/firestarter/Measurement/MeasurementWorker.hpp
+++ b/include/firestarter/Measurement/MeasurementWorker.hpp
@@ -27,14 +27,11 @@
 #include "MetricInterface.h"
 #include "Summary.hpp"
 #include "TimeValue.hpp"
+#include "firestarter/WindowsCompat.hpp" // IWYU pragma: keep
 #include <chrono>
 #include <map>
 #include <mutex>
 
-extern "C" {
-#include <pthread.h>
-}
-
 void insertCallback(void* Cls, const char* MetricName, int64_t TimeSinceEpoch, double Value);
 
 namespace firestarter::measurement {
diff --git a/include/firestarter/Optimizer/OptimizerWorker.hpp b/include/firestarter/Optimizer/OptimizerWorker.hpp
index 8fe35abc..2ac242cb 100644
--- a/include/firestarter/Optimizer/OptimizerWorker.hpp
+++ b/include/firestarter/Optimizer/OptimizerWorker.hpp
@@ -21,13 +21,10 @@
 
 #include "Algorithm.hpp"
 #include "Population.hpp"
+#include "firestarter/WindowsCompat.hpp" // IWYU pragma: keep
 #include <chrono>
 #include <memory>
 
-extern "C" {
-#include <pthread.h>
-}
-
 namespace firestarter::optimizer {
 
 class OptimizerWorker {
diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index b82aacb7..cfff3a41 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -58,4 +58,14 @@ namespace {
 #include <direct.h>
 inline auto get_current_dir_name() -> char* { return _getcwd(nullptr, 0); }
 } // namespace
+#endif
+
+// Make references in header files to pthread_t compatible to MSC. This will not make them functionally work.
+#ifdef _MSC_VER
+struct Placeholder {};
+using pthread_t = Placeholder;
+#else
+extern "C" {
+#include <pthread.h>
+}
 #endif
\ No newline at end of file

From 230103dd1f4ec3756bb7b0f77119c534bc1458c6 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Mon, 7 Oct 2024 16:55:45 +0200
Subject: [PATCH 041/167] fix errors in MSC compile

---
 include/firestarter/Logging/Log.hpp | 18 ++++++++++++++----
 src/firestarter/Main.cpp            |  8 ++++----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/include/firestarter/Logging/Log.hpp b/include/firestarter/Logging/Log.hpp
index a2874ebb..d5fc9d5e 100644
--- a/include/firestarter/Logging/Log.hpp
+++ b/include/firestarter/Logging/Log.hpp
@@ -54,10 +54,20 @@ class StdOut {
   }
 };
 
-using Record = nitro::log::record<nitro::log::severity_attribute, nitro::log::message_attribute,
+// NOLINTBEGIN(readability-identifier-naming)
+// The class may not be named Record since this is used as a template argument name in nitro which will cause errors
+// when compiling with MSC.
+using record = nitro::log::record<nitro::log::severity_attribute, nitro::log::message_attribute,
                                   nitro::log::timestamp_attribute, nitro::log::std_thread_id_attribute>;
+// NOLINTEND(readability-identifier-naming)
 
-template <typename Record> class Formater {
+template <typename Record>
+// NOLINTBEGIN(readability-identifier-naming)
+// The class may not be named Formater since this is used as a template argument name in nitro which will cause errors
+// when compiling with MSC. We will also write it with lower case and the correct spelling in case it gets renamed
+// correctly there.
+class formatter {
+  // NOLINTEND(readability-identifier-naming)
 public:
   auto format(Record& R) -> std::string {
     std::stringstream S;
@@ -92,9 +102,9 @@ using WorkerFilter = nitro::log::filter::and_filter<Filter<Record>, FirstWorkerT
 
 } // namespace logging
 
-using log = nitro::log::logger<logging::Record, logging::Formater, firestarter::logging::StdOut, logging::Filter>;
+using log = nitro::log::logger<logging::record, logging::formatter, firestarter::logging::StdOut, logging::Filter>;
 
 using workerLog =
-    nitro::log::logger<logging::Record, logging::Formater, firestarter::logging::StdOut, logging::WorkerFilter>;
+    nitro::log::logger<logging::record, logging::formatter, firestarter::logging::StdOut, logging::WorkerFilter>;
 
 } // namespace firestarter
diff --git a/src/firestarter/Main.cpp b/src/firestarter/Main.cpp
index ce16bcd9..04b17a28 100644
--- a/src/firestarter/Main.cpp
+++ b/src/firestarter/Main.cpp
@@ -270,13 +270,13 @@ Config::Config(int Argc, const char** Argv) {
     auto Options = Parser.parse(Argc, Argv);
 
     if (static_cast<bool>(Options.count("quiet"))) {
-      firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::warn);
+      firestarter::logging::Filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::warn);
     } else if (static_cast<bool>(Options.count("report"))) {
-      firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::debug);
+      firestarter::logging::Filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::debug);
     } else if (static_cast<bool>(Options.count("debug"))) {
-      firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::trace);
+      firestarter::logging::Filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::trace);
     } else {
-      firestarter::logging::Filter<firestarter::logging::Record>::set_severity(nitro::log::severity_level::info);
+      firestarter::logging::Filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::info);
     }
 
     if (static_cast<bool>(Options.count("version"))) {

From bfc21677ade733efe5049e2ca835ee934ccae8c8 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Mon, 7 Oct 2024 18:51:15 +0200
Subject: [PATCH 042/167] fix errors in MSC compile

---
 include/firestarter/LoadWorkerData.hpp      |  7 ++++---
 include/firestarter/Optimizer/History.hpp   |  5 +----
 include/firestarter/WindowsCompat.hpp       | 20 ++++++++++++++++++++
 src/firestarter/DumpRegisterWorker.cpp      |  3 ++-
 src/firestarter/Environment/CPUTopology.cpp |  5 +++--
 src/firestarter/Environment/Environment.cpp |  2 +-
 src/firestarter/Firestarter.cpp             |  3 ++-
 src/firestarter/LoadWorker.cpp              |  8 ++++----
 8 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index ef45404c..65c3286d 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -26,6 +26,7 @@
 #include "DumpRegisterStruct.hpp"
 #include "Environment/Environment.hpp"
 #include "ErrorDetectionStruct.hpp"
+#include <array>
 #include <atomic>
 #include <cmath>
 #include <cstddef>
@@ -106,7 +107,7 @@ class LoadWorkerData {
     }
   };
 
-  LoadWorkerData(int Id, environment::Environment& Environment, volatile LoadThreadWorkType& LoadVar,
+  LoadWorkerData(uint64_t Id, environment::Environment& Environment, volatile LoadThreadWorkType& LoadVar,
                  std::chrono::microseconds Period, bool DumpRegisters, bool ErrorDetection)
       : LoadVar(LoadVar)
       , Period(Period)
@@ -124,7 +125,7 @@ class LoadWorkerData {
     this->CommunicationRight = std::move(CommunicationRight);
   }
 
-  [[nodiscard]] auto id() const -> int { return Id; }
+  [[nodiscard]] auto id() const -> uint64_t { return Id; }
   [[nodiscard]] auto environment() const -> environment::Environment& { return Environment; }
   [[nodiscard]] auto config() const -> environment::platform::RuntimeConfig& { return *Config; }
 
@@ -173,7 +174,7 @@ class LoadWorkerData {
   std::shared_ptr<uint64_t> CommunicationRight;
 
 private:
-  int Id;
+  uint64_t Id;
   environment::Environment& Environment;
   environment::platform::RuntimeConfig* Config;
 };
diff --git a/include/firestarter/Optimizer/History.hpp b/include/firestarter/Optimizer/History.hpp
index 21d969f5..b6acc566 100644
--- a/include/firestarter/Optimizer/History.hpp
+++ b/include/firestarter/Optimizer/History.hpp
@@ -25,6 +25,7 @@
 #include "../Logging/Log.hpp"
 #include "../Measurement/Summary.hpp"
 #include "Individual.hpp"
+#include "firestarter/WindowsCompat.hpp" // IWYU pragma: keep
 #include <algorithm>
 #include <cassert>
 #include <cstring>
@@ -35,10 +36,6 @@
 #include <optional>
 #include <vector>
 
-extern "C" {
-#include <unistd.h>
-}
-
 namespace firestarter::optimizer {
 
 struct History {
diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index cfff3a41..8eb46144 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -58,9 +58,21 @@ namespace {
 #include <direct.h>
 inline auto get_current_dir_name() -> char* { return _getcwd(nullptr, 0); }
 } // namespace
+#else
+#include <unistd.h>
+#endif
+
+// correct include for gethostname
+#ifdef _MSC_VER
+#include <winsock.h>
+#else
+// NOLINTBEGIN(readability-duplicate-include)
+#include <unistd.h>
+// NOLINTEND(readability-duplicate-include)
 #endif
 
 // Make references in header files to pthread_t compatible to MSC. This will not make them functionally work.
+// We will be able to remove this hack once we transition from using pthread to std::thread
 #ifdef _MSC_VER
 struct Placeholder {};
 using pthread_t = Placeholder;
@@ -68,4 +80,12 @@ using pthread_t = Placeholder;
 extern "C" {
 #include <pthread.h>
 }
+#endif
+
+// Disable __asm__ __volatile__ in MSC
+// Static assert wont work, since if constexpr doesn't seem to work correctly
+#ifdef _MSC_VER
+#define __volatile__(X, ...)                                                                                           \
+  assert(false && "Attempted to use code path that uses the incorrect inline assembly macros for MSC.")
+#define __asm__
 #endif
\ No newline at end of file
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index 97d5e135..37b7bb67 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -65,8 +65,9 @@ void Firestarter::initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, con
 void Firestarter::joinDumpRegisterWorker() { this->DumpRegisterWorkerThread.join(); }
 
 void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Data) {
-
+#if defined(linux) || defined(__linux__)
   pthread_setname_np(pthread_self(), "DumpRegWorker");
+#endif
 
   const auto RegisterCount = Data->LoadWorkerDataPtr->config().payload().registerCount();
   const auto RegisterSize = Data->LoadWorkerDataPtr->config().payload().registerSize();
diff --git a/src/firestarter/Environment/CPUTopology.cpp b/src/firestarter/Environment/CPUTopology.cpp
index fa4d97ca..4283c34b 100644
--- a/src/firestarter/Environment/CPUTopology.cpp
+++ b/src/firestarter/Environment/CPUTopology.cpp
@@ -68,7 +68,8 @@ auto CPUTopology::print(std::ostream& Stream) const -> std::ostream& {
 
       auto* CacheObj = hwloc_get_obj_by_type(Topology, Cache, 0);
       std::array<char, 128> String{};
-      hwloc_obj_type_snprintf(String.begin(), sizeof(String), CacheObj, 0);
+      auto* StringPtr = String.data();
+      hwloc_obj_type_snprintf(StringPtr, sizeof(String), CacheObj, 0);
 
       switch (CacheObj->attr->cache.type) {
       case HWLOC_OBJ_CACHE_DATA:
@@ -378,7 +379,7 @@ auto CPUTopology::maxNumThreads() const -> unsigned {
 
     for (int I = 0; I < Width; I++) {
       auto* Obj = hwloc_get_obj_by_type(Topology, HWLOC_OBJ_PU, I);
-      Max = std::max(Max, Obj->os_index);
+      Max = (std::max)(Max, Obj->os_index);
     }
 
     return Max + 1;
diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp
index a8c55cbc..88664044 100644
--- a/src/firestarter/Environment/Environment.cpp
+++ b/src/firestarter/Environment/Environment.cpp
@@ -178,7 +178,7 @@ void Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::s
   }
 #endif
 
-  this->RequestedNumThreads = std::min(RequestedNumThreads, topology().maxNumThreads());
+  this->RequestedNumThreads = (std::min)(RequestedNumThreads, topology().maxNumThreads());
 }
 
 void Environment::printThreadSummary() {
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index abc02acb..534dec53 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -19,6 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/WindowsCompat.hpp"
 #include <algorithm>
 #include <csignal>
 #include <firestarter/Environment/X86/X86Environment.hpp>
@@ -179,7 +180,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
 
             signalWork();
 
-            uint64_t StartTimestamp = std::numeric_limits<uint64_t>::max();
+            uint64_t StartTimestamp = (std::numeric_limits<uint64_t>::max)();
             uint64_t StopTimestamp = 0;
 
             for (auto const& Thread : LoadThreads) {
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index 2ecf5d43..5a136a40 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -92,7 +92,7 @@ void Firestarter::initLoadWorkers(bool LowLoad, std::chrono::microseconds Period
 
     if (I == 0) {
       // only show error for all worker threads except first.
-      firestarter::logging::FirstWorkerThreadFilter<firestarter::logging::Record>::setFirstThread(T.get_id());
+      firestarter::logging::FirstWorkerThreadFilter<firestarter::logging::record>::setFirstThread(T.get_id());
     }
 
     LoadThreads.emplace_back(std::move(T), Td);
@@ -175,7 +175,7 @@ void Firestarter::printThreadErrorReport() {
 
 void Firestarter::printPerformanceReport() {
   // performance report
-  uint64_t StartTimestamp = std::numeric_limits<uint64_t>::max();
+  uint64_t StartTimestamp = (std::numeric_limits<uint64_t>::max)();
   uint64_t StopTimestamp = 0;
 
   uint64_t Iterations = 0;
@@ -188,8 +188,8 @@ void Firestarter::printPerformanceReport() {
     log::debug() << "Thread " << Td->id() << ": " << Td->LastRun.Iterations
                  << " iterations, tsc_delta: " << Td->LastRun.StopTsc - Td->LastRun.StartTsc;
 
-    StartTimestamp = std::min(StartTimestamp, Td->LastRun.StartTsc.load());
-    StopTimestamp = std::max(StopTimestamp, Td->LastRun.StopTsc.load());
+    StartTimestamp = (std::min)(StartTimestamp, Td->LastRun.StartTsc.load());
+    StopTimestamp = (std::max)(StopTimestamp, Td->LastRun.StopTsc.load());
 
     Iterations += Td->LastRun.Iterations.load();
   }

From bc3e37d684ac37d582c33cf4b4f70498b17c4f36 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Mon, 7 Oct 2024 18:57:57 +0200
Subject: [PATCH 043/167] add inline to functions define in the windows compat
 header

---
 include/firestarter/WindowsCompat.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index 8eb46144..846084d5 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -37,9 +37,9 @@ namespace {
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-function"
 #if not(defined(__MINGW32__) || defined(__MINGW64__))
-void _mm_mfence() noexcept {};
+inline void _mm_mfence() noexcept {};
 #endif
-void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
+inline void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
 #pragma GCC diagnostic pop
 #if defined(__clang__)
 #pragma clang diagnostic pop

From 4910835066d2faf8dcd781f44a70c7ff64f794ae Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Mon, 7 Oct 2024 19:00:48 +0200
Subject: [PATCH 044/167] AlignedAlloc: try to fix infinite recursion in free
 function

---
 include/firestarter/AlignedAlloc.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/firestarter/AlignedAlloc.hpp b/include/firestarter/AlignedAlloc.hpp
index 3c8605e9..7c5714fb 100644
--- a/include/firestarter/AlignedAlloc.hpp
+++ b/include/firestarter/AlignedAlloc.hpp
@@ -63,7 +63,7 @@ struct AlignedAlloc {
   static void free(void* Ptr) {
     // NOLINTBEGIN(cppcoreguidelines-owning-memory,cppcoreguidelines-no-malloc)
 #if defined(__APPLE__)
-    free(Ptr);
+    ::free(Ptr);
 #elif defined(__MINGW64__)
     _mm_free(Ptr);
 #elif defined(_MSC_VER)

From 6169843ba8b50d4c8b65e3c2c41fde10d768c357 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 8 Oct 2024 01:01:10 +0200
Subject: [PATCH 045/167] WindowsCompat: fix _mm_mfence compatibilty

---
 include/firestarter/WindowsCompat.hpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index 846084d5..9b02c4ea 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -36,7 +36,9 @@ namespace {
 #endif
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-function"
-#if not(defined(__MINGW32__) || defined(__MINGW64__))
+#if defined(__clang__)
+#include <emmintrin.h>
+#elif not(defined(__MINGW32__) || defined(__MINGW64__))
 inline void _mm_mfence() noexcept {};
 #endif
 inline void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};

From 33fc035153b0d2c6774041871d88a104acdda974 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 8 Oct 2024 01:12:20 +0200
Subject: [PATCH 046/167] fix Firestarter.cpp includes

---
 src/firestarter/Firestarter.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 534dec53..776688b5 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -19,8 +19,6 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include "firestarter/WindowsCompat.hpp"
-#include <algorithm>
 #include <csignal>
 #include <firestarter/Environment/X86/X86Environment.hpp>
 #include <firestarter/Firestarter.hpp>
@@ -30,8 +28,6 @@
 #include <firestarter/Optimizer/History.hpp>
 #include <firestarter/Optimizer/Problem/CLIArgumentProblem.hpp>
 #include <firestarter/WindowsCompat.hpp>
-#include <functional>
-#include <utility>
 
 namespace firestarter {
 

From 7799e06632db3ff7e893c228849d4385a78db9b9 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 8 Oct 2024 01:57:53 +0200
Subject: [PATCH 047/167] WindowsCompat: fix _mm_mfence compatibilty

---
 include/firestarter/WindowsCompat.hpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index 9b02c4ea..1a929cf0 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -36,10 +36,8 @@ namespace {
 #endif
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-function"
-#if defined(__clang__)
-#include <emmintrin.h>
-#elif not(defined(__MINGW32__) || defined(__MINGW64__))
-inline void _mm_mfence() noexcept {};
+#if not(defined(__MINGW32__) || defined(__MINGW64__))
+inline void _mm_mfence(){};
 #endif
 inline void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
 #pragma GCC diagnostic pop

From e7efd15b0261896e0ac64b4831bfe051b67c563a Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 8 Oct 2024 02:49:00 +0200
Subject: [PATCH 048/167] WindowsCompat: fix _mm_mfence compatibilty

---
 include/firestarter/WindowsCompat.hpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index 1a929cf0..72efae51 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -36,8 +36,9 @@ namespace {
 #endif
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-function"
-#if not(defined(__MINGW32__) || defined(__MINGW64__))
-inline void _mm_mfence(){};
+#if defined(__clang__)
+#elif not(defined(__MINGW32__) || defined(__MINGW64__))
+inline void _mm_mfence() noexcept {};
 #endif
 inline void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
 #pragma GCC diagnostic pop

From c29ccf832be3d630dfba98039e390901f57737fd Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 8 Oct 2024 02:49:37 +0200
Subject: [PATCH 049/167] fix static build

---
 src/firestarter/Measurement/MeasurementWorker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp
index f0ae3900..a6584daf 100644
--- a/src/firestarter/Measurement/MeasurementWorker.cpp
+++ b/src/firestarter/Measurement/MeasurementWorker.cpp
@@ -81,7 +81,7 @@ MeasurementWorker::MeasurementWorker(std::chrono::milliseconds UpdateInterval, u
     Metrics.push_back(Metric);
   }
 #else
-  (void)MetricDylibs;
+  (void)MetricDylibsNames;
 #endif
 
   // setup metric objects for metric names passed from stdin.

From 21c10be98efdcb7b6b60fff7f4c4246363fab1de Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 8 Oct 2024 02:56:17 +0200
Subject: [PATCH 050/167] fix get_current_dir_name in apple build

---
 include/firestarter/WindowsCompat.hpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index 72efae51..fcc46664 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -59,6 +59,12 @@ namespace {
 #include <direct.h>
 inline auto get_current_dir_name() -> char* { return _getcwd(nullptr, 0); }
 } // namespace
+#elif defined(__APPLE__)
+#include <unistd.h>
+namespace {
+#include <direct.h>
+inline auto get_current_dir_name() -> char* { return getcwd(nullptr, 0); }
+} // namespace
 #else
 #include <unistd.h>
 #endif

From e8370bf1b0780a9a972bd2f237e6ea6bc3487ca2 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 8 Oct 2024 02:58:04 +0200
Subject: [PATCH 051/167] fix get_current_dir_name in apple build

---
 include/firestarter/WindowsCompat.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index fcc46664..3af72364 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -62,7 +62,6 @@ inline auto get_current_dir_name() -> char* { return _getcwd(nullptr, 0); }
 #elif defined(__APPLE__)
 #include <unistd.h>
 namespace {
-#include <direct.h>
 inline auto get_current_dir_name() -> char* { return getcwd(nullptr, 0); }
 } // namespace
 #else

From e196bf247dc4ef0fee50b79380ca9a6e08fa6d83 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Wed, 16 Oct 2024 22:21:14 +0200
Subject: [PATCH 052/167] cleanup cuda and hip code

---
 include/firestarter/Cuda/CudaHipCompat.hpp | 759 +++++++++++++++++++++
 src/firestarter/Cuda/Cuda.cpp              | 588 +++++-----------
 2 files changed, 922 insertions(+), 425 deletions(-)
 create mode 100644 include/firestarter/Cuda/CudaHipCompat.hpp

diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
new file mode 100644
index 00000000..bb5a1a87
--- /dev/null
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -0,0 +1,759 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2020-2023 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+// This file provides compatibility for the minor differences between the CUDA and HIP APIs. We do this by:
+// 1. Include the required header files for CUDA or HIP
+// 2. Define compatibility types between CUDA and HIP. This results in all enum names to be the same in the source code.
+// These types are mapped to the ones with the correct prefix. These are cu and hip, CU and HIP, cuda and hip or CUDA
+// and HIP.
+// 3. Define functions that converts the error code enums into strings.
+// 4. Define compatibility function for cals to CUDA, HIP or one of their libraries (blas, rand etc.)
+
+#pragma once
+
+#include <cstddef>
+#include <optional>
+#include <sstream>
+#include <type_traits>
+
+namespace firestarter::cuda::compat {
+
+#ifdef FIRESTARTER_BUILD_CUDA
+// Start of CUDA compatibility types
+#include <cublas_v2.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <curand_kernel.h>
+
+enum class BlasStatusT : std::underlying_type_t<cublasStatus_t> {
+  BLAS_STATUS_SUCCESS = CUBLAS_STATUS_SUCCESS,
+  BLAS_STATUS_NOT_INITIALIZED = CUBLAS_STATUS_NOT_INITIALIZED,
+  BLAS_STATUS_ALLOC_FAILED = CUBLAS_STATUS_ALLOC_FAILED,
+  BLAS_STATUS_INVALID_VALUE = CUBLAS_STATUS_INVALID_VALUE,
+  BLAS_STATUS_ARCH_MISMATCH = CUBLAS_STATUS_ARCH_MISMATCH,
+  BLAS_STATUS_MAPPING_ERROR = CUBLAS_STATUS_MAPPING_ERROR,
+  BLAS_STATUS_EXECUTION_FAILED = CUBLAS_STATUS_EXECUTION_FAILED,
+  BLAS_STATUS_INTERNAL_ERROR = CUBLAS_STATUS_INTERNAL_ERROR,
+  BLAS_STATUS_NOT_SUPPORTED = CUBLAS_STATUS_NOT_SUPPORTED,
+  BLAS_STATUS_LICENSE_ERROR = CUBLAS_STATUS_LICENSE_ERROR,
+};
+
+constexpr const char* AccelleratorString = "CUDA";
+
+enum class ErrorT : std::underlying_type_t<cuError_t> {
+  Success = cudaSuccess,
+};
+
+enum class RandStatusT : std::underlying_type_t<curandStatus_t> {
+  RAND_STATUS_SUCCESS = CURAND_STATUS_SUCCESS,
+  RAND_STATUS_VERSION_MISMATCH = CURAND_STATUS_VERSION_MISMATCH,
+  RAND_STATUS_NOT_INITIALIZED = CURAND_STATUS_NOT_INITIALIZED,
+  RAND_STATUS_ALLOCATION_FAILED = CURAND_STATUS_ALLOCATION_FAILED,
+  RAND_STATUS_TYPE_ERROR = CURAND_STATUS_TYPE_ERROR,
+  RAND_STATUS_OUT_OF_RANGE = CURAND_STATUS_OUT_OF_RANGE,
+  RAND_STATUS_LENGTH_NOT_MULTIPLE = CURAND_STATUS_LENGTH_NOT_MULTIPLE,
+  RAND_STATUS_DOUBLE_PRECISION_REQUIRED = CURAND_STATUS_DOUBLE_PRECISION_REQUIRED,
+  RAND_STATUS_LAUNCH_FAILURE = CURAND_STATUS_LAUNCH_FAILURE,
+  RAND_STATUS_PREEXISTING_FAILURE = CURAND_STATUS_PREEXISTING_FAILURE,
+  RAND_STATUS_INITIALIZATION_FAILED = CURAND_STATUS_INITIALIZATION_FAILED,
+  RAND_STATUS_ARCH_MISMATCH = CURAND_STATUS_ARCH_MISMATCH,
+  RAND_STATUS_INTERNAL_ERROR = CURAND_STATUS_INTERNAL_ERROR,
+};
+
+using StreamOrContext = CUcontext;
+
+template <typename FloatingPointType> using DevicePtr = CUdeviceptr;
+
+using DeviceProperties = struct cudaDeviceProp;
+
+using RandGenerator = curandGenerator_t;
+
+using BlasHandle = cublasHandle_t;
+
+using BlasStatus = cublasStatus_t;
+
+enum class BlasOperation : std::underlying_type_t<cublasOperation_t> {
+  BLAS_OP_N = CUBLAS_OP_N,
+  BLAS_OP_T = CUBLAS_OP_T,
+  BLAS_OP_C = CUBLAS_OP_C,
+};
+
+#elif defined(FIRESTARTER_BUILD_HIP)
+// Start of HIP compatibility types
+
+#include <hip/hip_runtime.h>
+#include <hip/hip_runtime_api.h>
+#include <hipblas/hipblas.h>
+#include <hiprand_kernel.h>
+
+enum class BlasStatusT : std::underlying_type_t<hipblasStatus_t> {
+  BLAS_STATUS_SUCCESS = HIPBLAS_STATUS_SUCCESS,
+  BLAS_STATUS_NOT_INITIALIZED = HIPBLAS_STATUS_NOT_INITIALIZED,
+  BLAS_STATUS_ALLOC_FAILED = HIPBLAS_STATUS_ALLOC_FAILED,
+  BLAS_STATUS_INVALID_VALUE = HIPBLAS_STATUS_INVALID_VALUE,
+  BLAS_STATUS_ARCH_MISMATCH = HIPBLAS_STATUS_ARCH_MISMATCH,
+  BLAS_STATUS_MAPPING_ERROR = HIPBLAS_STATUS_MAPPING_ERROR,
+  BLAS_STATUS_EXECUTION_FAILED = HIPBLAS_STATUS_EXECUTION_FAILED,
+  BLAS_STATUS_INTERNAL_ERROR = HIPBLAS_STATUS_INTERNAL_ERROR,
+  BLAS_STATUS_NOT_SUPPORTED = HIPBLAS_STATUS_NOT_SUPPORTED,
+  BLAS_STATUS_UNKNOWN = HIPBLAS_STATUS_UNKNOWN,
+  BLAS_STATUS_HANDLE_IS_NULLPTR = HIPBLAS_STATUS_HANDLE_IS_NULLPTR,
+  BLAS_STATUS_INVALID_ENUM = HIPBLAS_STATUS_INVALID_ENUM,
+};
+
+constexpr const char* AccelleratorString = "HIP";
+
+enum class ErrorT ErrorT : std::underlying_type_t<hipError_t> {
+  Success = hipSuccess,
+};
+
+enum class RandStatusT : std::underlying_type_t<hiprandStatus_t> {
+  RAND_STATUS_SUCCESS = HIPRAND_STATUS_SUCCESS,
+  RAND_STATUS_VERSION_MISMATCH = HIPRAND_STATUS_VERSION_MISMATCH,
+  RAND_STATUS_NOT_INITIALIZED = HIPRAND_STATUS_NOT_INITIALIZED,
+  RAND_STATUS_ALLOCATION_FAILED = HIPRAND_STATUS_ALLOCATION_FAILED,
+  RAND_STATUS_TYPE_ERROR = HIPRAND_STATUS_TYPE_ERROR,
+  RAND_STATUS_OUT_OF_RANGE = HIPRAND_STATUS_OUT_OF_RANGE,
+  RAND_STATUS_LENGTH_NOT_MULTIPLE = HIPRAND_STATUS_LENGTH_NOT_MULTIPLE,
+  RAND_STATUS_DOUBLE_PRECISION_REQUIRED = HIPRAND_STATUS_DOUBLE_PRECISION_REQUIRED,
+  RAND_STATUS_LAUNCH_FAILURE = HIPRAND_STATUS_LAUNCH_FAILURE,
+  RAND_STATUS_PREEXISTING_FAILURE = HIPRAND_STATUS_PREEXISTING_FAILURE,
+  RAND_STATUS_INITIALIZATION_FAILED = HIPRAND_STATUS_INITIALIZATION_FAILED,
+  RAND_STATUS_ARCH_MISMATCH = HIPRAND_STATUS_ARCH_MISMATCH,
+  RAND_STATUS_INTERNAL_ERROR = HIPRAND_STATUS_INTERNAL_ERROR,
+  RAND_STATUS_NOT_IMPLEMENTED = HIPRAND_STATUS_NOT_IMPLEMENTED,
+};
+
+using StreamOrContext = hipStream_t;
+
+template <typename FloatingPointType> using DevicePtr = FloatingPointType*;
+
+using DeviceProperties = struct hipDeviceProp_t;
+
+using RandGenerator = hiprandGenerator_t;
+
+using BlasHandle = hipblasHandle_t;
+
+using BlasStatus = hipblasStatus_t;
+
+enum class BlasOperation : std::underlying_type_t<hipblasOperation_t> {
+  BLAS_OP_N = HIPBLAS_OP_N,
+  BLAS_OP_T = HIPBLAS_OP_T,
+  BLAS_OP_C = HIPBLAS_OP_C,
+};
+
+#else
+
+#error "Attempting to compile file but neither CUDA nor HIP is used"
+
+// Start of compatibility types for clangd
+
+enum class BlasStatusT {
+  BLAS_STATUS_SUCCESS = 0,
+};
+
+constexpr const char* AccelleratorString = "unknown";
+
+enum class ErrorT {
+  Success = 0,
+};
+
+enum class RandStatusT {
+  RAND_STATUS_SUCCESS = 0,
+};
+
+using StreamOrContext = void*;
+
+template <typename FloatingPointType> using DevicePtr = std::size_t;
+
+using DeviceProperties = void*;
+
+using RandGenerator = void*;
+
+using BlasHandle = void*;
+
+using BlasStatus = void*;
+
+enum class BlasOperation {
+  BLAS_OP_N,
+  BLAS_OP_T,
+  BLAS_OP_C,
+};
+
+#endif
+
+// abstracted function for both CUDA and HIP
+
+/// Get the error string from a call to CUDA of HIP libraries.
+/// \arg Status The status code that is returned by these calls.
+/// \return The error as a string.
+auto getErrorString(ErrorT Error) -> const char* {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return cudaGetErrorString(static_cast<cudaError_t>(Error));
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hipGetErrorString(static_cast<hipError_t>(Error));
+#else
+  (void)Error;
+  return "unknown";
+#endif
+}
+
+/// Get the error string from a call to CUDA of HIP blas library.
+/// \arg Status The status code that is returned by these calls.
+/// \return The error as a string.
+constexpr auto getErrorString(BlasStatusT Status) -> const char* {
+  switch (Status) {
+  case BlasStatusT::BLAS_STATUS_SUCCESS:
+    return "blas status: success";
+#if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
+  case BlasStatusT::BLAS_STATUS_NOT_INITIALIZED:
+    return "blas status: not initialized";
+  case BlasStatusT::BLAS_STATUS_ALLOC_FAILED:
+    return "blas status: alloc failed";
+  case BlasStatusT::BLAS_STATUS_INVALID_VALUE:
+    return "blas status: invalid value";
+  case BlasStatusT::BLAS_STATUS_ARCH_MISMATCH:
+    return "blas status: arch mismatch";
+  case BlasStatusT::BLAS_STATUS_MAPPING_ERROR:
+    return "blas status: mapping error";
+  case BlasStatusT::BLAS_STATUS_EXECUTION_FAILED:
+    return "blas status: execution failed";
+  case BlasStatusT::BLAS_STATUS_INTERNAL_ERROR:
+    return "blas status: internal error";
+  case BlasStatusT::BLAS_STATUS_NOT_SUPPORTED:
+    return "blas status: not supported";
+#endif
+#ifdef FIRESTARTER_BUILD_CUDA
+  case BlasStatusT::BLAS_STATUS_LICENSE_ERROR:
+    return "blas status: license error";
+#endif
+#ifdef FIRESTARTER_BUILD_HIP
+  case BlasStatusT::BLAS_STATUS_UNKNOWN:
+    return "blas status: unknown";
+  case BlasStatusT::BLAS_STATUS_HANDLE_IS_NULLPTR:
+    return "blas status: handle is null pointer";
+  case BlasStatusT::BLAS_STATUS_INVALID_ENUM:
+    return "blas status: invalid enum";
+#endif
+  default:
+    return "unknown";
+  }
+}
+
+/// Get the error string from a call to CUDA of HIP random library.
+/// \arg Status The status code that is returned by these calls.
+/// \return The error as a string.
+constexpr auto getErrorString(RandStatusT Status) -> const char* {
+  switch (Status) {
+  case RandStatusT::RAND_STATUS_SUCCESS:
+    return "rand status: success";
+#if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
+  case RandStatusT::RAND_STATUS_VERSION_MISMATCH:
+    return "rand status: version mismatch";
+  case RandStatusT::RAND_STATUS_NOT_INITIALIZED:
+    return "rand status: not initialized";
+  case RandStatusT::RAND_STATUS_ALLOCATION_FAILED:
+    return "rand status: allocation failed";
+  case RandStatusT::RAND_STATUS_TYPE_ERROR:
+    return "rand status: type error";
+  case RandStatusT::RAND_STATUS_OUT_OF_RANGE:
+    return "rand status: out of range";
+  case RandStatusT::RAND_STATUS_LENGTH_NOT_MULTIPLE:
+    return "rand status: length not multiple";
+  case RandStatusT::RAND_STATUS_DOUBLE_PRECISION_REQUIRED:
+    return "rand status: double precision required";
+  case RandStatusT::RAND_STATUS_LAUNCH_FAILURE:
+    return "rand status: launch failure";
+  case RandStatusT::RAND_STATUS_PREEXISTING_FAILURE:
+    return "rand status: preexisting failure";
+  case RandStatusT::RAND_STATUS_INITIALIZATION_FAILED:
+    return "rand status: initialization failed";
+  case RandStatusT::RAND_STATUS_ARCH_MISMATCH:
+    return "rand status: arch mismatch";
+  case RandStatusT::RAND_STATUS_INTERNAL_ERROR:
+    return "rand status: internal error";
+#endif
+#ifdef FIRESTARTER_BUILD_HIP
+  case RandStatusT::RAND_STATUS_NOT_IMPLEMENTED:
+    return "rand status: not implemented";
+#endif
+  default:
+    return "unknown";
+  }
+}
+
+#ifdef FIRESTARTER_BUILD_CUDA
+/// Get the error string from a call to CUDA library.
+/// \arg Result The status code that is returned by these calls.
+/// \return The error as a string.
+auto getErrorString(CUresult Result) -> const char* {
+  const char* ErrorString;
+  accellSafeCall(cuGetErrorName(Result, &ErrorString), __FILE__, __LINE__);
+  return ErrorString;
+}
+#else
+// define types to not run into compile errors with if constexpr
+
+enum class CUresult {};
+// NOLINTBEGIN(readability-identifier-naming)
+constexpr const int CUDA_SUCCESS = 0;
+// NOLINTEND(readability-identifier-naming)
+#endif
+
+/// Use this function as a wrapper to all calls of CUDA or HIP functions. If an error occured we abort and print the
+/// error code.
+/// \tparam T The type of the error code returned from calls to CUDA or HIP. This may be one of BlasStatusT, ErrorT,
+/// RandStatusT or CUresult.
+/// \arg TVal The errorcode returned from calls to CUDA or HIP.
+/// \arg File The file for the log message in which the error occured.
+/// \arg Line The line for the log message in which the error occured.
+/// \arg DeviceIndex if the CUDA or HIP call is associated to a specific device, the index of the device should be
+/// provided here for the log message.
+template <typename T>
+inline void accellSafeCall(T TVal, const char* File, const int Line, std::optional<int> DeviceIndex = std::nullopt_t) {
+  if constexpr (std::is_same_v<T, BlasStatusT>) {
+    if (TVal == BlasStatusT::BLAS_STATUS_SUCCESS) {
+      return;
+    }
+  } else if constexpr (std::is_same_v<T, ErrorT>) {
+    if (TVal == ErrorT::Success) {
+      return;
+    }
+  } else if constexpr (std::is_same_v<T, RandStatusT>) {
+    if (TVal == RandStatusT::RAND_STATUS_SUCCESS) {
+      return;
+    }
+  } else if constexpr (std::is_same_v<T, CUresult>) {
+#ifndef FIRESTARTER_BUILD_CUDA
+    static_assert(false, "Tried to call accell_safe_call with CUresult, but not building for CUDA.");
+#endif
+    if (TVal == CUDA_SUCCESS) {
+      return;
+    }
+  } else {
+    static_assert(false, "Tried to call accell_safe_call with an unknown type.");
+  }
+
+  std::stringstream Ss;
+  Ss << AccelleratorString << " error at " << File << ":" << Line << ": error code = " << TVal << " ("
+     << getErrorString(TVal) << ")";
+
+  if (DeviceIndex) {
+    Ss << ", device index: " << *DeviceIndex;
+  }
+
+  firestarter::log::error() << Ss.str();
+  exit(static_cast<int>(TVal));
+}
+
+/// Wrapper to cuInit or hipInit.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \arg Flags The Flags forwarded to cuInit or hipInit.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType> auto init(unsigned int Flags) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return cuInit(Flags);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hipInit(Flags);
+#else
+  (void)Flags;
+  static_assert(false, "Tried to call init, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Get the number GPU devices. Wrapper to cuDeviceGetCount or hipGetDeviceCount.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \arg DevCount The reference to where the number of GPU devices will be written.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType> auto getDeviceCount(int& DevCount) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return cuDeviceGetCount(&DevCount);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hipGetDeviceCount(&DevCount);
+#else
+  (void)DevCount;
+  static_assert(false, "Tried to call getDeviceCount, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Create a context in case of CUDA or a stream in case of HIP on a specific device. It must be deleted with
+/// destroyContextOrStream.
+/// \arg DeviceIndex The device on which to create the context or stream.
+/// \return The created context or stream.
+auto createContextOrStream(int DeviceIndex) -> StreamOrContext {
+  StreamOrContext Soc;
+#ifdef FIRESTARTER_BUILD_CUDA
+  firestarter::log::trace() << "Creating " << AccelleratorString << " context for computation on device nr. "
+                            << DeviceIndex;
+  CUdevice Device;
+  accell_safe_call(cuDeviceGet(&Device, DeviceIndex), __FILE__, __LINE__, DeviceIndex);
+  accell_safe_call(cuCtxCreate(&Soc, 0, device), __FILE__, __LINE__, DeviceIndex);
+
+  firestarter::log::trace() << "Set created " << AccelleratorString << " context on device nr. " << DeviceIndex;
+  ACCELL_SAFE_CALL(cuCtxSetCurrent(Soc), DeviceIndex);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  firestarter::log::trace() << "Creating " << AccelleratorString << " Stream for computation on device nr. "
+                            << DeviceIndex;
+  accell_safe_call(hipSetDevice(DeviceIndex), __FILE__, __LINE__, DeviceIndex);
+  accell_safe_call(hipStreamCreate(&Soc), __FILE__, __LINE__, DeviceIndex);
+#else
+  (void)DeviceIndex;
+  static_assert(false, "Tried to call createContextOrStream, but neither building for CUDA nor HIP.");
+#endif
+  return Soc;
+}
+
+/// Destroy the context (CUDA) or stream (HIP) with cuCtxDestroy and hipStreamDestroy respectively.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \arg Soc The reference to the context or stream.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType> auto destroyContextOrStream(StreamOrContext& Soc) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return cuCtxDestroy(Soc);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hipStreamDestroy(Soc);
+#else
+  (void)Soc;
+  static_assert(false, "Tried to call destroyContextOrStream, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Create a blas handle. Wrapper to cublasCreate or hipblasCreate.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \arg BlasHandle The reference to a BlasHandle object which will be initialized.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType> auto blasCreate(BlasHandle& BlasHandle) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return cublasCreate(&BlasHandle);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hipblasCreate(&BlasHandle);
+#else
+  (void)BlasHandle;
+  static_assert(false, "Tried to call blasCreate, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Destory a blas handle. Wrapper to cublasDestroy or hipblasDestroy.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \arg BlasHandle The reference to a BlasHandle object which will be destroyed.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType> auto blasDestory(BlasHandle& BlasHandle) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return cublasDestroy(BlasHandle);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hipblasDestroy(BlasHandle);
+#else
+  (void)BlasHandle;
+  static_assert(false, "Tried to call blasDestory, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Get the properties of a specific GPU device. Wrapper to cudaGetDeviceProperties or hipGetDeviceProperties.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \arg Property The reference to the properties that are retrived.
+/// \arg DeviceIndex The index of the GPU device for which to retrive the device properties.s
+/// \returns The Error code returned from these calls.
+template <typename ReturnType> auto getDeviceProperties(DeviceProperties& Property, int DeviceIndex) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return cudaGetDeviceProperties(&Property, DeviceIndex);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hipGetDeviceProperties(&Property, DeviceIndex);
+#else
+  (void)Property;
+  (void)DeviceIndex;
+  static_assert(false, "Tried to call getDeviceProperties, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Get the number of memory in the current CUDA or HIP context. Wrapper to cuMemGetInfo or
+/// hipMemGetInfo.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \arg MemoryAvail The reference to the available memory that is retrived.
+/// \arg MemoryTotal The reference to the total memory that is retrived.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType> auto memGetInfo(std::size_t& MemoryAvail, std::size_t& MemoryTotal) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return cuMemGetInfo(&MemoryAvail, &MemoryTotal);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hipMemGetInfo(&MemoryAvail, &MemoryTotal);
+#else
+  (void)MemoryAvail;
+  (void)MemoryTotal;
+  static_assert(false, "Tried to call memGetInfo, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Malloc device memory in the current CUDA or HIP context. Wrapper to cuMemAlloc or
+/// hipMalloc.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \tparam FloatingPointType The type of the floating point used. Either float or double.
+/// \arg Ptr The reference to the device pointer which is retrieved by the malloc call.
+/// \arg MemorySize The memory that is allocated on the device in bytes.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType, typename FloatingPointType>
+auto malloc(DevicePtr<FloatingPointType>& Ptr, std::size_t MemorySize) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return cuMemAlloc(&Ptr, MemorySize);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hipMalloc(&Ptr, MemorySize);
+#else
+  (void)Ptr;
+  (void)MemorySize;
+  static_assert(false, "Tried to call malloc, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Free device memory in the current CUDA or HIP context. Wrapper to cuMemFree or
+/// hipFree.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \tparam FloatingPointType The type of the floating point used. Either float or double.
+/// \arg Ptr The reference to the device pointer which is used in the free call.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType, typename FloatingPointType> auto free(DevicePtr<FloatingPointType>& Ptr) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return cuMemFree(Ptr);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hipFree(Ptr);
+#else
+  (void)Ptr;
+  static_assert(false, "Tried to call free, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Create a random generator in the current CUDA or HIP context. Wrapper to curandCreateGenerator or
+/// hiprandCreateGenerator.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \arg RandomGen The reference to the random generation which is retrived by the calls.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType> auto randCreateGeneratorPseudoRandom(RandGenerator& RandomGen) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return curandCreateGenerator(&RandomGen, CURAND_RNG_PSEUDO_DEFAULT);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hiprandCreateGenerator(&RandomGen, HIPRAND_RNG_PSEUDO_DEFAULT);
+#else
+  (void)RandomGen;
+  static_assert(false, "Tried to call randCreateGeneratorPseudoRandom, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Set the pseudo random generator seed in the current CUDA or HIP context. Wrapper to
+/// curandSetPseudoRandomGeneratorSeed or hiprandSetPseudoRandomGeneratorSeed.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \arg RandomGen The reference to the random generator.
+/// \arg Seed The seed used to initialize the pseudo random generator.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType> auto randSetPseudoRandomGeneratorSeed(RandGenerator& RandomGen, int Seed) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return curandSetPseudoRandomGeneratorSeed(RandomGen, Seed);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hiprandSetPseudoRandomGeneratorSeed(RandomGen, Seed);
+#else
+  (void)RandomGen;
+  (void)Seed;
+  static_assert(false, "Tried to call randSetPseudoRandomGeneratorSeed, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Initialize the provided memory with with a specific number of uniform random floats. Wrapper to
+/// curandGenerateUniform or hiprandGenerateUniform.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \arg RandomGen The reference to the random generator.
+/// \arg OutputPtr The device pointer on which is initialized with specific number of uniform random floats.
+/// \arg Num The number of unifrom random floats.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType>
+auto randGenerateUniform(RandGenerator& RandomGen, DevicePtr<float> OutputPtr, std::size_t Num) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return curandGenerateUniform(RandomGen, OutputPtr, Num);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hiprandGenerateUniform(RandomGen, OutputPtr, Num);
+#else
+  (void)RandomGen;
+  (void)OutputPtr;
+  (void)Num;
+  static_assert(false, "Tried to call randGenerateUniform, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Initialize the provided memory with with a specific number of uniform random doubles. Wrapper to
+/// curandGenerateUniformDouble or hiprandGenerateUniformDouble.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \arg RandomGen The reference to the random generator.
+/// \arg OutputPtr The device pointer on which is initialized with specific number of uniform random floats.
+/// \arg Num The number of unifrom random doubles.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType>
+auto randGenerateUniformDouble(RandGenerator& RandomGen, DevicePtr<double> OutputPtr, std::size_t Num) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return curandGenerateUniformDouble(RandomGen, OutputPtr, Num);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hiprandGenerateUniformDouble(RandomGen, OutputPtr, Num);
+#else
+  (void)RandomGen;
+  (void)OutputPtr;
+  (void)Num;
+  static_assert(false, "Tried to call randGenerateUniformDouble, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Initialize the provided memory with with a specific number of uniform random floating points. Wrapper to
+/// randGenerateUniform or randGenerateUniformDouble.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \tparam FloatPointType The float point types is used. Either float or double.
+/// \arg Generator The reference to the random generator.
+/// \arg OutputPtr The device pointer on which is initialized with specific number of uniform random floats.
+/// \arg Num The number of unifrom random doubles.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType, typename FloatPointType>
+auto generateUniform(RandGenerator& Generator, DevicePtr<FloatPointType> OutputPtr, size_t Num) -> ReturnType {
+  if constexpr (std::is_same_v<FloatPointType, float>) {
+    return randGenerateUniform<ReturnType>(Generator, OutputPtr, Num);
+  } else if constexpr (std::is_same_v<FloatPointType, double>) {
+    return randGenerateUniformDouble<ReturnType>(Generator, OutputPtr, Num);
+  } else {
+    static_assert(false, "generateUniform<FloatPointType>: Template argument must be either float or double");
+  }
+}
+
+/// Destory a random generator in the current CUDA or HIP context. Wrapper to curandDestroyGenerator or
+/// hiprandDestroyGenerator.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \arg RandomGen The reference to the random generation which shoule be destroyed.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType> auto randDestroyGenerator(RandGenerator& RandomGen) -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return curandDestroyGenerator(RandomGen);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hiprandDestroyGenerator(RandomGen);
+#else
+  (void)RandomGen;
+  static_assert(false, "Tried to call randDestroyGenerator, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Copy memory from a device pointer to another device pointer. Wrapper to cuMemcpyDtoD or hipMemcpyDtoD.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \arg DestinationPtr The destination address.
+/// \arg SourcePtr The source address.
+/// \arg Size The number of bytes to copy.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType, typename FloatPointType>
+auto memcpyDtoD(DevicePtr<FloatPointType> DestinationPtr, DevicePtr<FloatPointType> SourcePtr, std::size_t Size)
+    -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return cuMemcpyDtoD(DestinationPtr, SourcePtr, Size);
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hipMemcpyDtoD(DestinationPtr, SourcePtr, Size);
+#else
+  (void)DestinationPtr;
+  (void)SourcePtr;
+  (void)Size;
+  static_assert(false, "Tried to call memcpyDtoD, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// Block until the current device finished. Wrapper to cudaDeviceSynchronize or hipcudaDeviceSynchronize.
+/// \tparam ReturnType The type of the return code to these calls.
+/// \returns The Error code returned from these calls.
+template <typename ReturnType> auto deviceSynchronize() -> ReturnType {
+#ifdef FIRESTARTER_BUILD_CUDA
+  return cudaDeviceSynchronize();
+#elif defined(FIRESTARTER_BUILD_HIP)
+  return hipcudaDeviceSynchronize();
+#else
+  static_assert(false, "Tried to call deviceSynchronize, but neither building for CUDA nor HIP.");
+#endif
+}
+
+/// This function performs the matrix-matrix multiplication C = Alpha * op(A) * op(B) + Beta * C with op(A) and op(B)
+/// described by the selected operation for Transa and Transb. BlasOperation::BLAS_OP_N maps to op(X) = X,
+/// BlasOperation::BLAS_OP_T to op(X) = X transposed and BlasOperation::BLAS_OP_C to op(X) = conjugate transpose of X.
+/// It wrapps (cu|hip)blas(S|D)gemm.
+/// \tparam FloatPointType The float point types is used. Either float or double.
+/// \arg Handle The blass handle
+/// \arg Transa The operation selected for op(A)
+/// \arg Transb The operation selected for op(B)
+/// \arg M Number of rows of matrix op(A) and C.
+/// \arg N Number of columns of matrix op(B) and C.
+/// \arg K Number of columns of op(A) and rows of op(B).
+/// \arg Alpha
+/// \arg A
+/// \arg Lda Leading dimension of two-dimensional array used to store the matrix A.
+/// \arg B
+/// \arg Ldb Leading dimension of two-dimensional array used to store matrix B.
+/// \arg Beta
+/// \arg C
+/// \arg Ldc Leading dimension of a two-dimensional array used to store the matrix C.
+/// \returns The Error code returned from these calls.
+template <typename FloatPointType>
+auto gemm(BlasHandle Handle, BlasOperation Transa, BlasOperation Transb, int& M, int& N, int& K,
+          const FloatPointType* Alpha, const DevicePtr<FloatPointType>& A, int& Lda, const DevicePtr<FloatPointType>& B,
+          int& Ldb, const FloatPointType* Beta, DevicePtr<FloatPointType>& C, int& Ldc) -> BlasStatus {
+  if constexpr (std::is_same_v<FloatPointType, float>) {
+#ifdef FIRESTARTER_BUILD_CUDA
+    return cublasSgemm(Handle, Transa, Transb, M, N, K, Alpha, A, Lda, B, Ldb, Beta, C, Ldc);
+#elif defined(FIRESTARTER_BUILD_HIP)
+    return hipblasSgemm(Handle, Transa, Transb, M, N, K, Alpha, A, Lda, B, Ldb, Beta, C, Ldc);
+#endif
+  } else if constexpr (std::is_same_v<FloatPointType, double>) {
+#ifdef FIRESTARTER_BUILD_CUDA
+    return cublasDgemm(Handle, Transa, Transb, M, N, K, Alpha, A, Lda, B, Ldb, Beta, C, Ldc);
+#elif defined(FIRESTARTER_BUILD_HIP)
+    return hipblasDgemm(Handle, Transa, Transb, M, N, K, Alpha, A, Lda, B, Ldb, Beta, C, Ldc);
+#endif
+  } else {
+    (void)Handle;
+    (void)Transa;
+    (void)Transb;
+    (void)M;
+    (void)N;
+    (void)K;
+    (void)Alpha;
+    (void)A;
+    (void)Lda;
+    (void)B;
+    (void)Ldb;
+    (void)Beta;
+    (void)C;
+    (void)Ldc;
+    static_assert(false, "gemm<FloatPointType>: Template argument must be either float or double");
+  }
+
+#if not(defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP))
+  (void)Handle;
+  (void)Transa;
+  (void)Transb;
+  (void)M;
+  (void)N;
+  (void)K;
+  (void)Alpha;
+  (void)A;
+  (void)Lda;
+  (void)B;
+  (void)Ldb;
+  (void)Beta;
+  (void)C;
+  (void)Ldc;
+  static_assert(false, "Tried to call gemm, but neither building for CUDA nor HIP.");
+#endif
+}
+
+} // namespace firestarter::cuda::compat
\ No newline at end of file
diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp
index 2e5290a2..9d6b7fed 100644
--- a/src/firestarter/Cuda/Cuda.cpp
+++ b/src/firestarter/Cuda/Cuda.cpp
@@ -26,176 +26,17 @@
  * http://wili.cc/blog/gpu-burn.html
  *****************************************************************************/
 
+#include <algorithm>
+#include <atomic>
 #include <firestarter/Cuda/Cuda.hpp>
+#include <firestarter/Cuda/CudaHipCompat.hpp>
 #include <firestarter/LoadWorkerData.hpp>
 #include <firestarter/Logging/Log.hpp>
-
-#ifdef FIRESTARTER_BUILD_CUDA
-#include <cublas_v2.h>
-#include <cuda.h>
-#include <cuda_runtime_api.h>
-#include <curand_kernel.h>
-#define FS_ACCEL_PREFIX_LC_LONG cuda
-#define FS_ACCEL_PREFIX_LC cu
-#define FS_ACCEL_PREFIX_UC CU
-#define FS_ACCEL_PREFIX_UC_LONG CUDA
-#define FS_ACCEL_STRING "CUDA"
-#else
-#ifdef FIRESTARTER_BUILD_HIP
-#include <hip/hip_runtime.h>
-#include <hip/hip_runtime_api.h>
-#include <hipblas/hipblas.h>
-#include <hiprand_kernel.h>
-#define FS_ACCEL_PREFIX_LC_LONG hip
-#define FS_ACCEL_PREFIX_LC hip
-#define FS_ACCEL_PREFIX_UC HIP
-#define FS_ACCEL_PREFIX_UC_LONG HIP
-#define FS_ACCEL_STRING "HIP"
-#else
-#error "Attempting to compile file but neither CUDA nor HIP is used"
-#endif
-#endif
-#define CONCAT_(prefix, suffix) prefix##suffix
-/// Concatenate `prefix, suffix` into `prefixsuffix`
-#define CONCAT(prefix, suffix) CONCAT_(prefix, suffix)
-//#define FS_ACCEL_ERROR_TYPE CONCAT(FS_ACCEL_PREFIX_LC_LONG,Error_t)
-//#define FS_ACCEL_BLAS_STATUS_TYPE cublasStatus_t
-//#define FS_ACCEL_RAND_STATUS_TYPE curandStatus_t
-
-#include <algorithm>
-#include <atomic>
 #include <type_traits>
 
-#define ACCELL_SAFE_CALL(cuerr, dev_index) accell_safe_call(cuerr, dev_index, __FILE__, __LINE__)
-#define SEED 123
-
-using namespace firestarter::cuda;
-
-// CUDA error checking
-static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC_LONG, Error_t) cuerr, int dev_index, const char* file,
-                                    const int line) {
-  if (cuerr != CONCAT(FS_ACCEL_PREFIX_LC_LONG, Success) && cuerr != 1) {
-    firestarter::log::error() << FS_ACCEL_STRING " error at " << file << ":" << line << ": error code = " << cuerr
-                              << " (" << CONCAT(FS_ACCEL_PREFIX_LC_LONG, GetErrorString)(cuerr)
-                              << "), device index: " << dev_index;
-    exit(cuerr);
-  }
-
-  return;
-}
-
-static const char* _accellGetErrorEnum(CONCAT(FS_ACCEL_PREFIX_LC, blasStatus_t) error) {
-  switch (error) {
-  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_SUCCESS):
-    return FS_ACCEL_STRING "blas status: success";
-  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_NOT_INITIALIZED):
-    return FS_ACCEL_STRING "blas status: not initialized";
-  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_ALLOC_FAILED):
-    return FS_ACCEL_STRING "blas status: alloc failed";
-  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_INVALID_VALUE):
-    return FS_ACCEL_STRING "blas status: invalid value";
-  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_ARCH_MISMATCH):
-    return FS_ACCEL_STRING "blas status: arch mismatch";
-  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_MAPPING_ERROR):
-    return FS_ACCEL_STRING "blas status: mapping error";
-  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_EXECUTION_FAILED):
-    return FS_ACCEL_STRING "blas status: execution failed";
-  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_INTERNAL_ERROR):
-    return FS_ACCEL_STRING "blas status: internal error";
-  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_NOT_SUPPORTED):
-    return FS_ACCEL_STRING "blas status: not supported";
-#ifdef FIRESTARTER_BUILD_CUDA
-  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_LICENSE_ERROR):
-    return FS_ACCEL_STRING "blas status: license error";
-#endif
-#ifdef FIRESTARTER_BUILD_HIP
-  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_UNKNOWN):
-    return FS_ACCEL_STRING "blas status: unknown";
-  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_HANDLE_IS_NULLPTR):
-    return FS_ACCEL_STRING "blas status: handle is null pointer";
-  case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_INVALID_ENUM):
-    return FS_ACCEL_STRING "blas status: invalid enum";
-#endif
-  }
-
-  return "<unknown>";
-}
-
-static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC, blasStatus_t) cuerr, int dev_index, const char* file,
-                                    const int line) {
-  if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_SUCCESS)) {
-    firestarter::log::error() << FS_ACCEL_STRING "BLAS error at " << file << ":" << line << ": error code = " << cuerr
-                              << " (" << _accellGetErrorEnum(cuerr) << "), device index: " << dev_index;
-    exit(cuerr);
-  }
-
-  return;
-}
-
-#ifdef FIRESTARTER_BUILD_CUDA
-static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_UC, result) cuerr, int dev_index, const char* file,
-                                    const int line) {
-  if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC_LONG, _SUCCESS)) {
-    const char* errorString;
-
-    ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, GetErrorName)(cuerr, &errorString), dev_index);
+namespace firestarter::cuda {
 
-    firestarter::log::error() << FS_ACCEL_STRING " error at " << file << ":" << line << ": error code = " << cuerr
-                              << " (" << errorString << "), device index: " << dev_index;
-    exit(cuerr);
-  }
-
-  return;
-}
-#endif
-
-static const char* _accellrandGetErrorEnum(CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t) cuerr) {
-  switch (cuerr) {
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_SUCCESS):
-    return FS_ACCEL_STRING "rand status: success";
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_VERSION_MISMATCH):
-    return FS_ACCEL_STRING "rand status: version mismatch";
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_NOT_INITIALIZED):
-    return FS_ACCEL_STRING "rand status: not initialized";
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_ALLOCATION_FAILED):
-    return FS_ACCEL_STRING "rand status: allocation failed";
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_TYPE_ERROR):
-    return FS_ACCEL_STRING "rand status: type error";
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_OUT_OF_RANGE):
-    return FS_ACCEL_STRING "rand status: out of range";
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_LENGTH_NOT_MULTIPLE):
-    return FS_ACCEL_STRING "rand status: length not multiple";
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_DOUBLE_PRECISION_REQUIRED):
-    return FS_ACCEL_STRING "rand status: double precision required";
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_LAUNCH_FAILURE):
-    return FS_ACCEL_STRING "rand status: launch failure";
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_PREEXISTING_FAILURE):
-    return FS_ACCEL_STRING "rand status: preexisting failure";
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_INITIALIZATION_FAILED):
-    return FS_ACCEL_STRING "rand status: initialization failed";
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_ARCH_MISMATCH):
-    return FS_ACCEL_STRING "rand status: arch mismatch";
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_INTERNAL_ERROR):
-    return FS_ACCEL_STRING "rand status: internal error";
-#ifdef FIRESTARTER_BUILD_HIP
-  case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_NOT_IMPLEMENTED):
-    return FS_ACCEL_STRING "rand status: not implemented";
-#endif
-  }
-
-  return "<unknown>";
-}
-
-static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t) cuerr, int dev_index, const char* file,
-                                    const int line) {
-  if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_SUCCESS)) {
-    firestarter::log::error() << FS_ACCEL_STRING "RAND error at " << file << ":" << line << ": error code = " << cuerr
-                              << " (" << _accellrandGetErrorEnum(cuerr) << "), device index: " << dev_index;
-    exit(cuerr);
-  }
-
-  return;
-}
+constexpr const int Seed = 123;
 
 static int round_up(int num_to_round, int multiple) {
   if (multiple == 0) {
@@ -210,61 +51,61 @@ static int round_up(int num_to_round, int multiple) {
   return num_to_round + multiple - remainder;
 }
 
-#ifdef FIRESTARTER_BUILD_CUDA
-static int get_precision(int useDouble, struct cudaDeviceProp properties) {
-#else
-#ifdef FIRESTARTER_BUILD_HIP
-static int get_precision(int useDouble, struct hipDeviceProp_t properties) {
-#endif
-#endif
+/// Convert the UseDouble input (0 -> single precision, 1 -> double precision, 2 -> automatic) to either 0 or 1 for
+/// float or double respectively. For CUDART_VERSION at least equal 8000 and automatic selection we check if the card a
+/// singleToDoublePrecisionPerfRatio bigger than 3 and select float in this case otherwise double. In all other cases
+/// automatic results in double.
+/// \arg UseDouble The input that specifies either single precision, double precision or automatic selection.
+/// \arg Properties The device properties.
+/// \return The selected precision, either 0 or 1 for float or double respectively.
+static int get_precision(int UseDouble, const compat::DeviceProperties& Properties) {
 #if (CUDART_VERSION >= 8000)
   // read precision ratio (dp/sp) of GPU to choose the right variant for maximum
   // workload
-  if (useDouble == 2 && properties.singleToDoublePrecisionPerfRatio > 3) {
+  if (UseDouble == 2 && Properties.singleToDoublePrecisionPerfRatio > 3) {
     return 0;
-  } else if (useDouble) {
+  } else if (UseDouble) {
     return 1;
   } else {
     return 0;
   }
-}
 #else
   // as precision ratio is not supported return default/user input value
-  (void)properties;
+  (void)Properties;
 
-  if (useDouble) {
+  if (UseDouble) {
     return 1;
   } else {
     return 0;
   }
-}
 #endif
+}
 
 static int get_precision(int device_index, int useDouble) {
   size_t memory_avail, memory_total;
+  compat::DeviceProperties properties;
+
+  auto stream_or_context = compat::createContextOrStream(device_index);
+
+  compat::accell_safe_call(compat::memGetInfo<>(memory_avail, memory_total), __FILE__, __LINE__, device_index);
+  compat::accell_safe_call(compat::getDeviceProperties<>(properties, device_index), __FILE__, __LINE__, device_index);
+
+  useDouble = get_precision(useDouble, properties);
+
+  bool DoubleNotSupported =
 #ifdef FIRESTARTER_BUILD_CUDA
-  CUcontext context;
-  CUdevice device;
-  struct cudaDeviceProp properties;
-  ACCELL_SAFE_CALL(cuDeviceGet(&device, device_index), device_index);
-  ACCELL_SAFE_CALL(cuCtxCreate(&context, 0, device), device_index);
-  ACCELL_SAFE_CALL(cuCtxSetCurrent(context), device_index);
+      properties.major <= 1 && properties.minor <= 2;
+#elif defined(FIRESTARTER_BUILD_HIP)
+      !properties.hasDoubles;
 #else
-#ifdef FIRESTARTER_BUILD_HIP
-  struct hipDeviceProp_t properties;
-  ACCELL_SAFE_CALL(hipSetDevice(device_index), device_index);
+      true;
 #endif
-#endif
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, MemGetInfo)(&memory_avail, &memory_total), device_index);
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG, GetDeviceProperties)(&properties, device_index), device_index);
-
-  useDouble = get_precision(useDouble, properties);
 
   // we check for double precision support on the GPU and print errormsg, when
   // the user wants to compute DP on a SP-only-Card.
-  if (useDouble && properties.major <= 1 && properties.minor <= 2) {
+  if (useDouble && DoubleNotSupported) {
     std::stringstream ss;
-    ss << FS_ACCEL_STRING " GPU " << device_index << ": " << properties.name << " ";
+    ss << compat::AccelleratorString << " GPU " << device_index << ": " << properties.name << " ";
 
     firestarter::log::error() << ss.str() << "Doesn't support double precision.\n"
                               << ss.str() << "Compute Capability: " << properties.major << "." << properties.minor
@@ -274,65 +115,21 @@ static int get_precision(int device_index, int useDouble) {
     useDouble = 0;
   }
 
-#ifdef FIRESTARTER_BUILD_CUDA
-  ACCELL_SAFE_CALL(cuCtxDestroy(context), device_index);
-#endif
+  compat::accell_safe_call(compat::destroyContextOrStream<>(stream_or_context), __FILE__, __LINE__, device_index);
 
   return useDouble;
 }
 
-#ifdef FIRESTARTER_BUILD_CUDA
-static int get_msize(int device_index, int useDouble) {
-  CUcontext context;
-  CUdevice device;
-  size_t memory_avail, memory_total;
-
-  ACCELL_SAFE_CALL(cuDeviceGet(&device, device_index), device_index);
-  ACCELL_SAFE_CALL(cuCtxCreate(&context, 0, device), device_index);
-  ACCELL_SAFE_CALL(cuCtxSetCurrent(context), device_index);
-  ACCELL_SAFE_CALL(cuMemGetInfo(&memory_avail, &memory_total), device_index);
-
-  ACCELL_SAFE_CALL(cuCtxDestroy(context), device_index);
-
-  return round_up((int)(0.8 * sqrt(((memory_avail) / ((useDouble ? sizeof(double) : sizeof(float)) * 3)))),
-                  1024); // a multiple of 1024 works always well
-}
-#endif
-
-static CONCAT(FS_ACCEL_PREFIX_LC, blasStatus_t)
-    gemm(CONCAT(FS_ACCEL_PREFIX_LC, blasHandle_t) handle, CONCAT(FS_ACCEL_PREFIX_LC, blasOperation_t) transa,
-         CONCAT(FS_ACCEL_PREFIX_LC, blasOperation_t) transb, int& m, int& n, int& k, const float* alpha, const float* A,
-         int& lda, const float* B, int& ldb, const float* beta, float* C, int& ldc) {
-  return CONCAT(FS_ACCEL_PREFIX_LC, blasSgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
-}
-
-static CONCAT(FS_ACCEL_PREFIX_LC, blasStatus_t)
-    gemm(CONCAT(FS_ACCEL_PREFIX_LC, blasHandle_t) handle, CONCAT(FS_ACCEL_PREFIX_LC, blasOperation_t) transa,
-         CONCAT(FS_ACCEL_PREFIX_LC, blasOperation_t) transb, int& m, int& n, int& k, const double* alpha,
-         const double* A, int& lda, const double* B, int& ldb, const double* beta, double* C, int& ldc) {
-  return CONCAT(FS_ACCEL_PREFIX_LC, blasDgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
-}
-
-static CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t)
-    generateUniform(CONCAT(FS_ACCEL_PREFIX_LC, randGenerator_t) generator, float* outputPtr, size_t num) {
-  return CONCAT(FS_ACCEL_PREFIX_LC, randGenerateUniform)(generator, outputPtr, num);
-}
-
-static CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t)
-    generateUniform(CONCAT(FS_ACCEL_PREFIX_LC, randGenerator_t) generator, double* outputPtr, size_t num) {
-  return CONCAT(FS_ACCEL_PREFIX_LC, randGenerateUniformDouble)(generator, outputPtr, num);
-}
-
 // GPU index. Used to pin this thread to the GPU.
-template <typename T>
+template <typename FloatingPointType>
 static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index,
                         std::atomic<int>& initCount, volatile uint64_t* loadVar, int matrixSize) {
-  static_assert(std::is_same<T, float>::value || std::is_same<T, double>::value,
-                "create_load<T>: Template argument T must be either float or double");
+  static_assert(std::is_same_v<FloatingPointType, float> || std::is_same_v<FloatingPointType, double>,
+                "create_load<FloatingPointType>: Template argument must be either float or double");
 
   int iterations, i;
 
-  firestarter::log::trace() << "Starting CUDA/HIP with given matrix size " << matrixSize;
+  firestarter::log::trace() << "Starting " << compat::AccelleratorString << " with given matrix size " << matrixSize;
 
   size_t size_use = 0;
   if (matrixSize > 0) {
@@ -340,217 +137,158 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait
   }
 
   size_t use_bytes, memory_size;
-#ifdef FIRESTARTER_BUILD_CUDA
-  CUcontext context;
-  struct cudaDeviceProp properties;
-  CUdevice device;
-  cublasHandle_t cublas;
-#else
-#ifdef FIRESTARTER_BUILD_HIP
-  hipStream_t stream;
-  struct hipDeviceProp_t properties;
-  hipDevice_t device;
-  hipblasHandle_t cublas;
-#endif
-#endif
+  compat::DeviceProperties properties;
+  compat::BlasHandle blas;
   // reserving the GPU and initializing cublas
 
-  firestarter::log::trace() << "Getting " FS_ACCEL_STRING " device nr. " << device_index;
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, DeviceGet)(&device, device_index), device_index);
+  auto stream_or_context = compat::createContextOrStream(device_index);
 
-#ifdef FIRESTARTER_BUILD_CUDA
-  firestarter::log::trace() << "Creating " FS_ACCEL_STRING " context for computation on device nr. " << device_index;
-  ACCELL_SAFE_CALL(cuCtxCreate(&context, 0, device), device_index);
+  firestarter::log::trace() << "Create " << compat::AccelleratorString << " Blas on device nr. " << device_index;
+  compat::accell_safe_call(compat::blasCreate<>(blas), __FILE__, __LINE__, device_index);
 
-  firestarter::log::trace() << "Set created " FS_ACCEL_STRING " context on device nr. " << device_index;
-  ACCELL_SAFE_CALL(cuCtxSetCurrent(context), device_index);
-#else
-#ifdef FIRESTARTER_BUILD_HIP
-  firestarter::log::trace() << "Creating " FS_ACCEL_STRING " Stream for computation on device nr. " << device_index;
-  ACCELL_SAFE_CALL(hipSetDevice(device_index), device_index);
-  ACCELL_SAFE_CALL(hipStreamCreate(&stream), device_index);
-#endif
-#endif
-
-  firestarter::log::trace() << "Create " FS_ACCEL_STRING " Blas on device nr. " << device_index;
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, blasCreate)(&cublas), device_index);
-
-  firestarter::log::trace() << "Get " FS_ACCEL_STRING " device properties (e.g., support for double)"
+  firestarter::log::trace() << "Get " << compat::AccelleratorString << " device properties (e.g., support for double)"
                             << " on device nr. " << device_index;
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG, GetDeviceProperties)(&properties, device_index), device_index);
+  compat::accell_safe_call(compat::getDeviceProperties<>(properties, device_index), __FILE__, __LINE__, device_index);
 
   // getting information about the GPU memory
   size_t memory_avail, memory_total;
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, MemGetInfo)(&memory_avail, &memory_total), device_index);
-
-  firestarter::log::trace() << "Get " FS_ACCEL_STRING " Memory info on device nr. " << device_index << ": "
-                            << memory_avail << " B avail. from " << memory_total << " B total";
+  compat::accell_safe_call(compat::memGetInfo<>(memory_avail, memory_total), __FILE__, __LINE__, device_index);
+  firestarter::log::trace() << "Get " << compat::AccelleratorString << " Memory info on device nr. " << device_index
+                            << ": " << memory_avail << " B avail. from " << memory_total << " B total";
 
   // defining memory pointers
-#ifdef FIRESTARTER_BUILD_CUDA
-  CUdeviceptr a_data_ptr;
-  CUdeviceptr b_data_ptr;
-  CUdeviceptr c_data_ptr;
-#else
-#ifdef FIRESTARTER_BUILD_HIP
-  T* a_data_ptr;
-  T* b_data_ptr;
-  T* c_data_ptr;
-#endif
-#endif
+  compat::DevicePtr<FloatingPointType> a_data_ptr;
+  compat::DevicePtr<FloatingPointType> b_data_ptr;
+  compat::DevicePtr<FloatingPointType> c_data_ptr;
 
   // check if the user has not set a matrix OR has set a too big matrixsite and
   // if this is true: set a good matrixsize
-  if (!size_use || ((size_use * size_use * sizeof(T) * 3 > memory_avail))) {
-    size_use = round_up((int)(0.8 * sqrt(((memory_avail) / (sizeof(T) * 3)))),
+  if (!size_use || ((size_use * size_use * sizeof(FloatingPointType) * 3 > memory_avail))) {
+    size_use = round_up((int)(0.8 * sqrt(((memory_avail) / (sizeof(FloatingPointType) * 3)))),
                         1024); // a multiple of 1024 works always well
   }
-  firestarter::log::trace() << "Set " FS_ACCEL_STRING " matrix size: " << matrixSize;
-  use_bytes = (size_t)((T)memory_avail);
-  memory_size = sizeof(T) * size_use * size_use;
+  firestarter::log::trace() << "Set " << compat::AccelleratorString << " matrix size: " << matrixSize;
+  use_bytes = (size_t)((FloatingPointType)memory_avail);
+  memory_size = sizeof(FloatingPointType) * size_use * size_use;
   iterations = (use_bytes - 2 * memory_size) / memory_size; // = 1;
 
-  firestarter::log::trace() << "Allocating " FS_ACCEL_STRING " memory on device nr. " << device_index;
+  firestarter::log::trace() << "Allocating " << compat::AccelleratorString << " memory on device nr. " << device_index;
 
   // allocating memory on the GPU
-#ifdef FIRESTARTER_BUILD_CUDA
-  ACCELL_SAFE_CALL(cuMemAlloc(&a_data_ptr, memory_size), device_index);
-  ACCELL_SAFE_CALL(cuMemAlloc(&b_data_ptr, memory_size), device_index);
-  ACCELL_SAFE_CALL(cuMemAlloc(&c_data_ptr, iterations * memory_size), device_index);
-#else
-#ifdef FIRESTARTER_BUILD_HIP
-  ACCELL_SAFE_CALL(hipMalloc(&a_data_ptr, memory_size), device_index);
-  ACCELL_SAFE_CALL(hipMalloc(&b_data_ptr, memory_size), device_index);
-  ACCELL_SAFE_CALL(hipMalloc(&c_data_ptr, iterations * memory_size), device_index);
-#endif
-#endif
+  compat::accell_safe_call(compat::malloc<>(a_data_ptr, memory_size), __FILE__, __LINE__, device_index);
+  compat::accell_safe_call(compat::malloc<>(b_data_ptr, memory_size), __FILE__, __LINE__, device_index);
+  compat::accell_safe_call(compat::malloc<>(c_data_ptr, iterations * memory_size), __FILE__, __LINE__, device_index);
 
-  firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. " << device_index
+  firestarter::log::trace() << "Allocated " << compat::AccelleratorString << " memory on device nr. " << device_index
                             << ". A: " << a_data_ptr << "(Size: " << memory_size << "B)"
                             << "\n";
 
-  firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. " << device_index
+  firestarter::log::trace() << "Allocated " << compat::AccelleratorString << " memory on device nr. " << device_index
                             << ". B: " << b_data_ptr << "(Size: " << memory_size << "B)"
                             << "\n";
-  firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. " << device_index
+  firestarter::log::trace() << "Allocated " << compat::AccelleratorString << " memory on device nr. " << device_index
                             << ". C: " << c_data_ptr << "(Size: " << iterations * memory_size << "B)"
                             << "\n";
 
-  firestarter::log::trace() << "Initializing " FS_ACCEL_STRING " matrices a, b on device nr. " << device_index
-                            << ". Using " << size_use * size_use << " elements of size " << sizeof(T) << " Byte";
+  firestarter::log::trace() << "Initializing " << compat::AccelleratorString << " matrices a, b on device nr. "
+                            << device_index << ". Using " << size_use * size_use << " elements of size "
+                            << sizeof(FloatingPointType) << " Byte";
   // initialize matrix A and B on the GPU with random values
-  CONCAT(FS_ACCEL_PREFIX_LC, randGenerator_t) random_gen;
-  ACCELL_SAFE_CALL(
-      CONCAT(FS_ACCEL_PREFIX_LC, randCreateGenerator)(&random_gen, CONCAT(FS_ACCEL_PREFIX_UC, RAND_RNG_PSEUDO_DEFAULT)),
-      device_index);
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, randSetPseudoRandomGeneratorSeed)(random_gen, SEED), device_index);
-  ACCELL_SAFE_CALL(generateUniform(random_gen, (T*)a_data_ptr, size_use * size_use), device_index);
-  ACCELL_SAFE_CALL(generateUniform(random_gen, (T*)b_data_ptr, size_use * size_use), device_index);
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, randDestroyGenerator)(random_gen), device_index);
+  {
+    compat::RandGenerator random_gen;
+    compat::accell_safe_call(compat::randCreateGeneratorPseudoRandom<>(random_gen), __FILE__, __LINE__, device_index);
+    compat::accell_safe_call(compat::randSetPseudoRandomGeneratorSeed<>(random_gen, Seed), __FILE__, __LINE__,
+                             device_index);
+    compat::accell_safe_call(compat::generateUniform<>(random_gen, a_data_ptr, size_use * size_use), __FILE__, __LINE__,
+                             device_index);
+    compat::accell_safe_call(compat::generateUniform<>(random_gen, b_data_ptr, size_use * size_use), __FILE__, __LINE__,
+                             device_index);
+    compat::accell_safe_call(compat::randDestroyGenerator<>(random_gen), __FILE__, __LINE__, device_index);
+  }
 
   // initialize c_data_ptr with copies of A
   for (i = 0; i < iterations; i++) {
-    firestarter::log::trace() << "Initializing " FS_ACCEL_STRING " matrix c-" << i << " by copying " << memory_size
-                              << " byte from " << a_data_ptr << " to "
-                              << c_data_ptr +
-                                     (size_t)(i * size_use * size_use * (float)sizeof(T) / (float)sizeof(c_data_ptr))
-                              << "\n";
-    ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, MemcpyDtoD)(
-                         c_data_ptr + (size_t)(i * size_use * size_use * (float)sizeof(T) / (float)sizeof(c_data_ptr)),
-                         a_data_ptr, memory_size),
-                     device_index);
+    auto DestinationPtr =
+        c_data_ptr + (size_t)(i * size_use * size_use * (float)sizeof(FloatingPointType) / (float)sizeof(c_data_ptr));
+    firestarter::log::trace() << "Initializing " << compat::AccelleratorString << " matrix c-" << i << " by copying "
+                              << memory_size << " byte from " << a_data_ptr << " to " << DestinationPtr << "\n";
+    compat::accell_safe_call(compat::memcpyDtoD<>(DestinationPtr, a_data_ptr, memory_size), __FILE__, __LINE__,
+                             device_index);
   }
 
   // save gpuvar->init_count and sys.out
   {
     std::lock_guard<std::mutex> lk(waitForInitCvMutex);
 
-#define TO_MB(x) (unsigned long)(x / 1024 / 1024)
+    auto ToMiB = [](const size_t Val) { return Val / 1024 / 1024; };
     firestarter::log::info() << "   GPU " << device_index << "\n"
                              << "    name:           " << properties.name << "\n"
-                             << "    memory:         " << TO_MB(memory_avail) << "/" << TO_MB(memory_total)
-                             << " MiB available (using " << TO_MB(use_bytes) << " MiB)\n"
+                             << "    memory:         " << ToMB(memory_avail) << "/" << ToMB(memory_total)
+                             << " MiB available (using " << ToMB(use_bytes) << " MiB)\n"
                              << "    matrix size:    " << size_use << "\n"
-                             << "    used precision: " << ((sizeof(T) == sizeof(double)) ? "double" : "single");
-#undef TO_MB
+                             << "    used precision: "
+                             << ((sizeof(FloatingPointType) == sizeof(double)) ? "double" : "single");
 
     initCount++;
   }
   waitForInitCv.notify_all();
 
-  const T alpha = 1.0;
-  const T beta = 0.0;
+  const FloatingPointType alpha = 1.0;
+  const FloatingPointType beta = 0.0;
 
   int size_use_i = size_use;
   // actual stress begins here
   while (*loadVar != LOAD_STOP) {
     for (i = 0; i < iterations; i++) {
-      ACCELL_SAFE_CALL(gemm(cublas, CONCAT(FS_ACCEL_PREFIX_UC, BLAS_OP_N), CONCAT(FS_ACCEL_PREFIX_UC, BLAS_OP_N),
-                            size_use_i, size_use_i, size_use_i, &alpha, (const T*)a_data_ptr, size_use_i,
-                            (const T*)b_data_ptr, size_use_i, &beta, (T*)c_data_ptr + i * size_use * size_use,
-                            size_use_i),
-                       device_index);
-      ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG, DeviceSynchronize)(), device_index);
+      compat::accell_safe_call(compat::gemm<FloatingPointType>(
+                                   blas, compat::BlasOperation::BLAS_OP_N, compat::BlasOperation::BLAS_OP_N, size_use_i,
+                                   size_use_i, size_use_i, &alpha, a_data_ptr, size_use_i, b_data_ptr, size_use_i,
+                                   &beta, c_data_ptr + i * size_use * size_use, size_use_i),
+                               __FILE__, __LINE__, device_index);
+      compat::accell_safe_call(compat::deviceSynchronize<>(), __FILE__, __LINE__, device_index);
     }
   }
 
-#ifdef FIRESTARTER_BUILD_CUDA
-  ACCELL_SAFE_CALL(cuMemFree(a_data_ptr), device_index);
-  ACCELL_SAFE_CALL(cuMemFree(b_data_ptr), device_index);
-  ACCELL_SAFE_CALL(cuMemFree(c_data_ptr), device_index);
-#else
-#ifdef FIRESTARTER_BUILD_HIP
-  ACCELL_SAFE_CALL(hipFree(a_data_ptr), device_index);
-  ACCELL_SAFE_CALL(hipFree(b_data_ptr), device_index);
-  ACCELL_SAFE_CALL(hipFree(c_data_ptr), device_index);
-#endif
-#endif
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, blasDestroy)(cublas), device_index);
-#ifdef FIRESTARTER_BUILD_CUDA
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, CtxDestroy)(context), device_index);
-#else
-#ifdef FIRESTARTER_BUILD_HIP
-  ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, StreamDestroy)(stream), device_index);
-#endif
-#endif
+  compat::accell_safe_call(compat::free<>(a_data_ptr), __FILE__, __LINE__, device_index);
+  compat::accell_safe_call(compat::free<>(b_data_ptr), __FILE__, __LINE__, device_index);
+  compat::accell_safe_call(compat::free<>(c_data_ptr), __FILE__, __LINE__, device_index);
+
+  compat::accell_safe_call(compat::blasDestroy<>(blas), __FILE__, __LINE__, device_index);
+
+  compat::accell_safe_call(compat::destroyContextOrStream<>(stream_or_context), __FILE__, __LINE__, device_index);
 }
 
-Cuda::Cuda(volatile uint64_t* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) {
-  std::thread t(Cuda::initGpus, std::ref(_waitForInitCv), loadVar, useFloat, useDouble, matrixSize, gpus);
-  _initThread = std::move(t);
+Cuda::Cuda(volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus) {
+  std::thread T(Cuda::initGpus, std::ref(WaitForInitCv), LoadVar, UseFloat, UseDouble, MatrixSize, Gpus);
+  InitThread = std::move(T);
 
-  std::unique_lock<std::mutex> lk(_waitForInitCvMutex);
+  const std::unique_lock<std::mutex> Lk(WaitForInitCvMutex);
   // wait for gpus to initialize
-  _waitForInitCv.wait(lk);
+  WaitForInitCv.wait(Lk);
 }
 
-void Cuda::initGpus(std::condition_variable& cv, volatile uint64_t* loadVar, bool useFloat, bool useDouble,
-                    unsigned matrixSize, int gpus) {
-  std::condition_variable waitForInitCv;
-  std::mutex waitForInitCvMutex;
+void Cuda::initGpus(std::condition_variable& WaitForInitCv, volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble,
+                    unsigned MatrixSize, int Gpus) {
+  std::condition_variable GpuThreadsWaitForInitCv;
+  std::mutex GpuThreadsWaitForInitCvMutex;
+  std::vector<std::thread> GpuThreads;
 
-  if (gpus) {
-    ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, Init)(0), -1);
-    int devCount;
-#ifdef FIRESTARTER_BUILD_CUDA
-    ACCELL_SAFE_CALL(cuDeviceGetCount(&devCount), -1);
-#else
-#ifdef FIRESTARTER_BUILD_HIP
-    ACCELL_SAFE_CALL(hipGetDeviceCount(&devCount), -1);
-#endif
-#endif
+  if (Gpus) {
+    accell_safe_call(compat::init<>(0), __FILE__, __LINE__);
 
-    if (devCount) {
-      std::vector<std::thread> gpuThreads;
-      std::atomic<int> initCount = 0;
-      int use_double;
+    int DevCount;
+    accell_safe_call(compat::getDeviceCount<>(DevCount), __FILE__, __LINE__);
 
-      if (useFloat) {
-        use_double = 0;
-      } else if (useDouble) {
-        use_double = 1;
+    if (DevCount) {
+      std::atomic<int> InitCount = 0;
+      int UseDoubleConverted;
+
+      if (UseFloat) {
+        UseDoubleConverted = 0;
+      } else if (UseDouble) {
+        UseDoubleConverted = 1;
       } else {
-        use_double = 2;
+        UseDoubleConverted = 2;
       }
 
       firestarter::log::info()
@@ -560,59 +298,59 @@ void Cuda::initGpus(std::condition_variable& cv, volatile uint64_t* loadVar, boo
           << "\n  graphics processor characteristics:";
 
       // use all GPUs if the user gave no information about use_device
-      if (gpus < 0) {
-        gpus = devCount;
+      if (Gpus < 0) {
+        Gpus = DevCount;
       }
 
-      if (gpus > devCount) {
-        firestarter::log::warn() << "You requested more " FS_ACCEL_STRING " devices than available. "
-                                    "Maybe you set " FS_ACCEL_STRING "_VISIBLE_DEVICES?";
-        firestarter::log::warn() << "FIRESTARTER will use " << devCount << " of the requested " << gpus
-                                 << " " FS_ACCEL_STRING " device(s)";
-        gpus = devCount;
+      if (Gpus > DevCount) {
+        firestarter::log::warn() << "You requested more " << compat::AccelleratorString
+                                 << " devices than available. "
+                                    "Maybe you set "
+                                 << compat::AccelleratorString << "_VISIBLE_DEVICES?";
+        firestarter::log::warn() << "FIRESTARTER will use " << DevCount << " of the requested " << Gpus << " "
+                                 << compat::AccelleratorString << " device(s)";
+        Gpus = DevCount;
       }
 
       {
-        std::lock_guard<std::mutex> lk(waitForInitCvMutex);
+        std::lock_guard<std::mutex> Lk(WaitForInitCvMutex);
 
-        for (int i = 0; i < gpus; ++i) {
+        for (int I = 0; I < Gpus; ++I) {
           // if there's a GPU in the system without Double Precision support, we
           // have to correct this.
-          int precision = get_precision(i, use_double);
-
-          if (precision) {
-            std::thread t(create_load<double>, std::ref(waitForInitCv), std::ref(waitForInitCvMutex), i,
-                          std::ref(initCount), loadVar, (int)matrixSize);
-            gpuThreads.push_back(std::move(t));
-          } else {
-            std::thread t(create_load<float>, std::ref(waitForInitCv), std::ref(waitForInitCvMutex), i,
-                          std::ref(initCount), loadVar, (int)matrixSize);
-            gpuThreads.push_back(std::move(t));
-          }
+          int Precision = get_precision(I, UseDoubleConverted);
+          void (*LoadFunc)(std::condition_variable&, std::mutex&, int, std::atomic<int>&, volatile uint64_t*, int) =
+              Precision ? create_load<double> : create_load<float>;
+
+          std::thread t(LoadFunc, std::ref(GpuThreadsWaitForInitCv), std::ref(GpuThreadsWaitForInitCvMutex), I,
+                        std::ref(InitCount), LoadVar, (int)MatrixSize);
         }
       }
 
       {
-        std::unique_lock<std::mutex> lk(waitForInitCvMutex);
+        std::unique_lock<std::mutex> Lk(GpuThreadsWaitForInitCvMutex);
         // wait for all threads to initialize
-        waitForInitCv.wait(lk, [&] { return initCount == gpus; });
-      }
-
-      // notify that init is done
-      cv.notify_all();
-
-      /* join computation threads */
-      for (auto& t : gpuThreads) {
-        t.join();
+        GpuThreadsWaitForInitCv.wait(lk, [&] { return InitCount == Gpus; });
       }
     } else {
-      firestarter::log::info() << "    - No " FS_ACCEL_STRING " devices. Just stressing CPU(s). Maybe use "
-                                  "FIRESTARTER instead of FIRESTARTER_" FS_ACCEL_STRING "?";
-      cv.notify_all();
+      firestarter::log::info() << "    - No " << compat::AccelleratorString
+                               << " devices. Just stressing CPU(s). Maybe use "
+                                  "FIRESTARTER instead of FIRESTARTER_"
+                               << compat::AccelleratorString << "?";
     }
   } else {
     firestarter::log::info() << "    --gpus 0 is set. Just stressing CPU(s). Maybe use "
-                                "FIRESTARTER instead of FIRESTARTER_" FS_ACCEL_STRING "?";
-    cv.notify_all();
+                                "FIRESTARTER instead of FIRESTARTER_"
+                             << compat::AccelleratorString << "?";
+  }
+
+  // notify that init is done
+  WaitForInitCv.notify_all();
+
+  /* join computation threads */
+  for (auto& Thread : GpuThreads) {
+    Thread.join();
   }
 }
+
+} // namespace firestarter::cuda
\ No newline at end of file

From eb0d6e50f8218044f93231afa2f33bcb8e078711 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Wed, 16 Oct 2024 22:51:42 +0200
Subject: [PATCH 053/167] Cuda: rename accellSafeCall

---
 include/firestarter/Cuda/CudaHipCompat.hpp | 10 ++--
 src/firestarter/Cuda/Cuda.cpp              | 58 +++++++++++-----------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
index bb5a1a87..cc061430 100644
--- a/include/firestarter/Cuda/CudaHipCompat.hpp
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -404,16 +404,16 @@ auto createContextOrStream(int DeviceIndex) -> StreamOrContext {
   firestarter::log::trace() << "Creating " << AccelleratorString << " context for computation on device nr. "
                             << DeviceIndex;
   CUdevice Device;
-  accell_safe_call(cuDeviceGet(&Device, DeviceIndex), __FILE__, __LINE__, DeviceIndex);
-  accell_safe_call(cuCtxCreate(&Soc, 0, device), __FILE__, __LINE__, DeviceIndex);
+  accellSafeCall(cuDeviceGet(&Device, DeviceIndex), __FILE__, __LINE__, DeviceIndex);
+  accellSafeCall(cuCtxCreate(&Soc, 0, device), __FILE__, __LINE__, DeviceIndex);
 
   firestarter::log::trace() << "Set created " << AccelleratorString << " context on device nr. " << DeviceIndex;
-  ACCELL_SAFE_CALL(cuCtxSetCurrent(Soc), DeviceIndex);
+  accellSafeCall(cuCtxSetCurrent(Soc), __FILE__, __LINE__, DeviceIndex);
 #elif defined(FIRESTARTER_BUILD_HIP)
   firestarter::log::trace() << "Creating " << AccelleratorString << " Stream for computation on device nr. "
                             << DeviceIndex;
-  accell_safe_call(hipSetDevice(DeviceIndex), __FILE__, __LINE__, DeviceIndex);
-  accell_safe_call(hipStreamCreate(&Soc), __FILE__, __LINE__, DeviceIndex);
+  accellSafeCall(hipSetDevice(DeviceIndex), __FILE__, __LINE__, DeviceIndex);
+  accellSafeCall(hipStreamCreate(&Soc), __FILE__, __LINE__, DeviceIndex);
 #else
   (void)DeviceIndex;
   static_assert(false, "Tried to call createContextOrStream, but neither building for CUDA nor HIP.");
diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp
index 9d6b7fed..7bc85641 100644
--- a/src/firestarter/Cuda/Cuda.cpp
+++ b/src/firestarter/Cuda/Cuda.cpp
@@ -87,8 +87,8 @@ static int get_precision(int device_index, int useDouble) {
 
   auto stream_or_context = compat::createContextOrStream(device_index);
 
-  compat::accell_safe_call(compat::memGetInfo<>(memory_avail, memory_total), __FILE__, __LINE__, device_index);
-  compat::accell_safe_call(compat::getDeviceProperties<>(properties, device_index), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::memGetInfo<>(memory_avail, memory_total), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::getDeviceProperties<>(properties, device_index), __FILE__, __LINE__, device_index);
 
   useDouble = get_precision(useDouble, properties);
 
@@ -115,7 +115,7 @@ static int get_precision(int device_index, int useDouble) {
     useDouble = 0;
   }
 
-  compat::accell_safe_call(compat::destroyContextOrStream<>(stream_or_context), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::destroyContextOrStream<>(stream_or_context), __FILE__, __LINE__, device_index);
 
   return useDouble;
 }
@@ -144,15 +144,15 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait
   auto stream_or_context = compat::createContextOrStream(device_index);
 
   firestarter::log::trace() << "Create " << compat::AccelleratorString << " Blas on device nr. " << device_index;
-  compat::accell_safe_call(compat::blasCreate<>(blas), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::blasCreate<>(blas), __FILE__, __LINE__, device_index);
 
   firestarter::log::trace() << "Get " << compat::AccelleratorString << " device properties (e.g., support for double)"
                             << " on device nr. " << device_index;
-  compat::accell_safe_call(compat::getDeviceProperties<>(properties, device_index), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::getDeviceProperties<>(properties, device_index), __FILE__, __LINE__, device_index);
 
   // getting information about the GPU memory
   size_t memory_avail, memory_total;
-  compat::accell_safe_call(compat::memGetInfo<>(memory_avail, memory_total), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::memGetInfo<>(memory_avail, memory_total), __FILE__, __LINE__, device_index);
   firestarter::log::trace() << "Get " << compat::AccelleratorString << " Memory info on device nr. " << device_index
                             << ": " << memory_avail << " B avail. from " << memory_total << " B total";
 
@@ -175,9 +175,9 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait
   firestarter::log::trace() << "Allocating " << compat::AccelleratorString << " memory on device nr. " << device_index;
 
   // allocating memory on the GPU
-  compat::accell_safe_call(compat::malloc<>(a_data_ptr, memory_size), __FILE__, __LINE__, device_index);
-  compat::accell_safe_call(compat::malloc<>(b_data_ptr, memory_size), __FILE__, __LINE__, device_index);
-  compat::accell_safe_call(compat::malloc<>(c_data_ptr, iterations * memory_size), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::malloc<>(a_data_ptr, memory_size), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::malloc<>(b_data_ptr, memory_size), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::malloc<>(c_data_ptr, iterations * memory_size), __FILE__, __LINE__, device_index);
 
   firestarter::log::trace() << "Allocated " << compat::AccelleratorString << " memory on device nr. " << device_index
                             << ". A: " << a_data_ptr << "(Size: " << memory_size << "B)"
@@ -196,14 +196,14 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait
   // initialize matrix A and B on the GPU with random values
   {
     compat::RandGenerator random_gen;
-    compat::accell_safe_call(compat::randCreateGeneratorPseudoRandom<>(random_gen), __FILE__, __LINE__, device_index);
-    compat::accell_safe_call(compat::randSetPseudoRandomGeneratorSeed<>(random_gen, Seed), __FILE__, __LINE__,
-                             device_index);
-    compat::accell_safe_call(compat::generateUniform<>(random_gen, a_data_ptr, size_use * size_use), __FILE__, __LINE__,
-                             device_index);
-    compat::accell_safe_call(compat::generateUniform<>(random_gen, b_data_ptr, size_use * size_use), __FILE__, __LINE__,
-                             device_index);
-    compat::accell_safe_call(compat::randDestroyGenerator<>(random_gen), __FILE__, __LINE__, device_index);
+    compat::accellSafeCall(compat::randCreateGeneratorPseudoRandom<>(random_gen), __FILE__, __LINE__, device_index);
+    compat::accellSafeCall(compat::randSetPseudoRandomGeneratorSeed<>(random_gen, Seed), __FILE__, __LINE__,
+                           device_index);
+    compat::accellSafeCall(compat::generateUniform<>(random_gen, a_data_ptr, size_use * size_use), __FILE__, __LINE__,
+                           device_index);
+    compat::accellSafeCall(compat::generateUniform<>(random_gen, b_data_ptr, size_use * size_use), __FILE__, __LINE__,
+                           device_index);
+    compat::accellSafeCall(compat::randDestroyGenerator<>(random_gen), __FILE__, __LINE__, device_index);
   }
 
   // initialize c_data_ptr with copies of A
@@ -212,8 +212,8 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait
         c_data_ptr + (size_t)(i * size_use * size_use * (float)sizeof(FloatingPointType) / (float)sizeof(c_data_ptr));
     firestarter::log::trace() << "Initializing " << compat::AccelleratorString << " matrix c-" << i << " by copying "
                               << memory_size << " byte from " << a_data_ptr << " to " << DestinationPtr << "\n";
-    compat::accell_safe_call(compat::memcpyDtoD<>(DestinationPtr, a_data_ptr, memory_size), __FILE__, __LINE__,
-                             device_index);
+    compat::accellSafeCall(compat::memcpyDtoD<>(DestinationPtr, a_data_ptr, memory_size), __FILE__, __LINE__,
+                           device_index);
   }
 
   // save gpuvar->init_count and sys.out
@@ -240,22 +240,22 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait
   // actual stress begins here
   while (*loadVar != LOAD_STOP) {
     for (i = 0; i < iterations; i++) {
-      compat::accell_safe_call(compat::gemm<FloatingPointType>(
-                                   blas, compat::BlasOperation::BLAS_OP_N, compat::BlasOperation::BLAS_OP_N, size_use_i,
-                                   size_use_i, size_use_i, &alpha, a_data_ptr, size_use_i, b_data_ptr, size_use_i,
-                                   &beta, c_data_ptr + i * size_use * size_use, size_use_i),
-                               __FILE__, __LINE__, device_index);
-      compat::accell_safe_call(compat::deviceSynchronize<>(), __FILE__, __LINE__, device_index);
+      compat::accellSafeCall(compat::gemm<FloatingPointType>(
+                                 blas, compat::BlasOperation::BLAS_OP_N, compat::BlasOperation::BLAS_OP_N, size_use_i,
+                                 size_use_i, size_use_i, &alpha, a_data_ptr, size_use_i, b_data_ptr, size_use_i, &beta,
+                                 c_data_ptr + i * size_use * size_use, size_use_i),
+                             __FILE__, __LINE__, device_index);
+      compat::accellSafeCall(compat::deviceSynchronize<>(), __FILE__, __LINE__, device_index);
     }
   }
 
-  compat::accell_safe_call(compat::free<>(a_data_ptr), __FILE__, __LINE__, device_index);
-  compat::accell_safe_call(compat::free<>(b_data_ptr), __FILE__, __LINE__, device_index);
-  compat::accell_safe_call(compat::free<>(c_data_ptr), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::free<>(a_data_ptr), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::free<>(b_data_ptr), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::free<>(c_data_ptr), __FILE__, __LINE__, device_index);
 
   compat::accell_safe_call(compat::blasDestroy<>(blas), __FILE__, __LINE__, device_index);
 
-  compat::accell_safe_call(compat::destroyContextOrStream<>(stream_or_context), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::destroyContextOrStream<>(stream_or_context), __FILE__, __LINE__, device_index);
 }
 
 Cuda::Cuda(volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus) {

From b961fb8c814e3692721ed90786fe116e2c157566 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 17 Oct 2024 12:16:29 +0200
Subject: [PATCH 054/167] fix some cuda compile errors

---
 include/firestarter/Cuda/Cuda.hpp          | 10 +++-----
 include/firestarter/Cuda/CudaHipCompat.hpp |  1 +
 src/firestarter/Cuda/Cuda.cpp              | 29 +++++++++++++---------
 3 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/include/firestarter/Cuda/Cuda.hpp b/include/firestarter/Cuda/Cuda.hpp
index 23d6c4bf..df16a731 100644
--- a/include/firestarter/Cuda/Cuda.hpp
+++ b/include/firestarter/Cuda/Cuda.hpp
@@ -23,7 +23,6 @@
 
 #include "firestarter/Constants.hpp"
 #include <condition_variable>
-#include <mutex>
 #include <thread>
 
 namespace firestarter::cuda {
@@ -31,19 +30,18 @@ namespace firestarter::cuda {
 class Cuda {
 private:
   std::thread InitThread;
-  std::condition_variable WaitForInitCv;
-  std::mutex WaitForInitCvMutex;
 
-  static void initGpus(std::condition_variable& Cv, const volatile firestarter::LoadThreadWorkType& LoadVar,
+  static void initGpus(std::condition_variable& WaitForInitCv, const volatile firestarter::LoadThreadWorkType& LoadVar,
                        bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus);
 
 public:
-  Cuda(volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus)
+  Cuda(const volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize,
+       int Gpus)
 #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
       ;
 #else
   {
-    (void)&LoadVar;
+    (void)LoadVar;
     (void)UseFloat;
     (void)UseDouble;
     (void)MatrixSize;
diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
index cc061430..6961e9b7 100644
--- a/include/firestarter/Cuda/CudaHipCompat.hpp
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -30,6 +30,7 @@
 #pragma once
 
 #include <cstddef>
+#include <firestarter/Logging/Log.hpp>
 #include <optional>
 #include <sstream>
 #include <type_traits>
diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp
index 7bc85641..94978e20 100644
--- a/src/firestarter/Cuda/Cuda.cpp
+++ b/src/firestarter/Cuda/Cuda.cpp
@@ -123,7 +123,8 @@ static int get_precision(int device_index, int useDouble) {
 // GPU index. Used to pin this thread to the GPU.
 template <typename FloatingPointType>
 static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index,
-                        std::atomic<int>& initCount, volatile uint64_t* loadVar, int matrixSize) {
+                        std::atomic<int>& initCount, const volatile firestarter::LoadThreadWorkType& LoadVar,
+                        int matrixSize) {
   static_assert(std::is_same_v<FloatingPointType, float> || std::is_same_v<FloatingPointType, double>,
                 "create_load<FloatingPointType>: Template argument must be either float or double");
 
@@ -238,7 +239,7 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait
 
   int size_use_i = size_use;
   // actual stress begins here
-  while (*loadVar != LOAD_STOP) {
+  while (LoadVar != LoadThreadWorkType::LOAD_STOP) {
     for (i = 0; i < iterations; i++) {
       compat::accellSafeCall(compat::gemm<FloatingPointType>(
                                  blas, compat::BlasOperation::BLAS_OP_N, compat::BlasOperation::BLAS_OP_N, size_use_i,
@@ -253,13 +254,17 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait
   compat::accellSafeCall(compat::free<>(b_data_ptr), __FILE__, __LINE__, device_index);
   compat::accellSafeCall(compat::free<>(c_data_ptr), __FILE__, __LINE__, device_index);
 
-  compat::accell_safe_call(compat::blasDestroy<>(blas), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::blasDestroy<>(blas), __FILE__, __LINE__, device_index);
 
   compat::accellSafeCall(compat::destroyContextOrStream<>(stream_or_context), __FILE__, __LINE__, device_index);
 }
 
-Cuda::Cuda(volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus) {
-  std::thread T(Cuda::initGpus, std::ref(WaitForInitCv), LoadVar, UseFloat, UseDouble, MatrixSize, Gpus);
+Cuda::Cuda(const volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize,
+           int Gpus) {
+  std::condition_variable WaitForInitCv;
+  std::mutex WaitForInitCvMutex;
+
+  std::thread T(Cuda::initGpus, std::ref(WaitForInitCv), std::cref(LoadVar), UseFloat, UseDouble, MatrixSize, Gpus);
   InitThread = std::move(T);
 
   const std::unique_lock<std::mutex> Lk(WaitForInitCvMutex);
@@ -267,17 +272,17 @@ Cuda::Cuda(volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble, unsigned M
   WaitForInitCv.wait(Lk);
 }
 
-void Cuda::initGpus(std::condition_variable& WaitForInitCv, volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble,
-                    unsigned MatrixSize, int Gpus) {
+void Cuda::initGpus(std::condition_variable& WaitForInitCv, const volatile firestarter::LoadThreadWorkType& LoadVar,
+                    bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus) {
   std::condition_variable GpuThreadsWaitForInitCv;
   std::mutex GpuThreadsWaitForInitCvMutex;
   std::vector<std::thread> GpuThreads;
 
   if (Gpus) {
-    accell_safe_call(compat::init<>(0), __FILE__, __LINE__);
+    compat::accellSafeCall(compat::init<>(0), __FILE__, __LINE__);
 
     int DevCount;
-    accell_safe_call(compat::getDeviceCount<>(DevCount), __FILE__, __LINE__);
+    compat::accellSafeCall(compat::getDeviceCount<>(DevCount), __FILE__, __LINE__);
 
     if (DevCount) {
       std::atomic<int> InitCount = 0;
@@ -313,7 +318,7 @@ void Cuda::initGpus(std::condition_variable& WaitForInitCv, volatile uint64_t* L
       }
 
       {
-        std::lock_guard<std::mutex> Lk(WaitForInitCvMutex);
+        std::lock_guard<std::mutex> Lk(GpuThreadsWaitForInitCvMutex);
 
         for (int I = 0; I < Gpus; ++I) {
           // if there's a GPU in the system without Double Precision support, we
@@ -323,14 +328,14 @@ void Cuda::initGpus(std::condition_variable& WaitForInitCv, volatile uint64_t* L
               Precision ? create_load<double> : create_load<float>;
 
           std::thread t(LoadFunc, std::ref(GpuThreadsWaitForInitCv), std::ref(GpuThreadsWaitForInitCvMutex), I,
-                        std::ref(InitCount), LoadVar, (int)MatrixSize);
+                        std::ref(InitCount), std::cref(LoadVar), (int)MatrixSize);
         }
       }
 
       {
         std::unique_lock<std::mutex> Lk(GpuThreadsWaitForInitCvMutex);
         // wait for all threads to initialize
-        GpuThreadsWaitForInitCv.wait(lk, [&] { return InitCount == Gpus; });
+        GpuThreadsWaitForInitCv.wait(Lk, [&] { return InitCount == Gpus; });
       }
     } else {
       firestarter::log::info() << "    - No " << compat::AccelleratorString

From 0edf8d74c98b33bdd4498d0c88590132957f1e33 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 17 Oct 2024 14:32:46 +0200
Subject: [PATCH 055/167] fix some cuda compile errors

---
 include/firestarter/Cuda/CudaHipCompat.hpp | 225 ++++++++++---------
 src/firestarter/Cuda/Cuda.cpp              | 248 +++++++++++----------
 2 files changed, 244 insertions(+), 229 deletions(-)

diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
index 6961e9b7..04ccf70b 100644
--- a/include/firestarter/Cuda/CudaHipCompat.hpp
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -35,15 +35,42 @@
 #include <sstream>
 #include <type_traits>
 
-namespace firestarter::cuda::compat {
-
 #ifdef FIRESTARTER_BUILD_CUDA
-// Start of CUDA compatibility types
+
 #include <cublas_v2.h>
 #include <cuda.h>
 #include <cuda_runtime_api.h>
 #include <curand_kernel.h>
 
+#elif defined(FIRESTARTER_BUILD_HIP)
+
+#include <hip/hip_runtime.h>
+#include <hip/hip_runtime_api.h>
+#include <hipblas/hipblas.h>
+#include <hiprand_kernel.h>
+
+#else
+
+#error "Attempting to compile file but neither CUDA nor HIP is used"
+
+#endif
+
+namespace firestarter::cuda::compat {
+
+/// Use this function as a wrapper to all calls of CUDA or HIP functions. If an error occured we abort and print the
+/// error code.
+/// \tparam T The type of the error code returned from calls to CUDA or HIP. This may be one of BlasStatusT, ErrorT,
+/// RandStatusT or CUresult.
+/// \arg TVal The errorcode returned from calls to CUDA or HIP.
+/// \arg File The file for the log message in which the error occured.
+/// \arg Line The line for the log message in which the error occured.
+/// \arg DeviceIndex if the CUDA or HIP call is associated to a specific device, the index of the device should be
+/// provided here for the log message.
+template <typename T> void accellSafeCall(T TVal, const char* File, int Line, std::optional<int> DeviceIndex = {});
+
+#ifdef FIRESTARTER_BUILD_CUDA
+// Start of CUDA compatibility types
+
 enum class BlasStatusT : std::underlying_type_t<cublasStatus_t> {
   BLAS_STATUS_SUCCESS = CUBLAS_STATUS_SUCCESS,
   BLAS_STATUS_NOT_INITIALIZED = CUBLAS_STATUS_NOT_INITIALIZED,
@@ -59,7 +86,7 @@ enum class BlasStatusT : std::underlying_type_t<cublasStatus_t> {
 
 constexpr const char* AccelleratorString = "CUDA";
 
-enum class ErrorT : std::underlying_type_t<cuError_t> {
+enum class ErrorT : std::underlying_type_t<cudaError_t> {
   Success = cudaSuccess,
 };
 
@@ -83,7 +110,7 @@ using StreamOrContext = CUcontext;
 
 template <typename FloatingPointType> using DevicePtr = CUdeviceptr;
 
-using DeviceProperties = struct cudaDeviceProp;
+using DeviceProperties = cudaDeviceProp;
 
 using RandGenerator = curandGenerator_t;
 
@@ -97,14 +124,13 @@ enum class BlasOperation : std::underlying_type_t<cublasOperation_t> {
   BLAS_OP_C = CUBLAS_OP_C,
 };
 
+using BlasOperationT = cublasOperation_t;
+
+using CUResultOrHipErrorT = CUresult;
+
 #elif defined(FIRESTARTER_BUILD_HIP)
 // Start of HIP compatibility types
 
-#include <hip/hip_runtime.h>
-#include <hip/hip_runtime_api.h>
-#include <hipblas/hipblas.h>
-#include <hiprand_kernel.h>
-
 enum class BlasStatusT : std::underlying_type_t<hipblasStatus_t> {
   BLAS_STATUS_SUCCESS = HIPBLAS_STATUS_SUCCESS,
   BLAS_STATUS_NOT_INITIALIZED = HIPBLAS_STATUS_NOT_INITIALIZED,
@@ -147,7 +173,7 @@ using StreamOrContext = hipStream_t;
 
 template <typename FloatingPointType> using DevicePtr = FloatingPointType*;
 
-using DeviceProperties = struct hipDeviceProp_t;
+using DeviceProperties = hipDeviceProp_t;
 
 using RandGenerator = hiprandGenerator_t;
 
@@ -161,9 +187,11 @@ enum class BlasOperation : std::underlying_type_t<hipblasOperation_t> {
   BLAS_OP_C = HIPBLAS_OP_C,
 };
 
-#else
+using BlasOperationT = hipblasOperation_t;
 
-#error "Attempting to compile file but neither CUDA nor HIP is used"
+using CUResultOrHipErrorT = ErrorT;
+
+#else
 
 // Start of compatibility types for clangd
 
@@ -199,6 +227,10 @@ enum class BlasOperation {
   BLAS_OP_C,
 };
 
+using BlasOperationT = std::size_t;
+
+using CUResultOrHipErrorT = void*;
+
 #endif
 
 // abstracted function for both CUDA and HIP
@@ -319,17 +351,7 @@ constexpr const int CUDA_SUCCESS = 0;
 // NOLINTEND(readability-identifier-naming)
 #endif
 
-/// Use this function as a wrapper to all calls of CUDA or HIP functions. If an error occured we abort and print the
-/// error code.
-/// \tparam T The type of the error code returned from calls to CUDA or HIP. This may be one of BlasStatusT, ErrorT,
-/// RandStatusT or CUresult.
-/// \arg TVal The errorcode returned from calls to CUDA or HIP.
-/// \arg File The file for the log message in which the error occured.
-/// \arg Line The line for the log message in which the error occured.
-/// \arg DeviceIndex if the CUDA or HIP call is associated to a specific device, the index of the device should be
-/// provided here for the log message.
-template <typename T>
-inline void accellSafeCall(T TVal, const char* File, const int Line, std::optional<int> DeviceIndex = std::nullopt_t) {
+template <typename T> void accellSafeCall(T TVal, const char* File, const int Line, std::optional<int> DeviceIndex) {
   if constexpr (std::is_same_v<T, BlasStatusT>) {
     if (TVal == BlasStatusT::BLAS_STATUS_SUCCESS) {
       return;
@@ -344,18 +366,18 @@ inline void accellSafeCall(T TVal, const char* File, const int Line, std::option
     }
   } else if constexpr (std::is_same_v<T, CUresult>) {
 #ifndef FIRESTARTER_BUILD_CUDA
-    static_assert(false, "Tried to call accell_safe_call with CUresult, but not building for CUDA.");
+    static_assert(false, "Tried to call accellSafeCall with CUresult, but not building for CUDA.");
 #endif
     if (TVal == CUDA_SUCCESS) {
       return;
     }
   } else {
-    static_assert(false, "Tried to call accell_safe_call with an unknown type.");
+    static_assert(false, "Tried to call accellSafeCall with an unknown type.");
   }
 
   std::stringstream Ss;
-  Ss << AccelleratorString << " error at " << File << ":" << Line << ": error code = " << TVal << " ("
-     << getErrorString(TVal) << ")";
+  Ss << AccelleratorString << " error at " << File << ":" << Line
+     << ": error code = " << static_cast<std::underlying_type_t<T>>(TVal) << " (" << getErrorString(TVal) << ")";
 
   if (DeviceIndex) {
     Ss << ", device index: " << *DeviceIndex;
@@ -366,10 +388,9 @@ inline void accellSafeCall(T TVal, const char* File, const int Line, std::option
 }
 
 /// Wrapper to cuInit or hipInit.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \arg Flags The Flags forwarded to cuInit or hipInit.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType> auto init(unsigned int Flags) -> ReturnType {
+auto init(unsigned int Flags) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return cuInit(Flags);
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -381,10 +402,9 @@ template <typename ReturnType> auto init(unsigned int Flags) -> ReturnType {
 }
 
 /// Get the number GPU devices. Wrapper to cuDeviceGetCount or hipGetDeviceCount.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \arg DevCount The reference to where the number of GPU devices will be written.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType> auto getDeviceCount(int& DevCount) -> ReturnType {
+auto getDeviceCount(int& DevCount) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return cuDeviceGetCount(&DevCount);
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -406,7 +426,7 @@ auto createContextOrStream(int DeviceIndex) -> StreamOrContext {
                             << DeviceIndex;
   CUdevice Device;
   accellSafeCall(cuDeviceGet(&Device, DeviceIndex), __FILE__, __LINE__, DeviceIndex);
-  accellSafeCall(cuCtxCreate(&Soc, 0, device), __FILE__, __LINE__, DeviceIndex);
+  accellSafeCall(cuCtxCreate(&Soc, 0, Device), __FILE__, __LINE__, DeviceIndex);
 
   firestarter::log::trace() << "Set created " << AccelleratorString << " context on device nr. " << DeviceIndex;
   accellSafeCall(cuCtxSetCurrent(Soc), __FILE__, __LINE__, DeviceIndex);
@@ -423,14 +443,13 @@ auto createContextOrStream(int DeviceIndex) -> StreamOrContext {
 }
 
 /// Destroy the context (CUDA) or stream (HIP) with cuCtxDestroy and hipStreamDestroy respectively.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \arg Soc The reference to the context or stream.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType> auto destroyContextOrStream(StreamOrContext& Soc) -> ReturnType {
+auto destroyContextOrStream(StreamOrContext& Soc) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return cuCtxDestroy(Soc);
+  return static_cast<CUResultOrHipErrorT>(cuCtxDestroy(Soc));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hipStreamDestroy(Soc);
+  return static_cast<CUResultOrHipErrorT>(hipStreamDestroy(Soc));
 #else
   (void)Soc;
   static_assert(false, "Tried to call destroyContextOrStream, but neither building for CUDA nor HIP.");
@@ -438,14 +457,13 @@ template <typename ReturnType> auto destroyContextOrStream(StreamOrContext& Soc)
 }
 
 /// Create a blas handle. Wrapper to cublasCreate or hipblasCreate.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \arg BlasHandle The reference to a BlasHandle object which will be initialized.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType> auto blasCreate(BlasHandle& BlasHandle) -> ReturnType {
+auto blasCreate(BlasHandle& BlasHandle) -> BlasStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return cublasCreate(&BlasHandle);
+  return static_cast<BlasStatusT>(cublasCreate(&BlasHandle));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hipblasCreate(&BlasHandle);
+  return static_cast<BlasStatusT>(hipblasCreate(&BlasHandle));
 #else
   (void)BlasHandle;
   static_assert(false, "Tried to call blasCreate, but neither building for CUDA nor HIP.");
@@ -453,30 +471,28 @@ template <typename ReturnType> auto blasCreate(BlasHandle& BlasHandle) -> Return
 }
 
 /// Destory a blas handle. Wrapper to cublasDestroy or hipblasDestroy.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \arg BlasHandle The reference to a BlasHandle object which will be destroyed.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType> auto blasDestory(BlasHandle& BlasHandle) -> ReturnType {
+auto blasDestroy(BlasHandle& BlasHandle) -> BlasStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return cublasDestroy(BlasHandle);
+  return static_cast<BlasStatusT>(cublasDestroy(BlasHandle));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hipblasDestroy(BlasHandle);
+  return static_cast<BlasStatusT>(hipblasDestroy(BlasHandle));
 #else
   (void)BlasHandle;
-  static_assert(false, "Tried to call blasDestory, but neither building for CUDA nor HIP.");
+  static_assert(false, "Tried to call blasDestroy, but neither building for CUDA nor HIP.");
 #endif
 }
 
 /// Get the properties of a specific GPU device. Wrapper to cudaGetDeviceProperties or hipGetDeviceProperties.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \arg Property The reference to the properties that are retrived.
 /// \arg DeviceIndex The index of the GPU device for which to retrive the device properties.s
 /// \returns The Error code returned from these calls.
-template <typename ReturnType> auto getDeviceProperties(DeviceProperties& Property, int DeviceIndex) -> ReturnType {
+auto getDeviceProperties(DeviceProperties& Property, int DeviceIndex) -> ErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return cudaGetDeviceProperties(&Property, DeviceIndex);
+  return static_cast<ErrorT>(cudaGetDeviceProperties(&Property, DeviceIndex));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hipGetDeviceProperties(&Property, DeviceIndex);
+  return static_cast<ErrorT>(hipGetDeviceProperties(&Property, DeviceIndex));
 #else
   (void)Property;
   (void)DeviceIndex;
@@ -486,15 +502,14 @@ template <typename ReturnType> auto getDeviceProperties(DeviceProperties& Proper
 
 /// Get the number of memory in the current CUDA or HIP context. Wrapper to cuMemGetInfo or
 /// hipMemGetInfo.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \arg MemoryAvail The reference to the available memory that is retrived.
 /// \arg MemoryTotal The reference to the total memory that is retrived.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType> auto memGetInfo(std::size_t& MemoryAvail, std::size_t& MemoryTotal) -> ReturnType {
+auto memGetInfo(std::size_t& MemoryAvail, std::size_t& MemoryTotal) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return cuMemGetInfo(&MemoryAvail, &MemoryTotal);
+  return static_cast<CUResultOrHipErrorT>(cuMemGetInfo(&MemoryAvail, &MemoryTotal));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hipMemGetInfo(&MemoryAvail, &MemoryTotal);
+  return static_cast<CUResultOrHipErrorT>(hipMemGetInfo(&MemoryAvail, &MemoryTotal));
 #else
   (void)MemoryAvail;
   (void)MemoryTotal;
@@ -504,17 +519,16 @@ template <typename ReturnType> auto memGetInfo(std::size_t& MemoryAvail, std::si
 
 /// Malloc device memory in the current CUDA or HIP context. Wrapper to cuMemAlloc or
 /// hipMalloc.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \tparam FloatingPointType The type of the floating point used. Either float or double.
 /// \arg Ptr The reference to the device pointer which is retrieved by the malloc call.
 /// \arg MemorySize The memory that is allocated on the device in bytes.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType, typename FloatingPointType>
-auto malloc(DevicePtr<FloatingPointType>& Ptr, std::size_t MemorySize) -> ReturnType {
+template <typename FloatingPointType>
+auto malloc(DevicePtr<FloatingPointType>& Ptr, std::size_t MemorySize) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return cuMemAlloc(&Ptr, MemorySize);
+  return static_cast<CUResultOrHipErrorT>(cuMemAlloc(&Ptr, MemorySize));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hipMalloc(&Ptr, MemorySize);
+  return static_cast<CUResultOrHipErrorT>(hipMalloc(&Ptr, MemorySize));
 #else
   (void)Ptr;
   (void)MemorySize;
@@ -524,15 +538,14 @@ auto malloc(DevicePtr<FloatingPointType>& Ptr, std::size_t MemorySize) -> Return
 
 /// Free device memory in the current CUDA or HIP context. Wrapper to cuMemFree or
 /// hipFree.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \tparam FloatingPointType The type of the floating point used. Either float or double.
 /// \arg Ptr The reference to the device pointer which is used in the free call.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType, typename FloatingPointType> auto free(DevicePtr<FloatingPointType>& Ptr) -> ReturnType {
+template <typename FloatingPointType> auto free(DevicePtr<FloatingPointType>& Ptr) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return cuMemFree(Ptr);
+  return static_cast<CUResultOrHipErrorT>(cuMemFree(Ptr));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hipFree(Ptr);
+  return static_cast<CUResultOrHipErrorT>(hipFree(Ptr));
 #else
   (void)Ptr;
   static_assert(false, "Tried to call free, but neither building for CUDA nor HIP.");
@@ -541,14 +554,13 @@ template <typename ReturnType, typename FloatingPointType> auto free(DevicePtr<F
 
 /// Create a random generator in the current CUDA or HIP context. Wrapper to curandCreateGenerator or
 /// hiprandCreateGenerator.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \arg RandomGen The reference to the random generation which is retrived by the calls.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType> auto randCreateGeneratorPseudoRandom(RandGenerator& RandomGen) -> ReturnType {
+auto randCreateGeneratorPseudoRandom(RandGenerator& RandomGen) -> RandStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return curandCreateGenerator(&RandomGen, CURAND_RNG_PSEUDO_DEFAULT);
+  return static_cast<RandStatusT>(curandCreateGenerator(&RandomGen, CURAND_RNG_PSEUDO_DEFAULT));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hiprandCreateGenerator(&RandomGen, HIPRAND_RNG_PSEUDO_DEFAULT);
+  return static_cast<RandStatusT>(hiprandCreateGenerator(&RandomGen, HIPRAND_RNG_PSEUDO_DEFAULT));
 #else
   (void)RandomGen;
   static_assert(false, "Tried to call randCreateGeneratorPseudoRandom, but neither building for CUDA nor HIP.");
@@ -557,15 +569,14 @@ template <typename ReturnType> auto randCreateGeneratorPseudoRandom(RandGenerato
 
 /// Set the pseudo random generator seed in the current CUDA or HIP context. Wrapper to
 /// curandSetPseudoRandomGeneratorSeed or hiprandSetPseudoRandomGeneratorSeed.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \arg RandomGen The reference to the random generator.
 /// \arg Seed The seed used to initialize the pseudo random generator.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType> auto randSetPseudoRandomGeneratorSeed(RandGenerator& RandomGen, int Seed) -> ReturnType {
+auto randSetPseudoRandomGeneratorSeed(RandGenerator& RandomGen, int Seed) -> RandStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return curandSetPseudoRandomGeneratorSeed(RandomGen, Seed);
+  return static_cast<RandStatusT>(curandSetPseudoRandomGeneratorSeed(RandomGen, Seed));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hiprandSetPseudoRandomGeneratorSeed(RandomGen, Seed);
+  return static_cast<RandStatusT>(hiprandSetPseudoRandomGeneratorSeed(RandomGen, Seed));
 #else
   (void)RandomGen;
   (void)Seed;
@@ -575,17 +586,15 @@ template <typename ReturnType> auto randSetPseudoRandomGeneratorSeed(RandGenerat
 
 /// Initialize the provided memory with with a specific number of uniform random floats. Wrapper to
 /// curandGenerateUniform or hiprandGenerateUniform.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \arg RandomGen The reference to the random generator.
 /// \arg OutputPtr The device pointer on which is initialized with specific number of uniform random floats.
 /// \arg Num The number of unifrom random floats.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType>
-auto randGenerateUniform(RandGenerator& RandomGen, DevicePtr<float> OutputPtr, std::size_t Num) -> ReturnType {
+auto randGenerateUniform(RandGenerator& RandomGen, DevicePtr<float> OutputPtr, std::size_t Num) -> RandStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return curandGenerateUniform(RandomGen, OutputPtr, Num);
+  return static_cast<RandStatusT>(curandGenerateUniform(RandomGen, reinterpret_cast<float*>(OutputPtr), Num));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hiprandGenerateUniform(RandomGen, OutputPtr, Num);
+  return static_cast<RandStatusT>(hiprandGenerateUniform(RandomGen, OutputPtr, Num));
 #else
   (void)RandomGen;
   (void)OutputPtr;
@@ -596,17 +605,15 @@ auto randGenerateUniform(RandGenerator& RandomGen, DevicePtr<float> OutputPtr, s
 
 /// Initialize the provided memory with with a specific number of uniform random doubles. Wrapper to
 /// curandGenerateUniformDouble or hiprandGenerateUniformDouble.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \arg RandomGen The reference to the random generator.
 /// \arg OutputPtr The device pointer on which is initialized with specific number of uniform random floats.
 /// \arg Num The number of unifrom random doubles.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType>
-auto randGenerateUniformDouble(RandGenerator& RandomGen, DevicePtr<double> OutputPtr, std::size_t Num) -> ReturnType {
+auto randGenerateUniformDouble(RandGenerator& RandomGen, DevicePtr<double> OutputPtr, std::size_t Num) -> RandStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return curandGenerateUniformDouble(RandomGen, OutputPtr, Num);
+  return static_cast<RandStatusT>(curandGenerateUniformDouble(RandomGen, reinterpret_cast<double*>(OutputPtr), Num));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hiprandGenerateUniformDouble(RandomGen, OutputPtr, Num);
+  return static_cast<RandStatusT>(hiprandGenerateUniformDouble(RandomGen, OutputPtr, Num));
 #else
   (void)RandomGen;
   (void)OutputPtr;
@@ -617,18 +624,17 @@ auto randGenerateUniformDouble(RandGenerator& RandomGen, DevicePtr<double> Outpu
 
 /// Initialize the provided memory with with a specific number of uniform random floating points. Wrapper to
 /// randGenerateUniform or randGenerateUniformDouble.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \tparam FloatPointType The float point types is used. Either float or double.
 /// \arg Generator The reference to the random generator.
 /// \arg OutputPtr The device pointer on which is initialized with specific number of uniform random floats.
 /// \arg Num The number of unifrom random doubles.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType, typename FloatPointType>
-auto generateUniform(RandGenerator& Generator, DevicePtr<FloatPointType> OutputPtr, size_t Num) -> ReturnType {
+template <typename FloatPointType>
+auto generateUniform(RandGenerator& Generator, DevicePtr<FloatPointType> OutputPtr, size_t Num) -> RandStatusT {
   if constexpr (std::is_same_v<FloatPointType, float>) {
-    return randGenerateUniform<ReturnType>(Generator, OutputPtr, Num);
+    return static_cast<RandStatusT>(randGenerateUniform(Generator, OutputPtr, Num));
   } else if constexpr (std::is_same_v<FloatPointType, double>) {
-    return randGenerateUniformDouble<ReturnType>(Generator, OutputPtr, Num);
+    return static_cast<RandStatusT>(randGenerateUniformDouble(Generator, OutputPtr, Num));
   } else {
     static_assert(false, "generateUniform<FloatPointType>: Template argument must be either float or double");
   }
@@ -636,14 +642,13 @@ auto generateUniform(RandGenerator& Generator, DevicePtr<FloatPointType> OutputP
 
 /// Destory a random generator in the current CUDA or HIP context. Wrapper to curandDestroyGenerator or
 /// hiprandDestroyGenerator.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \arg RandomGen The reference to the random generation which shoule be destroyed.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType> auto randDestroyGenerator(RandGenerator& RandomGen) -> ReturnType {
+auto randDestroyGenerator(RandGenerator& RandomGen) -> RandStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return curandDestroyGenerator(RandomGen);
+  return static_cast<RandStatusT>(curandDestroyGenerator(RandomGen));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hiprandDestroyGenerator(RandomGen);
+  return static_cast<RandStatusT>(hiprandDestroyGenerator(RandomGen));
 #else
   (void)RandomGen;
   static_assert(false, "Tried to call randDestroyGenerator, but neither building for CUDA nor HIP.");
@@ -651,18 +656,17 @@ template <typename ReturnType> auto randDestroyGenerator(RandGenerator& RandomGe
 }
 
 /// Copy memory from a device pointer to another device pointer. Wrapper to cuMemcpyDtoD or hipMemcpyDtoD.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \arg DestinationPtr The destination address.
 /// \arg SourcePtr The source address.
 /// \arg Size The number of bytes to copy.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType, typename FloatPointType>
+template <typename FloatPointType>
 auto memcpyDtoD(DevicePtr<FloatPointType> DestinationPtr, DevicePtr<FloatPointType> SourcePtr, std::size_t Size)
-    -> ReturnType {
+    -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return cuMemcpyDtoD(DestinationPtr, SourcePtr, Size);
+  return static_cast<CUResultOrHipErrorT>(cuMemcpyDtoD(DestinationPtr, SourcePtr, Size));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hipMemcpyDtoD(DestinationPtr, SourcePtr, Size);
+  return static_cast<CUResultOrHipErrorT>(hipMemcpyDtoD(DestinationPtr, SourcePtr, Size));
 #else
   (void)DestinationPtr;
   (void)SourcePtr;
@@ -672,13 +676,12 @@ auto memcpyDtoD(DevicePtr<FloatPointType> DestinationPtr, DevicePtr<FloatPointTy
 }
 
 /// Block until the current device finished. Wrapper to cudaDeviceSynchronize or hipcudaDeviceSynchronize.
-/// \tparam ReturnType The type of the return code to these calls.
 /// \returns The Error code returned from these calls.
-template <typename ReturnType> auto deviceSynchronize() -> ReturnType {
+auto deviceSynchronize() -> ErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return cudaDeviceSynchronize();
+  return static_cast<ErrorT>(cudaDeviceSynchronize());
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hipcudaDeviceSynchronize();
+  return static_cast<ErrorT>(hipDeviceSynchronize());
 #else
   static_assert(false, "Tried to call deviceSynchronize, but neither building for CUDA nor HIP.");
 #endif
@@ -705,20 +708,30 @@ template <typename ReturnType> auto deviceSynchronize() -> ReturnType {
 /// \arg Ldc Leading dimension of a two-dimensional array used to store the matrix C.
 /// \returns The Error code returned from these calls.
 template <typename FloatPointType>
-auto gemm(BlasHandle Handle, BlasOperation Transa, BlasOperation Transb, int& M, int& N, int& K,
-          const FloatPointType* Alpha, const DevicePtr<FloatPointType>& A, int& Lda, const DevicePtr<FloatPointType>& B,
-          int& Ldb, const FloatPointType* Beta, DevicePtr<FloatPointType>& C, int& Ldc) -> BlasStatus {
+auto gemm(BlasHandle Handle, BlasOperation Transa, BlasOperation Transb, int M, int N, int K,
+          const FloatPointType& Alpha, const DevicePtr<FloatPointType> A, int Lda, const DevicePtr<FloatPointType> B,
+          int Ldb, const FloatPointType& Beta, DevicePtr<FloatPointType> C, int Ldc) -> BlasStatusT {
   if constexpr (std::is_same_v<FloatPointType, float>) {
 #ifdef FIRESTARTER_BUILD_CUDA
-    return cublasSgemm(Handle, Transa, Transb, M, N, K, Alpha, A, Lda, B, Ldb, Beta, C, Ldc);
+    return static_cast<BlasStatusT>(
+        cublasSgemm(Handle, static_cast<BlasOperationT>(Transa), static_cast<BlasOperationT>(Transb), M, N, K, &Alpha,
+                    reinterpret_cast<const float*>(A), Lda, reinterpret_cast<const float*>(B), Ldb, &Beta,
+                    reinterpret_cast<float*>(C), Ldc));
 #elif defined(FIRESTARTER_BUILD_HIP)
-    return hipblasSgemm(Handle, Transa, Transb, M, N, K, Alpha, A, Lda, B, Ldb, Beta, C, Ldc);
+    return static_cast<BlasStatusT>(hipblasSgemm(Handle, static_cast<BlasOperationT>(Transa),
+                                                 static_cast<BlasOperationT>(Transb), M, N, K, &Alpha, A, Lda, B, Ldb,
+                                                 &Beta, C, Ldc));
 #endif
   } else if constexpr (std::is_same_v<FloatPointType, double>) {
 #ifdef FIRESTARTER_BUILD_CUDA
-    return cublasDgemm(Handle, Transa, Transb, M, N, K, Alpha, A, Lda, B, Ldb, Beta, C, Ldc);
+    return static_cast<BlasStatusT>(
+        cublasDgemm(Handle, static_cast<BlasOperationT>(Transa), static_cast<BlasOperationT>(Transb), M, N, K, &Alpha,
+                    reinterpret_cast<const double*>(A), Lda, reinterpret_cast<const double*>(B), Ldb, &Beta,
+                    reinterpret_cast<double*>(C), Ldc));
 #elif defined(FIRESTARTER_BUILD_HIP)
-    return hipblasDgemm(Handle, Transa, Transb, M, N, K, Alpha, A, Lda, B, Ldb, Beta, C, Ldc);
+    return static_cast<BlasStatusT>(hipblasDgemm(Handle, static_cast<BlasOperationT>(Transa),
+                                                 static_cast<BlasOperationT>(Transb), M, N, K, &Alpha, A, Lda, B, Ldb,
+                                                 &Beta, C, Ldc));
 #endif
   } else {
     (void)Handle;
diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp
index 94978e20..5cd18cdc 100644
--- a/src/firestarter/Cuda/Cuda.cpp
+++ b/src/firestarter/Cuda/Cuda.cpp
@@ -26,7 +26,6 @@
  * http://wili.cc/blog/gpu-burn.html
  *****************************************************************************/
 
-#include <algorithm>
 #include <atomic>
 #include <firestarter/Cuda/Cuda.hpp>
 #include <firestarter/Cuda/CudaHipCompat.hpp>
@@ -38,17 +37,17 @@ namespace firestarter::cuda {
 
 constexpr const int Seed = 123;
 
-static int round_up(int num_to_round, int multiple) {
-  if (multiple == 0) {
-    return num_to_round;
+static auto roundUp(int NumToRound, int Multiple) -> int {
+  if (Multiple == 0) {
+    return NumToRound;
   }
 
-  int remainder = num_to_round % multiple;
-  if (remainder == 0) {
-    return num_to_round;
+  const int Remainder = NumToRound % Multiple;
+  if (Remainder == 0) {
+    return NumToRound;
   }
 
-  return num_to_round + multiple - remainder;
+  return NumToRound + Multiple - Remainder;
 }
 
 /// Convert the UseDouble input (0 -> single precision, 1 -> double precision, 2 -> automatic) to either 0 or 1 for
@@ -58,39 +57,40 @@ static int round_up(int num_to_round, int multiple) {
 /// \arg UseDouble The input that specifies either single precision, double precision or automatic selection.
 /// \arg Properties The device properties.
 /// \return The selected precision, either 0 or 1 for float or double respectively.
-static int get_precision(int UseDouble, const compat::DeviceProperties& Properties) {
+static auto getPrecision(int UseDouble, const compat::DeviceProperties& Properties) -> int {
 #if (CUDART_VERSION >= 8000)
   // read precision ratio (dp/sp) of GPU to choose the right variant for maximum
   // workload
   if (UseDouble == 2 && Properties.singleToDoublePrecisionPerfRatio > 3) {
     return 0;
-  } else if (UseDouble) {
+  }
+  if (UseDouble) {
     return 1;
-  } else {
-    return 0;
   }
+  return 0;
 #else
   // as precision ratio is not supported return default/user input value
   (void)Properties;
 
   if (UseDouble) {
     return 1;
-  } else {
-    return 0;
   }
+  return 0;
+
 #endif
 }
 
-static int get_precision(int device_index, int useDouble) {
-  size_t memory_avail, memory_total;
-  compat::DeviceProperties properties;
+static auto getPrecision(int DeviceIndex, int UseDouble) -> int {
+  size_t MemoryAvail;
+  size_t MemoryTotal;
+  compat::DeviceProperties Properties;
 
-  auto stream_or_context = compat::createContextOrStream(device_index);
+  auto StreamOrContext = compat::createContextOrStream(DeviceIndex);
 
-  compat::accellSafeCall(compat::memGetInfo<>(memory_avail, memory_total), __FILE__, __LINE__, device_index);
-  compat::accellSafeCall(compat::getDeviceProperties<>(properties, device_index), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::memGetInfo(MemoryAvail, MemoryTotal), __FILE__, __LINE__, DeviceIndex);
+  compat::accellSafeCall(compat::getDeviceProperties(Properties, DeviceIndex), __FILE__, __LINE__, DeviceIndex);
 
-  useDouble = get_precision(useDouble, properties);
+  UseDouble = getPrecision(UseDouble, Properties);
 
   bool DoubleNotSupported =
 #ifdef FIRESTARTER_BUILD_CUDA
@@ -103,160 +103,160 @@ static int get_precision(int device_index, int useDouble) {
 
   // we check for double precision support on the GPU and print errormsg, when
   // the user wants to compute DP on a SP-only-Card.
-  if (useDouble && DoubleNotSupported) {
-    std::stringstream ss;
-    ss << compat::AccelleratorString << " GPU " << device_index << ": " << properties.name << " ";
+  if (UseDouble && DoubleNotSupported) {
+    std::stringstream Ss;
+    Ss << compat::AccelleratorString << " GPU " << DeviceIndex << ": " << Properties.name << " ";
 
-    firestarter::log::error() << ss.str() << "Doesn't support double precision.\n"
-                              << ss.str() << "Compute Capability: " << properties.major << "." << properties.minor
+    firestarter::log::error() << Ss.str() << "Doesn't support double precision.\n"
+                              << Ss.str() << "Compute Capability: " << Properties.major << "." << Properties.minor
                               << ". Requiered for double precision: >=1.3\n"
-                              << ss.str() << "Stressing with single precision instead. Maybe use -f parameter.";
+                              << Ss.str() << "Stressing with single precision instead. Maybe use -f parameter.";
 
-    useDouble = 0;
+    UseDouble = 0;
   }
 
-  compat::accellSafeCall(compat::destroyContextOrStream<>(stream_or_context), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::destroyContextOrStream(StreamOrContext), __FILE__, __LINE__, DeviceIndex);
 
-  return useDouble;
+  return UseDouble;
 }
 
 // GPU index. Used to pin this thread to the GPU.
 template <typename FloatingPointType>
-static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index,
-                        std::atomic<int>& initCount, const volatile firestarter::LoadThreadWorkType& LoadVar,
-                        int matrixSize) {
+static void createLoad(std::condition_variable& WaitForInitCv, std::mutex& WaitForInitCvMutex, int DeviceIndex,
+                       std::atomic<int>& InitCount, const volatile firestarter::LoadThreadWorkType& LoadVar,
+                       unsigned MatrixSize) {
   static_assert(std::is_same_v<FloatingPointType, float> || std::is_same_v<FloatingPointType, double>,
                 "create_load<FloatingPointType>: Template argument must be either float or double");
 
-  int iterations, i;
-
-  firestarter::log::trace() << "Starting " << compat::AccelleratorString << " with given matrix size " << matrixSize;
+  firestarter::log::trace() << "Starting " << compat::AccelleratorString << " with given matrix size " << MatrixSize;
 
-  size_t size_use = 0;
-  if (matrixSize > 0) {
-    size_use = matrixSize;
+  size_t SizeUse = 0;
+  if (MatrixSize > 0) {
+    SizeUse = MatrixSize;
   }
 
-  size_t use_bytes, memory_size;
-  compat::DeviceProperties properties;
-  compat::BlasHandle blas;
+  size_t UseBytes;
+  size_t MemorySize;
+  compat::DeviceProperties Properties;
+  compat::BlasHandle Blas;
   // reserving the GPU and initializing cublas
 
-  auto stream_or_context = compat::createContextOrStream(device_index);
+  auto StreamOrContext = compat::createContextOrStream(DeviceIndex);
 
-  firestarter::log::trace() << "Create " << compat::AccelleratorString << " Blas on device nr. " << device_index;
-  compat::accellSafeCall(compat::blasCreate<>(blas), __FILE__, __LINE__, device_index);
+  firestarter::log::trace() << "Create " << compat::AccelleratorString << " Blas on device nr. " << DeviceIndex;
+  compat::accellSafeCall(compat::blasCreate(Blas), __FILE__, __LINE__, DeviceIndex);
 
   firestarter::log::trace() << "Get " << compat::AccelleratorString << " device properties (e.g., support for double)"
-                            << " on device nr. " << device_index;
-  compat::accellSafeCall(compat::getDeviceProperties<>(properties, device_index), __FILE__, __LINE__, device_index);
+                            << " on device nr. " << DeviceIndex;
+  compat::accellSafeCall(compat::getDeviceProperties(Properties, DeviceIndex), __FILE__, __LINE__, DeviceIndex);
 
   // getting information about the GPU memory
-  size_t memory_avail, memory_total;
-  compat::accellSafeCall(compat::memGetInfo<>(memory_avail, memory_total), __FILE__, __LINE__, device_index);
-  firestarter::log::trace() << "Get " << compat::AccelleratorString << " Memory info on device nr. " << device_index
-                            << ": " << memory_avail << " B avail. from " << memory_total << " B total";
+  size_t MemoryAvail;
+  size_t MemoryTotal;
+  compat::accellSafeCall(compat::memGetInfo(MemoryAvail, MemoryTotal), __FILE__, __LINE__, DeviceIndex);
+  firestarter::log::trace() << "Get " << compat::AccelleratorString << " emory info on device nr. " << DeviceIndex
+                            << ": " << MemoryAvail << " B avail. from " << MemoryTotal << " B total";
 
   // defining memory pointers
-  compat::DevicePtr<FloatingPointType> a_data_ptr;
-  compat::DevicePtr<FloatingPointType> b_data_ptr;
-  compat::DevicePtr<FloatingPointType> c_data_ptr;
+  compat::DevicePtr<FloatingPointType> ADataPtr;
+  compat::DevicePtr<FloatingPointType> BDataPtr;
+  compat::DevicePtr<FloatingPointType> CDataPtr;
 
   // check if the user has not set a matrix OR has set a too big matrixsite and
   // if this is true: set a good matrixsize
-  if (!size_use || ((size_use * size_use * sizeof(FloatingPointType) * 3 > memory_avail))) {
-    size_use = round_up((int)(0.8 * sqrt(((memory_avail) / (sizeof(FloatingPointType) * 3)))),
-                        1024); // a multiple of 1024 works always well
+  if (!SizeUse || ((SizeUse * SizeUse * sizeof(FloatingPointType) * 3 > MemoryAvail))) {
+    SizeUse = roundUp((int)(0.8 * sqrt(((MemoryAvail) / (sizeof(FloatingPointType) * 3)))),
+                      1024); // a multiple of 1024 works always well
   }
-  firestarter::log::trace() << "Set " << compat::AccelleratorString << " matrix size: " << matrixSize;
-  use_bytes = (size_t)((FloatingPointType)memory_avail);
-  memory_size = sizeof(FloatingPointType) * size_use * size_use;
-  iterations = (use_bytes - 2 * memory_size) / memory_size; // = 1;
+  firestarter::log::trace() << "Set " << compat::AccelleratorString << " matrix size: " << MatrixSize;
+  UseBytes = (size_t)((FloatingPointType)MemoryAvail);
+  MemorySize = sizeof(FloatingPointType) * SizeUse * SizeUse;
+  int Iterations = (UseBytes - 2 * MemorySize) / MemorySize; // = 1;
 
-  firestarter::log::trace() << "Allocating " << compat::AccelleratorString << " memory on device nr. " << device_index;
+  firestarter::log::trace() << "Allocating " << compat::AccelleratorString << " memory on device nr. " << DeviceIndex;
 
   // allocating memory on the GPU
-  compat::accellSafeCall(compat::malloc<>(a_data_ptr, memory_size), __FILE__, __LINE__, device_index);
-  compat::accellSafeCall(compat::malloc<>(b_data_ptr, memory_size), __FILE__, __LINE__, device_index);
-  compat::accellSafeCall(compat::malloc<>(c_data_ptr, iterations * memory_size), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::malloc<FloatingPointType>(ADataPtr, MemorySize), __FILE__, __LINE__, DeviceIndex);
+  compat::accellSafeCall(compat::malloc<FloatingPointType>(BDataPtr, MemorySize), __FILE__, __LINE__, DeviceIndex);
+  compat::accellSafeCall(compat::malloc<FloatingPointType>(CDataPtr, Iterations * MemorySize), __FILE__, __LINE__,
+                         DeviceIndex);
 
-  firestarter::log::trace() << "Allocated " << compat::AccelleratorString << " memory on device nr. " << device_index
-                            << ". A: " << a_data_ptr << "(Size: " << memory_size << "B)"
+  firestarter::log::trace() << "Allocated " << compat::AccelleratorString << " memory on device nr. " << DeviceIndex
+                            << ". A: " << ADataPtr << "(Size: " << MemorySize << "B)"
                             << "\n";
 
-  firestarter::log::trace() << "Allocated " << compat::AccelleratorString << " memory on device nr. " << device_index
-                            << ". B: " << b_data_ptr << "(Size: " << memory_size << "B)"
+  firestarter::log::trace() << "Allocated " << compat::AccelleratorString << " memory on device nr. " << DeviceIndex
+                            << ". B: " << BDataPtr << "(Size: " << MemorySize << "B)"
                             << "\n";
-  firestarter::log::trace() << "Allocated " << compat::AccelleratorString << " memory on device nr. " << device_index
-                            << ". C: " << c_data_ptr << "(Size: " << iterations * memory_size << "B)"
+  firestarter::log::trace() << "Allocated " << compat::AccelleratorString << " memory on device nr. " << DeviceIndex
+                            << ". C: " << CDataPtr << "(Size: " << Iterations * MemorySize << "B)"
                             << "\n";
 
   firestarter::log::trace() << "Initializing " << compat::AccelleratorString << " matrices a, b on device nr. "
-                            << device_index << ". Using " << size_use * size_use << " elements of size "
+                            << DeviceIndex << ". Using " << SizeUse * SizeUse << " elements of size "
                             << sizeof(FloatingPointType) << " Byte";
   // initialize matrix A and B on the GPU with random values
   {
-    compat::RandGenerator random_gen;
-    compat::accellSafeCall(compat::randCreateGeneratorPseudoRandom<>(random_gen), __FILE__, __LINE__, device_index);
-    compat::accellSafeCall(compat::randSetPseudoRandomGeneratorSeed<>(random_gen, Seed), __FILE__, __LINE__,
-                           device_index);
-    compat::accellSafeCall(compat::generateUniform<>(random_gen, a_data_ptr, size_use * size_use), __FILE__, __LINE__,
-                           device_index);
-    compat::accellSafeCall(compat::generateUniform<>(random_gen, b_data_ptr, size_use * size_use), __FILE__, __LINE__,
-                           device_index);
-    compat::accellSafeCall(compat::randDestroyGenerator<>(random_gen), __FILE__, __LINE__, device_index);
+    compat::RandGenerator RandomGen;
+    compat::accellSafeCall(compat::randCreateGeneratorPseudoRandom(RandomGen), __FILE__, __LINE__, DeviceIndex);
+    compat::accellSafeCall(compat::randSetPseudoRandomGeneratorSeed(RandomGen, Seed), __FILE__, __LINE__, DeviceIndex);
+    compat::accellSafeCall(compat::generateUniform<FloatingPointType>(RandomGen, ADataPtr, SizeUse * SizeUse), __FILE__,
+                           __LINE__, DeviceIndex);
+    compat::accellSafeCall(compat::generateUniform<FloatingPointType>(RandomGen, BDataPtr, SizeUse * SizeUse), __FILE__,
+                           __LINE__, DeviceIndex);
+    compat::accellSafeCall(compat::randDestroyGenerator(RandomGen), __FILE__, __LINE__, DeviceIndex);
   }
 
   // initialize c_data_ptr with copies of A
-  for (i = 0; i < iterations; i++) {
+  for (int I = 0; I < Iterations; I++) {
     auto DestinationPtr =
-        c_data_ptr + (size_t)(i * size_use * size_use * (float)sizeof(FloatingPointType) / (float)sizeof(c_data_ptr));
-    firestarter::log::trace() << "Initializing " << compat::AccelleratorString << " matrix c-" << i << " by copying "
-                              << memory_size << " byte from " << a_data_ptr << " to " << DestinationPtr << "\n";
-    compat::accellSafeCall(compat::memcpyDtoD<>(DestinationPtr, a_data_ptr, memory_size), __FILE__, __LINE__,
-                           device_index);
+        CDataPtr + (size_t)(I * SizeUse * SizeUse * (float)sizeof(FloatingPointType) / (float)sizeof(CDataPtr));
+    firestarter::log::trace() << "Initializing " << compat::AccelleratorString << " matrix c-" << I << " by copying "
+                              << MemorySize << " byte from " << ADataPtr << " to " << DestinationPtr << "\n";
+    compat::accellSafeCall(compat::memcpyDtoD<FloatingPointType>(DestinationPtr, ADataPtr, MemorySize), __FILE__,
+                           __LINE__, DeviceIndex);
   }
 
   // save gpuvar->init_count and sys.out
   {
-    std::lock_guard<std::mutex> lk(waitForInitCvMutex);
-
-    auto ToMiB = [](const size_t Val) { return Val / 1024 / 1024; };
-    firestarter::log::info() << "   GPU " << device_index << "\n"
-                             << "    name:           " << properties.name << "\n"
-                             << "    memory:         " << ToMB(memory_avail) << "/" << ToMB(memory_total)
-                             << " MiB available (using " << ToMB(use_bytes) << " MiB)\n"
-                             << "    matrix size:    " << size_use << "\n"
+    const std::lock_guard<std::mutex> Lk(WaitForInitCvMutex);
+
+    auto ToiB = [](const size_t Val) { return Val / 1024 / 1024; };
+    firestarter::log::info() << "   GPU " << DeviceIndex << "\n"
+                             << "    name:           " << Properties.name << "\n"
+                             << "    memory:         " << ToiB(MemoryAvail) << "/" << ToiB(MemoryTotal)
+                             << " iB available (using " << ToiB(UseBytes) << " iB)\n"
+                             << "    matrix size:    " << SizeUse << "\n"
                              << "    used precision: "
                              << ((sizeof(FloatingPointType) == sizeof(double)) ? "double" : "single");
 
-    initCount++;
+    InitCount++;
   }
-  waitForInitCv.notify_all();
+  WaitForInitCv.notify_all();
 
-  const FloatingPointType alpha = 1.0;
-  const FloatingPointType beta = 0.0;
+  const FloatingPointType Alpha = 1.0;
+  const FloatingPointType Beta = 0.0;
 
-  int size_use_i = size_use;
+  const int SizeUseI = SizeUse;
   // actual stress begins here
-  while (LoadVar != LoadThreadWorkType::LOAD_STOP) {
-    for (i = 0; i < iterations; i++) {
-      compat::accellSafeCall(compat::gemm<FloatingPointType>(
-                                 blas, compat::BlasOperation::BLAS_OP_N, compat::BlasOperation::BLAS_OP_N, size_use_i,
-                                 size_use_i, size_use_i, &alpha, a_data_ptr, size_use_i, b_data_ptr, size_use_i, &beta,
-                                 c_data_ptr + i * size_use * size_use, size_use_i),
-                             __FILE__, __LINE__, device_index);
-      compat::accellSafeCall(compat::deviceSynchronize<>(), __FILE__, __LINE__, device_index);
+  while (LoadVar != firestarter::LoadThreadWorkType::LoadStop) {
+    for (int I = 0; I < Iterations; I++) {
+      compat::accellSafeCall(compat::gemm<FloatingPointType>(Blas, compat::BlasOperation::BLAS_OP_N,
+                                                             compat::BlasOperation::BLAS_OP_N, SizeUseI, SizeUseI,
+                                                             SizeUseI, Alpha, ADataPtr, SizeUseI, BDataPtr, SizeUseI,
+                                                             Beta, CDataPtr + (I * SizeUse * SizeUse), SizeUseI),
+                             __FILE__, __LINE__, DeviceIndex);
+      compat::accellSafeCall(compat::deviceSynchronize(), __FILE__, __LINE__, DeviceIndex);
     }
   }
 
-  compat::accellSafeCall(compat::free<>(a_data_ptr), __FILE__, __LINE__, device_index);
-  compat::accellSafeCall(compat::free<>(b_data_ptr), __FILE__, __LINE__, device_index);
-  compat::accellSafeCall(compat::free<>(c_data_ptr), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::free<FloatingPointType>(ADataPtr), __FILE__, __LINE__, DeviceIndex);
+  compat::accellSafeCall(compat::free<FloatingPointType>(BDataPtr), __FILE__, __LINE__, DeviceIndex);
+  compat::accellSafeCall(compat::free<FloatingPointType>(CDataPtr), __FILE__, __LINE__, DeviceIndex);
 
-  compat::accellSafeCall(compat::blasDestroy<>(blas), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::blasDestroy(Blas), __FILE__, __LINE__, DeviceIndex);
 
-  compat::accellSafeCall(compat::destroyContextOrStream<>(stream_or_context), __FILE__, __LINE__, device_index);
+  compat::accellSafeCall(compat::destroyContextOrStream(StreamOrContext), __FILE__, __LINE__, DeviceIndex);
 }
 
 Cuda::Cuda(const volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize,
@@ -267,7 +267,7 @@ Cuda::Cuda(const volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloa
   std::thread T(Cuda::initGpus, std::ref(WaitForInitCv), std::cref(LoadVar), UseFloat, UseDouble, MatrixSize, Gpus);
   InitThread = std::move(T);
 
-  const std::unique_lock<std::mutex> Lk(WaitForInitCvMutex);
+  std::unique_lock<std::mutex> Lk(WaitForInitCvMutex);
   // wait for gpus to initialize
   WaitForInitCv.wait(Lk);
 }
@@ -279,10 +279,10 @@ void Cuda::initGpus(std::condition_variable& WaitForInitCv, const volatile fires
   std::vector<std::thread> GpuThreads;
 
   if (Gpus) {
-    compat::accellSafeCall(compat::init<>(0), __FILE__, __LINE__);
+    compat::accellSafeCall(compat::init(0), __FILE__, __LINE__);
 
     int DevCount;
-    compat::accellSafeCall(compat::getDeviceCount<>(DevCount), __FILE__, __LINE__);
+    compat::accellSafeCall(compat::getDeviceCount(DevCount), __FILE__, __LINE__);
 
     if (DevCount) {
       std::atomic<int> InitCount = 0;
@@ -318,17 +318,19 @@ void Cuda::initGpus(std::condition_variable& WaitForInitCv, const volatile fires
       }
 
       {
-        std::lock_guard<std::mutex> Lk(GpuThreadsWaitForInitCvMutex);
+        const std::lock_guard<std::mutex> Lk(GpuThreadsWaitForInitCvMutex);
 
         for (int I = 0; I < Gpus; ++I) {
           // if there's a GPU in the system without Double Precision support, we
           // have to correct this.
-          int Precision = get_precision(I, UseDoubleConverted);
-          void (*LoadFunc)(std::condition_variable&, std::mutex&, int, std::atomic<int>&, volatile uint64_t*, int) =
-              Precision ? create_load<double> : create_load<float>;
-
-          std::thread t(LoadFunc, std::ref(GpuThreadsWaitForInitCv), std::ref(GpuThreadsWaitForInitCvMutex), I,
-                        std::ref(InitCount), std::cref(LoadVar), (int)MatrixSize);
+          const auto Precision = getPrecision(I, UseDoubleConverted);
+          void (*LoadFunc)(std::condition_variable&, std::mutex&, int, std::atomic<int>&,
+                           const volatile firestarter::LoadThreadWorkType&, unsigned) =
+              Precision ? createLoad<double> : createLoad<float>;
+
+          std::thread T(LoadFunc, std::ref(GpuThreadsWaitForInitCv), std::ref(GpuThreadsWaitForInitCvMutex), I,
+                        std::ref(InitCount), std::cref(LoadVar), MatrixSize);
+          GpuThreads.emplace_back(std::move(T));
         }
       }
 

From 4307b1347efe3f2fea593667d0b91f96e891c853 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 17 Oct 2024 14:33:41 +0200
Subject: [PATCH 056/167] fix some cuda compile errors

---
 src/firestarter/Cuda/Cuda.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp
index 5cd18cdc..23f4dfeb 100644
--- a/src/firestarter/Cuda/Cuda.cpp
+++ b/src/firestarter/Cuda/Cuda.cpp
@@ -94,9 +94,9 @@ static auto getPrecision(int DeviceIndex, int UseDouble) -> int {
 
   bool DoubleNotSupported =
 #ifdef FIRESTARTER_BUILD_CUDA
-      properties.major <= 1 && properties.minor <= 2;
+      Properties.major <= 1 && Properties.minor <= 2;
 #elif defined(FIRESTARTER_BUILD_HIP)
-      !properties.hasDoubles;
+      !Properties.hasDoubles;
 #else
       true;
 #endif

From 14786a36b6db2021a8789bba52a5c2bd447ffb2a Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 11:26:11 +0200
Subject: [PATCH 057/167] cuda/hip compat: replace some static_assert with
 assert to cope with less powerful static analysis of older clang/gcc versions

---
 include/firestarter/Cuda/CudaHipCompat.hpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
index 04ccf70b..6c87e914 100644
--- a/include/firestarter/Cuda/CudaHipCompat.hpp
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -29,6 +29,7 @@
 
 #pragma once
 
+#include <cassert>
 #include <cstddef>
 #include <firestarter/Logging/Log.hpp>
 #include <optional>
@@ -372,7 +373,7 @@ template <typename T> void accellSafeCall(T TVal, const char* File, const int Li
       return;
     }
   } else {
-    static_assert(false, "Tried to call accellSafeCall with an unknown type.");
+    assert(false && "Tried to call accellSafeCall with an unknown type.");
   }
 
   std::stringstream Ss;
@@ -632,11 +633,11 @@ auto randGenerateUniformDouble(RandGenerator& RandomGen, DevicePtr<double> Outpu
 template <typename FloatPointType>
 auto generateUniform(RandGenerator& Generator, DevicePtr<FloatPointType> OutputPtr, size_t Num) -> RandStatusT {
   if constexpr (std::is_same_v<FloatPointType, float>) {
-    return static_cast<RandStatusT>(randGenerateUniform(Generator, OutputPtr, Num));
+    return randGenerateUniform(Generator, OutputPtr, Num);
   } else if constexpr (std::is_same_v<FloatPointType, double>) {
-    return static_cast<RandStatusT>(randGenerateUniformDouble(Generator, OutputPtr, Num));
+    return randGenerateUniformDouble(Generator, OutputPtr, Num);
   } else {
-    static_assert(false, "generateUniform<FloatPointType>: Template argument must be either float or double");
+    assert(false && "generateUniform<FloatPointType>: Template argument must be either float or double");
   }
 }
 
@@ -748,7 +749,7 @@ auto gemm(BlasHandle Handle, BlasOperation Transa, BlasOperation Transb, int M,
     (void)Beta;
     (void)C;
     (void)Ldc;
-    static_assert(false, "gemm<FloatPointType>: Template argument must be either float or double");
+    assert(false && "gemm<FloatPointType>: Template argument must be either float or double");
   }
 
 #if not(defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP))

From 73f3d8870cdd7ebcf97ed921f92beb57e09e34bf Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 11:51:54 +0200
Subject: [PATCH 058/167] ci: set cxx compiler correctly for openapi build

---
 .github/workflows/cmake.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index 6b4c9178..bff5f02d 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -234,12 +234,16 @@ jobs:
         cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_CUDA" -DCMAKE_EXE_LINKER_FLAGS=-L"$CUDA_ROOT/$NVARCH/22.5/cuda/11.7/lib64/stubs" -LA ..
     - name: Run CMake configure (OneAPI 2023.2.0)
       if: matrix.CUDA == '0' && matrix.ONEAPI =='2023.2.0'
+      env:
+        CXX: ${{ matrix.compiler }}
       run: |
         . /opt/intel/oneapi/setvars.sh
         cd build
         cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_ONEAPI" ..
     - name: Run CMake configure (OneAPI 2024.0)
       if: matrix.CUDA == '0' && matrix.ONEAPI =='2024.0'
+      env:
+        CXX: ${{ matrix.compiler }}
       run: |
         . /opt/intel/oneapi/${{ matrix.ONEAPI }}/oneapi-vars.sh
         cd build

From 47fc3269df9f8b5c5a1dfd47fada8c16a2f5065b Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 11:55:21 +0200
Subject: [PATCH 059/167] windowscompat: remove inline

---
 include/firestarter/WindowsCompat.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index 3af72364..e88f642e 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -38,9 +38,9 @@ namespace {
 #pragma GCC diagnostic ignored "-Wunused-function"
 #if defined(__clang__)
 #elif not(defined(__MINGW32__) || defined(__MINGW64__))
-inline void _mm_mfence() noexcept {};
+void _mm_mfence() noexcept {};
 #endif
-inline void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
+void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
 #pragma GCC diagnostic pop
 #if defined(__clang__)
 #pragma clang diagnostic pop
@@ -57,12 +57,12 @@ inline void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
 
 namespace {
 #include <direct.h>
-inline auto get_current_dir_name() -> char* { return _getcwd(nullptr, 0); }
+auto get_current_dir_name() -> char* { return _getcwd(nullptr, 0); }
 } // namespace
 #elif defined(__APPLE__)
 #include <unistd.h>
 namespace {
-inline auto get_current_dir_name() -> char* { return getcwd(nullptr, 0); }
+auto get_current_dir_name() -> char* { return getcwd(nullptr, 0); }
 } // namespace
 #else
 #include <unistd.h>

From 0e4fea0dfb81a8b9b5799ddc007d54b8cc25a616 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 11:55:34 +0200
Subject: [PATCH 060/167] remove compile error

---
 include/firestarter/Cuda/Cuda.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/firestarter/Cuda/Cuda.hpp b/include/firestarter/Cuda/Cuda.hpp
index df16a731..ebf63762 100644
--- a/include/firestarter/Cuda/Cuda.hpp
+++ b/include/firestarter/Cuda/Cuda.hpp
@@ -41,7 +41,7 @@ class Cuda {
       ;
 #else
   {
-    (void)LoadVar;
+    (void)&LoadVar;
     (void)UseFloat;
     (void)UseDouble;
     (void)MatrixSize;

From a7882737bfe8da3d19367eb80f611e3bf0d3115e Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 12:05:05 +0200
Subject: [PATCH 061/167] windows compat: remove anonymous namespace for
 __cpuid and _mm_mfence

---
 include/firestarter/WindowsCompat.hpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index e88f642e..5d1995dd 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -25,7 +25,6 @@
 #ifdef _MSC_VER
 #include <intrin.h>
 #else
-namespace {
 
 /// Define the _mm_mfence and __cpuid function when we are not using MSC to enable the use of if constexpr instead of
 /// ifdefs.
@@ -38,16 +37,15 @@ namespace {
 #pragma GCC diagnostic ignored "-Wunused-function"
 #if defined(__clang__)
 #elif not(defined(__MINGW32__) || defined(__MINGW64__))
-void _mm_mfence() noexcept {};
+static void _mm_mfence() noexcept {};
 #endif
-void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
+static void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
 #pragma GCC diagnostic pop
 #if defined(__clang__)
 #pragma clang diagnostic pop
 #endif
 // NOLINTEND(readability-identifier-naming,cert-dcl37-c,cert-dcl37-cpp,cert-dcl51-cpp,bugprone-reserved-identifier)
 
-} // namespace
 #endif
 // NOLINTEND(cert-dcl59-cpp,google-build-namespaces)
 

From 5912cb48433fce00c9f4c80903d7393b6822c485 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 12:09:11 +0200
Subject: [PATCH 062/167] windows compat: remove anonymous namespaces from
 header

---
 include/firestarter/WindowsCompat.hpp | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index 5d1995dd..cdc1e04c 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -21,7 +21,6 @@
 
 #pragma once
 
-// NOLINTBEGIN(cert-dcl59-cpp,google-build-namespaces)
 #ifdef _MSC_VER
 #include <intrin.h>
 #else
@@ -47,21 +46,16 @@ static void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
 // NOLINTEND(readability-identifier-naming,cert-dcl37-c,cert-dcl37-cpp,cert-dcl51-cpp,bugprone-reserved-identifier)
 
 #endif
-// NOLINTEND(cert-dcl59-cpp,google-build-namespaces)
 
 #ifdef _WIN32
 // SIGALRM is not available on Windows
 #define SIGALRM 0
 
-namespace {
 #include <direct.h>
-auto get_current_dir_name() -> char* { return _getcwd(nullptr, 0); }
-} // namespace
+static auto get_current_dir_name() -> char* { return _getcwd(nullptr, 0); }
 #elif defined(__APPLE__)
 #include <unistd.h>
-namespace {
-auto get_current_dir_name() -> char* { return getcwd(nullptr, 0); }
-} // namespace
+static auto get_current_dir_name() -> char* { return getcwd(nullptr, 0); }
 #else
 #include <unistd.h>
 #endif

From 20b6360b63860a68245dad2f705e495b9fef38dc Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 12:21:48 +0200
Subject: [PATCH 063/167] ci: do not fail fast in linux build

---
 .github/workflows/cmake.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index bff5f02d..8fb7bd51 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -7,7 +7,7 @@ jobs:
   build-linux-hip-rocm:
     strategy:
 #      max-parallel: 1 # Sets the limit of jobs to run concurrently
-      fail-fast: true
+      fail-fast: false
       matrix:
         os: [ubuntu-22.04]
         compiler: [g++-9, g++-10, g++-11, g++-12, clang++-11, clang++-12, clang++-13, clang++-14, clang++-15]
@@ -115,7 +115,7 @@ jobs:
   build-linux:
     strategy:
 #      max-parallel: 1 # Sets the limit of jobs to run concurrently
-      fail-fast: true
+      fail-fast: false
       matrix:
         os: [ubuntu-20.04]
         compiler: [g++-7, g++-8, g++-9, g++-10, clang++-8, clang++-9, clang++-10]

From aefe54c90d0d6cb55a038007c5c12a31291c98d8 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 12:30:56 +0200
Subject: [PATCH 064/167] windowscompat: use static inline

---
 include/firestarter/WindowsCompat.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index cdc1e04c..6ec68647 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -36,9 +36,9 @@
 #pragma GCC diagnostic ignored "-Wunused-function"
 #if defined(__clang__)
 #elif not(defined(__MINGW32__) || defined(__MINGW64__))
-static void _mm_mfence() noexcept {};
+static inline void _mm_mfence() noexcept {};
 #endif
-static void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
+static inline void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
 #pragma GCC diagnostic pop
 #if defined(__clang__)
 #pragma clang diagnostic pop
@@ -52,10 +52,10 @@ static void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
 #define SIGALRM 0
 
 #include <direct.h>
-static auto get_current_dir_name() -> char* { return _getcwd(nullptr, 0); }
+static inline auto get_current_dir_name() -> char* { return _getcwd(nullptr, 0); }
 #elif defined(__APPLE__)
 #include <unistd.h>
-static auto get_current_dir_name() -> char* { return getcwd(nullptr, 0); }
+static inline auto get_current_dir_name() -> char* { return getcwd(nullptr, 0); }
 #else
 #include <unistd.h>
 #endif

From 99c03f9d7ab3e8d3432bb732e036f9744b856a88 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 18:29:15 +0200
Subject: [PATCH 065/167] fix oneapi

---
 include/firestarter/OneAPI/OneAPI.hpp |   9 +-
 src/firestarter/OneAPI/OneAPI.cpp     | 142 +++++++++++++-------------
 2 files changed, 73 insertions(+), 78 deletions(-)

diff --git a/include/firestarter/OneAPI/OneAPI.hpp b/include/firestarter/OneAPI/OneAPI.hpp
index a15a1572..0c8a6e5f 100644
--- a/include/firestarter/OneAPI/OneAPI.hpp
+++ b/include/firestarter/OneAPI/OneAPI.hpp
@@ -23,7 +23,6 @@
 
 #include "firestarter/Constants.hpp"
 #include <condition_variable>
-#include <mutex>
 #include <thread>
 
 namespace firestarter::oneapi {
@@ -31,14 +30,12 @@ namespace firestarter::oneapi {
 class OneAPI {
 private:
   std::thread InitThread;
-  std::condition_variable WaitForInitCv;
-  std::mutex WaitForInitCvMutex;
 
-  static void initGpus(std::condition_variable& Cv, const volatile firestarter::LoadThreadWorkType& LoadVar,
+  static void initGpus(std::condition_variable& WaitForInitCv, const volatile firestarter::LoadThreadWorkType& LoadVar,
                        bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus);
 
 public:
-  OneAPI(volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize,
+  OneAPI(const volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize,
          int Gpus)
 #if defined(FIRESTARTER_BUILD_ONEAPI)
       ;
@@ -59,4 +56,4 @@ class OneAPI {
   }
 };
 
-} // namespace firestarter::oneapi
+} // namespace firestarter::oneapi
\ No newline at end of file
diff --git a/src/firestarter/OneAPI/OneAPI.cpp b/src/firestarter/OneAPI/OneAPI.cpp
index 6ebb2da3..a78efcad 100644
--- a/src/firestarter/OneAPI/OneAPI.cpp
+++ b/src/firestarter/OneAPI/OneAPI.cpp
@@ -33,7 +33,7 @@
 #include <atomic>
 #include <type_traits>
 
-using namespace firestarter::oneapi;
+namespace firestarter::oneapi {
 
 /* Random number generation helpers */
 template <typename T> void generate_random_data(size_t elems, T* v) {
@@ -116,7 +116,8 @@ static int round_up(int num_to_round, int multiple) {
 // GPU index. Used to pin this thread to the GPU.
 template <typename T>
 static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index,
-                        std::atomic<int>& initCount, volatile uint64_t* loadVar, int matrixSize) {
+                        std::atomic<int>& initCount, const volatile firestarter::LoadThreadWorkType& LoadVar,
+                        unsigned matrixSize) {
   static_assert(std::is_same<T, float>::value || std::is_same<T, double>::value,
                 "create_load<T>: Template argument T must be either float or double");
 
@@ -227,63 +228,64 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait
         return runs;
     };
   */
-  while (*loadVar != LOAD_STOP) {
+  while (LoadVar != firestarter::LoadThreadWorkType::LoadStop) {
     firestarter::log::trace() << "Run gemm on device nr. " << device_index;
-    oneapi::mkl::blas::gemm(device_queue, oneapi::mkl::transpose::N, oneapi::mkl::transpose::N, size_use, size_use,
-                            size_use, 1, A, size_use, B, size_use, 0, C, size_use);
+    ::oneapi::mkl::blas::gemm(device_queue, ::oneapi::mkl::transpose::N, ::oneapi::mkl::transpose::N, size_use,
+                              size_use, size_use, 1, A, size_use, B, size_use, 0, C, size_use);
     firestarter::log::trace() << "wait gemm on device nr. " << device_index;
     device_queue.wait_and_throw();
   }
 }
 
-OneAPI::OneAPI(volatile uint64_t* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) {
-  std::thread t(OneAPI::initGpus, std::ref(_waitForInitCv), loadVar, useFloat, useDouble, matrixSize, gpus);
-  _initThread = std::move(t);
+OneAPI::OneAPI(const volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble,
+               unsigned MatrixSize, int Gpus) {
+  std::condition_variable WaitForInitCv;
+  std::mutex WaitForInitCvMutex;
 
-  std::unique_lock<std::mutex> lk(_waitForInitCvMutex);
+  std::thread T(OneAPI::initGpus, std::ref(WaitForInitCv), std::cref(LoadVar), UseFloat, UseDouble, MatrixSize, Gpus);
+  InitThread = std::move(T);
+
+  std::unique_lock<std::mutex> Lk(WaitForInitCvMutex);
   // wait for gpus to initialize
-  _waitForInitCv.wait(lk);
+  WaitForInitCv.wait(Lk);
 }
 
-void OneAPI::initGpus(std::condition_variable& cv, volatile uint64_t* loadVar, bool useFloat, bool useDouble,
-                      unsigned matrixSize, int gpus) {
-  std::condition_variable waitForInitCv;
-  std::mutex waitForInitCvMutex;
-
-  if (gpus) {
+void OneAPI::initGpus(std::condition_variable& WaitForInitCv, const volatile firestarter::LoadThreadWorkType& LoadVar,
+                      bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus) {
+  std::condition_variable GpuThreadsWaitForInitCv;
+  std::mutex GpuThreadsWaitForInitCvMutex;
+  std::vector<std::thread> GpuThreads;
 
-    auto platforms = sycl::platform::get_platforms();
+  if (Gpus) {
+    auto Platforms = sycl::platform::get_platforms();
 
-    if (platforms.empty()) {
+    if (Platforms.empty()) {
       std::cerr << "No SYCL platforms found." << std::endl;
       return;
     }
 
     // Choose a platform based on specific criteria (e.g., device type)
-    sycl::platform chosenPlatform;
-    auto devCount = 0;
-    for (const auto& platform : platforms) {
-      auto devices = platform.get_devices();
-      devCount = 0;
-      for (const auto& device : devices) {
-        if (device.is_gpu()) { // Choose GPU, you can use other criteria
-          chosenPlatform = platform;
-          devCount++;
+    auto DevCount = 0;
+    for (const auto& Platform : Platforms) {
+      auto Devices = Platform.get_devices();
+      DevCount = 0;
+      for (const auto& Device : Devices) {
+        if (Device.is_gpu()) { // Choose GPU, you can use other criteria
+          DevCount++;
         }
       }
     }
 
-    if (devCount) {
-      std::vector<std::thread> gpuThreads;
-      std::atomic<int> initCount = 0;
-      int use_double;
+    if (DevCount) {
+      std::atomic<int> InitCount = 0;
+      int UseDoubleConverted;
 
-      if (useFloat) {
-        use_double = 0;
-      } else if (useDouble) {
-        use_double = 1;
+      if (UseFloat) {
+        UseDoubleConverted = 0;
+      } else if (UseDouble) {
+        UseDoubleConverted = 1;
       } else {
-        use_double = 2;
+        UseDoubleConverted = 2;
       }
 
       firestarter::log::info()
@@ -293,62 +295,58 @@ void OneAPI::initGpus(std::condition_variable& cv, volatile uint64_t* loadVar, b
           << "\n  graphics processor characteristics:";
 
       // use all GPUs if the user gave no information about use_device
-      if (gpus < 0) {
-        gpus = devCount;
+      if (Gpus < 0) {
+        Gpus = DevCount;
       }
-      if (gpus > devCount) {
-        firestarter::log::warn() << "You requested more OneAPI devices than available.";
-        firestarter::log::warn() << "FIRESTARTER will use " << devCount << " of the requested " << gpus
+
+      if (Gpus > DevCount) {
+        firestarter::log::warn() << "You requested more OneAPI devices than available. "
+                                    "Maybe you set OneAPI_VISIBLE_DEVICES?";
+        firestarter::log::warn() << "FIRESTARTER will use " << DevCount << " of the requested " << Gpus
                                  << " OneAPI device(s)";
-        gpus = devCount;
+        Gpus = DevCount;
       }
 
       {
-        std::lock_guard<std::mutex> lk(waitForInitCvMutex);
+        const std::lock_guard<std::mutex> Lk(GpuThreadsWaitForInitCvMutex);
 
-        for (int i = 0; i < gpus; ++i) {
-          // if there's a GPU in the system without Double Precision support, we
-          // have to correct this.
-          int precision = get_precision(i, use_double);
-          if (precision == -1) {
+        for (int I = 0; I < Gpus; ++I) {
+          const auto Precision = get_precision(I, UseDoubleConverted);
+          if (Precision == -1) {
             firestarter::log::warn() << "This should not have happened. Could not get precision via SYCL.";
           }
+          void (*LoadFunc)(std::condition_variable&, std::mutex&, int, std::atomic<int>&,
+                           const volatile firestarter::LoadThreadWorkType&, unsigned) =
+              Precision ? create_load<double> : create_load<float>;
 
-          if (precision) {
-            firestarter::log::trace() << "Starting OneAPI GPU double workload.";
-            std::thread t(create_load<double>, std::ref(waitForInitCv), std::ref(waitForInitCvMutex), i,
-                          std::ref(initCount), loadVar, (int)matrixSize);
-            gpuThreads.push_back(std::move(t));
-          } else {
-            firestarter::log::trace() << "Starting OneAPI GPU float workload.";
-            std::thread t(create_load<float>, std::ref(waitForInitCv), std::ref(waitForInitCvMutex), i,
-                          std::ref(initCount), loadVar, (int)matrixSize);
-            gpuThreads.push_back(std::move(t));
-          }
+          std::thread T(LoadFunc, std::ref(GpuThreadsWaitForInitCv), std::ref(GpuThreadsWaitForInitCvMutex), I,
+                        std::ref(InitCount), std::cref(LoadVar), MatrixSize);
+          GpuThreads.emplace_back(std::move(T));
         }
       }
 
       {
-        std::unique_lock<std::mutex> lk(waitForInitCvMutex);
+        std::unique_lock<std::mutex> Lk(GpuThreadsWaitForInitCvMutex);
         // wait for all threads to initialize
-        waitForInitCv.wait(lk, [&] { return initCount == gpus; });
-      }
-
-      // notify that init is done
-      cv.notify_all();
-
-      /* join computation threads */
-      for (auto& t : gpuThreads) {
-        t.join();
+        GpuThreadsWaitForInitCv.wait(Lk, [&] { return InitCount == Gpus; });
       }
     } else {
-      firestarter::log::info() << "    - No OneAPI devices. Just stressing CPU(s). Maybe use "
+      firestarter::log::info() << "    - No OneAPI"
+                               << " devices. Just stressing CPU(s). Maybe use "
                                   "FIRESTARTER instead of FIRESTARTER_OneAPI?";
-      cv.notify_all();
     }
   } else {
     firestarter::log::info() << "    --gpus 0 is set. Just stressing CPU(s). Maybe use "
                                 "FIRESTARTER instead of FIRESTARTER_OneAPI?";
-    cv.notify_all();
+  }
+
+  // notify that init is done
+  WaitForInitCv.notify_all();
+
+  /* join computation threads */
+  for (auto& Thread : GpuThreads) {
+    Thread.join();
   }
 }
+
+} // namespace firestarter::oneapi
\ No newline at end of file

From c9d57c19999def49655ac14aeb3a4f414f404ab5 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 18:29:52 +0200
Subject: [PATCH 066/167] windows compat: efix compatibility with icx

---
 include/firestarter/WindowsCompat.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index 6ec68647..b39264e8 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -38,7 +38,9 @@
 #elif not(defined(__MINGW32__) || defined(__MINGW64__))
 static inline void _mm_mfence() noexcept {};
 #endif
+#if not defined(__INTEL_LLVM_COMPILER)
 static inline void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
+#endif
 #pragma GCC diagnostic pop
 #if defined(__clang__)
 #pragma clang diagnostic pop

From 4b652fdf162290579c6df2cf912649d698628f37 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 18:30:01 +0200
Subject: [PATCH 067/167] remove warning

---
 include/firestarter/Environment/X86/Payload/X86Payload.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index 7de87098..e46144c0 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -103,7 +103,7 @@ class X86Payload : public environment::payload::Payload {
                   "IterReg is not of any supported type");
 
     if constexpr (std::is_same_v<asmjit::x86::Mm, IterRegT>) {
-      assert((IterReg == asmjit::x86::mm0, "iter_reg must be mm0"));
+      assert(IterReg == asmjit::x86::mm0 && "iter_reg must be mm0");
     }
 
     assert(IterReg != TempReg && "iter_reg must be != temp_reg");

From def3a9f79cc7f441d627866cb5aad9eb3355f07f Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 18:30:42 +0200
Subject: [PATCH 068/167] Revert "ci: set cxx compiler correctly for openapi
 build"

This reverts commit 73f3d8870cdd7ebcf97ed921f92beb57e09e34bf.
---
 .github/workflows/cmake.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index 8fb7bd51..0c874b61 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -234,16 +234,12 @@ jobs:
         cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_CUDA" -DCMAKE_EXE_LINKER_FLAGS=-L"$CUDA_ROOT/$NVARCH/22.5/cuda/11.7/lib64/stubs" -LA ..
     - name: Run CMake configure (OneAPI 2023.2.0)
       if: matrix.CUDA == '0' && matrix.ONEAPI =='2023.2.0'
-      env:
-        CXX: ${{ matrix.compiler }}
       run: |
         . /opt/intel/oneapi/setvars.sh
         cd build
         cmake -DFIRESTARTER_BUILD_TYPE="FIRESTARTER_ONEAPI" ..
     - name: Run CMake configure (OneAPI 2024.0)
       if: matrix.CUDA == '0' && matrix.ONEAPI =='2024.0'
-      env:
-        CXX: ${{ matrix.compiler }}
       run: |
         . /opt/intel/oneapi/${{ matrix.ONEAPI }}/oneapi-vars.sh
         cd build

From 8973ddfa3b51dee54923a7d2bdb398580fc5a04c Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 23:31:04 +0200
Subject: [PATCH 069/167] fix hip/cuda compat layer

---
 include/firestarter/Cuda/CudaHipCompat.hpp | 10 +++++-----
 include/firestarter/WindowsCompat.hpp      |  2 +-
 src/firestarter/Cuda/Cuda.cpp              |  4 +---
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
index 6c87e914..f2200d95 100644
--- a/include/firestarter/Cuda/CudaHipCompat.hpp
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -149,7 +149,7 @@ enum class BlasStatusT : std::underlying_type_t<hipblasStatus_t> {
 
 constexpr const char* AccelleratorString = "HIP";
 
-enum class ErrorT ErrorT : std::underlying_type_t<hipError_t> {
+enum class ErrorT : std::underlying_type_t<hipError_t> {
   Success = hipSuccess,
 };
 
@@ -395,7 +395,7 @@ auto init(unsigned int Flags) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return cuInit(Flags);
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hipInit(Flags);
+  return static_cast<CUResultOrHipErrorT>(hipInit(Flags));
 #else
   (void)Flags;
   static_assert(false, "Tried to call init, but neither building for CUDA nor HIP.");
@@ -409,7 +409,7 @@ auto getDeviceCount(int& DevCount) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return cuDeviceGetCount(&DevCount);
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return hipGetDeviceCount(&DevCount);
+  return static_cast<CUResultOrHipErrorT>(hipGetDeviceCount(&DevCount));
 #else
   (void)DevCount;
   static_assert(false, "Tried to call getDeviceCount, but neither building for CUDA nor HIP.");
@@ -434,8 +434,8 @@ auto createContextOrStream(int DeviceIndex) -> StreamOrContext {
 #elif defined(FIRESTARTER_BUILD_HIP)
   firestarter::log::trace() << "Creating " << AccelleratorString << " Stream for computation on device nr. "
                             << DeviceIndex;
-  accellSafeCall(hipSetDevice(DeviceIndex), __FILE__, __LINE__, DeviceIndex);
-  accellSafeCall(hipStreamCreate(&Soc), __FILE__, __LINE__, DeviceIndex);
+  accellSafeCall(static_cast<ErrorT>(hipSetDevice(DeviceIndex)), __FILE__, __LINE__, DeviceIndex);
+  accellSafeCall(static_cast<ErrorT>(hipStreamCreate(&Soc)), __FILE__, __LINE__, DeviceIndex);
 #else
   (void)DeviceIndex;
   static_assert(false, "Tried to call createContextOrStream, but neither building for CUDA nor HIP.");
diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index b39264e8..a879d6f5 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -38,7 +38,7 @@
 #elif not(defined(__MINGW32__) || defined(__MINGW64__))
 static inline void _mm_mfence() noexcept {};
 #endif
-#if not defined(__INTEL_LLVM_COMPILER)
+#if not(defined(__INTEL_LLVM_COMPILER) || defined(__HIP__))
 static inline void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
 #endif
 #pragma GCC diagnostic pop
diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp
index 23f4dfeb..6804e2a3 100644
--- a/src/firestarter/Cuda/Cuda.cpp
+++ b/src/firestarter/Cuda/Cuda.cpp
@@ -95,10 +95,8 @@ static auto getPrecision(int DeviceIndex, int UseDouble) -> int {
   bool DoubleNotSupported =
 #ifdef FIRESTARTER_BUILD_CUDA
       Properties.major <= 1 && Properties.minor <= 2;
-#elif defined(FIRESTARTER_BUILD_HIP)
-      !Properties.hasDoubles;
 #else
-      true;
+      false;
 #endif
 
   // we check for double precision support on the GPU and print errormsg, when

From b396920b6fdd8faf6aef267a68d0a84acac9fdc3 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 18 Oct 2024 23:56:55 +0200
Subject: [PATCH 070/167] fix problem with metric interface and hip compile

---
 .../Measurement/MeasurementWorker.hpp         |  6 +-
 .../Metric/{IPCEstimate.h => IPCEstimate.hpp} | 37 ++++++--
 include/firestarter/Measurement/Metric/Perf.h | 36 --------
 .../firestarter/Measurement/Metric/Perf.hpp   | 91 +++++++++++++++++++
 include/firestarter/Measurement/Metric/RAPL.h | 34 -------
 .../firestarter/Measurement/Metric/RAPL.hpp   | 73 +++++++++++++++
 src/firestarter/Firestarter.cpp               |  2 +-
 src/firestarter/LoadWorker.cpp                |  2 +-
 .../Measurement/Metric/IPCEstimate.cpp        | 56 +++---------
 src/firestarter/Measurement/Metric/Perf.cpp   | 89 ++++--------------
 src/firestarter/Measurement/Metric/RAPL.cpp   | 85 +++++------------
 11 files changed, 253 insertions(+), 258 deletions(-)
 rename include/firestarter/Measurement/Metric/{IPCEstimate.h => IPCEstimate.hpp} (51%)
 delete mode 100644 include/firestarter/Measurement/Metric/Perf.h
 create mode 100644 include/firestarter/Measurement/Metric/Perf.hpp
 delete mode 100644 include/firestarter/Measurement/Metric/RAPL.h
 create mode 100644 include/firestarter/Measurement/Metric/RAPL.hpp

diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp
index 7396e713..a6f6e0f1 100644
--- a/include/firestarter/Measurement/MeasurementWorker.hpp
+++ b/include/firestarter/Measurement/MeasurementWorker.hpp
@@ -21,9 +21,9 @@
 
 #pragma once
 
-#include "Metric/IPCEstimate.h"
-#include "Metric/Perf.h"
-#include "Metric/RAPL.h"
+#include "Metric/IPCEstimate.hpp"
+#include "Metric/Perf.hpp"
+#include "Metric/RAPL.hpp"
 #include "MetricInterface.h"
 #include "Summary.hpp"
 #include "TimeValue.hpp"
diff --git a/include/firestarter/Measurement/Metric/IPCEstimate.h b/include/firestarter/Measurement/Metric/IPCEstimate.hpp
similarity index 51%
rename from include/firestarter/Measurement/Metric/IPCEstimate.h
rename to include/firestarter/Measurement/Metric/IPCEstimate.hpp
index f5362f93..0c57a34c 100644
--- a/include/firestarter/Measurement/Metric/IPCEstimate.h
+++ b/include/firestarter/Measurement/Metric/IPCEstimate.hpp
@@ -22,15 +22,34 @@
 #pragma once
 
 #include "../MetricInterface.h"
+#include <string>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern const MetricInterface IpcEstimateMetric;
-
-extern void ipcEstimateMetricInsert(double Value);
+struct IpcEstimateMetricData {
+  static std::string ErrorString;
+  static void (*Callback)(void*, const char*, int64_t, double);
+  static void* CallbackArg;
+  static auto fini() -> int32_t;
+  static auto init() -> int32_t;
+  static auto getError() -> const char*;
+  static auto registerInsertCallback(void (*C)(void*, const char*, int64_t, double), void* Arg) -> int32_t;
+};
 
-#ifdef __cplusplus
+const MetricInterface IpcEstimateMetric = {
+    .Name = "ipc-estimate",
+    .Type = {.Absolute = 1,
+             .Accumalative = 0,
+             .DivideByThreadCount = 0,
+             .InsertCallback = 1,
+             .IgnoreStartStopDelta = 1,
+             .Reserved = 0},
+    .Unit = "IPC",
+    .CallbackTime = 0,
+    .Callback = nullptr,
+    .Init = IpcEstimateMetricData::init,
+    .Fini = IpcEstimateMetricData::fini,
+    .GetReading = nullptr,
+    .GetError = IpcEstimateMetricData::getError,
+    .RegisterInsertCallback = IpcEstimateMetricData::registerInsertCallback,
 };
-#endif
\ No newline at end of file
+
+void ipcEstimateMetricInsert(double Value);
\ No newline at end of file
diff --git a/include/firestarter/Measurement/Metric/Perf.h b/include/firestarter/Measurement/Metric/Perf.h
deleted file mode 100644
index 480fb808..00000000
--- a/include/firestarter/Measurement/Metric/Perf.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/******************************************************************************
- * FIRESTARTER - A Processor Stress Test Utility
- * Copyright (C) 2020-2023 TU Dresden, Center for Information Services and High
- * Performance Computing
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
- *
- * Contact: daniel.hackenberg@tu-dresden.de
- *****************************************************************************/
-
-#pragma once
-
-#include "../MetricInterface.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern const MetricInterface PerfIpcMetric;
-
-extern const MetricInterface PerfFreqMetric;
-
-#ifdef __cplusplus
-};
-#endif
\ No newline at end of file
diff --git a/include/firestarter/Measurement/Metric/Perf.hpp b/include/firestarter/Measurement/Metric/Perf.hpp
new file mode 100644
index 00000000..c2646b30
--- /dev/null
+++ b/include/firestarter/Measurement/Metric/Perf.hpp
@@ -0,0 +1,91 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2020-2023 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#pragma once
+
+#include "../MetricInterface.h"
+#include <string>
+
+struct PerfMetricData {
+  inline static const char* PerfEventParanoidFile = "/proc/sys/kernel/perf_event_paranoid";
+
+  struct ReadFormat {
+    uint64_t Nr;
+    struct {
+      uint64_t Value;
+      uint64_t Id;
+    } Values[2];
+  };
+
+  static std::string ErrorString;
+  inline static int CpuCyclesFd = -1;
+  inline static int InstructionsFd = -1;
+  static uint64_t CpuCyclesId;
+  static uint64_t InstructionsId;
+  inline static bool InitDone = false;
+  static int32_t InitValue;
+  static struct ReadFormat Last;
+  static auto perfEventOpen(struct perf_event_attr* HwEvent, pid_t Pid, int Cpu, int GroupFd, unsigned long Flags)
+      -> long;
+  static auto fini() -> int32_t;
+  static auto init() -> int32_t;
+  static auto valueFromId(struct ReadFormat* Values, uint64_t Id) -> uint64_t;
+  static auto getReading(double* IpcValue, double* FreqValue) -> int32_t;
+  static auto getReadingIpc(double* Value) -> int32_t;
+  static auto getReadingFreq(double* Value) -> int32_t;
+  static auto getError() -> const char*;
+};
+
+const MetricInterface PerfIpcMetric = {
+    .Name = "perf-ipc",
+    .Type = {.Absolute = 1,
+             .Accumalative = 0,
+             .DivideByThreadCount = 0,
+             .InsertCallback = 0,
+             .IgnoreStartStopDelta = 0,
+             .Reserved = 0},
+    .Unit = "IPC",
+    .CallbackTime = 0,
+    .Callback = nullptr,
+    .Init = PerfMetricData::init,
+    .Fini = PerfMetricData::fini,
+    .GetReading = PerfMetricData::getReadingIpc,
+    .GetError = PerfMetricData::getError,
+    .RegisterInsertCallback = nullptr,
+};
+
+const MetricInterface PerfFreqMetric = {
+    .Name = "perf-freq",
+    .Type = {.Absolute = 0,
+             .Accumalative = 1,
+             .DivideByThreadCount = 1,
+             .InsertCallback = 0,
+             .IgnoreStartStopDelta = 0,
+             .Reserved = 0},
+    .Unit = "GHz",
+    .CallbackTime = 0,
+    .Callback = nullptr,
+    .Init = PerfMetricData::init,
+    .Fini = PerfMetricData::fini,
+    .GetReading = PerfMetricData::getReadingFreq,
+    .GetError = PerfMetricData::getError,
+    .RegisterInsertCallback = nullptr,
+};
\ No newline at end of file
diff --git a/include/firestarter/Measurement/Metric/RAPL.h b/include/firestarter/Measurement/Metric/RAPL.h
deleted file mode 100644
index 5076affe..00000000
--- a/include/firestarter/Measurement/Metric/RAPL.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/******************************************************************************
- * FIRESTARTER - A Processor Stress Test Utility
- * Copyright (C) 2020-2023 TU Dresden, Center for Information Services and High
- * Performance Computing
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
- *
- * Contact: daniel.hackenberg@tu-dresden.de
- *****************************************************************************/
-
-#pragma once
-
-#include "../MetricInterface.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern const MetricInterface RaplMetric;
-
-#ifdef __cplusplus
-};
-#endif
\ No newline at end of file
diff --git a/include/firestarter/Measurement/Metric/RAPL.hpp b/include/firestarter/Measurement/Metric/RAPL.hpp
new file mode 100644
index 00000000..0a6903fb
--- /dev/null
+++ b/include/firestarter/Measurement/Metric/RAPL.hpp
@@ -0,0 +1,73 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2020-2023 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#pragma once
+
+#include "../MetricInterface.h"
+#include <memory>
+#include <string>
+#include <vector>
+
+struct RaplMetricData {
+  inline static const char* RaplPath = "/sys/class/powercap";
+
+  static std::string ErrorString;
+
+  struct ReaderDef {
+    char* Path;
+    long long int LastReading;
+    long long int Overflow;
+    long long int Max;
+  };
+
+  struct ReaderDefFree {
+    void operator()(struct ReaderDef* Def);
+  };
+
+  static std::vector<std::shared_ptr<struct ReaderDef>> Readers;
+
+  static auto fini() -> int32_t;
+  static auto init() -> int32_t;
+
+  static auto getReading(double* Value) -> int32_t;
+
+  static auto getError() -> const char*;
+
+  static void callback();
+};
+
+const MetricInterface RaplMetric = {
+    .Name = "sysfs-powercap-rapl",
+    .Type = {.Absolute = 0,
+             .Accumalative = 1,
+             .DivideByThreadCount = 0,
+             .InsertCallback = 0,
+             .IgnoreStartStopDelta = 0,
+             .Reserved = 0},
+    .Unit = "J",
+    .CallbackTime = 30000000,
+    .Callback = RaplMetricData::callback,
+    .Init = RaplMetricData::init,
+    .Fini = RaplMetricData::fini,
+    .GetReading = RaplMetricData::getReading,
+    .GetError = RaplMetricData::getError,
+    .RegisterInsertCallback = nullptr,
+};
\ No newline at end of file
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 776688b5..6b00d9ee 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -23,7 +23,7 @@
 #include <firestarter/Environment/X86/X86Environment.hpp>
 #include <firestarter/Firestarter.hpp>
 #include <firestarter/Logging/Log.hpp>
-#include <firestarter/Measurement/Metric/IPCEstimate.h>
+#include <firestarter/Measurement/Metric/IPCEstimate.hpp>
 #include <firestarter/Optimizer/Algorithm/NSGA2.hpp>
 #include <firestarter/Optimizer/History.hpp>
 #include <firestarter/Optimizer/Problem/CLIArgumentProblem.hpp>
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index 5a136a40..4c2bf289 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -30,7 +30,7 @@
 #include <limits>
 
 #if defined(linux) || defined(__linux__)
-#include <firestarter/Measurement/Metric/IPCEstimate.h>
+#include <firestarter/Measurement/Metric/IPCEstimate.hpp>
 #endif
 
 #ifdef ENABLE_VTRACING
diff --git a/src/firestarter/Measurement/Metric/IPCEstimate.cpp b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
index 6bd5f7d9..989b492d 100644
--- a/src/firestarter/Measurement/Metric/IPCEstimate.cpp
+++ b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
@@ -21,44 +21,36 @@
 
 #include <chrono>
 #include <cstdlib>
+#include <firestarter/Measurement/Metric/IPCEstimate.hpp>
 #include <string>
 
-extern "C" {
-#include <firestarter/Measurement/Metric/IPCEstimate.h>
-#include <firestarter/Measurement/MetricInterface.h>
-}
-
-static std::string ErrorString;
-
-static void (*Callback)(void*, const char*, int64_t, double) = nullptr;
-static void* CallbackArg = nullptr;
-
-static auto fini() -> int32_t {
-  Callback = nullptr;
-  CallbackArg = nullptr;
+auto IpcEstimateMetricData::fini() -> int32_t {
+  IpcEstimateMetricData::Callback = nullptr;
+  IpcEstimateMetricData::CallbackArg = nullptr;
 
   return EXIT_SUCCESS;
 }
 
-static auto init() -> int32_t {
-  ErrorString = "";
+auto IpcEstimateMetricData::init() -> int32_t {
+  IpcEstimateMetricData::ErrorString = "";
 
   return EXIT_SUCCESS;
 }
 
-static auto getError() -> const char* {
-  const char* ErrorCString = ErrorString.c_str();
+auto IpcEstimateMetricData::getError() -> const char* {
+  const char* ErrorCString = IpcEstimateMetricData::ErrorString.c_str();
   return ErrorCString;
 }
 
-static auto registerInsertCallback(void (*C)(void*, const char*, int64_t, double), void* Arg) -> int32_t {
-  Callback = C;
-  CallbackArg = Arg;
+auto IpcEstimateMetricData::registerInsertCallback(void (*C)(void*, const char*, int64_t, double), void* Arg)
+    -> int32_t {
+  IpcEstimateMetricData::Callback = C;
+  IpcEstimateMetricData::CallbackArg = Arg;
   return EXIT_SUCCESS;
 }
 
 void ipcEstimateMetricInsert(double Value) {
-  if (Callback == nullptr || CallbackArg == nullptr) {
+  if (IpcEstimateMetricData::Callback == nullptr || IpcEstimateMetricData::CallbackArg == nullptr) {
     return;
   }
 
@@ -66,23 +58,5 @@ void ipcEstimateMetricInsert(double Value) {
       std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch())
           .count();
 
-  Callback(CallbackArg, "ipc-estimate", T, Value);
-}
-
-const MetricInterface IpcEstimateMetric = {
-    .Name = "ipc-estimate",
-    .Type = {.Absolute = 1,
-             .Accumalative = 0,
-             .DivideByThreadCount = 0,
-             .InsertCallback = 1,
-             .IgnoreStartStopDelta = 1,
-             .Reserved = 0},
-    .Unit = "IPC",
-    .CallbackTime = 0,
-    .Callback = nullptr,
-    .Init = init,
-    .Fini = fini,
-    .GetReading = nullptr,
-    .GetError = getError,
-    .RegisterInsertCallback = registerInsertCallback,
-};
+  IpcEstimateMetricData::Callback(IpcEstimateMetricData::CallbackArg, "ipc-estimate", T, Value);
+}
\ No newline at end of file
diff --git a/src/firestarter/Measurement/Metric/Perf.cpp b/src/firestarter/Measurement/Metric/Perf.cpp
index 3ce749c0..28640735 100644
--- a/src/firestarter/Measurement/Metric/Perf.cpp
+++ b/src/firestarter/Measurement/Metric/Perf.cpp
@@ -22,42 +22,21 @@
 #include <cstring>
 #include <string>
 
-extern "C" {
-#include <firestarter/Measurement/Metric/Perf.h>
-#include <firestarter/Measurement/MetricInterface.h>
+#include <firestarter/Measurement/Metric/Perf.hpp>
 
+extern "C" {
 #include <linux/perf_event.h>
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
 #include <unistd.h>
+}
 
-static const std::string PerfEventParanoidFile = "/proc/sys/kernel/perf_event_paranoid";
-
-struct ReadFormat {
-  uint64_t Nr;
-  struct {
-    uint64_t Value;
-    uint64_t Id;
-  } Values[2];
-};
-
-static std::string ErrorString;
-
-static int CpuCyclesFd = -1;
-static int InstructionsFd = -1;
-static uint64_t CpuCyclesId;
-static uint64_t InstructionsId;
-static bool InitDone = false;
-static int32_t InitValue;
-
-static struct ReadFormat Last;
-
-static auto perfEventOpen(struct perf_event_attr* HwEvent, pid_t Pid, int Cpu, int GroupFd, unsigned long Flags)
-    -> long {
+auto PerfMetricData::perfEventOpen(struct perf_event_attr* HwEvent, pid_t Pid, int Cpu, int GroupFd,
+                                   unsigned long Flags) -> long {
   return syscall(__NR_perf_event_open, HwEvent, Pid, Cpu, GroupFd, Flags);
 }
 
-static auto fini() -> int32_t {
+auto PerfMetricData::fini() -> int32_t {
   if (!(CpuCyclesFd < 0)) {
     close(CpuCyclesFd);
     CpuCyclesFd = -1;
@@ -70,17 +49,18 @@ static auto fini() -> int32_t {
   return EXIT_SUCCESS;
 }
 
-static auto init() -> int32_t {
+auto PerfMetricData::init() -> int32_t {
   if (InitDone) {
     return InitValue;
   }
 
-  if (access(PerfEventParanoidFile.c_str(), F_OK) == -1) {
+  if (access(PerfEventParanoidFile, F_OK) == -1) {
     // https://man7.org/linux/man-pages/man2/perf_event_open.2.html
     // The official way of knowing if perf_event_open() support is enabled
     // is checking for the existence of the file
     // /proc/sys/kernel/perf_event_paranoid.
-    ErrorString = "syscall perf_event_open not supported or file " + PerfEventParanoidFile + " does not exist";
+    ErrorString =
+        "syscall perf_event_open not supported or file " + std::string(PerfEventParanoidFile) + " does not exist";
     InitValue = EXIT_FAILURE;
     InitDone = true;
     return EXIT_FAILURE;
@@ -179,7 +159,7 @@ static auto init() -> int32_t {
   return EXIT_SUCCESS;
 }
 
-static auto valueFromId(struct ReadFormat* Values, uint64_t Id) -> uint64_t {
+auto PerfMetricData::valueFromId(struct ReadFormat* Values, uint64_t Id) -> uint64_t {
   for (decltype(Values->Nr) I = 0; I < Values->Nr; ++I) {
     if (Id == Values->Values[I].Id) {
       return Values->Values[I].Value;
@@ -189,7 +169,7 @@ static auto valueFromId(struct ReadFormat* Values, uint64_t Id) -> uint64_t {
   return 0;
 }
 
-static auto getReading(double* IpcValue, double* FreqValue) -> int32_t {
+auto PerfMetricData::getReading(double* IpcValue, double* FreqValue) -> int32_t {
 
   if (CpuCyclesFd < 0 || InstructionsFd < 0) {
     fini();
@@ -221,48 +201,11 @@ static auto getReading(double* IpcValue, double* FreqValue) -> int32_t {
   return EXIT_SUCCESS;
 }
 
-static auto getReadingIpc(double* Value) -> int32_t { return getReading(Value, nullptr); }
+auto PerfMetricData::getReadingIpc(double* Value) -> int32_t { return getReading(Value, nullptr); }
 
-static auto getReadingFreq(double* Value) -> int32_t { return getReading(nullptr, Value); }
+auto PerfMetricData::getReadingFreq(double* Value) -> int32_t { return getReading(nullptr, Value); }
 
-static auto getError() -> const char* {
+auto PerfMetricData::getError() -> const char* {
   const char* ErrorCString = ErrorString.c_str();
   return ErrorCString;
-}
-}
-
-const MetricInterface PerfIpcMetric = {
-    .Name = "perf-ipc",
-    .Type = {.Absolute = 1,
-             .Accumalative = 0,
-             .DivideByThreadCount = 0,
-             .InsertCallback = 0,
-             .IgnoreStartStopDelta = 0,
-             .Reserved = 0},
-    .Unit = "IPC",
-    .CallbackTime = 0,
-    .Callback = nullptr,
-    .Init = init,
-    .Fini = fini,
-    .GetReading = getReadingIpc,
-    .GetError = getError,
-    .RegisterInsertCallback = nullptr,
-};
-
-const MetricInterface PerfFreqMetric = {
-    .Name = "perf-freq",
-    .Type = {.Absolute = 0,
-             .Accumalative = 1,
-             .DivideByThreadCount = 1,
-             .InsertCallback = 0,
-             .IgnoreStartStopDelta = 0,
-             .Reserved = 0},
-    .Unit = "GHz",
-    .CallbackTime = 0,
-    .Callback = nullptr,
-    .Init = init,
-    .Fini = fini,
-    .GetReading = getReadingFreq,
-    .GetError = getError,
-    .RegisterInsertCallback = nullptr,
-};
+}
\ No newline at end of file
diff --git a/src/firestarter/Measurement/Metric/RAPL.cpp b/src/firestarter/Measurement/Metric/RAPL.cpp
index b05fa626..a33b7453 100644
--- a/src/firestarter/Measurement/Metric/RAPL.cpp
+++ b/src/firestarter/Measurement/Metric/RAPL.cpp
@@ -21,53 +21,37 @@
 
 #include <cstdio>
 #include <cstring>
+#include <firestarter/Measurement/Metric/RAPL.hpp>
 #include <fstream>
 #include <memory>
 #include <sstream>
 #include <vector>
 
 extern "C" {
-#include <firestarter/Measurement/Metric/RAPL.h>
-#include <firestarter/Measurement/MetricInterface.h>
-
 #include <dirent.h>
+}
 
-static const std::string RaplPath = "/sys/class/powercap";
-
-static std::string errorString;
-
-struct ReaderDef {
-  char* Path;
-  long long int LastReading;
-  long long int Overflow;
-  long long int Max;
-};
-
-struct ReaderDefFree {
-  void operator()(struct ReaderDef* Def) {
-    if (Def != nullptr) {
-      if (((void*)Def->Path) != nullptr) {
-        free((void*)Def->Path);
-      }
-      free((void*)Def);
+void RaplMetricData::ReaderDefFree::operator()(struct ReaderDef* Def) {
+  if (Def != nullptr) {
+    if (((void*)Def->Path) != nullptr) {
+      free((void*)Def->Path);
     }
+    free((void*)Def);
   }
-};
-
-static std::vector<std::shared_ptr<struct ReaderDef>> Readers = {};
+}
 
-static auto fini() -> int32_t {
+auto RaplMetricData::fini() -> int32_t {
   Readers.clear();
 
   return EXIT_SUCCESS;
 }
 
-static auto init() -> int32_t {
-  errorString = "";
+auto RaplMetricData::init() -> int32_t {
+  ErrorString = "";
 
-  DIR* RaplDir = opendir(RaplPath.c_str());
+  DIR* RaplDir = opendir(RaplPath);
   if (RaplDir == nullptr) {
-    errorString = "Could not open " + RaplPath;
+    ErrorString = "Could not open " + std::string(RaplPath);
     return EXIT_FAILURE;
   }
 
@@ -116,7 +100,7 @@ static auto init() -> int32_t {
   // paths now contains all interesting nodes
 
   if (Paths.empty()) {
-    errorString = "No valid entries in " + RaplPath;
+    ErrorString = "No valid entries in " + std::string(RaplPath);
     return EXIT_FAILURE;
   }
 
@@ -125,7 +109,7 @@ static auto init() -> int32_t {
     EnergyUjPath << Path << "/energy_uj";
     std::ifstream EnergyReadingStream(EnergyUjPath.str());
     if (!EnergyReadingStream.good()) {
-      errorString = "Could not read energy_uj";
+      ErrorString = "Could not read energy_uj";
       break;
     }
 
@@ -133,7 +117,7 @@ static auto init() -> int32_t {
     MaxEnergyUjRangePath << Path << "/max_energy_range_uj";
     std::ifstream MaxEnergyReadingStream(MaxEnergyUjRangePath.str());
     if (!MaxEnergyReadingStream.good()) {
-      errorString = "Could not read max_energy_range_uj";
+      ErrorString = "Could not read max_energy_range_uj";
       break;
     }
 
@@ -148,7 +132,7 @@ static auto init() -> int32_t {
     if (Read == 0) {
       std::stringstream Ss;
       Ss << "Contents in file " << EnergyUjPath.str() << " do not conform to mask (uint64_t)";
-      errorString = Ss.str();
+      ErrorString = Ss.str();
       break;
     }
 
@@ -158,11 +142,11 @@ static auto init() -> int32_t {
     if (Read == 0) {
       std::stringstream Ss;
       Ss << "Contents in file " << MaxEnergyUjRangePath.str() << " do not conform to mask (uint64_t)";
-      errorString = Ss.str();
+      ErrorString = Ss.str();
       break;
     }
 
-    std::shared_ptr<struct ReaderDef> Def(reinterpret_cast<struct ReaderDef*>(malloc(sizeof(struct ReaderDef))),
+    std::shared_ptr<struct ReaderDef> Def(static_cast<struct ReaderDef*>(malloc(sizeof(struct ReaderDef))),
                                           ReaderDefFree());
     const auto* PathName = Path.c_str();
     size_t Size = (strlen(PathName) + 1) * sizeof(char);
@@ -176,7 +160,7 @@ static auto init() -> int32_t {
     Readers.push_back(Def);
   }
 
-  if (!errorString.empty()) {
+  if (!ErrorString.empty()) {
     fini();
     return EXIT_FAILURE;
   }
@@ -184,7 +168,7 @@ static auto init() -> int32_t {
   return EXIT_SUCCESS;
 }
 
-static auto getReading(double* Value) -> int32_t {
+auto RaplMetricData::getReading(double* Value) -> int32_t {
   double FinalReading = 0.0;
 
   for (auto& Def : Readers) {
@@ -203,7 +187,7 @@ static auto getReading(double* Value) -> int32_t {
 
     Def->LastReading = Reading;
 
-    FinalReading += 1.0E-6 * (double)((Def->Overflow * Def->Max) + Def->LastReading);
+    FinalReading += 1.0E-6 * static_cast<double>((Def->Overflow * Def->Max) + Def->LastReading);
   }
 
   if (Value != nullptr) {
@@ -213,30 +197,11 @@ static auto getReading(double* Value) -> int32_t {
   return EXIT_SUCCESS;
 }
 
-static auto getError() -> const char* {
-  const char* ErrorCString = errorString.c_str();
+auto RaplMetricData::getError() -> const char* {
+  const char* ErrorCString = ErrorString.c_str();
   return ErrorCString;
 }
 
 // this function will be called periodically to make sure we do not miss an
 // overflow of the counter
-static void callback() { getReading(nullptr); }
-}
-
-const MetricInterface RaplMetric = {
-    .Name = "sysfs-powercap-rapl",
-    .Type = {.Absolute = 0,
-             .Accumalative = 1,
-             .DivideByThreadCount = 0,
-             .InsertCallback = 0,
-             .IgnoreStartStopDelta = 0,
-             .Reserved = 0},
-    .Unit = "J",
-    .CallbackTime = 30000000,
-    .Callback = callback,
-    .Init = init,
-    .Fini = fini,
-    .GetReading = getReading,
-    .GetError = getError,
-    .RegisterInsertCallback = nullptr,
-};
+void RaplMetricData::callback() { getReading(nullptr); }
\ No newline at end of file

From e3b01db46723a3d59737317984642ddabf69abcc Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 19 Oct 2024 00:05:51 +0200
Subject: [PATCH 071/167] windows compat: fix hip build

---
 include/firestarter/WindowsCompat.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index a879d6f5..6c31066e 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -36,10 +36,10 @@
 #pragma GCC diagnostic ignored "-Wunused-function"
 #if defined(__clang__)
 #elif not(defined(__MINGW32__) || defined(__MINGW64__))
-static inline void _mm_mfence() noexcept {};
+void _mm_mfence() noexcept;
 #endif
-#if not(defined(__INTEL_LLVM_COMPILER) || defined(__HIP__))
-static inline void __cpuid(int* /*unused*/, int /*unused*/) noexcept {};
+#if not(defined(__INTEL_LLVM_COMPILER))
+void __cpuid(int* /*unused*/, int /*unused*/) noexcept;
 #endif
 #pragma GCC diagnostic pop
 #if defined(__clang__)

From a2f603326917f20940a455fc943871a4fc12a60a Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 19 Oct 2024 00:10:46 +0200
Subject: [PATCH 072/167] fix link errors with metric interface restructure

---
 .../firestarter/Measurement/Metric/IPCEstimate.hpp   |  6 +++---
 include/firestarter/Measurement/Metric/Perf.hpp      | 10 +++++-----
 include/firestarter/Measurement/Metric/RAPL.hpp      |  4 ++--
 src/firestarter/Measurement/Metric/IPCEstimate.cpp   | 12 ++++++------
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/include/firestarter/Measurement/Metric/IPCEstimate.hpp b/include/firestarter/Measurement/Metric/IPCEstimate.hpp
index 0c57a34c..2a4f7dfa 100644
--- a/include/firestarter/Measurement/Metric/IPCEstimate.hpp
+++ b/include/firestarter/Measurement/Metric/IPCEstimate.hpp
@@ -25,9 +25,9 @@
 #include <string>
 
 struct IpcEstimateMetricData {
-  static std::string ErrorString;
-  static void (*Callback)(void*, const char*, int64_t, double);
-  static void* CallbackArg;
+  inline static std::string ErrorString;
+  inline static void (*Callback)(void*, const char*, int64_t, double);
+  inline static void* CallbackArg;
   static auto fini() -> int32_t;
   static auto init() -> int32_t;
   static auto getError() -> const char*;
diff --git a/include/firestarter/Measurement/Metric/Perf.hpp b/include/firestarter/Measurement/Metric/Perf.hpp
index c2646b30..24624d77 100644
--- a/include/firestarter/Measurement/Metric/Perf.hpp
+++ b/include/firestarter/Measurement/Metric/Perf.hpp
@@ -35,14 +35,14 @@ struct PerfMetricData {
     } Values[2];
   };
 
-  static std::string ErrorString;
+  inline static std::string ErrorString;
   inline static int CpuCyclesFd = -1;
   inline static int InstructionsFd = -1;
-  static uint64_t CpuCyclesId;
-  static uint64_t InstructionsId;
+  inline static uint64_t CpuCyclesId;
+  inline static uint64_t InstructionsId;
   inline static bool InitDone = false;
-  static int32_t InitValue;
-  static struct ReadFormat Last;
+  inline static int32_t InitValue;
+  inline static struct ReadFormat Last;
   static auto perfEventOpen(struct perf_event_attr* HwEvent, pid_t Pid, int Cpu, int GroupFd, unsigned long Flags)
       -> long;
   static auto fini() -> int32_t;
diff --git a/include/firestarter/Measurement/Metric/RAPL.hpp b/include/firestarter/Measurement/Metric/RAPL.hpp
index 0a6903fb..c98e1e4b 100644
--- a/include/firestarter/Measurement/Metric/RAPL.hpp
+++ b/include/firestarter/Measurement/Metric/RAPL.hpp
@@ -29,7 +29,7 @@
 struct RaplMetricData {
   inline static const char* RaplPath = "/sys/class/powercap";
 
-  static std::string ErrorString;
+  inline static std::string ErrorString;
 
   struct ReaderDef {
     char* Path;
@@ -42,7 +42,7 @@ struct RaplMetricData {
     void operator()(struct ReaderDef* Def);
   };
 
-  static std::vector<std::shared_ptr<struct ReaderDef>> Readers;
+  inline static std::vector<std::shared_ptr<struct ReaderDef>> Readers;
 
   static auto fini() -> int32_t;
   static auto init() -> int32_t;
diff --git a/src/firestarter/Measurement/Metric/IPCEstimate.cpp b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
index 989b492d..dcbc379e 100644
--- a/src/firestarter/Measurement/Metric/IPCEstimate.cpp
+++ b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
@@ -25,27 +25,27 @@
 #include <string>
 
 auto IpcEstimateMetricData::fini() -> int32_t {
-  IpcEstimateMetricData::Callback = nullptr;
-  IpcEstimateMetricData::CallbackArg = nullptr;
+  Callback = nullptr;
+  CallbackArg = nullptr;
 
   return EXIT_SUCCESS;
 }
 
 auto IpcEstimateMetricData::init() -> int32_t {
-  IpcEstimateMetricData::ErrorString = "";
+  ErrorString = "";
 
   return EXIT_SUCCESS;
 }
 
 auto IpcEstimateMetricData::getError() -> const char* {
-  const char* ErrorCString = IpcEstimateMetricData::ErrorString.c_str();
+  const char* ErrorCString = ErrorString.c_str();
   return ErrorCString;
 }
 
 auto IpcEstimateMetricData::registerInsertCallback(void (*C)(void*, const char*, int64_t, double), void* Arg)
     -> int32_t {
-  IpcEstimateMetricData::Callback = C;
-  IpcEstimateMetricData::CallbackArg = Arg;
+  Callback = C;
+  CallbackArg = Arg;
   return EXIT_SUCCESS;
 }
 

From ddd4b5e5245ce59158d37040f204e4a75147d680 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 19 Oct 2024 00:27:42 +0200
Subject: [PATCH 073/167] fix windows msc build

---
 .../Measurement/Metric/IPCEstimate.hpp        | 29 +++++-----
 .../firestarter/Measurement/Metric/Perf.hpp   | 58 +++++++++----------
 .../firestarter/Measurement/Metric/RAPL.hpp   | 29 +++++-----
 3 files changed, 52 insertions(+), 64 deletions(-)

diff --git a/include/firestarter/Measurement/Metric/IPCEstimate.hpp b/include/firestarter/Measurement/Metric/IPCEstimate.hpp
index 2a4f7dfa..a65263d9 100644
--- a/include/firestarter/Measurement/Metric/IPCEstimate.hpp
+++ b/include/firestarter/Measurement/Metric/IPCEstimate.hpp
@@ -34,22 +34,19 @@ struct IpcEstimateMetricData {
   static auto registerInsertCallback(void (*C)(void*, const char*, int64_t, double), void* Arg) -> int32_t;
 };
 
-const MetricInterface IpcEstimateMetric = {
-    .Name = "ipc-estimate",
-    .Type = {.Absolute = 1,
-             .Accumalative = 0,
-             .DivideByThreadCount = 0,
-             .InsertCallback = 1,
-             .IgnoreStartStopDelta = 1,
-             .Reserved = 0},
-    .Unit = "IPC",
-    .CallbackTime = 0,
-    .Callback = nullptr,
-    .Init = IpcEstimateMetricData::init,
-    .Fini = IpcEstimateMetricData::fini,
-    .GetReading = nullptr,
-    .GetError = IpcEstimateMetricData::getError,
-    .RegisterInsertCallback = IpcEstimateMetricData::registerInsertCallback,
+static constexpr const MetricInterface IpcEstimateMetric{
+    /*Name=*/"ipc-estimate",
+    /*Type=*/
+    {/*Absolute=*/1, /*Accumalative=*/0, /*DivideByThreadCount=*/0, /*InsertCallback=*/1, /*IgnoreStartStopDelta=*/1,
+     /*Reserved=*/0},
+    /*Unit=*/"IPC",
+    /*CallbackTime=*/0,
+    /*Callback=*/nullptr,
+    /*Init=*/IpcEstimateMetricData::init,
+    /*Fini=*/IpcEstimateMetricData::fini,
+    /*GetReading=*/nullptr,
+    /*GetError=*/IpcEstimateMetricData::getError,
+    /*RegisterInsertCallback=*/IpcEstimateMetricData::registerInsertCallback,
 };
 
 void ipcEstimateMetricInsert(double Value);
\ No newline at end of file
diff --git a/include/firestarter/Measurement/Metric/Perf.hpp b/include/firestarter/Measurement/Metric/Perf.hpp
index 24624d77..fd2be6b0 100644
--- a/include/firestarter/Measurement/Metric/Perf.hpp
+++ b/include/firestarter/Measurement/Metric/Perf.hpp
@@ -54,38 +54,32 @@ struct PerfMetricData {
   static auto getError() -> const char*;
 };
 
-const MetricInterface PerfIpcMetric = {
-    .Name = "perf-ipc",
-    .Type = {.Absolute = 1,
-             .Accumalative = 0,
-             .DivideByThreadCount = 0,
-             .InsertCallback = 0,
-             .IgnoreStartStopDelta = 0,
-             .Reserved = 0},
-    .Unit = "IPC",
-    .CallbackTime = 0,
-    .Callback = nullptr,
-    .Init = PerfMetricData::init,
-    .Fini = PerfMetricData::fini,
-    .GetReading = PerfMetricData::getReadingIpc,
-    .GetError = PerfMetricData::getError,
-    .RegisterInsertCallback = nullptr,
+static constexpr const MetricInterface PerfIpcMetric{
+    /*Name=*/"perf-ipc",
+    /*Type=*/
+    {/*Absolute=*/1, /*Accumalative=*/0, /*DivideByThreadCount=*/0, /*InsertCallback=*/0, /*IgnoreStartStopDelta=*/0,
+     /*Reserved=*/0},
+    /*Unit=*/"IPC",
+    /*CallbackTime=*/0,
+    /*Callback=*/nullptr,
+    /*Init=*/PerfMetricData::init,
+    /*Fini=*/PerfMetricData::fini,
+    /*GetReading=*/PerfMetricData::getReadingIpc,
+    /*GetError=*/PerfMetricData::getError,
+    /*RegisterInsertCallback=*/nullptr,
 };
 
-const MetricInterface PerfFreqMetric = {
-    .Name = "perf-freq",
-    .Type = {.Absolute = 0,
-             .Accumalative = 1,
-             .DivideByThreadCount = 1,
-             .InsertCallback = 0,
-             .IgnoreStartStopDelta = 0,
-             .Reserved = 0},
-    .Unit = "GHz",
-    .CallbackTime = 0,
-    .Callback = nullptr,
-    .Init = PerfMetricData::init,
-    .Fini = PerfMetricData::fini,
-    .GetReading = PerfMetricData::getReadingFreq,
-    .GetError = PerfMetricData::getError,
-    .RegisterInsertCallback = nullptr,
+static constexpr const MetricInterface PerfFreqMetric{
+    /*Name=*/"perf-freq",
+    /*Type=*/
+    {/*Absolute=*/0, /*Accumalative=*/1, /*DivideByThreadCount=*/1, /*InsertCallback=*/0, /*IgnoreStartStopDelta=*/0,
+     /*Reserved=*/0},
+    /*Unit=*/"GHz",
+    /*CallbackTime=*/0,
+    /*Callback=*/nullptr,
+    /*Init=*/PerfMetricData::init,
+    /*Fini=*/PerfMetricData::fini,
+    /*GetReading=*/PerfMetricData::getReadingFreq,
+    /*GetError=*/PerfMetricData::getError,
+    /*RegisterInsertCallback=*/nullptr,
 };
\ No newline at end of file
diff --git a/include/firestarter/Measurement/Metric/RAPL.hpp b/include/firestarter/Measurement/Metric/RAPL.hpp
index c98e1e4b..6ee7bf94 100644
--- a/include/firestarter/Measurement/Metric/RAPL.hpp
+++ b/include/firestarter/Measurement/Metric/RAPL.hpp
@@ -54,20 +54,17 @@ struct RaplMetricData {
   static void callback();
 };
 
-const MetricInterface RaplMetric = {
-    .Name = "sysfs-powercap-rapl",
-    .Type = {.Absolute = 0,
-             .Accumalative = 1,
-             .DivideByThreadCount = 0,
-             .InsertCallback = 0,
-             .IgnoreStartStopDelta = 0,
-             .Reserved = 0},
-    .Unit = "J",
-    .CallbackTime = 30000000,
-    .Callback = RaplMetricData::callback,
-    .Init = RaplMetricData::init,
-    .Fini = RaplMetricData::fini,
-    .GetReading = RaplMetricData::getReading,
-    .GetError = RaplMetricData::getError,
-    .RegisterInsertCallback = nullptr,
+static constexpr const MetricInterface RaplMetric{
+    /*Name=*/"sysfs-powercap-rapl",
+    /*Type=*/
+    {/*Absolute=*/0, /*Accumalative=*/1, /*DivideByThreadCount=*/0, /*InsertCallback=*/0, /*IgnoreStartStopDelta=*/0,
+     /*Reserved=*/0},
+    /*Unit=*/"J",
+    /*CallbackTime=*/30000000,
+    /*Callback=*/nullptr,
+    /*Init=*/RaplMetricData::init,
+    /*Fini=*/RaplMetricData::fini,
+    /*GetReading=*/RaplMetricData::getReading,
+    /*GetError=*/RaplMetricData::getError,
+    /*RegisterInsertCallback=*/nullptr,
 };
\ No newline at end of file

From f8a3bc5e91cb25d8a813736fdcfaa24691c7c861 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 19 Oct 2024 00:34:13 +0200
Subject: [PATCH 074/167] fix windows msc build

---
 include/firestarter/Measurement/Metric/Perf.hpp | 2 --
 src/firestarter/Measurement/Metric/Perf.cpp     | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/firestarter/Measurement/Metric/Perf.hpp b/include/firestarter/Measurement/Metric/Perf.hpp
index fd2be6b0..4fdc943a 100644
--- a/include/firestarter/Measurement/Metric/Perf.hpp
+++ b/include/firestarter/Measurement/Metric/Perf.hpp
@@ -43,8 +43,6 @@ struct PerfMetricData {
   inline static bool InitDone = false;
   inline static int32_t InitValue;
   inline static struct ReadFormat Last;
-  static auto perfEventOpen(struct perf_event_attr* HwEvent, pid_t Pid, int Cpu, int GroupFd, unsigned long Flags)
-      -> long;
   static auto fini() -> int32_t;
   static auto init() -> int32_t;
   static auto valueFromId(struct ReadFormat* Values, uint64_t Id) -> uint64_t;
diff --git a/src/firestarter/Measurement/Metric/Perf.cpp b/src/firestarter/Measurement/Metric/Perf.cpp
index 28640735..6cac1708 100644
--- a/src/firestarter/Measurement/Metric/Perf.cpp
+++ b/src/firestarter/Measurement/Metric/Perf.cpp
@@ -31,8 +31,8 @@ extern "C" {
 #include <unistd.h>
 }
 
-auto PerfMetricData::perfEventOpen(struct perf_event_attr* HwEvent, pid_t Pid, int Cpu, int GroupFd,
-                                   unsigned long Flags) -> long {
+static auto perfEventOpen(struct perf_event_attr* HwEvent, pid_t Pid, int Cpu, int GroupFd, unsigned long Flags)
+    -> long {
   return syscall(__NR_perf_event_open, HwEvent, Pid, Cpu, GroupFd, Flags);
 }
 

From e2cba76a64aec7a8c4a1ed3812a252e11df90266 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 19 Oct 2024 14:17:49 +0200
Subject: [PATCH 075/167] windows compat: add #pragma message to debug
 _mm_mfence compatibility

---
 include/firestarter/WindowsCompat.hpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index 6c31066e..5c53a22e 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -35,8 +35,12 @@
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-function"
 #if defined(__clang__)
+#pragma message("Compiling with clang. Not defining _mm_mfence.")
 #elif not(defined(__MINGW32__) || defined(__MINGW64__))
+#pragma message("Not compiling with clang or mingw. Defining _mm_mfence.")
 void _mm_mfence() noexcept;
+#else
+#pragma message("Compiling with mingw or not clang. Not defining _mm_mfence.")
 #endif
 #if not(defined(__INTEL_LLVM_COMPILER))
 void __cpuid(int* /*unused*/, int /*unused*/) noexcept;

From 78272adf6633d635cc5878aa2b639acab26337d5 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 19 Oct 2024 14:22:11 +0200
Subject: [PATCH 076/167] windows compat: add emmintrin.h header when compiling
 with clang.

---
 include/firestarter/WindowsCompat.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index 5c53a22e..8a601a35 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -36,6 +36,7 @@
 #pragma GCC diagnostic ignored "-Wunused-function"
 #if defined(__clang__)
 #pragma message("Compiling with clang. Not defining _mm_mfence.")
+#include <emmintrin.h>
 #elif not(defined(__MINGW32__) || defined(__MINGW64__))
 #pragma message("Not compiling with clang or mingw. Defining _mm_mfence.")
 void _mm_mfence() noexcept;

From 30aceefefa95db9db0bc0c69d16a88e8f36158c1 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 19 Oct 2024 16:35:04 +0200
Subject: [PATCH 077/167] Revert "windows compat: add #pragma message to debug
 _mm_mfence compatibility"

This reverts commit e2cba76a64aec7a8c4a1ed3812a252e11df90266.
---
 include/firestarter/WindowsCompat.hpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index 8a601a35..f2a56721 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -35,13 +35,9 @@
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-function"
 #if defined(__clang__)
-#pragma message("Compiling with clang. Not defining _mm_mfence.")
 #include <emmintrin.h>
 #elif not(defined(__MINGW32__) || defined(__MINGW64__))
-#pragma message("Not compiling with clang or mingw. Defining _mm_mfence.")
 void _mm_mfence() noexcept;
-#else
-#pragma message("Compiling with mingw or not clang. Not defining _mm_mfence.")
 #endif
 #if not(defined(__INTEL_LLVM_COMPILER))
 void __cpuid(int* /*unused*/, int /*unused*/) noexcept;

From d62dc1aa109a4e93d10b831d8bcf243fb06ab99b Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 19 Oct 2024 16:38:15 +0200
Subject: [PATCH 078/167] windows compat: less compiler warnings

---
 include/firestarter/WindowsCompat.hpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/firestarter/WindowsCompat.hpp b/include/firestarter/WindowsCompat.hpp
index f2a56721..11ef1329 100644
--- a/include/firestarter/WindowsCompat.hpp
+++ b/include/firestarter/WindowsCompat.hpp
@@ -31,9 +31,10 @@
 #if defined(__clang__)
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wunused-function"
-#endif
+#elif defined(__GNUC__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-function"
+#endif
 #if defined(__clang__)
 #include <emmintrin.h>
 #elif not(defined(__MINGW32__) || defined(__MINGW64__))
@@ -42,9 +43,10 @@ void _mm_mfence() noexcept;
 #if not(defined(__INTEL_LLVM_COMPILER))
 void __cpuid(int* /*unused*/, int /*unused*/) noexcept;
 #endif
-#pragma GCC diagnostic pop
 #if defined(__clang__)
 #pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#pragma GCC diagnostic pop
 #endif
 // NOLINTEND(readability-identifier-naming,cert-dcl37-c,cert-dcl37-cpp,cert-dcl51-cpp,bugprone-reserved-identifier)
 

From ddfc1ae18c983d26f0493a3c3308443da6878273 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 19 Oct 2024 16:38:33 +0200
Subject: [PATCH 079/167] Revert "ci: do not fail fast in linux build"

This reverts commit 20b6360b63860a68245dad2f705e495b9fef38dc.
---
 .github/workflows/cmake.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index 0c874b61..6b4c9178 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -7,7 +7,7 @@ jobs:
   build-linux-hip-rocm:
     strategy:
 #      max-parallel: 1 # Sets the limit of jobs to run concurrently
-      fail-fast: false
+      fail-fast: true
       matrix:
         os: [ubuntu-22.04]
         compiler: [g++-9, g++-10, g++-11, g++-12, clang++-11, clang++-12, clang++-13, clang++-14, clang++-15]
@@ -115,7 +115,7 @@ jobs:
   build-linux:
     strategy:
 #      max-parallel: 1 # Sets the limit of jobs to run concurrently
-      fail-fast: false
+      fail-fast: true
       matrix:
         os: [ubuntu-20.04]
         compiler: [g++-7, g++-8, g++-9, g++-10, clang++-8, clang++-9, clang++-10]

From 1fd90f4142c269133ca26788e0c83b8c05c602be Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 19 Oct 2024 17:13:56 +0200
Subject: [PATCH 080/167] restructure rapl: add callback back in again

---
 include/firestarter/Measurement/Metric/RAPL.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/firestarter/Measurement/Metric/RAPL.hpp b/include/firestarter/Measurement/Metric/RAPL.hpp
index 6ee7bf94..4e0d4781 100644
--- a/include/firestarter/Measurement/Metric/RAPL.hpp
+++ b/include/firestarter/Measurement/Metric/RAPL.hpp
@@ -61,7 +61,7 @@ static constexpr const MetricInterface RaplMetric{
      /*Reserved=*/0},
     /*Unit=*/"J",
     /*CallbackTime=*/30000000,
-    /*Callback=*/nullptr,
+    /*Callback=*/RaplMetricData::callback,
     /*Init=*/RaplMetricData::init,
     /*Fini=*/RaplMetricData::fini,
     /*GetReading=*/RaplMetricData::getReading,

From 669681796dce043aac79d929f2f37f66f834967b Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 24 Oct 2024 15:51:57 +0200
Subject: [PATCH 081/167] cleanup cuda/hip and openapi code. add  todo comments
 with links to their respective issues

---
 include/firestarter/Cuda/CudaHipCompat.hpp    |  87 +++++------
 .../firestarter/Environment/Environment.hpp   |   2 +-
 src/firestarter/Cuda/Cuda.cpp                 | 136 +++++++++---------
 src/firestarter/OneAPI/OneAPI.cpp             |  27 ++--
 4 files changed, 135 insertions(+), 117 deletions(-)

diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
index f2200d95..c3715640 100644
--- a/include/firestarter/Cuda/CudaHipCompat.hpp
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -1,6 +1,6 @@
 /******************************************************************************
  * FIRESTARTER - A Processor Stress Test Utility
- * Copyright (C) 2020-2023 TU Dresden, Center for Information Services and High
+ * Copyright (C) 2024 TU Dresden, Center for Information Services and High
  * Performance Computing
  *
  * This program is free software: you can redistribute it and/or modify
@@ -72,6 +72,7 @@ template <typename T> void accellSafeCall(T TVal, const char* File, int Line, st
 #ifdef FIRESTARTER_BUILD_CUDA
 // Start of CUDA compatibility types
 
+// NOLINTNEXTLINE(performance-enum-size)
 enum class BlasStatusT : std::underlying_type_t<cublasStatus_t> {
   BLAS_STATUS_SUCCESS = CUBLAS_STATUS_SUCCESS,
   BLAS_STATUS_NOT_INITIALIZED = CUBLAS_STATUS_NOT_INITIALIZED,
@@ -87,10 +88,12 @@ enum class BlasStatusT : std::underlying_type_t<cublasStatus_t> {
 
 constexpr const char* AccelleratorString = "CUDA";
 
+// NOLINTNEXTLINE(performance-enum-size)
 enum class ErrorT : std::underlying_type_t<cudaError_t> {
   Success = cudaSuccess,
 };
 
+// NOLINTNEXTLINE(performance-enum-size)
 enum class RandStatusT : std::underlying_type_t<curandStatus_t> {
   RAND_STATUS_SUCCESS = CURAND_STATUS_SUCCESS,
   RAND_STATUS_VERSION_MISMATCH = CURAND_STATUS_VERSION_MISMATCH,
@@ -109,8 +112,6 @@ enum class RandStatusT : std::underlying_type_t<curandStatus_t> {
 
 using StreamOrContext = CUcontext;
 
-template <typename FloatingPointType> using DevicePtr = CUdeviceptr;
-
 using DeviceProperties = cudaDeviceProp;
 
 using RandGenerator = curandGenerator_t;
@@ -119,6 +120,7 @@ using BlasHandle = cublasHandle_t;
 
 using BlasStatus = cublasStatus_t;
 
+// NOLINTNEXTLINE(performance-enum-size)
 enum class BlasOperation : std::underlying_type_t<cublasOperation_t> {
   BLAS_OP_N = CUBLAS_OP_N,
   BLAS_OP_T = CUBLAS_OP_T,
@@ -132,6 +134,7 @@ using CUResultOrHipErrorT = CUresult;
 #elif defined(FIRESTARTER_BUILD_HIP)
 // Start of HIP compatibility types
 
+// NOLINTNEXTLINE(performance-enum-size)
 enum class BlasStatusT : std::underlying_type_t<hipblasStatus_t> {
   BLAS_STATUS_SUCCESS = HIPBLAS_STATUS_SUCCESS,
   BLAS_STATUS_NOT_INITIALIZED = HIPBLAS_STATUS_NOT_INITIALIZED,
@@ -149,10 +152,12 @@ enum class BlasStatusT : std::underlying_type_t<hipblasStatus_t> {
 
 constexpr const char* AccelleratorString = "HIP";
 
+// NOLINTNEXTLINE(performance-enum-size)
 enum class ErrorT : std::underlying_type_t<hipError_t> {
   Success = hipSuccess,
 };
 
+// NOLINTNEXTLINE(performance-enum-size)
 enum class RandStatusT : std::underlying_type_t<hiprandStatus_t> {
   RAND_STATUS_SUCCESS = HIPRAND_STATUS_SUCCESS,
   RAND_STATUS_VERSION_MISMATCH = HIPRAND_STATUS_VERSION_MISMATCH,
@@ -172,8 +177,6 @@ enum class RandStatusT : std::underlying_type_t<hiprandStatus_t> {
 
 using StreamOrContext = hipStream_t;
 
-template <typename FloatingPointType> using DevicePtr = FloatingPointType*;
-
 using DeviceProperties = hipDeviceProp_t;
 
 using RandGenerator = hiprandGenerator_t;
@@ -182,6 +185,7 @@ using BlasHandle = hipblasHandle_t;
 
 using BlasStatus = hipblasStatus_t;
 
+// NOLINTNEXTLINE(performance-enum-size)
 enum class BlasOperation : std::underlying_type_t<hipblasOperation_t> {
   BLAS_OP_N = HIPBLAS_OP_N,
   BLAS_OP_T = HIPBLAS_OP_T,
@@ -196,24 +200,25 @@ using CUResultOrHipErrorT = ErrorT;
 
 // Start of compatibility types for clangd
 
+// NOLINTNEXTLINE(performance-enum-size)
 enum class BlasStatusT {
   BLAS_STATUS_SUCCESS = 0,
 };
 
 constexpr const char* AccelleratorString = "unknown";
 
+// NOLINTNEXTLINE(performance-enum-size)
 enum class ErrorT {
   Success = 0,
 };
 
+// NOLINTNEXTLINE(performance-enum-size)
 enum class RandStatusT {
   RAND_STATUS_SUCCESS = 0,
 };
 
 using StreamOrContext = void*;
 
-template <typename FloatingPointType> using DevicePtr = std::size_t;
-
 using DeviceProperties = void*;
 
 using RandGenerator = void*;
@@ -222,6 +227,7 @@ using BlasHandle = void*;
 
 using BlasStatus = void*;
 
+// NOLINTNEXTLINE(performance-enum-size)
 enum class BlasOperation {
   BLAS_OP_N,
   BLAS_OP_T,
@@ -239,7 +245,7 @@ using CUResultOrHipErrorT = void*;
 /// Get the error string from a call to CUDA of HIP libraries.
 /// \arg Status The status code that is returned by these calls.
 /// \return The error as a string.
-auto getErrorString(ErrorT Error) -> const char* {
+inline auto getErrorString(ErrorT Error) -> const char* {
 #ifdef FIRESTARTER_BUILD_CUDA
   return cudaGetErrorString(static_cast<cudaError_t>(Error));
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -391,7 +397,7 @@ template <typename T> void accellSafeCall(T TVal, const char* File, const int Li
 /// Wrapper to cuInit or hipInit.
 /// \arg Flags The Flags forwarded to cuInit or hipInit.
 /// \returns The Error code returned from these calls.
-auto init(unsigned int Flags) -> CUResultOrHipErrorT {
+inline auto init(unsigned int Flags) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return cuInit(Flags);
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -405,7 +411,7 @@ auto init(unsigned int Flags) -> CUResultOrHipErrorT {
 /// Get the number GPU devices. Wrapper to cuDeviceGetCount or hipGetDeviceCount.
 /// \arg DevCount The reference to where the number of GPU devices will be written.
 /// \returns The Error code returned from these calls.
-auto getDeviceCount(int& DevCount) -> CUResultOrHipErrorT {
+inline auto getDeviceCount(int& DevCount) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return cuDeviceGetCount(&DevCount);
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -420,8 +426,8 @@ auto getDeviceCount(int& DevCount) -> CUResultOrHipErrorT {
 /// destroyContextOrStream.
 /// \arg DeviceIndex The device on which to create the context or stream.
 /// \return The created context or stream.
-auto createContextOrStream(int DeviceIndex) -> StreamOrContext {
-  StreamOrContext Soc;
+inline auto createContextOrStream(int DeviceIndex) -> StreamOrContext {
+  StreamOrContext Soc{};
 #ifdef FIRESTARTER_BUILD_CUDA
   firestarter::log::trace() << "Creating " << AccelleratorString << " context for computation on device nr. "
                             << DeviceIndex;
@@ -446,7 +452,7 @@ auto createContextOrStream(int DeviceIndex) -> StreamOrContext {
 /// Destroy the context (CUDA) or stream (HIP) with cuCtxDestroy and hipStreamDestroy respectively.
 /// \arg Soc The reference to the context or stream.
 /// \returns The Error code returned from these calls.
-auto destroyContextOrStream(StreamOrContext& Soc) -> CUResultOrHipErrorT {
+inline auto destroyContextOrStream(StreamOrContext& Soc) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return static_cast<CUResultOrHipErrorT>(cuCtxDestroy(Soc));
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -460,7 +466,7 @@ auto destroyContextOrStream(StreamOrContext& Soc) -> CUResultOrHipErrorT {
 /// Create a blas handle. Wrapper to cublasCreate or hipblasCreate.
 /// \arg BlasHandle The reference to a BlasHandle object which will be initialized.
 /// \returns The Error code returned from these calls.
-auto blasCreate(BlasHandle& BlasHandle) -> BlasStatusT {
+inline auto blasCreate(BlasHandle& BlasHandle) -> BlasStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return static_cast<BlasStatusT>(cublasCreate(&BlasHandle));
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -474,7 +480,7 @@ auto blasCreate(BlasHandle& BlasHandle) -> BlasStatusT {
 /// Destory a blas handle. Wrapper to cublasDestroy or hipblasDestroy.
 /// \arg BlasHandle The reference to a BlasHandle object which will be destroyed.
 /// \returns The Error code returned from these calls.
-auto blasDestroy(BlasHandle& BlasHandle) -> BlasStatusT {
+inline auto blasDestroy(BlasHandle& BlasHandle) -> BlasStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return static_cast<BlasStatusT>(cublasDestroy(BlasHandle));
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -489,7 +495,7 @@ auto blasDestroy(BlasHandle& BlasHandle) -> BlasStatusT {
 /// \arg Property The reference to the properties that are retrived.
 /// \arg DeviceIndex The index of the GPU device for which to retrive the device properties.s
 /// \returns The Error code returned from these calls.
-auto getDeviceProperties(DeviceProperties& Property, int DeviceIndex) -> ErrorT {
+inline auto getDeviceProperties(DeviceProperties& Property, int DeviceIndex) -> ErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return static_cast<ErrorT>(cudaGetDeviceProperties(&Property, DeviceIndex));
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -506,7 +512,7 @@ auto getDeviceProperties(DeviceProperties& Property, int DeviceIndex) -> ErrorT
 /// \arg MemoryAvail The reference to the available memory that is retrived.
 /// \arg MemoryTotal The reference to the total memory that is retrived.
 /// \returns The Error code returned from these calls.
-auto memGetInfo(std::size_t& MemoryAvail, std::size_t& MemoryTotal) -> CUResultOrHipErrorT {
+inline auto memGetInfo(std::size_t& MemoryAvail, std::size_t& MemoryTotal) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return static_cast<CUResultOrHipErrorT>(cuMemGetInfo(&MemoryAvail, &MemoryTotal));
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -525,11 +531,11 @@ auto memGetInfo(std::size_t& MemoryAvail, std::size_t& MemoryTotal) -> CUResultO
 /// \arg MemorySize The memory that is allocated on the device in bytes.
 /// \returns The Error code returned from these calls.
 template <typename FloatingPointType>
-auto malloc(DevicePtr<FloatingPointType>& Ptr, std::size_t MemorySize) -> CUResultOrHipErrorT {
+auto malloc(FloatingPointType** Ptr, std::size_t MemorySize) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return static_cast<CUResultOrHipErrorT>(cuMemAlloc(&Ptr, MemorySize));
+  return static_cast<CUResultOrHipErrorT>(cuMemAlloc(static_cast<void**>(Ptr), MemorySize));
 #elif defined(FIRESTARTER_BUILD_HIP)
-  return static_cast<CUResultOrHipErrorT>(hipMalloc(&Ptr, MemorySize));
+  return static_cast<CUResultOrHipErrorT>(hipMalloc(Ptr, MemorySize));
 #else
   (void)Ptr;
   (void)MemorySize;
@@ -542,9 +548,9 @@ auto malloc(DevicePtr<FloatingPointType>& Ptr, std::size_t MemorySize) -> CUResu
 /// \tparam FloatingPointType The type of the floating point used. Either float or double.
 /// \arg Ptr The reference to the device pointer which is used in the free call.
 /// \returns The Error code returned from these calls.
-template <typename FloatingPointType> auto free(DevicePtr<FloatingPointType>& Ptr) -> CUResultOrHipErrorT {
+template <typename FloatingPointType> auto free(FloatingPointType* Ptr) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return static_cast<CUResultOrHipErrorT>(cuMemFree(Ptr));
+  return static_cast<CUResultOrHipErrorT>(cuMemFree(static_cast<void**>(Ptr)));
 #elif defined(FIRESTARTER_BUILD_HIP)
   return static_cast<CUResultOrHipErrorT>(hipFree(Ptr));
 #else
@@ -557,7 +563,7 @@ template <typename FloatingPointType> auto free(DevicePtr<FloatingPointType>& Pt
 /// hiprandCreateGenerator.
 /// \arg RandomGen The reference to the random generation which is retrived by the calls.
 /// \returns The Error code returned from these calls.
-auto randCreateGeneratorPseudoRandom(RandGenerator& RandomGen) -> RandStatusT {
+inline auto randCreateGeneratorPseudoRandom(RandGenerator& RandomGen) -> RandStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return static_cast<RandStatusT>(curandCreateGenerator(&RandomGen, CURAND_RNG_PSEUDO_DEFAULT));
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -573,7 +579,7 @@ auto randCreateGeneratorPseudoRandom(RandGenerator& RandomGen) -> RandStatusT {
 /// \arg RandomGen The reference to the random generator.
 /// \arg Seed The seed used to initialize the pseudo random generator.
 /// \returns The Error code returned from these calls.
-auto randSetPseudoRandomGeneratorSeed(RandGenerator& RandomGen, int Seed) -> RandStatusT {
+inline auto randSetPseudoRandomGeneratorSeed(RandGenerator& RandomGen, int Seed) -> RandStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return static_cast<RandStatusT>(curandSetPseudoRandomGeneratorSeed(RandomGen, Seed));
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -591,7 +597,7 @@ auto randSetPseudoRandomGeneratorSeed(RandGenerator& RandomGen, int Seed) -> Ran
 /// \arg OutputPtr The device pointer on which is initialized with specific number of uniform random floats.
 /// \arg Num The number of unifrom random floats.
 /// \returns The Error code returned from these calls.
-auto randGenerateUniform(RandGenerator& RandomGen, DevicePtr<float> OutputPtr, std::size_t Num) -> RandStatusT {
+inline auto randGenerateUniform(RandGenerator& RandomGen, float* OutputPtr, std::size_t Num) -> RandStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return static_cast<RandStatusT>(curandGenerateUniform(RandomGen, reinterpret_cast<float*>(OutputPtr), Num));
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -610,9 +616,9 @@ auto randGenerateUniform(RandGenerator& RandomGen, DevicePtr<float> OutputPtr, s
 /// \arg OutputPtr The device pointer on which is initialized with specific number of uniform random floats.
 /// \arg Num The number of unifrom random doubles.
 /// \returns The Error code returned from these calls.
-auto randGenerateUniformDouble(RandGenerator& RandomGen, DevicePtr<double> OutputPtr, std::size_t Num) -> RandStatusT {
+inline auto randGenerateUniformDouble(RandGenerator& RandomGen, double* OutputPtr, std::size_t Num) -> RandStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return static_cast<RandStatusT>(curandGenerateUniformDouble(RandomGen, reinterpret_cast<double*>(OutputPtr), Num));
+  return static_cast<RandStatusT>(curandGenerateUniformDouble(RandomGen, OutputPtr, Num));
 #elif defined(FIRESTARTER_BUILD_HIP)
   return static_cast<RandStatusT>(hiprandGenerateUniformDouble(RandomGen, OutputPtr, Num));
 #else
@@ -631,7 +637,7 @@ auto randGenerateUniformDouble(RandGenerator& RandomGen, DevicePtr<double> Outpu
 /// \arg Num The number of unifrom random doubles.
 /// \returns The Error code returned from these calls.
 template <typename FloatPointType>
-auto generateUniform(RandGenerator& Generator, DevicePtr<FloatPointType> OutputPtr, size_t Num) -> RandStatusT {
+auto generateUniform(RandGenerator& Generator, FloatPointType* OutputPtr, size_t Num) -> RandStatusT {
   if constexpr (std::is_same_v<FloatPointType, float>) {
     return randGenerateUniform(Generator, OutputPtr, Num);
   } else if constexpr (std::is_same_v<FloatPointType, double>) {
@@ -645,7 +651,7 @@ auto generateUniform(RandGenerator& Generator, DevicePtr<FloatPointType> OutputP
 /// hiprandDestroyGenerator.
 /// \arg RandomGen The reference to the random generation which shoule be destroyed.
 /// \returns The Error code returned from these calls.
-auto randDestroyGenerator(RandGenerator& RandomGen) -> RandStatusT {
+inline auto randDestroyGenerator(RandGenerator& RandomGen) -> RandStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return static_cast<RandStatusT>(curandDestroyGenerator(RandomGen));
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -662,8 +668,7 @@ auto randDestroyGenerator(RandGenerator& RandomGen) -> RandStatusT {
 /// \arg Size The number of bytes to copy.
 /// \returns The Error code returned from these calls.
 template <typename FloatPointType>
-auto memcpyDtoD(DevicePtr<FloatPointType> DestinationPtr, DevicePtr<FloatPointType> SourcePtr, std::size_t Size)
-    -> CUResultOrHipErrorT {
+auto memcpyDtoD(FloatPointType* DestinationPtr, FloatPointType* SourcePtr, std::size_t Size) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return static_cast<CUResultOrHipErrorT>(cuMemcpyDtoD(DestinationPtr, SourcePtr, Size));
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -678,7 +683,7 @@ auto memcpyDtoD(DevicePtr<FloatPointType> DestinationPtr, DevicePtr<FloatPointTy
 
 /// Block until the current device finished. Wrapper to cudaDeviceSynchronize or hipcudaDeviceSynchronize.
 /// \returns The Error code returned from these calls.
-auto deviceSynchronize() -> ErrorT {
+inline auto deviceSynchronize() -> ErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
   return static_cast<ErrorT>(cudaDeviceSynchronize());
 #elif defined(FIRESTARTER_BUILD_HIP)
@@ -710,14 +715,13 @@ auto deviceSynchronize() -> ErrorT {
 /// \returns The Error code returned from these calls.
 template <typename FloatPointType>
 auto gemm(BlasHandle Handle, BlasOperation Transa, BlasOperation Transb, int M, int N, int K,
-          const FloatPointType& Alpha, const DevicePtr<FloatPointType> A, int Lda, const DevicePtr<FloatPointType> B,
-          int Ldb, const FloatPointType& Beta, DevicePtr<FloatPointType> C, int Ldc) -> BlasStatusT {
+          const FloatPointType& Alpha, const FloatPointType* A, int Lda, const FloatPointType* B, int Ldb,
+          const FloatPointType& Beta, FloatPointType* C, int Ldc) -> BlasStatusT {
   if constexpr (std::is_same_v<FloatPointType, float>) {
 #ifdef FIRESTARTER_BUILD_CUDA
-    return static_cast<BlasStatusT>(
-        cublasSgemm(Handle, static_cast<BlasOperationT>(Transa), static_cast<BlasOperationT>(Transb), M, N, K, &Alpha,
-                    reinterpret_cast<const float*>(A), Lda, reinterpret_cast<const float*>(B), Ldb, &Beta,
-                    reinterpret_cast<float*>(C), Ldc));
+    return static_cast<BlasStatusT>(cublasSgemm(Handle, static_cast<BlasOperationT>(Transa),
+                                                static_cast<BlasOperationT>(Transb), M, N, K, &Alpha, A, Lda, B, Ldb,
+                                                &Beta, C, Ldc));
 #elif defined(FIRESTARTER_BUILD_HIP)
     return static_cast<BlasStatusT>(hipblasSgemm(Handle, static_cast<BlasOperationT>(Transa),
                                                  static_cast<BlasOperationT>(Transb), M, N, K, &Alpha, A, Lda, B, Ldb,
@@ -725,10 +729,9 @@ auto gemm(BlasHandle Handle, BlasOperation Transa, BlasOperation Transb, int M,
 #endif
   } else if constexpr (std::is_same_v<FloatPointType, double>) {
 #ifdef FIRESTARTER_BUILD_CUDA
-    return static_cast<BlasStatusT>(
-        cublasDgemm(Handle, static_cast<BlasOperationT>(Transa), static_cast<BlasOperationT>(Transb), M, N, K, &Alpha,
-                    reinterpret_cast<const double*>(A), Lda, reinterpret_cast<const double*>(B), Ldb, &Beta,
-                    reinterpret_cast<double*>(C), Ldc));
+    return static_cast<BlasStatusT>(cublasDgemm(Handle, static_cast<BlasOperationT>(Transa),
+                                                static_cast<BlasOperationT>(Transb), M, N, K, &Alpha, A, Lda, B, Ldb,
+                                                &Beta, C, Ldc));
 #elif defined(FIRESTARTER_BUILD_HIP)
     return static_cast<BlasStatusT>(hipblasDgemm(Handle, static_cast<BlasOperationT>(Transa),
                                                  static_cast<BlasOperationT>(Transb), M, N, K, &Alpha, A, Lda, B, Ldb,
diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index 60e2f338..89cff2cf 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -67,8 +67,8 @@ class Environment {
 private:
   uint64_t RequestedNumThreads = 0;
 
+  // TODO(Issue #74): Use hwloc for cpu thread affinity.
 #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
-  // TODO: replace these functions with the builtins one from hwlocom hwloc
   static auto cpuAllowed(unsigned Id) -> int;
   static auto cpuSet(unsigned Id) -> int;
   void addCpuSet(unsigned Cpu, cpu_set_t& Mask) const;
diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp
index 6804e2a3..6ce974f1 100644
--- a/src/firestarter/Cuda/Cuda.cpp
+++ b/src/firestarter/Cuda/Cuda.cpp
@@ -1,6 +1,6 @@
 /******************************************************************************
  * FIRESTARTER - A Processor Stress Test Utility
- * Copyright (C) 2020-2023 TU Dresden, Center for Information Services and High
+ * Copyright (C) 2020-2024 TU Dresden, Center for Information Services and High
  * Performance Computing
  *
  * This program is free software: you can redistribute it and/or modify
@@ -19,14 +19,13 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-/* CUDA error checking based on CudaWrapper.h
- * https://github.com/ashwin/gDel3D/blob/master/GDelFlipping/src/gDel3D/GPU/CudaWrapper.h
- *
+/******************************************************************************
  * inspired by gpu_burn
  * http://wili.cc/blog/gpu-burn.html
  *****************************************************************************/
 
 #include <atomic>
+#include <cstddef>
 #include <firestarter/Cuda/Cuda.hpp>
 #include <firestarter/Cuda/CudaHipCompat.hpp>
 #include <firestarter/LoadWorkerData.hpp>
@@ -37,10 +36,10 @@ namespace firestarter::cuda {
 
 constexpr const int Seed = 123;
 
-static auto roundUp(int NumToRound, int Multiple) -> int {
-  if (Multiple == 0) {
-    return NumToRound;
-  }
+namespace {
+
+template <std::size_t Multiple> auto roundUp(int NumToRound) -> int {
+  static_assert(Multiple != 0, "Multiple may not be zero.");
 
   const int Remainder = NumToRound % Multiple;
   if (Remainder == 0) {
@@ -57,7 +56,7 @@ static auto roundUp(int NumToRound, int Multiple) -> int {
 /// \arg UseDouble The input that specifies either single precision, double precision or automatic selection.
 /// \arg Properties The device properties.
 /// \return The selected precision, either 0 or 1 for float or double respectively.
-static auto getPrecision(int UseDouble, const compat::DeviceProperties& Properties) -> int {
+auto getPrecision(int UseDouble, const compat::DeviceProperties& Properties) -> int {
 #if (CUDART_VERSION >= 8000)
   // read precision ratio (dp/sp) of GPU to choose the right variant for maximum
   // workload
@@ -80,11 +79,12 @@ static auto getPrecision(int UseDouble, const compat::DeviceProperties& Properti
 #endif
 }
 
-static auto getPrecision(int DeviceIndex, int UseDouble) -> int {
-  size_t MemoryAvail;
-  size_t MemoryTotal;
+auto getPrecision(int DeviceIndex, int UseDouble) -> int {
+  std::size_t MemoryAvail{};
+  std::size_t MemoryTotal{};
   compat::DeviceProperties Properties;
 
+  // NOLINTNEXTLINE(readability-qualified-auto)
   auto StreamOrContext = compat::createContextOrStream(DeviceIndex);
 
   compat::accellSafeCall(compat::memGetInfo(MemoryAvail, MemoryTotal), __FILE__, __LINE__, DeviceIndex);
@@ -92,7 +92,7 @@ static auto getPrecision(int DeviceIndex, int UseDouble) -> int {
 
   UseDouble = getPrecision(UseDouble, Properties);
 
-  bool DoubleNotSupported =
+  const bool DoubleNotSupported =
 #ifdef FIRESTARTER_BUILD_CUDA
       Properties.major <= 1 && Properties.minor <= 2;
 #else
@@ -119,26 +119,21 @@ static auto getPrecision(int DeviceIndex, int UseDouble) -> int {
 }
 
 // GPU index. Used to pin this thread to the GPU.
+// Size use is one square matrix dim size
 template <typename FloatingPointType>
-static void createLoad(std::condition_variable& WaitForInitCv, std::mutex& WaitForInitCvMutex, int DeviceIndex,
-                       std::atomic<int>& InitCount, const volatile firestarter::LoadThreadWorkType& LoadVar,
-                       unsigned MatrixSize) {
+void createLoad(std::condition_variable& WaitForInitCv, std::mutex& WaitForInitCvMutex, int DeviceIndex,
+                std::atomic<int>& InitCount, const volatile firestarter::LoadThreadWorkType& LoadVar,
+                unsigned MatrixSize) {
   static_assert(std::is_same_v<FloatingPointType, float> || std::is_same_v<FloatingPointType, double>,
                 "create_load<FloatingPointType>: Template argument must be either float or double");
 
   firestarter::log::trace() << "Starting " << compat::AccelleratorString << " with given matrix size " << MatrixSize;
 
-  size_t SizeUse = 0;
-  if (MatrixSize > 0) {
-    SizeUse = MatrixSize;
-  }
-
-  size_t UseBytes;
-  size_t MemorySize;
   compat::DeviceProperties Properties;
-  compat::BlasHandle Blas;
+  compat::BlasHandle Blas{};
   // reserving the GPU and initializing cublas
 
+  // NOLINTNEXTLINE(readability-qualified-auto)
   auto StreamOrContext = compat::createContextOrStream(DeviceIndex);
 
   firestarter::log::trace() << "Create " << compat::AccelleratorString << " Blas on device nr. " << DeviceIndex;
@@ -149,66 +144,72 @@ static void createLoad(std::condition_variable& WaitForInitCv, std::mutex& WaitF
   compat::accellSafeCall(compat::getDeviceProperties(Properties, DeviceIndex), __FILE__, __LINE__, DeviceIndex);
 
   // getting information about the GPU memory
-  size_t MemoryAvail;
-  size_t MemoryTotal;
+  std::size_t MemoryAvail{};
+  std::size_t MemoryTotal{};
   compat::accellSafeCall(compat::memGetInfo(MemoryAvail, MemoryTotal), __FILE__, __LINE__, DeviceIndex);
   firestarter::log::trace() << "Get " << compat::AccelleratorString << " emory info on device nr. " << DeviceIndex
                             << ": " << MemoryAvail << " B avail. from " << MemoryTotal << " B total";
 
-  // defining memory pointers
-  compat::DevicePtr<FloatingPointType> ADataPtr;
-  compat::DevicePtr<FloatingPointType> BDataPtr;
-  compat::DevicePtr<FloatingPointType> CDataPtr;
-
-  // check if the user has not set a matrix OR has set a too big matrixsite and
-  // if this is true: set a good matrixsize
-  if (!SizeUse || ((SizeUse * SizeUse * sizeof(FloatingPointType) * 3 > MemoryAvail))) {
-    SizeUse = roundUp((int)(0.8 * sqrt(((MemoryAvail) / (sizeof(FloatingPointType) * 3)))),
-                      1024); // a multiple of 1024 works always well
+  // Defining memory pointers. ADataPtr and BDataPtr will point to a square matrix. CDataPtr may be one or multiple
+  // square matrices.
+  FloatingPointType* ADataPtr{};
+  FloatingPointType* BDataPtr{};
+  FloatingPointType* CDataPtr{};
+
+  // If the matrix size is not set or three square matricies with dim size of SizeUse do not fit into the available
+  // memory, select the size so that 3 square matricies will fit into the available device memory where the dim size
+  // is a multiple of 1024. There may be edge cases with small device memory that results in matricies that are not
+  // multiples of 1024.
+  std::size_t MemorySize = sizeof(FloatingPointType) * MatrixSize * MatrixSize;
+  if (!MatrixSize || (MemorySize * 3 > MemoryAvail)) {
+    // a multiple of 1024 works always well
+    MatrixSize = roundUp<1024>(0.8 * std::sqrt(MemoryAvail / sizeof(FloatingPointType) / 3));
+    MemorySize = sizeof(FloatingPointType) * MatrixSize * MatrixSize;
   }
+
   firestarter::log::trace() << "Set " << compat::AccelleratorString << " matrix size: " << MatrixSize;
-  UseBytes = (size_t)((FloatingPointType)MemoryAvail);
-  MemorySize = sizeof(FloatingPointType) * SizeUse * SizeUse;
-  int Iterations = (UseBytes - 2 * MemorySize) / MemorySize; // = 1;
+  // Calculate the numnber of C matricies based on the available memory and the matrix size in B.
+  const auto Iterations = (MemoryAvail - 2 * MemorySize) / MemorySize;
+  // The numner of used memory are two time the matrix size in B (Matrix A and B) plus the number of matricies in C.
+  const auto UseBytes = (2 + Iterations) * MemorySize;
 
   firestarter::log::trace() << "Allocating " << compat::AccelleratorString << " memory on device nr. " << DeviceIndex;
 
   // allocating memory on the GPU
-  compat::accellSafeCall(compat::malloc<FloatingPointType>(ADataPtr, MemorySize), __FILE__, __LINE__, DeviceIndex);
-  compat::accellSafeCall(compat::malloc<FloatingPointType>(BDataPtr, MemorySize), __FILE__, __LINE__, DeviceIndex);
-  compat::accellSafeCall(compat::malloc<FloatingPointType>(CDataPtr, Iterations * MemorySize), __FILE__, __LINE__,
+  compat::accellSafeCall(compat::malloc<FloatingPointType>(&ADataPtr, MemorySize), __FILE__, __LINE__, DeviceIndex);
+  compat::accellSafeCall(compat::malloc<FloatingPointType>(&BDataPtr, MemorySize), __FILE__, __LINE__, DeviceIndex);
+  compat::accellSafeCall(compat::malloc<FloatingPointType>(&CDataPtr, Iterations * MemorySize), __FILE__, __LINE__,
                          DeviceIndex);
 
   firestarter::log::trace() << "Allocated " << compat::AccelleratorString << " memory on device nr. " << DeviceIndex
-                            << ". A: " << ADataPtr << "(Size: " << MemorySize << "B)"
+                            << ". A: " << ADataPtr << " (Size: " << MemorySize << "B)"
                             << "\n";
-
   firestarter::log::trace() << "Allocated " << compat::AccelleratorString << " memory on device nr. " << DeviceIndex
-                            << ". B: " << BDataPtr << "(Size: " << MemorySize << "B)"
+                            << ". B: " << BDataPtr << " (Size: " << MemorySize << "B)"
                             << "\n";
   firestarter::log::trace() << "Allocated " << compat::AccelleratorString << " memory on device nr. " << DeviceIndex
-                            << ". C: " << CDataPtr << "(Size: " << Iterations * MemorySize << "B)"
+                            << ". C: " << CDataPtr << " (Size: " << Iterations * MemorySize << "B)"
                             << "\n";
 
   firestarter::log::trace() << "Initializing " << compat::AccelleratorString << " matrices a, b on device nr. "
-                            << DeviceIndex << ". Using " << SizeUse * SizeUse << " elements of size "
+                            << DeviceIndex << ". Using " << MatrixSize * MatrixSize << " elements of size "
                             << sizeof(FloatingPointType) << " Byte";
   // initialize matrix A and B on the GPU with random values
   {
-    compat::RandGenerator RandomGen;
+    compat::RandGenerator RandomGen{};
     compat::accellSafeCall(compat::randCreateGeneratorPseudoRandom(RandomGen), __FILE__, __LINE__, DeviceIndex);
     compat::accellSafeCall(compat::randSetPseudoRandomGeneratorSeed(RandomGen, Seed), __FILE__, __LINE__, DeviceIndex);
-    compat::accellSafeCall(compat::generateUniform<FloatingPointType>(RandomGen, ADataPtr, SizeUse * SizeUse), __FILE__,
-                           __LINE__, DeviceIndex);
-    compat::accellSafeCall(compat::generateUniform<FloatingPointType>(RandomGen, BDataPtr, SizeUse * SizeUse), __FILE__,
-                           __LINE__, DeviceIndex);
+    compat::accellSafeCall(compat::generateUniform<FloatingPointType>(RandomGen, ADataPtr, MatrixSize * MatrixSize),
+                           __FILE__, __LINE__, DeviceIndex);
+    compat::accellSafeCall(compat::generateUniform<FloatingPointType>(RandomGen, BDataPtr, MatrixSize * MatrixSize),
+                           __FILE__, __LINE__, DeviceIndex);
     compat::accellSafeCall(compat::randDestroyGenerator(RandomGen), __FILE__, __LINE__, DeviceIndex);
   }
 
   // initialize c_data_ptr with copies of A
-  for (int I = 0; I < Iterations; I++) {
-    auto DestinationPtr =
-        CDataPtr + (size_t)(I * SizeUse * SizeUse * (float)sizeof(FloatingPointType) / (float)sizeof(CDataPtr));
+  for (std::size_t I = 0; I < Iterations; I++) {
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+    auto DestinationPtr = CDataPtr + (I * MatrixSize * MatrixSize);
     firestarter::log::trace() << "Initializing " << compat::AccelleratorString << " matrix c-" << I << " by copying "
                               << MemorySize << " byte from " << ADataPtr << " to " << DestinationPtr << "\n";
     compat::accellSafeCall(compat::memcpyDtoD<FloatingPointType>(DestinationPtr, ADataPtr, MemorySize), __FILE__,
@@ -219,12 +220,12 @@ static void createLoad(std::condition_variable& WaitForInitCv, std::mutex& WaitF
   {
     const std::lock_guard<std::mutex> Lk(WaitForInitCvMutex);
 
-    auto ToiB = [](const size_t Val) { return Val / 1024 / 1024; };
+    auto ToMiB = [](const size_t Val) { return Val / 1024 / 1024; };
     firestarter::log::info() << "   GPU " << DeviceIndex << "\n"
                              << "    name:           " << Properties.name << "\n"
-                             << "    memory:         " << ToiB(MemoryAvail) << "/" << ToiB(MemoryTotal)
-                             << " iB available (using " << ToiB(UseBytes) << " iB)\n"
-                             << "    matrix size:    " << SizeUse << "\n"
+                             << "    memory:         " << ToMiB(MemoryAvail) << "/" << ToMiB(MemoryTotal)
+                             << " iB available (using " << ToMiB(UseBytes) << " iB)\n"
+                             << "    matrix size:    " << MatrixSize << "\n"
                              << "    used precision: "
                              << ((sizeof(FloatingPointType) == sizeof(double)) ? "double" : "single");
 
@@ -235,14 +236,15 @@ static void createLoad(std::condition_variable& WaitForInitCv, std::mutex& WaitF
   const FloatingPointType Alpha = 1.0;
   const FloatingPointType Beta = 0.0;
 
-  const int SizeUseI = SizeUse;
   // actual stress begins here
   while (LoadVar != firestarter::LoadThreadWorkType::LoadStop) {
-    for (int I = 0; I < Iterations; I++) {
+    for (std::size_t I = 0; I < Iterations; I++) {
+      // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
+      auto CSectionPtr = CDataPtr + (I * MatrixSize * MatrixSize);
       compat::accellSafeCall(compat::gemm<FloatingPointType>(Blas, compat::BlasOperation::BLAS_OP_N,
-                                                             compat::BlasOperation::BLAS_OP_N, SizeUseI, SizeUseI,
-                                                             SizeUseI, Alpha, ADataPtr, SizeUseI, BDataPtr, SizeUseI,
-                                                             Beta, CDataPtr + (I * SizeUse * SizeUse), SizeUseI),
+                                                             compat::BlasOperation::BLAS_OP_N, MatrixSize, MatrixSize,
+                                                             MatrixSize, Alpha, ADataPtr, MatrixSize, BDataPtr,
+                                                             MatrixSize, Beta, CSectionPtr, MatrixSize),
                              __FILE__, __LINE__, DeviceIndex);
       compat::accellSafeCall(compat::deviceSynchronize(), __FILE__, __LINE__, DeviceIndex);
     }
@@ -257,6 +259,8 @@ static void createLoad(std::condition_variable& WaitForInitCv, std::mutex& WaitF
   compat::accellSafeCall(compat::destroyContextOrStream(StreamOrContext), __FILE__, __LINE__, DeviceIndex);
 }
 
+}; // namespace
+
 Cuda::Cuda(const volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize,
            int Gpus) {
   std::condition_variable WaitForInitCv;
@@ -279,12 +283,12 @@ void Cuda::initGpus(std::condition_variable& WaitForInitCv, const volatile fires
   if (Gpus) {
     compat::accellSafeCall(compat::init(0), __FILE__, __LINE__);
 
-    int DevCount;
+    int DevCount{};
     compat::accellSafeCall(compat::getDeviceCount(DevCount), __FILE__, __LINE__);
 
     if (DevCount) {
       std::atomic<int> InitCount = 0;
-      int UseDoubleConverted;
+      int UseDoubleConverted{};
 
       if (UseFloat) {
         UseDoubleConverted = 0;
diff --git a/src/firestarter/OneAPI/OneAPI.cpp b/src/firestarter/OneAPI/OneAPI.cpp
index a78efcad..9e38843b 100644
--- a/src/firestarter/OneAPI/OneAPI.cpp
+++ b/src/firestarter/OneAPI/OneAPI.cpp
@@ -35,10 +35,17 @@
 
 namespace firestarter::oneapi {
 
-/* Random number generation helpers */
-template <typename T> void generate_random_data(size_t elems, T* v) {
-  for (size_t i = 0; i < elems; i++)
-    v[i] = double(std::rand()) / RAND_MAX;
+/// Helper function to generate random floating point values between 0 and 1 in an array.
+/// \targ FloatingPointType The type of floating point value of the array. Either float or double.
+/// \arg NumberOfElems The number of elements of the array.
+/// \arg Array The array of floating point values which should be initilized with random data between 0 and 1.
+template <typename FloatingPointType> void fillArrayWithRandomFloats(size_t NumberOfElems, FloatingPointType& Array) {
+  static_assert(std::is_same_v<FloatingPointType, float> || std::is_same_v<FloatingPointType, double>,
+                "fillArrayWithRandomFloats<FloatingPointType>: Template argument must be either float or double");
+
+  for (size_t i = 0; i < NumberOfElems; i++) {
+    Array[i] = static_cast<FloatingPointType>(std::rand()) / RAND_MAX;
+  }
 }
 
 template <typename T> void replicate_data(sycl::queue& Q, T* dst, size_t dst_elems, const T* src, size_t src_elems) {
@@ -69,6 +76,7 @@ static int get_precision(int device_index, int useDouble) {
     return -1;
   }
   // Choose a platform based on specific criteria (e.g., device type)
+  // TODO(Issue #75): We may select the incorrect platform with gpu devices of the wrong vendor/type.
   sycl::platform chosenPlatform;
   auto nr_gpus = 0;
   for (const auto& platform : platforms) {
@@ -114,6 +122,8 @@ static int round_up(int num_to_round, int multiple) {
 }
 
 // GPU index. Used to pin this thread to the GPU.
+// The main difference to the CUDA/HIP version is that we do not run multiple iterations of C=A*B, just one single
+// iteration.
 template <typename T>
 static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index,
                         std::atomic<int>& initCount, const volatile firestarter::LoadThreadWorkType& LoadVar,
@@ -188,14 +198,14 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait
   /* Allocate A/B/C matrices */
 
   firestarter::log::trace() << "Allocating memory on device nr. " << device_index;
-  auto A = malloc_device<T>(size_use * size_use, device_queue);
-  auto B = malloc_device<T>(size_use * size_use, device_queue);
-  auto C = malloc_device<T>(size_use * size_use, device_queue);
+  auto* A = sycl::malloc_device<T>(size_use * size_use, device_queue);
+  auto* B = sycl::malloc_device<T>(size_use * size_use, device_queue);
+  auto* C = sycl::malloc_device<T>(size_use * size_use, device_queue);
 
   /* Create 64 MB random data on Host */
   constexpr int rd_size = 1024 * 1024 * 64;
   auto random_data = malloc_host<T>(rd_size, device_queue);
-  generate_random_data(rd_size, random_data);
+  fillArrayWithRandomFloats(rd_size, *random_data);
 
   firestarter::log::trace() << "Copy memory to device nr. " << device_index;
   /* fill A and B with random data */
@@ -265,6 +275,7 @@ void OneAPI::initGpus(std::condition_variable& WaitForInitCv, const volatile fir
     }
 
     // Choose a platform based on specific criteria (e.g., device type)
+    // TODO(Issue #75): We may select the incorrect platform with gpu devices of the wrong vendor/type.
     auto DevCount = 0;
     for (const auto& Platform : Platforms) {
       auto Devices = Platform.get_devices();

From 6b50a282c8b7042388486949106dbd18a15b720b Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 24 Oct 2024 17:08:10 +0200
Subject: [PATCH 082/167] fix cuda compat layer

---
 include/firestarter/Cuda/CudaHipCompat.hpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
index c3715640..a82632bd 100644
--- a/include/firestarter/Cuda/CudaHipCompat.hpp
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -533,7 +533,7 @@ inline auto memGetInfo(std::size_t& MemoryAvail, std::size_t& MemoryTotal) -> CU
 template <typename FloatingPointType>
 auto malloc(FloatingPointType** Ptr, std::size_t MemorySize) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return static_cast<CUResultOrHipErrorT>(cuMemAlloc(static_cast<void**>(Ptr), MemorySize));
+  return static_cast<CUResultOrHipErrorT>(cuMemAlloc(&static_cast<void*>(*Ptr), MemorySize));
 #elif defined(FIRESTARTER_BUILD_HIP)
   return static_cast<CUResultOrHipErrorT>(hipMalloc(Ptr, MemorySize));
 #else
@@ -550,7 +550,7 @@ auto malloc(FloatingPointType** Ptr, std::size_t MemorySize) -> CUResultOrHipErr
 /// \returns The Error code returned from these calls.
 template <typename FloatingPointType> auto free(FloatingPointType* Ptr) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return static_cast<CUResultOrHipErrorT>(cuMemFree(static_cast<void**>(Ptr)));
+  return static_cast<CUResultOrHipErrorT>(cuMemFree(static_cast<void*>(Ptr)));
 #elif defined(FIRESTARTER_BUILD_HIP)
   return static_cast<CUResultOrHipErrorT>(hipFree(Ptr));
 #else
@@ -670,7 +670,8 @@ inline auto randDestroyGenerator(RandGenerator& RandomGen) -> RandStatusT {
 template <typename FloatPointType>
 auto memcpyDtoD(FloatPointType* DestinationPtr, FloatPointType* SourcePtr, std::size_t Size) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return static_cast<CUResultOrHipErrorT>(cuMemcpyDtoD(DestinationPtr, SourcePtr, Size));
+  return static_cast<CUResultOrHipErrorT>(
+      cuMemcpyDtoD(reinterpret_cast<CUdeviceptr>(DestinationPtr), reinterpret_cast<CUdeviceptr>(SourcePtr), Size));
 #elif defined(FIRESTARTER_BUILD_HIP)
   return static_cast<CUResultOrHipErrorT>(hipMemcpyDtoD(DestinationPtr, SourcePtr, Size));
 #else

From 3e13ca0d83a8d9e82d9a762ef566c7d8ae6ba3fb Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 24 Oct 2024 17:15:17 +0200
Subject: [PATCH 083/167] remove redundant reinterpret cast

---
 include/firestarter/Cuda/CudaHipCompat.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
index a82632bd..b0adec76 100644
--- a/include/firestarter/Cuda/CudaHipCompat.hpp
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -599,7 +599,7 @@ inline auto randSetPseudoRandomGeneratorSeed(RandGenerator& RandomGen, int Seed)
 /// \returns The Error code returned from these calls.
 inline auto randGenerateUniform(RandGenerator& RandomGen, float* OutputPtr, std::size_t Num) -> RandStatusT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return static_cast<RandStatusT>(curandGenerateUniform(RandomGen, reinterpret_cast<float*>(OutputPtr), Num));
+  return static_cast<RandStatusT>(curandGenerateUniform(RandomGen, OutputPtr, Num));
 #elif defined(FIRESTARTER_BUILD_HIP)
   return static_cast<RandStatusT>(hiprandGenerateUniform(RandomGen, OutputPtr, Num));
 #else

From 6e574604ff5a7b240cc874b304244e07052ad9e2 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 24 Oct 2024 17:18:22 +0200
Subject: [PATCH 084/167] fix cast in cuda compat malloc function

---
 include/firestarter/Cuda/CudaHipCompat.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
index b0adec76..8ffa98d3 100644
--- a/include/firestarter/Cuda/CudaHipCompat.hpp
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -533,7 +533,7 @@ inline auto memGetInfo(std::size_t& MemoryAvail, std::size_t& MemoryTotal) -> CU
 template <typename FloatingPointType>
 auto malloc(FloatingPointType** Ptr, std::size_t MemorySize) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return static_cast<CUResultOrHipErrorT>(cuMemAlloc(&static_cast<void*>(*Ptr), MemorySize));
+  return static_cast<CUResultOrHipErrorT>(cuMemAlloc(reinterpret_cast<void**>(Ptr), MemorySize));
 #elif defined(FIRESTARTER_BUILD_HIP)
   return static_cast<CUResultOrHipErrorT>(hipMalloc(Ptr, MemorySize));
 #else

From a0591db445fb97dfbaf24870a9b374b8c04d9b4c Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 24 Oct 2024 17:26:49 +0200
Subject: [PATCH 085/167] fix cast in cuda compat malloc/free functions

---
 include/firestarter/Cuda/CudaHipCompat.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
index 8ffa98d3..a12dd56a 100644
--- a/include/firestarter/Cuda/CudaHipCompat.hpp
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -533,7 +533,7 @@ inline auto memGetInfo(std::size_t& MemoryAvail, std::size_t& MemoryTotal) -> CU
 template <typename FloatingPointType>
 auto malloc(FloatingPointType** Ptr, std::size_t MemorySize) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return static_cast<CUResultOrHipErrorT>(cuMemAlloc(reinterpret_cast<void**>(Ptr), MemorySize));
+  return static_cast<CUResultOrHipErrorT>(cuMemAlloc(reinterpret_cast<CUdeviceptr*>(Ptr), MemorySize));
 #elif defined(FIRESTARTER_BUILD_HIP)
   return static_cast<CUResultOrHipErrorT>(hipMalloc(Ptr, MemorySize));
 #else
@@ -550,7 +550,7 @@ auto malloc(FloatingPointType** Ptr, std::size_t MemorySize) -> CUResultOrHipErr
 /// \returns The Error code returned from these calls.
 template <typename FloatingPointType> auto free(FloatingPointType* Ptr) -> CUResultOrHipErrorT {
 #ifdef FIRESTARTER_BUILD_CUDA
-  return static_cast<CUResultOrHipErrorT>(cuMemFree(static_cast<void*>(Ptr)));
+  return static_cast<CUResultOrHipErrorT>(cuMemFree(reinterpret_cast<CUdeviceptr>(Ptr)));
 #elif defined(FIRESTARTER_BUILD_HIP)
   return static_cast<CUResultOrHipErrorT>(hipFree(Ptr));
 #else

From 0921c1fa5ba9a7dd09e57c24bfe2b9577e795f67 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 24 Oct 2024 19:29:19 +0200
Subject: [PATCH 086/167] fix oneapi refactor

---
 src/firestarter/OneAPI/OneAPI.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/firestarter/OneAPI/OneAPI.cpp b/src/firestarter/OneAPI/OneAPI.cpp
index 9e38843b..bc27e18a 100644
--- a/src/firestarter/OneAPI/OneAPI.cpp
+++ b/src/firestarter/OneAPI/OneAPI.cpp
@@ -39,7 +39,7 @@ namespace firestarter::oneapi {
 /// \targ FloatingPointType The type of floating point value of the array. Either float or double.
 /// \arg NumberOfElems The number of elements of the array.
 /// \arg Array The array of floating point values which should be initilized with random data between 0 and 1.
-template <typename FloatingPointType> void fillArrayWithRandomFloats(size_t NumberOfElems, FloatingPointType& Array) {
+template <typename FloatingPointType> void fillArrayWithRandomFloats(size_t NumberOfElems, FloatingPointType* Array) {
   static_assert(std::is_same_v<FloatingPointType, float> || std::is_same_v<FloatingPointType, double>,
                 "fillArrayWithRandomFloats<FloatingPointType>: Template argument must be either float or double");
 
@@ -204,8 +204,8 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait
 
   /* Create 64 MB random data on Host */
   constexpr int rd_size = 1024 * 1024 * 64;
-  auto random_data = malloc_host<T>(rd_size, device_queue);
-  fillArrayWithRandomFloats(rd_size, *random_data);
+  auto* random_data = malloc_host<T>(rd_size, device_queue);
+  fillArrayWithRandomFloats(rd_size, random_data);
 
   firestarter::log::trace() << "Copy memory to device nr. " << device_index;
   /* fill A and B with random data */

From a77e4297427edf796b2de37c073aaa7423d89524 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 24 Oct 2024 22:47:59 +0200
Subject: [PATCH 087/167] refactor oneapi code

---
 src/firestarter/OneAPI/OneAPI.cpp | 215 +++++++++++++++---------------
 1 file changed, 104 insertions(+), 111 deletions(-)

diff --git a/src/firestarter/OneAPI/OneAPI.cpp b/src/firestarter/OneAPI/OneAPI.cpp
index bc27e18a..ea156e69 100644
--- a/src/firestarter/OneAPI/OneAPI.cpp
+++ b/src/firestarter/OneAPI/OneAPI.cpp
@@ -35,6 +35,8 @@
 
 namespace firestarter::oneapi {
 
+namespace {
+
 /// Helper function to generate random floating point values between 0 and 1 in an array.
 /// \targ FloatingPointType The type of floating point value of the array. Either float or double.
 /// \arg NumberOfElems The number of elements of the array.
@@ -48,205 +50,196 @@ template <typename FloatingPointType> void fillArrayWithRandomFloats(size_t Numb
   }
 }
 
-template <typename T> void replicate_data(sycl::queue& Q, T* dst, size_t dst_elems, const T* src, size_t src_elems) {
-  firestarter::log::trace() << "replicate_data " << dst_elems << " elements from " << src << " to " << dst;
-  while (dst_elems > 0) {
-    auto copy_elems = std::min(dst_elems, src_elems);
-    Q.copy(src, dst, copy_elems);
-    dst += copy_elems;
-    dst_elems -= copy_elems;
+template <typename FloatingPointType>
+void replicateData(sycl::queue& Q, FloatingPointType* Dst, size_t DstElems, const FloatingPointType* Src,
+                   size_t SrcElems) {
+  static_assert(std::is_same_v<FloatingPointType, float> || std::is_same_v<FloatingPointType, double>,
+                "fillArrayWithRandomFloats<FloatingPointType>: Template argument must be either float or double");
+
+  firestarter::log::trace() << "replicateData<FloatingPointType> " << DstElems << " elements from " << Src << " to "
+                            << Dst;
+  while (DstElems > 0) {
+    auto copy_elems = std::min(DstElems, SrcElems);
+    Q.copy(Src, Dst, copy_elems);
+    Dst += copy_elems;
+    DstElems -= copy_elems;
   }
   Q.wait();
 }
 
-static int get_precision(int device_index, int useDouble) {
-
-  firestarter::log::trace() << "Checking useDouble " << useDouble;
+int getPrecision(int DeviceIndex, int UseDouble) {
+  firestarter::log::trace() << "Checking UseDouble " << UseDouble;
 
-  if (!useDouble) {
+  if (!UseDouble) {
     return 0;
   }
 
-  int supports_double = 0;
+  int SupportsDouble = 0;
 
-  auto platforms = sycl::platform::get_platforms();
+  auto Platforms = sycl::platform::get_platforms();
 
-  if (platforms.empty()) {
+  if (Platforms.empty()) {
     firestarter::log::warn() << "No SYCL platforms found.";
     return -1;
   }
   // Choose a platform based on specific criteria (e.g., device type)
   // TODO(Issue #75): We may select the incorrect platform with gpu devices of the wrong vendor/type.
-  sycl::platform chosenPlatform;
-  auto nr_gpus = 0;
-  for (const auto& platform : platforms) {
+  sycl::platform ChosenPlatform;
+  auto NbGpus = 0;
+  for (const auto& Platform : Platforms) {
     firestarter::log::trace() << "Checking SYCL platform " << platform.get_info<sycl::info::platform::name>();
-    auto devices = platform.get_devices();
-    nr_gpus = 0;
+    auto devices = Platform.get_devices();
+    NbGpus = 0;
     for (const auto& device : devices) {
       firestarter::log::trace() << "Checking SYCL device " << device.get_info<sycl::info::device::name>();
       if (device.is_gpu()) { // Choose GPU, you can use other criteria
         firestarter::log::trace() << " ... is GPU";
-        chosenPlatform = platform;
-        nr_gpus++;
+        ChosenPlatform = Platform;
+        NbGpus++;
       }
     }
   }
 
-  if (!nr_gpus) {
+  if (!NbGpus) {
     firestarter::log::warn() << "No suitable platform with GPU found.";
     return -1;
   }
   // Get a list of devices for the chosen platform
 
   firestarter::log::trace() << "Get support for double"
-                            << " on device nr. " << device_index;
-  auto devices = chosenPlatform.get_devices();
-  if (devices[device_index].has(sycl::aspect::fp64))
-    supports_double = 1;
+                            << " on device nr. " << DeviceIndex;
+  auto Devices = ChosenPlatform.get_devices();
+  if (Devices[DeviceIndex].has(sycl::aspect::fp64))
+    SupportsDouble = 1;
 
-  return supports_double;
+  return SupportsDouble;
 }
 
-static int round_up(int num_to_round, int multiple) {
-  if (multiple == 0) {
-    return num_to_round;
-  }
+template <std::size_t Multiple> auto roundUp(int NumToRound) -> int {
+  static_assert(Multiple != 0, "Multiple may not be zero.");
 
-  int remainder = num_to_round % multiple;
-  if (remainder == 0) {
-    return num_to_round;
+  const int Remainder = NumToRound % Multiple;
+  if (Remainder == 0) {
+    return NumToRound;
   }
 
-  return num_to_round + multiple - remainder;
+  return NumToRound + Multiple - Remainder;
 }
 
 // GPU index. Used to pin this thread to the GPU.
 // The main difference to the CUDA/HIP version is that we do not run multiple iterations of C=A*B, just one single
 // iteration.
-template <typename T>
-static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index,
-                        std::atomic<int>& initCount, const volatile firestarter::LoadThreadWorkType& LoadVar,
-                        unsigned matrixSize) {
-  static_assert(std::is_same<T, float>::value || std::is_same<T, double>::value,
-                "create_load<T>: Template argument T must be either float or double");
-
-  firestarter::log::trace() << "Starting OneAPI with given matrix size " << matrixSize;
-
-  size_t size_use = 0;
-  if (matrixSize > 0) {
-    size_use = matrixSize;
-  }
+template <typename FloatingPointType>
+void createLoad(std::condition_variable& WaitForInitCv, std::mutex& WaitForInitCvMutex, int DeviceIndex,
+                std::atomic<int>& InitCount, const volatile firestarter::LoadThreadWorkType& LoadVar,
+                unsigned MatrixSize) {
+  static_assert(std::is_same<FloatingPointType, float>::value || std::is_same<FloatingPointType, double>::value,
+                "createLoad<T>: Template argument T must be either float or double");
 
-  size_t use_bytes;
+  firestarter::log::trace() << "Starting OneAPI with given matrix size " << MatrixSize;
 
   // reserving the GPU and initializing
 
-  firestarter::log::trace() << "Getting device nr. " << device_index;
+  firestarter::log::trace() << "Getting device nr. " << DeviceIndex;
 
-  auto platforms = sycl::platform::get_platforms();
+  auto Platforms = sycl::platform::get_platforms();
 
-  if (platforms.empty()) {
+  if (Platforms.empty()) {
     firestarter::log::warn() << "No SYCL platforms found.";
     return;
   }
 
   // Choose a platform based on specific criteria (e.g., device type)
-  sycl::platform chosenPlatform;
-  auto nr_gpus = 0;
-  for (const auto& platform : platforms) {
-    auto devices = platform.get_devices();
-    nr_gpus = 0;
-    for (const auto& device : devices) {
-      if (device.is_gpu()) { // Choose GPU, you can use other criteria
-        chosenPlatform = platform;
-        nr_gpus++;
+  sycl::platform ChosenPlatform;
+  auto NbGpus = 0;
+  for (const auto& Platform : Platforms) {
+    auto Devices = Platform.get_devices();
+    NbGpus = 0;
+    for (const auto& Device : Devices) {
+      if (Device.is_gpu()) { // Choose GPU, you can use other criteria
+        ChosenPlatform = Platform;
+        NbGpus++;
       }
     }
   }
 
-  if (!nr_gpus) {
+  if (!NbGpus) {
     firestarter::log::warn() << "No suitable platform with GPU found.";
     return;
   }
 
   // Get a list of devices for the chosen platform
-  auto devices = chosenPlatform.get_devices();
+  auto Devices = ChosenPlatform.get_devices();
 
-  firestarter::log::trace() << "Creating SYCL queue for computation on device nr. " << device_index;
-  auto chosenDevice = devices[device_index];
-  sycl::queue device_queue(chosenDevice);
+  firestarter::log::trace() << "Creating SYCL queue for computation on device nr. " << DeviceIndex;
+  auto ChosenDevice = Devices[DeviceIndex];
+  auto DeviceQueue = sycl::queue(chosenDevice);
 
-  firestarter::log::trace() << "Get memory size on device nr. " << device_index;
+  firestarter::log::trace() << "Get memory size on device nr. " << DeviceIndex;
 
   // getting information about the GPU memory
-  size_t memory_total = devices[device_index].get_info<sycl::info::device::global_mem_size>();
+  size_t MemoryTotal = Devices[DeviceIndex].get_info<sycl::info::device::global_mem_size>();
 
-  firestarter::log::trace() << "Get Memory info on device nr. " << device_index << ": has " << memory_total
+  firestarter::log::trace() << "Get Memory info on device nr. " << DeviceIndex << ": has " << MemoryTotal
                             << " B global memory";
 
-  // check if the user has not set a matrix OR has set a too big matrixsite and
-  // if this is true: set a good matrixsize
-  if (!size_use || ((size_use * size_use * sizeof(T) * 3 > memory_total))) {
-    size_use = round_up((int)(0.8 * sqrt(((memory_total) / (sizeof(T) * 3)))),
-                        1024); // a multiple of 1024 works always well
+  // If the matrix size is not set or three square matricies with dim size of SizeUse do not fit into the available
+  // memory, select the size so that 3 square matricies will fit into the available device memory where the dim size
+  // is a multiple of 1024.
+  std::size_t MemorySize = sizeof(FloatingPointType) * MatrixSize * MatrixSize;
+  if (!MatrixSize || (MemorySize * 3 > MemoryTotal)) {
+    // a multiple of 1024 works always well
+    MatrixSize = roundUp<1024>(0.8 * std::sqrt(MemoryTotal / sizeof(FloatingPointType) / 3));
+    MemorySize = sizeof(FloatingPointType) * MatrixSize * MatrixSize;
   }
 
-  firestarter::log::trace() << "Set OneAPI matrix size in B: " << size_use;
-  use_bytes = sizeof(T) * size_use * size_use * 3;
+  firestarter::log::trace() << "Set OneAPI matrix size in B: " << MatrixSize;
 
   /* Allocate A/B/C matrices */
 
-  firestarter::log::trace() << "Allocating memory on device nr. " << device_index;
-  auto* A = sycl::malloc_device<T>(size_use * size_use, device_queue);
-  auto* B = sycl::malloc_device<T>(size_use * size_use, device_queue);
-  auto* C = sycl::malloc_device<T>(size_use * size_use, device_queue);
+  firestarter::log::trace() << "Allocating memory on device nr. " << DeviceIndex;
+  auto* A = sycl::malloc_device<FloatingPointType>(MatrixSize * MatrixSize, DeviceQueue);
+  auto* B = sycl::malloc_device<FloatingPointType>(MatrixSize * MatrixSize, DeviceQueue);
+  auto* C = sycl::malloc_device<FloatingPointType>(MatrixSize * MatrixSize, DeviceQueue);
 
   /* Create 64 MB random data on Host */
-  constexpr int rd_size = 1024 * 1024 * 64;
-  auto* random_data = malloc_host<T>(rd_size, device_queue);
-  fillArrayWithRandomFloats(rd_size, random_data);
+  constexpr int RandomSize = 1024 * 1024 * 64;
+  auto* RandomData = malloc_host<FloatingPointType>(RandomSize, DeviceQueue);
+  fillArrayWithRandomFloats<FloatingPointType>(RandomSize, RandomData);
 
-  firestarter::log::trace() << "Copy memory to device nr. " << device_index;
+  firestarter::log::trace() << "Copy memory to device nr. " << DeviceIndex;
   /* fill A and B with random data */
-  replicate_data(device_queue, A, size_use * size_use, random_data, rd_size);
-  replicate_data(device_queue, B, size_use * size_use, random_data, rd_size);
+  replicateData(DeviceQueue, A, MatrixSize * MatrixSize, RandomData, RandomSize);
+  replicateData(DeviceQueue, B, MatrixSize * MatrixSize, RandomData, RandomSize);
 
   {
-    std::lock_guard<std::mutex> lk(waitForInitCvMutex);
+    std::lock_guard<std::mutex> lk(WaitForInitCvMutex);
 
-#define TO_MB(x) (unsigned long)(x / 1024 / 1024)
+    auto ToMiB = [](const size_t Val) { return Val / 1024 / 1024; };
     firestarter::log::info() << "   GPU " << device_index << "\n"
-                             << "    name:           " << devices[device_index].get_info<sycl::info::device::name>()
+                             << "    name:           " << Devices[DeviceIndex].get_info<sycl::info::device::name>()
                              << "\n"
-                             << "    memory:         " << TO_MB(memory_total) << " MiB total (using "
-                             << TO_MB(use_bytes) << " MiB)\n"
-                             << "    matrix size:    " << size_use << "\n"
+                             << "    memory:         " << ToMiB(MemoryTotal) << " MiB total (using "
+                             << ToMiB(MemorySize) << " MiB)\n"
+                             << "    matrix size:    " << MatrixSize << "\n"
                              << "    used precision: " << ((sizeof(T) == sizeof(double)) ? "double" : "single");
-#undef TO_MB
 
-    initCount++;
+    InitCount++;
   }
-  waitForInitCv.notify_all();
-
-  firestarter::log::trace() << "Run gemm on device nr. " << device_index;
-  /* With this, we could run multiple gemms ...*/
-  /*  auto run_gemms = [=, &device_queue](int runs) -> double {
-        using namespace oneapi::mkl;
-        for (int i = 0; i < runs; i++)
+  WaitForInitCv.notify_all();
 
-        return runs;
-    };
-  */
+  firestarter::log::trace() << "Run gemm on device nr. " << DeviceIndex;
   while (LoadVar != firestarter::LoadThreadWorkType::LoadStop) {
-    firestarter::log::trace() << "Run gemm on device nr. " << device_index;
-    ::oneapi::mkl::blas::gemm(device_queue, ::oneapi::mkl::transpose::N, ::oneapi::mkl::transpose::N, size_use,
-                              size_use, size_use, 1, A, size_use, B, size_use, 0, C, size_use);
-    firestarter::log::trace() << "wait gemm on device nr. " << device_index;
-    device_queue.wait_and_throw();
+    firestarter::log::trace() << "Run gemm on device nr. " << DeviceIndex;
+    ::oneapi::mkl::blas::gemm(DeviceQueue, ::oneapi::mkl::transpose::N, ::oneapi::mkl::transpose::N, MatrixSize,
+                              MatrixSize, MatrixSize, 1, A, MatrixSize, B, MatrixSize, 0, C, MatrixSize);
+    firestarter::log::trace() << "wait gemm on device nr. " << DeviceIndex;
+    DeviceQueue.wait_and_throw();
   }
 }
 
+} // namespace
+
 OneAPI::OneAPI(const volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble,
                unsigned MatrixSize, int Gpus) {
   std::condition_variable WaitForInitCv;
@@ -322,13 +315,13 @@ void OneAPI::initGpus(std::condition_variable& WaitForInitCv, const volatile fir
         const std::lock_guard<std::mutex> Lk(GpuThreadsWaitForInitCvMutex);
 
         for (int I = 0; I < Gpus; ++I) {
-          const auto Precision = get_precision(I, UseDoubleConverted);
+          const auto Precision = getPrecision(I, UseDoubleConverted);
           if (Precision == -1) {
             firestarter::log::warn() << "This should not have happened. Could not get precision via SYCL.";
           }
           void (*LoadFunc)(std::condition_variable&, std::mutex&, int, std::atomic<int>&,
                            const volatile firestarter::LoadThreadWorkType&, unsigned) =
-              Precision ? create_load<double> : create_load<float>;
+              Precision ? createLoad<double> : createLoad<float>;
 
           std::thread T(LoadFunc, std::ref(GpuThreadsWaitForInitCv), std::ref(GpuThreadsWaitForInitCvMutex), I,
                         std::ref(InitCount), std::cref(LoadVar), MatrixSize);

From 77fec86ff01c19f0a8bf0dddaac96abea387c6ba Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 24 Oct 2024 23:08:38 +0200
Subject: [PATCH 088/167] refactor oneapi code

---
 src/firestarter/OneAPI/OneAPI.cpp | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/firestarter/OneAPI/OneAPI.cpp b/src/firestarter/OneAPI/OneAPI.cpp
index ea156e69..fcff8cf4 100644
--- a/src/firestarter/OneAPI/OneAPI.cpp
+++ b/src/firestarter/OneAPI/OneAPI.cpp
@@ -46,7 +46,7 @@ template <typename FloatingPointType> void fillArrayWithRandomFloats(size_t Numb
                 "fillArrayWithRandomFloats<FloatingPointType>: Template argument must be either float or double");
 
   for (size_t i = 0; i < NumberOfElems; i++) {
-    Array[i] = static_cast<FloatingPointType>(std::rand()) / RAND_MAX;
+    Array[i] = static_cast<FloatingPointType>(std::rand()) / static_cast<FloatingPointType>(RAND_MAX);
   }
 }
 
@@ -87,12 +87,12 @@ int getPrecision(int DeviceIndex, int UseDouble) {
   sycl::platform ChosenPlatform;
   auto NbGpus = 0;
   for (const auto& Platform : Platforms) {
-    firestarter::log::trace() << "Checking SYCL platform " << platform.get_info<sycl::info::platform::name>();
-    auto devices = Platform.get_devices();
+    firestarter::log::trace() << "Checking SYCL platform " << Platform.get_info<sycl::info::platform::name>();
+    auto Devices = Platform.get_devices();
     NbGpus = 0;
-    for (const auto& device : devices) {
-      firestarter::log::trace() << "Checking SYCL device " << device.get_info<sycl::info::device::name>();
-      if (device.is_gpu()) { // Choose GPU, you can use other criteria
+    for (const auto& Device : Devices) {
+      firestarter::log::trace() << "Checking SYCL device " << Device.get_info<sycl::info::device::name>();
+      if (Device.is_gpu()) { // Choose GPU, you can use other criteria
         firestarter::log::trace() << " ... is GPU";
         ChosenPlatform = Platform;
         NbGpus++;
@@ -173,7 +173,7 @@ void createLoad(std::condition_variable& WaitForInitCv, std::mutex& WaitForInitC
 
   firestarter::log::trace() << "Creating SYCL queue for computation on device nr. " << DeviceIndex;
   auto ChosenDevice = Devices[DeviceIndex];
-  auto DeviceQueue = sycl::queue(chosenDevice);
+  auto DeviceQueue = sycl::queue(ChosenDevice);
 
   firestarter::log::trace() << "Get memory size on device nr. " << DeviceIndex;
 
@@ -204,7 +204,7 @@ void createLoad(std::condition_variable& WaitForInitCv, std::mutex& WaitForInitC
 
   /* Create 64 MB random data on Host */
   constexpr int RandomSize = 1024 * 1024 * 64;
-  auto* RandomData = malloc_host<FloatingPointType>(RandomSize, DeviceQueue);
+  auto* RandomData = sycl::malloc_host<FloatingPointType>(RandomSize, DeviceQueue);
   fillArrayWithRandomFloats<FloatingPointType>(RandomSize, RandomData);
 
   firestarter::log::trace() << "Copy memory to device nr. " << DeviceIndex;
@@ -216,13 +216,14 @@ void createLoad(std::condition_variable& WaitForInitCv, std::mutex& WaitForInitC
     std::lock_guard<std::mutex> lk(WaitForInitCvMutex);
 
     auto ToMiB = [](const size_t Val) { return Val / 1024 / 1024; };
-    firestarter::log::info() << "   GPU " << device_index << "\n"
+    firestarter::log::info() << "   GPU " << DeviceIndex << "\n"
                              << "    name:           " << Devices[DeviceIndex].get_info<sycl::info::device::name>()
                              << "\n"
                              << "    memory:         " << ToMiB(MemoryTotal) << " MiB total (using "
                              << ToMiB(MemorySize) << " MiB)\n"
                              << "    matrix size:    " << MatrixSize << "\n"
-                             << "    used precision: " << ((sizeof(T) == sizeof(double)) ? "double" : "single");
+                             << "    used precision: "
+                             << ((sizeof(FloatingPointType) == sizeof(double)) ? "double" : "single");
 
     InitCount++;
   }

From badd0fece7865f962808a5035e27a75a1c37049c Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 13:29:08 +0200
Subject: [PATCH 089/167] add script to start clang-tidy

---
 tooling/clang-tidy.py | 77 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 tooling/clang-tidy.py

diff --git a/tooling/clang-tidy.py b/tooling/clang-tidy.py
new file mode 100644
index 00000000..ae939306
--- /dev/null
+++ b/tooling/clang-tidy.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import glob
+from pathlib import Path
+import subprocess
+import click
+import multiprocessing
+import sys
+import typing
+
+# Find all source and header files in the project root that belong to FIRESTARTER
+def find_source_and_header_files(project_root: Path) -> typing.List[ Path ]:
+    src_path = project_root / Path('src')
+    include_path = project_root / Path('include')
+    files = glob.glob(f'{src_path}/**/*.cpp', recursive=True)
+    files += glob.glob(f'{include_path}/**/*.hpp', recursive=True)
+    files += glob.glob(f'{include_path}/**/*.h', recursive=True)
+    return files
+
+# Split a list of paths into multiple list of paths
+def split_in_chunks(chunk_size: int, input: typing.List[Path]) -> typing.List[typing.List[Path]]:
+    length = len(input) // chunk_size
+    if length * chunk_size < len(input):
+        length += 1
+    
+    return [ input[i:i+length] for i in range(0, len(input), length)]
+
+@click.command()
+@click.option('--project-root', default=Path(__file__).parent.parent.absolute(), help='The folder where the git repository is located.')
+@click.option('--build-root', help='The folder where the compile_commands.json is located.', required=True)
+@click.option('--cores', default=multiprocessing.cpu_count(), help='The number of clang-tidy processes to spawn.')
+def clang_tidy_report(project_root, build_root, cores):
+    project_root_path = Path(project_root).absolute()
+    build_root_path = Path(build_root).absolute()
+
+    print(f'Looking for compile_commands.json in {build_root_path}')
+    compile_commands_path = build_root_path / Path('compile_commands.json')
+    if compile_commands_path.exists():
+        print(f'Found {compile_commands_path}')
+    else:
+        sys.exit("Dind't find compile_commands.json. Aborting.")
+        
+    print(f'Looking for .clang-tidy in {project_root_path}')
+    clang_tidy_file_path = project_root_path / Path('.clang-tidy')
+    if clang_tidy_file_path.exists():
+        print(f'Found {clang_tidy_file_path}')
+    else:
+        sys.exit("Dind't find .clang-tidy. Aborting.")
+
+    files = find_source_and_header_files(project_root_path)
+    print(f'Found {len(files)} source and header files.')
+    
+    print(f'Lanching {cores} instances of clang-tidy in project root: {project_root_path}')
+
+    processes = set()
+    for chunck in split_in_chunks(cores, files):
+        command_args = ['clang-tidy', '-extra-arg=-std=c++17', f'-p={build_root_path}', f'--config-file={clang_tidy_file_path}', '--header-filter=include/firestarter/*', '--format-style=file']
+        command_args += chunck
+        print(f'Starting {command_args}')
+        processes.add(subprocess.Popen(command_args, stdout=subprocess.PIPE, cwd=project_root_path))
+
+    # Wait for clang-tidy instances to terminate
+    complete_stdout = b''
+    for p in processes:
+        if p.poll() is None:
+            p.wait()
+            stdout, _ = p.communicate()
+            complete_stdout += stdout + b'\n'
+
+    clang_tidy_report_file = build_root_path / Path('clang-tidy-report.txt')
+    print(f'Writing report to {clang_tidy_report_file}')
+    with open(clang_tidy_report_file, 'wb') as fp:
+        fp.write(complete_stdout)
+
+if __name__ == '__main__':
+    clang_tidy_report()
\ No newline at end of file

From 5c8f6c3fbfdee016ccf9f833ad5d891863b1cc8a Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 13:37:21 +0200
Subject: [PATCH 090/167] update clang-tidy ci action

---
 .github/workflows/clang-tidy.yml | 34 +++++++++++++++++++++-----------
 tooling/clang-tidy.py            |  0
 2 files changed, 22 insertions(+), 12 deletions(-)
 mode change 100644 => 100755 tooling/clang-tidy.py

diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
index a697042a..4c378583 100644
--- a/.github/workflows/clang-tidy.yml
+++ b/.github/workflows/clang-tidy.yml
@@ -1,6 +1,6 @@
 name: clang-tidy-review
 
-on: [pull_request]
+on: [push, pull_request]
 
 jobs:
   build:
@@ -13,16 +13,26 @@ jobs:
 
     # Ideally we would want to run the clang-tidy for every kind of build.
     # This would make shure that we will check all platform dependent code parts.
-    # Here we only test the standard linux build.
-    - uses: ZedThree/clang-tidy-review@v0.14.0
-      id: review
-      with:
-        split_workflow: true
-        config_file: '.clang-tidy'
-        cmake_command: 'cmake . && make -j2'
+    # Here we only test the standard linux build.    
+    - name: Install python3 and libraries
+      run: |
+        sudo apt update
+        sudo apt install python3 python3-pip
+        pip install click
+
+    - name: Create build directory
+      run: |
+          mkdir build
+
+    - name: Run CMake configure (default)
+      run: |
+        cd build
+        cmake ..
 
-    - uses: ZedThree/clang-tidy-review/upload@v0.14.0
-      id: upload-review
+    - name: Run clang-tidy
+      run: |
+          ./tooling/clang-tidy.py --build-root build
 
-    - if: steps.review.outputs.total_comments > 0
-      run: exit 1
+    - name: Print report
+      run: |
+          cat build/clang-tidy-report.txt
\ No newline at end of file
diff --git a/tooling/clang-tidy.py b/tooling/clang-tidy.py
old mode 100644
new mode 100755

From 93512a5e98cb63bb177e2f6f1831e47f1eb86530 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 13:37:37 +0200
Subject: [PATCH 091/167] update todos

---
 include/firestarter/Optimizer/History.hpp | 2 +-
 src/firestarter/Firestarter.cpp           | 2 +-
 src/firestarter/Measurement/Summary.cpp   | 4 +---
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/include/firestarter/Optimizer/History.hpp b/include/firestarter/Optimizer/History.hpp
index b6acc566..0e694bb2 100644
--- a/include/firestarter/Optimizer/History.hpp
+++ b/include/firestarter/Optimizer/History.hpp
@@ -81,7 +81,7 @@ struct History {
 
   static void printBest(std::vector<std::string> const& OptimizationMetrics,
                         std::vector<std::string> const& PayloadItems) {
-    // TODO: print paretto front
+    // TODO(Issue #76): print paretto front
 
     // print the best 20 individuals for each metric in a format
     // where the user can give it to --run-instruction-groups directly
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 6b00d9ee..281bbb86 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -296,7 +296,7 @@ void Firestarter::mainThread() {
   if constexpr (firestarter::OptionalFeatures.OptimizationEnabled) {
     // if measurment is enabled, stop it here
     if (Measurement) {
-      // TODO: clear this up
+      // TODO(Issue #77): clear this up
       log::info() << "metric,num_timepoints,duration_ms,average,stddev";
       for (auto const& [name, sum] : MeasurementWorker->getValues(StartDelta, StopDelta)) {
         log::info() << std::quoted(name) << "," << sum.NumTimepoints << "," << sum.Duration.count() << ","
diff --git a/src/firestarter/Measurement/Summary.cpp b/src/firestarter/Measurement/Summary.cpp
index da626e9e..90f770bd 100644
--- a/src/firestarter/Measurement/Summary.cpp
+++ b/src/firestarter/Measurement/Summary.cpp
@@ -30,10 +30,8 @@ namespace firestarter::measurement {
 // https://github.com/metricq/metricq-cpp/blob/master/tools/metricq-summary/src/summary.cpp
 auto Summary::calculate(std::vector<TimeValue>::iterator Begin, std::vector<TimeValue>::iterator End,
                         MetricType MetricType, uint64_t NumThreads) -> Summary {
-  std::vector<TimeValue> Values = {};
+  std::vector<TimeValue> Values;
 
-  // TODO: i would really like to make this code a bit more readable, but i
-  // could not find a way yet.
   if (MetricType.Accumalative) {
     TimeValue Prev;
 

From 049d1834c817a16656a90b91714ff3732d643b21 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 16:36:35 +0200
Subject: [PATCH 092/167] run clang-tidy useing 2 cores

---
 .github/workflows/clang-tidy.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
index 4c378583..efa52f2c 100644
--- a/.github/workflows/clang-tidy.yml
+++ b/.github/workflows/clang-tidy.yml
@@ -22,7 +22,7 @@ jobs:
 
     - name: Create build directory
       run: |
-          mkdir build
+        mkdir build
 
     - name: Run CMake configure (default)
       run: |
@@ -31,8 +31,8 @@ jobs:
 
     - name: Run clang-tidy
       run: |
-          ./tooling/clang-tidy.py --build-root build
+        ./tooling/clang-tidy.py --build-root build --cores 2
 
     - name: Print report
       run: |
-          cat build/clang-tidy-report.txt
\ No newline at end of file
+        cat build/clang-tidy-report.txt
\ No newline at end of file

From dc5c9dcf3166798b7059f3926389936689cecebd Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 16:52:02 +0200
Subject: [PATCH 093/167] clang-tidy: scramble files to reduce the runtime

---
 tooling/clang-tidy.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tooling/clang-tidy.py b/tooling/clang-tidy.py
index ae939306..038d61ed 100755
--- a/tooling/clang-tidy.py
+++ b/tooling/clang-tidy.py
@@ -8,6 +8,7 @@
 import multiprocessing
 import sys
 import typing
+import random
 
 # Find all source and header files in the project root that belong to FIRESTARTER
 def find_source_and_header_files(project_root: Path) -> typing.List[ Path ]:
@@ -53,8 +54,12 @@ def clang_tidy_report(project_root, build_root, cores):
     
     print(f'Lanching {cores} instances of clang-tidy in project root: {project_root_path}')
 
+    # Scramble files to improve runtime performance
+    files_scrambled = files.copy()
+    random.shuffle(files_scrambled)
+
     processes = set()
-    for chunck in split_in_chunks(cores, files):
+    for chunck in split_in_chunks(cores, files_scrambled):
         command_args = ['clang-tidy', '-extra-arg=-std=c++17', f'-p={build_root_path}', f'--config-file={clang_tidy_file_path}', '--header-filter=include/firestarter/*', '--format-style=file']
         command_args += chunck
         print(f'Starting {command_args}')

From 4db1a5bc80f75937ac9a699d760ffd1b570750bf Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 16:54:39 +0200
Subject: [PATCH 094/167] ci: run build before executing clang-tidy

---
 .github/workflows/clang-tidy.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
index efa52f2c..446f4b22 100644
--- a/.github/workflows/clang-tidy.yml
+++ b/.github/workflows/clang-tidy.yml
@@ -29,6 +29,11 @@ jobs:
         cd build
         cmake ..
 
+    - name: Build
+      run: |
+        cd build
+        make -j4
+
     - name: Run clang-tidy
       run: |
         ./tooling/clang-tidy.py --build-root build --cores 2

From e6ae34d62472170ce310e71fc127e7772bfb1823 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 17:08:50 +0200
Subject: [PATCH 095/167] clang-tidy start with shell

---
 tooling/clang-tidy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tooling/clang-tidy.py b/tooling/clang-tidy.py
index 038d61ed..8d7bbb5f 100755
--- a/tooling/clang-tidy.py
+++ b/tooling/clang-tidy.py
@@ -63,7 +63,7 @@ def clang_tidy_report(project_root, build_root, cores):
         command_args = ['clang-tidy', '-extra-arg=-std=c++17', f'-p={build_root_path}', f'--config-file={clang_tidy_file_path}', '--header-filter=include/firestarter/*', '--format-style=file']
         command_args += chunck
         print(f'Starting {command_args}')
-        processes.add(subprocess.Popen(command_args, stdout=subprocess.PIPE, cwd=project_root_path))
+        processes.add(subprocess.Popen(' '.join(command_args), shell=True, stdout=subprocess.PIPE, cwd=project_root_path))
 
     # Wait for clang-tidy instances to terminate
     complete_stdout = b''

From 4ae98e3ed14a42686cc9225aabb225941ec687f7 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 17:36:45 +0200
Subject: [PATCH 096/167] clang-tidy: launch processes in new python thread

---
 tooling/clang-tidy.py | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/tooling/clang-tidy.py b/tooling/clang-tidy.py
index 8d7bbb5f..1c8b7c73 100755
--- a/tooling/clang-tidy.py
+++ b/tooling/clang-tidy.py
@@ -9,6 +9,8 @@
 import sys
 import typing
 import random
+from multiprocessing import Pool
+from functools import partial
 
 # Find all source and header files in the project root that belong to FIRESTARTER
 def find_source_and_header_files(project_root: Path) -> typing.List[ Path ]:
@@ -27,6 +29,21 @@ def split_in_chunks(chunk_size: int, input: typing.List[Path]) -> typing.List[ty
     
     return [ input[i:i+length] for i in range(0, len(input), length)]
 
+# Run clang-tidy on a set of input files and return the stdout
+def run_clang_tidy(files: typing.List[Path], project_root_path: Path, build_root_path: Path, clang_tidy_file_path: Path) -> bytes:
+    command_args = ['clang-tidy', '-extra-arg=-std=c++17', f'-p={build_root_path}', f'--config-file={clang_tidy_file_path}', '--header-filter=include/firestarter/*', '--format-style=file']
+    command_args += files
+    print(f'Starting {command_args}')
+    p = subprocess.Popen(command_args, stdout=subprocess.PIPE, cwd=project_root_path)
+
+    # Wait for clang-tidy instances to terminate
+    if p.poll() is None:
+        p.wait()
+        stdout, _ = p.communicate()
+        return stdout + b'\n'
+    
+    return b''
+
 @click.command()
 @click.option('--project-root', default=Path(__file__).parent.parent.absolute(), help='The folder where the git repository is located.')
 @click.option('--build-root', help='The folder where the compile_commands.json is located.', required=True)
@@ -58,25 +75,13 @@ def clang_tidy_report(project_root, build_root, cores):
     files_scrambled = files.copy()
     random.shuffle(files_scrambled)
 
-    processes = set()
-    for chunck in split_in_chunks(cores, files_scrambled):
-        command_args = ['clang-tidy', '-extra-arg=-std=c++17', f'-p={build_root_path}', f'--config-file={clang_tidy_file_path}', '--header-filter=include/firestarter/*', '--format-style=file']
-        command_args += chunck
-        print(f'Starting {command_args}')
-        processes.add(subprocess.Popen(' '.join(command_args), shell=True, stdout=subprocess.PIPE, cwd=project_root_path))
-
-    # Wait for clang-tidy instances to terminate
-    complete_stdout = b''
-    for p in processes:
-        if p.poll() is None:
-            p.wait()
-            stdout, _ = p.communicate()
-            complete_stdout += stdout + b'\n'
+    with Pool(cores) as p:
+        stdout = p.map(partial(run_clang_tidy, project_root_path=project_root_path, build_root_path=build_root_path, clang_tidy_file_path=clang_tidy_file_path), split_in_chunks(cores, files_scrambled))
 
     clang_tidy_report_file = build_root_path / Path('clang-tidy-report.txt')
     print(f'Writing report to {clang_tidy_report_file}')
     with open(clang_tidy_report_file, 'wb') as fp:
-        fp.write(complete_stdout)
+        fp.write(b''.join(stdout))
 
 if __name__ == '__main__':
     clang_tidy_report()
\ No newline at end of file

From 2c26deb392fefe45aea23cc6e88f3c288363e97b Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 17:59:23 +0200
Subject: [PATCH 097/167] run clang-tidy ci with 4 cores

---
 .github/workflows/clang-tidy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
index 446f4b22..ce4d514c 100644
--- a/.github/workflows/clang-tidy.yml
+++ b/.github/workflows/clang-tidy.yml
@@ -36,7 +36,7 @@ jobs:
 
     - name: Run clang-tidy
       run: |
-        ./tooling/clang-tidy.py --build-root build --cores 2
+        ./tooling/clang-tidy.py --build-root build --cores 4
 
     - name: Print report
       run: |

From 077662a2877799ee1cac944a3b809d75243d74e3 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 21:23:31 +0200
Subject: [PATCH 098/167] clang-tidy: find the correct source files for the
 current build

---
 tooling/clang-tidy.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/tooling/clang-tidy.py b/tooling/clang-tidy.py
index 1c8b7c73..6fca57be 100755
--- a/tooling/clang-tidy.py
+++ b/tooling/clang-tidy.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import glob
+import json
 from pathlib import Path
 import subprocess
 import click
@@ -12,13 +13,27 @@
 from multiprocessing import Pool
 from functools import partial
 
+# Find all source files from the compile commands database that are in a specific directory
+def find_source_files_from_compile_commands(compile_commands_path: Path, sources_dir: Path) -> typing.List[Path]:
+    with open(compile_commands_path, 'r') as fp:
+        compile_commands = json.loads(fp.read())
+        sources = [ entry['file'] for entry in compile_commands ]
+        sources = list(filter(lambda file: str(file).startswith(str(sources_dir)), sources))
+        return sources
+
 # Find all source and header files in the project root that belong to FIRESTARTER
-def find_source_and_header_files(project_root: Path) -> typing.List[ Path ]:
+def find_source_and_header_files(project_root: Path, build_root: Path) -> typing.List[Path]:
     src_path = project_root / Path('src')
     include_path = project_root / Path('include')
-    files = glob.glob(f'{src_path}/**/*.cpp', recursive=True)
+
+    # find all cpp file from the compile commands database
+    compile_commands_path = build_root / Path('compile_commands.json')
+    files = find_source_files_from_compile_commands(compile_commands_path, src_path)
+
+    # find all headers based on glob
     files += glob.glob(f'{include_path}/**/*.hpp', recursive=True)
     files += glob.glob(f'{include_path}/**/*.h', recursive=True)
+
     return files
 
 # Split a list of paths into multiple list of paths
@@ -66,7 +81,7 @@ def clang_tidy_report(project_root, build_root, cores):
     else:
         sys.exit("Dind't find .clang-tidy. Aborting.")
 
-    files = find_source_and_header_files(project_root_path)
+    files = find_source_and_header_files(project_root_path, build_root_path)
     print(f'Found {len(files)} source and header files.')
     
     print(f'Lanching {cores} instances of clang-tidy in project root: {project_root_path}')

From ae95982e0c0b4ee97751efc4214f5d772af4fbae Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 21:25:49 +0200
Subject: [PATCH 099/167] remove error message from cuda/hip compat header

---
 include/firestarter/Cuda/CudaHipCompat.hpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
index a12dd56a..c2c009f4 100644
--- a/include/firestarter/Cuda/CudaHipCompat.hpp
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -50,10 +50,6 @@
 #include <hipblas/hipblas.h>
 #include <hiprand_kernel.h>
 
-#else
-
-#error "Attempting to compile file but neither CUDA nor HIP is used"
-
 #endif
 
 namespace firestarter::cuda::compat {

From 25571c1cc794e7aefe60b2f9b409f783dcb39834 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 21:42:09 +0200
Subject: [PATCH 100/167] reformat clang-tidy

---
 tooling/clang-tidy.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tooling/clang-tidy.py b/tooling/clang-tidy.py
index 6fca57be..ed1a5b91 100755
--- a/tooling/clang-tidy.py
+++ b/tooling/clang-tidy.py
@@ -10,7 +10,6 @@
 import sys
 import typing
 import random
-from multiprocessing import Pool
 from functools import partial
 
 # Find all source files from the compile commands database that are in a specific directory
@@ -42,7 +41,7 @@ def split_in_chunks(chunk_size: int, input: typing.List[Path]) -> typing.List[ty
     if length * chunk_size < len(input):
         length += 1
     
-    return [ input[i:i+length] for i in range(0, len(input), length)]
+    return [ input[i:i+length] for i in range(0, len(input), length) ]
 
 # Run clang-tidy on a set of input files and return the stdout
 def run_clang_tidy(files: typing.List[Path], project_root_path: Path, build_root_path: Path, clang_tidy_file_path: Path) -> bytes:
@@ -90,7 +89,7 @@ def clang_tidy_report(project_root, build_root, cores):
     files_scrambled = files.copy()
     random.shuffle(files_scrambled)
 
-    with Pool(cores) as p:
+    with multiprocessing.Pool(cores) as p:
         stdout = p.map(partial(run_clang_tidy, project_root_path=project_root_path, build_root_path=build_root_path, clang_tidy_file_path=clang_tidy_file_path), split_in_chunks(cores, files_scrambled))
 
     clang_tidy_report_file = build_root_path / Path('clang-tidy-report.txt')

From b9b468ece4f65d00a33e2ac8dc6b7387224e1447 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 22:12:18 +0200
Subject: [PATCH 101/167] clang-tidy: use seed for shuffled files

---
 tooling/clang-tidy.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tooling/clang-tidy.py b/tooling/clang-tidy.py
index ed1a5b91..772a71ee 100755
--- a/tooling/clang-tidy.py
+++ b/tooling/clang-tidy.py
@@ -85,12 +85,12 @@ def clang_tidy_report(project_root, build_root, cores):
     
     print(f'Lanching {cores} instances of clang-tidy in project root: {project_root_path}')
 
-    # Scramble files to improve runtime performance
-    files_scrambled = files.copy()
-    random.shuffle(files_scrambled)
+    # Shuffle files to improve runtime performance. Use seed 123 to keep it the same across runs.
+    files_shuffled = files.copy()
+    random.Random(123).shuffle(files_shuffled)
 
     with multiprocessing.Pool(cores) as p:
-        stdout = p.map(partial(run_clang_tidy, project_root_path=project_root_path, build_root_path=build_root_path, clang_tidy_file_path=clang_tidy_file_path), split_in_chunks(cores, files_scrambled))
+        stdout = p.map(partial(run_clang_tidy, project_root_path=project_root_path, build_root_path=build_root_path, clang_tidy_file_path=clang_tidy_file_path), split_in_chunks(cores, files_shuffled))
 
     clang_tidy_report_file = build_root_path / Path('clang-tidy-report.txt')
     print(f'Writing report to {clang_tidy_report_file}')

From 219a262d5e5a82b3c3b1569cdab6bf5864f5f12c Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 25 Oct 2024 23:46:20 +0200
Subject: [PATCH 102/167] clang-tidy: add check to ci action

---
 .github/workflows/clang-tidy.yml |  8 ++++++--
 tooling/clang-tidy.py            | 27 +++++++++++++++++++++++++--
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
index ce4d514c..69e84dc3 100644
--- a/.github/workflows/clang-tidy.yml
+++ b/.github/workflows/clang-tidy.yml
@@ -36,8 +36,12 @@ jobs:
 
     - name: Run clang-tidy
       run: |
-        ./tooling/clang-tidy.py --build-root build --cores 4
+        ./tooling/clang-tidy.py clang-tidy-report --build-root build --cores 4
 
     - name: Print report
       run: |
-        cat build/clang-tidy-report.txt
\ No newline at end of file
+        cat build/clang-tidy-report.txt
+
+    - name: Check if report is empty
+      run: |
+        ./tooling/clang-tidy.py check --build-root build
\ No newline at end of file
diff --git a/tooling/clang-tidy.py b/tooling/clang-tidy.py
index 772a71ee..1e8bded1 100755
--- a/tooling/clang-tidy.py
+++ b/tooling/clang-tidy.py
@@ -58,7 +58,30 @@ def run_clang_tidy(files: typing.List[Path], project_root_path: Path, build_root
     
     return b''
 
-@click.command()
+@click.group()
+def cli():
+    pass
+
+@cli.command(help='Exsits successfully if the report is empty')
+@click.option('--build-root', help='The folder where the clang-tidy-report.txt is located.', required=True)
+def check(build_root):
+    build_root_path = Path(build_root).absolute()
+
+    print(f'Looking for clang-tidy-report.txt in {build_root_path}')
+    clang_tidy_report_path = build_root_path / Path('clang-tidy-report.txt')
+    if clang_tidy_report_path.exists():
+        print(f'Found {clang_tidy_report_path}')
+    else:
+        sys.exit("Dind't find clang-tidy-report.txt. Aborting.")
+
+    with open(clang_tidy_report_path, 'r') as fp:
+        content = fp.read().rstrip()
+        if len(content) == 0:
+            print('No content in clang-tidy-report.txt')
+        else:
+            sys.exit('Found content in clang-tidy-report.txt')
+
+@cli.command(help='Create the clang-tidy report')
 @click.option('--project-root', default=Path(__file__).parent.parent.absolute(), help='The folder where the git repository is located.')
 @click.option('--build-root', help='The folder where the compile_commands.json is located.', required=True)
 @click.option('--cores', default=multiprocessing.cpu_count(), help='The number of clang-tidy processes to spawn.')
@@ -98,4 +121,4 @@ def clang_tidy_report(project_root, build_root, cores):
         fp.write(b''.join(stdout))
 
 if __name__ == '__main__':
-    clang_tidy_report()
\ No newline at end of file
+    cli()
\ No newline at end of file

From 5c4cb4c8f1ed1ff97eab5dc1901941635ac8674e Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 26 Oct 2024 00:00:32 +0200
Subject: [PATCH 103/167] clang-tidy: set python unbuffered in ci action

---
 .github/workflows/clang-tidy.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
index 69e84dc3..051d72f9 100644
--- a/.github/workflows/clang-tidy.yml
+++ b/.github/workflows/clang-tidy.yml
@@ -2,6 +2,9 @@ name: clang-tidy-review
 
 on: [push, pull_request]
 
+env:
+  PYTHONUNBUFFERED: 1
+
 jobs:
   build:
     runs-on: ubuntu-latest

From d978ca97ad21082b34721703035c566ff9663315 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 26 Oct 2024 14:50:14 +0200
Subject: [PATCH 104/167] refactor config parser

---
 include/firestarter/Config.hpp         |  83 +++++
 include/firestarter/Firestarter.hpp    |  80 ++---
 src/CMakeLists.txt                     |   1 +
 src/firestarter/Config.cpp             | 392 ++++++++++++++++++++++
 src/firestarter/DumpRegisterWorker.cpp |   5 +-
 src/firestarter/Firestarter.cpp        | 130 +++-----
 src/firestarter/LoadWorker.cpp         |  15 +-
 src/firestarter/Main.cpp               | 430 +------------------------
 8 files changed, 552 insertions(+), 584 deletions(-)
 create mode 100644 include/firestarter/Config.hpp
 create mode 100644 src/firestarter/Config.cpp

diff --git a/include/firestarter/Config.hpp b/include/firestarter/Config.hpp
new file mode 100644
index 00000000..ce88008d
--- /dev/null
+++ b/include/firestarter/Config.hpp
@@ -0,0 +1,83 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2024 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#pragma once
+
+#include <chrono>
+#include <string>
+#include <vector>
+
+namespace firestarter {
+
+struct Config {
+  const char** Argv;
+  int Argc;
+
+  // default parameters
+  std::chrono::seconds Timeout{};
+  std::chrono::microseconds Period{};
+  std::chrono::microseconds Load{};
+  unsigned RequestedNumThreads;
+  std::string CpuBind;
+  bool PrintFunctionSummary;
+  unsigned FunctionId;
+  bool ListInstructionGroups;
+  std::string InstructionGroups;
+  unsigned LineCount = 0;
+  // debug features
+  bool AllowUnavailablePayload = false;
+  bool DumpRegisters = false;
+  std::chrono::seconds DumpRegistersTimeDelta = std::chrono::seconds(0);
+  std::string DumpRegistersOutpath;
+  bool ErrorDetection = false;
+  // CUDA parameters
+  int Gpus = 0;
+  unsigned GpuMatrixSize = 0;
+  bool GpuUseFloat = false;
+  bool GpuUseDouble = false;
+  // linux features
+  bool ListMetrics = false;
+  bool Measurement = false;
+  std::chrono::milliseconds StartDelta = std::chrono::milliseconds(0);
+  std::chrono::milliseconds StopDelta = std::chrono::milliseconds(0);
+  std::chrono::milliseconds MeasurementInterval = std::chrono::milliseconds(0);
+  std::vector<std::string> StdinMetrics;
+  // linux and dynamic linked binary
+  std::vector<std::string> MetricPaths;
+
+  // optimization
+  bool Optimize = false;
+  std::chrono::seconds Preheat{};
+  std::string OptimizationAlgorithm;
+  std::vector<std::string> OptimizationMetrics;
+  std::chrono::seconds EvaluationDuration{};
+  unsigned Individuals;
+  std::string OptimizeOutfile;
+  unsigned Generations;
+  double Nsga2Cr;
+  double Nsga2M;
+
+  Config() = delete;
+
+  Config(int Argc, const char** Argv);
+};
+
+} // namespace firestarter
\ No newline at end of file
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 22de1d33..1e509d05 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include "Config.hpp"
 #include "Constants.hpp"
 #include "Cuda/Cuda.hpp"
 #include "DumpRegisterWorkerData.hpp"
@@ -50,64 +51,39 @@ class Firestarter {
 public:
   Firestarter() = delete;
 
-  Firestarter(int Argc, const char** Argv, std::chrono::seconds const& Timeout, unsigned LoadPercent,
-              std::chrono::microseconds const& Period, unsigned RequestedNumThreads, std::string const& CpuBind,
-              bool PrintFunctionSummary, unsigned FunctionId, bool ListInstructionGroups,
-              std::string const& InstructionGroups, unsigned LineCount, bool AllowUnavailablePayload,
-              bool DumpRegisters, std::chrono::seconds const& DumpRegistersTimeDelta, std::string DumpRegistersOutpath,
-              bool ErrorDetection, int Gpus, unsigned GpuMatrixSize, bool GpuUseFloat, bool GpuUseDouble,
-              bool ListMetrics, bool Measurement, std::chrono::milliseconds const& StartDelta,
-              std::chrono::milliseconds const& StopDelta, std::chrono::milliseconds const& MeasurementInterval,
-              std::vector<std::string> const& MetricPaths, std::vector<std::string> const& StdinMetrics, bool Optimize,
-              std::chrono::seconds const& Preheat, std::string const& OptimizationAlgorithm,
-              std::vector<std::string> const& OptimizationMetrics, std::chrono::seconds const& EvaluationDuration,
-              unsigned Individuals, std::string OptimizeOutfile, unsigned Generations, double Nsga2Cr, double Nsga2M);
+  explicit Firestarter(Config&& Cfg);
 
   ~Firestarter() = default;
 
   void mainThread();
 
 private:
-  const int Argc;
-  const char** Argv;
-  const std::chrono::seconds Timeout;
-  const unsigned LoadPercent;
-  std::chrono::microseconds Load{};
-  std::chrono::microseconds Period;
-  const bool DumpRegisters;
-  const std::chrono::seconds DumpRegistersTimeDelta;
-  const std::string DumpRegistersOutpath;
-  const bool ErrorDetection;
-  const int Gpus;
-  const unsigned GpuMatrixSize;
-  const bool GpuUseFloat;
-  const bool GpuUseDouble;
-  const std::chrono::milliseconds StartDelta;
-  const std::chrono::milliseconds StopDelta;
-  const bool Measurement;
-  const bool Optimize;
-  const std::chrono::seconds Preheat;
-  const std::string OptimizationAlgorithm;
-  const std::vector<std::string> OptimizationMetrics;
-  const std::chrono::seconds EvaluationDuration;
-  const unsigned Individuals;
-  const std::string OptimizeOutfile;
-  const unsigned Generations;
-  const double Nsga2Cr;
-  const double Nsga2M;
+  const Config Cfg;
 
   std::unique_ptr<environment::Environment> Environment;
-
   std::unique_ptr<cuda::Cuda> Cuda;
   std::unique_ptr<oneapi::OneAPI> Oneapi;
-
-  inline static std::unique_ptr<optimizer::OptimizerWorker> Optimizer;
-  std::shared_ptr<measurement::MeasurementWorker> MeasurementWorker;
   std::unique_ptr<firestarter::optimizer::Algorithm> Algorithm;
+  std::thread DumpRegisterWorkerThread;
+  std::shared_ptr<measurement::MeasurementWorker> MeasurementWorker;
+
+  std::vector<std::pair<std::thread, std::shared_ptr<LoadWorkerData>>> LoadThreads;
+  std::vector<std::shared_ptr<uint64_t>> ErrorCommunication;
+
   firestarter::optimizer::Population Population;
 
+  inline static std::unique_ptr<optimizer::OptimizerWorker> Optimizer;
+
+  // variables to control the termination of the watchdog
+  inline static bool WatchdogTerminate = false;
+  inline static std::condition_variable WatchdogTerminateAlert;
+  inline static std::mutex WatchdogTerminateMutex;
+
+  // variable to control the load of the threads
+  inline static volatile LoadThreadWorkType LoadVar = LoadThreadWorkType::LoadLow;
+
   // LoadThreadWorker.cpp
-  void initLoadWorkers(bool LowLoad, std::chrono::microseconds Period);
+  void initLoadWorkers();
   void joinLoadWorkers();
   void printThreadErrorReport();
   void printPerformanceReport();
@@ -149,28 +125,14 @@ class Firestarter {
                              std::chrono::seconds Timeout) -> int;
 
   // DumpRegisterWorker.cpp
-  void initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath);
+  void initDumpRegisterWorker();
   void joinDumpRegisterWorker();
   static void dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Data);
 
-  std::thread DumpRegisterWorkerThread;
-
   static void setLoad(LoadThreadWorkType Value);
 
   static void sigalrmHandler(int Signum);
   static void sigtermHandler(int Signum);
-
-  // variables to control the termination of the watchdog
-  inline static bool WatchdogTerminate = false;
-  inline static std::condition_variable WatchdogTerminateAlert;
-  inline static std::mutex WatchdogTerminateMutex;
-
-  // variable to control the load of the threads
-  inline static volatile LoadThreadWorkType LoadVar = LoadThreadWorkType::LoadLow;
-
-  std::vector<std::pair<std::thread, std::shared_ptr<LoadWorkerData>>> LoadThreads;
-
-  std::vector<std::shared_ptr<uint64_t>> ErrorCommunication;
 };
 
 } // namespace firestarter
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7d405dd9..47e01cca 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,4 +1,5 @@
 SET(FIRESTARTER_FILES
+	firestarter/Config.cpp
 	firestarter/Main.cpp
 	firestarter/Firestarter.cpp
 	firestarter/LoadWorker.cpp
diff --git a/src/firestarter/Config.cpp b/src/firestarter/Config.cpp
new file mode 100644
index 00000000..1c1f0677
--- /dev/null
+++ b/src/firestarter/Config.cpp
@@ -0,0 +1,392 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2024 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#include <cxxopts.hpp>
+#include <firestarter/Config.hpp>
+#include <firestarter/Constants.hpp>
+#include <firestarter/Logging/Log.hpp>
+
+namespace {
+
+void printCopyright() {
+  firestarter::log::info() << "This program is free software: you can redistribute it and/or "
+                              "modify\n"
+                           << "it under the terms of the GNU General Public License as published "
+                              "by\n"
+                           << "the Free Software Foundation, either version 3 of the License, or\n"
+                           << "(at your option) any later version.\n"
+                           << "\n"
+                           << "You should have received a copy of the GNU General Public License\n"
+                           << "along with this program.  If not, see "
+                              "<http://www.gnu.org/licenses/>.\n";
+}
+
+void printWarranty() {
+  firestarter::log::info() << "This program is distributed in the hope that it will be useful,\n"
+                           << "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+                           << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
+                           << "GNU General Public License for more details.\n"
+                           << "\n"
+                           << "You should have received a copy of the GNU General Public License\n"
+                           << "along with this program.  If not, see "
+                              "<http://www.gnu.org/licenses/>.\n";
+}
+
+void printHelp(cxxopts::Options const& Parser, std::string const& Section = "") {
+  std::vector<std::pair<std::string, std::string>> Options = {
+    {"information", "Information Options:\n"},
+    {"general", "General Options:\n"},
+    {"specialized-workloads", "Specialized workloads:\n"},
+#ifdef FIRESTARTER_DEBUG_FEATURES
+    {"debug", "Debugging:\n"},
+#endif
+#if defined(linux) || defined(__linux__)
+    {"measurement", "Measurement:\n"},
+    {"optimization", "Optimization:\n"}
+#endif
+  };
+
+  // Select the specific option if sections is no empty
+  if (!Section.empty()) {
+    // section not found
+    auto FindSection = [&Section](std::pair<std::string, std::string> const& Pair) { return Pair.first == Section; };
+    auto SectionsIt = std::find_if(Options.begin(), Options.end(), FindSection);
+    if (SectionsIt == Options.end()) {
+      throw std::invalid_argument("Section \"" + Section + "\" not found in help.");
+    }
+    Options = {*SectionsIt};
+  }
+
+  // clang-format off
+  firestarter::log::info()
+    << Parser.help(Options)
+    << "Examples:\n"
+    << "  ./FIRESTARTER                 starts FIRESTARTER without timeout\n"
+    << "  ./FIRESTARTER -t 300          starts a 5 minute run of FIRESTARTER\n"
+    << "  ./FIRESTARTER -l 50 -t 600    starts a 10 minute run of FIRESTARTER with\n"
+    << "                                50\% high load and 50\% idle time\n"
+    << (firestarter::OptionalFeatures.gpuEnabled() ? 
+       "                                on CPUs and full load on GPUs\n"
+     : "")
+    << "  ./FIRESTARTER -l 75 -p 20000000\n"
+    << "                                starts FIRESTARTER with an interval length\n"
+    << "                                of 2 sec, 1.5s high load"
+    << (firestarter::OptionalFeatures.gpuEnabled() ? 
+       " on CPUs and full load on GPUs\n"
+     : "\n")
+    << (firestarter::OptionalFeatures.OptimizationEnabled ?
+       "  ./FIRESTARTER --measurement --start-delta=300000 -t 900\n"
+       "                                starts FIRESTARTER measuring all available\n"
+       "                                metrics for 15 minutes disregarding the first\n"
+       "                                5 minutes and last two seconds (default to `--stop-delta`)\n"
+       "  ./FIRESTARTER -t 20 --optimize=NSGA2 --optimization-metric sysfs-powercap-rapl,perf-ipc\n"
+       "                                starts FIRESTARTER optimizing with the sysfs-powercap-rapl\n"
+       "                                and perf-ipc metric. The duration is 20s long. The default\n"
+       "                                instruction groups for the current platform will be used.\n"
+     : "")
+    ;
+  // clang-format on
+}
+
+} // namespace
+
+namespace firestarter {
+
+Config::Config(int Argc, const char** Argv)
+    : Argv(Argv)
+    , Argc(Argc) {
+  const auto* ExecutableName = *Argv;
+
+  cxxopts::Options Parser(ExecutableName);
+
+  const auto HelpDescription =
+      std::string("Display usage information. SECTION can be any of: information | general | specialized-workloads") +
+      (firestarter::OptionalFeatures.DebugFeatureEnabled ? " | debug" : "") +
+      (firestarter::OptionalFeatures.OptimizationEnabled ? "\n| measurement | optimization" : "");
+
+  const auto LoadDescription =
+      std::string("Set the percentage of high CPU load to LOAD\n(%) default: 100, valid values: 0 <= LOAD <=\n100, "
+                  "threads will be idle in the remaining time,\nfrequency of load changes is determined by -p.") +
+      (firestarter::OptionalFeatures.gpuEnabled() ? " This option does NOT influence the GPU\nworkload!" : "");
+
+  // clang-format off
+  Parser.add_options("information")
+    ("h,help", HelpDescription,
+      cxxopts::value<std::string>()->implicit_value(""), "SECTION")
+    ("v,version", "Display version information")
+    ("c,copyright", "Display copyright information")
+    ("w,warranty", "Display warranty information")
+    ("q,quiet", "Set log level to Warning")
+    ("r,report", "Display additional information (overridden by -q)")
+    ("debug", "Print debug output")
+    ("a,avail", "List available functions");
+
+  Parser.add_options("general")
+    ("i,function", "Specify integer ID of the load-function to be\nused (as listed by --avail)",
+      cxxopts::value<unsigned>()->default_value("0"), "ID");
+
+  if (firestarter::OptionalFeatures.gpuEnabled()) {
+    Parser.add_options("general")
+      ("f,usegpufloat", "Use single precision matrix multiplications\ninstead of default")
+      ("d,usegpudouble", "Use double precision matrix multiplications\ninstead of default")
+      ("g,gpus", "Number of gpus to use, default: -1 (all)",
+        cxxopts::value<int>()->default_value("-1"))
+      ("m,matrixsize", "Size of the matrix to calculate, default: 0 (maximum)",
+        cxxopts::value<unsigned>()->default_value("0"));
+  }
+
+  Parser.add_options("general")
+    ("t,timeout", "Set the timeout (seconds) after which FIRESTARTER\nterminates itself, default: 0 (no timeout)",
+      cxxopts::value<unsigned>()->default_value("0"), "TIMEOUT")
+    ("l,load", LoadDescription,
+      cxxopts::value<unsigned>()->default_value("100"), "LOAD")
+    ("p,period", "Set the interval length for CPUs to PERIOD\n(usec), default: 100000, each interval contains\na high load and an idle phase, the percentage\nof high load is defined by -l.",
+      cxxopts::value<unsigned>()->default_value("100000"), "PERIOD")
+    ("n,threads", "Specify the number of threads. Cannot be\ncombined with -b | --bind, which impicitly\nspecifies the number of threads.",
+      cxxopts::value<unsigned>()->default_value("0"), "COUNT")
+#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
+    ("b,bind", "Select certain CPUs. CPULIST format: \"x,y,z\",\n\"x-y\", \"x-y/step\", and any combination of the\nabove. Cannot be combined with -n | --threads.",
+      cxxopts::value<std::string>()->default_value(""), "CPULIST")
+#endif
+    ("error-detection", "Enable error detection. This aborts execution when the calculated data is corruped by errors. FIRESTARTER must run with 2 or more threads for this feature. Cannot be used with -l | --load and --optimize.");
+
+  Parser.add_options("specialized-workloads")
+    ("list-instruction-groups", "List the available instruction groups for the\npayload of the current platform.")
+    ("run-instruction-groups", "Run the payload with the specified\ninstruction groups. GROUPS format: multiple INST:VAL\npairs comma-seperated.",
+      cxxopts::value<std::string>()->default_value(""), "GROUPS")
+    ("set-line-count", "Set the number of lines for a payload.",
+      cxxopts::value<unsigned>());
+
+  if (firestarter::OptionalFeatures.DebugFeatureEnabled) {
+    Parser.add_options("debug")
+      ("allow-unavailable-payload", "")
+      ("dump-registers", "Dump the working registers on the first\nthread. Depending on the payload these are mm, xmm,\nymm or zmm. Only use it without a timeout and\n100 percent load. DELAY between dumps in secs. Cannot be used with --error-detection.",
+        cxxopts::value<unsigned>()->implicit_value("10"), "DELAY")
+      ("dump-registers-outpath", "Path for the dump of the output files. If\nPATH is not given, current working directory will\nbe used.",
+        cxxopts::value<std::string>()->default_value(""), "PATH");
+  }
+
+  if (firestarter::OptionalFeatures.OptimizationEnabled) {
+    Parser.add_options("measurement")
+      ("list-metrics", "List the available metrics.")
+#ifndef FIRESTARTER_LINK_STATIC
+      ("metric-path", "Add a path to a shared library representing an interface for a metric. This option can be specified multiple times.",
+        cxxopts::value<std::vector<std::string>>()->default_value(""))
+#endif
+      ("metric-from-stdin", "Add a metric NAME with values from stdin.\nFormat of input: \"NAME TIME_SINCE_EPOCH VALUE\\n\".\nTIME_SINCE_EPOCH is a int64 in nanoseconds. VALUE is a double. (Do not forget to flush\nlines!)",
+        cxxopts::value<std::vector<std::string>>(), "NAME")
+      ("measurement", "Start a measurement for the time specified by\n-t | --timeout. (The timeout must be greater\nthan the start and stop deltas.) Cannot be\ncombined with --optimize.")
+      ("measurement-interval", "Interval of measurements in milliseconds, default: 100",
+        cxxopts::value<unsigned>()->default_value("100"))
+      ("start-delta", "Cut of first N milliseconds of measurement, default: 5000",
+        cxxopts::value<unsigned>()->default_value("5000"), "N")
+      ("stop-delta", "Cut of last N milliseconds of measurement, default: 2000",
+        cxxopts::value<unsigned>()->default_value("2000"), "N")
+      ("preheat", "Preheat for N seconds, default: 240",
+        cxxopts::value<unsigned>()->default_value("240"), "N");
+  
+    Parser.add_options("optimization")
+      ("optimize", "Run the optimization with one of these algorithms: NSGA2.\nCannot be combined with --measurement.",
+        cxxopts::value<std::string>())
+      ("optimize-outfile", "Dump the output of the optimization into this\nfile, default: $PWD/$HOSTNAME_$DATE.json",
+        cxxopts::value<std::string>())
+      ("optimization-metric", "Use a metric for optimization. Metrics listed\nwith cli argument --list-metrics or specified\nwith --metric-from-stdin are valid.",
+        cxxopts::value<std::vector<std::string>>())
+      ("individuals", "Number of individuals for the population. For\nNSGA2 specify at least 5 and a multiple of 4,\ndefault: 20",
+        cxxopts::value<unsigned>()->default_value("20"))
+      ("generations", "Number of generations, default: 20",
+        cxxopts::value<unsigned>()->default_value("20"))
+      ("nsga2-cr", "Crossover probability. Must be in range [0,1[\ndefault: 0.6",
+        cxxopts::value<double>()->default_value("0.6"))
+      ("nsga2-m", "Mutation probability. Must be in range [0,1]\ndefault: 0.4",
+        cxxopts::value<double>()->default_value("0.4"));
+  }
+  // clang-format on
+
+  try {
+    auto Options = Parser.parse(Argc, Argv);
+
+    if (static_cast<bool>(Options.count("quiet"))) {
+      firestarter::logging::Filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::warn);
+    } else if (static_cast<bool>(Options.count("report"))) {
+      firestarter::logging::Filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::debug);
+    } else if (static_cast<bool>(Options.count("debug"))) {
+      firestarter::logging::Filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::trace);
+    } else {
+      firestarter::logging::Filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::info);
+    }
+
+    if (static_cast<bool>(Options.count("version"))) {
+      std::exit(EXIT_SUCCESS);
+    }
+
+    if (static_cast<bool>(Options.count("copyright"))) {
+      printCopyright();
+      std::exit(EXIT_SUCCESS);
+    }
+
+    if (static_cast<bool>(Options.count("warranty"))) {
+      printWarranty();
+      std::exit(EXIT_SUCCESS);
+    }
+
+    firestarter::log::info() << "This program comes with ABSOLUTELY NO WARRANTY; for details run `" << ExecutableName
+                             << " -w`.\n"
+                             << "This is free software, and you are welcome to redistribute it\n"
+                             << "under certain conditions; run `" << ExecutableName << " -c` for details.\n";
+
+    if (static_cast<bool>(Options.count("help"))) {
+      auto Section = Options["help"].as<std::string>();
+
+      printHelp(Parser, Section);
+      std::exit(EXIT_SUCCESS);
+    }
+
+    Timeout = std::chrono::seconds(Options["timeout"].as<unsigned>());
+    const auto LoadPercent = Options["load"].as<unsigned>();
+    Period = std::chrono::microseconds(Options["period"].as<unsigned>());
+
+    if (LoadPercent > 100) {
+      throw std::invalid_argument("Option -l/--load may not be above 100.");
+    }
+
+    Load = (Period * LoadPercent) / 100;
+    if (LoadPercent == 100 || Load == std::chrono::microseconds::zero()) {
+      Period = std::chrono::microseconds::zero();
+    }
+
+    ErrorDetection = static_cast<bool>(Options.count("error-detection"));
+    if (ErrorDetection && LoadPercent != 100) {
+      throw std::invalid_argument("Option --error-detection may only be used "
+                                  "with -l/--load equal 100.");
+    }
+
+    if (firestarter::OptionalFeatures.DebugFeatureEnabled) {
+      AllowUnavailablePayload = static_cast<bool>(Options.count("allow-unavailable-payload"));
+      DumpRegisters = static_cast<bool>(Options.count("dump-registers"));
+      if (DumpRegisters) {
+        DumpRegistersTimeDelta = std::chrono::seconds(Options["dump-registers"].as<unsigned>());
+        if (Timeout != std::chrono::microseconds::zero() && LoadPercent != 100) {
+          throw std::invalid_argument("Option --dump-registers may only be used "
+                                      "without a timeout and full load.");
+        }
+        if (ErrorDetection) {
+          throw std::invalid_argument("Options --dump-registers and --error-detection cannot be used "
+                                      "together.");
+        }
+      }
+    }
+
+    RequestedNumThreads = Options["threads"].as<unsigned>();
+
+#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
+    CpuBind = Options["bind"].as<std::string>();
+    if (!CpuBind.empty()) {
+      if (RequestedNumThreads != 0) {
+        throw std::invalid_argument("Options -b/--bind and -n/--threads cannot be used together.");
+      }
+    }
+#endif
+
+    if (firestarter::OptionalFeatures.gpuEnabled()) {
+      GpuUseFloat = static_cast<bool>(Options.count("usegpufloat"));
+      GpuUseDouble = static_cast<bool>(Options.count("usegpudouble"));
+
+      if (GpuUseFloat && GpuUseDouble) {
+        throw std::invalid_argument("Options -f/--usegpufloat and "
+                                    "-d/--usegpudouble cannot be used together.");
+      }
+
+      GpuMatrixSize = Options["matrixsize"].as<unsigned>();
+      if (GpuMatrixSize > 0 && GpuMatrixSize < 64) {
+        throw std::invalid_argument("Option -m/--matrixsize may not be below 64.");
+      }
+
+      Gpus = Options["gpus"].as<int>();
+    }
+
+    PrintFunctionSummary = static_cast<bool>(Options.count("avail"));
+
+    FunctionId = Options["function"].as<unsigned>();
+
+    ListInstructionGroups = static_cast<bool>(Options.count("list-instruction-groups"));
+    InstructionGroups = Options["run-instruction-groups"].as<std::string>();
+    if (static_cast<bool>(Options.count("set-line-count"))) {
+      LineCount = Options["set-line-count"].as<unsigned>();
+    }
+
+    if (firestarter::OptionalFeatures.OptimizationEnabled) {
+      StartDelta = std::chrono::milliseconds(Options["start-delta"].as<unsigned>());
+      StopDelta = std::chrono::milliseconds(Options["stop-delta"].as<unsigned>());
+      MeasurementInterval = std::chrono::milliseconds(Options["measurement-interval"].as<unsigned>());
+#ifndef FIRESTARTER_LINK_STATIC
+      MetricPaths = Options["metric-path"].as<std::vector<std::string>>();
+#endif
+      if (static_cast<bool>(Options.count("metric-from-stdin"))) {
+        StdinMetrics = Options["metric-from-stdin"].as<std::vector<std::string>>();
+      }
+      Measurement = static_cast<bool>(Options.count("measurement"));
+      ListMetrics = static_cast<bool>(Options.count("list-metrics"));
+      Optimize = static_cast<bool>(Options.count("optimize"));
+
+      if (Optimize) {
+        if (ErrorDetection) {
+          throw std::invalid_argument("Options --error-detection and --optimize "
+                                      "cannot be used together.");
+        }
+        if (Measurement) {
+          throw std::invalid_argument("Options --measurement and --optimize cannot be used together.");
+        }
+        Preheat = std::chrono::seconds(Options["preheat"].as<unsigned>());
+        OptimizationAlgorithm = Options["optimize"].as<std::string>();
+        if (static_cast<bool>(Options.count("optimization-metric"))) {
+          OptimizationMetrics = Options["optimization-metric"].as<std::vector<std::string>>();
+        }
+        if (LoadPercent != 100) {
+          throw std::invalid_argument("Options -p | --period and -l | --load are "
+                                      "not compatible with --optimize.");
+        }
+        if (Timeout == std::chrono::seconds::zero()) {
+          throw std::invalid_argument("Option -t | --timeout must be specified for optimization.");
+        }
+        EvaluationDuration = Timeout;
+        // this will deactivate the watchdog worker
+        Timeout = std::chrono::seconds::zero();
+        Individuals = Options["individuals"].as<unsigned>();
+        if (static_cast<bool>(Options.count("optimize-outfile"))) {
+          OptimizeOutfile = Options["optimize-outfile"].as<std::string>();
+        }
+        Generations = Options["generations"].as<unsigned>();
+        Nsga2Cr = Options["nsga2-cr"].as<double>();
+        Nsga2M = Options["nsga2-m"].as<double>();
+
+        if (OptimizationAlgorithm != "NSGA2") {
+          throw std::invalid_argument("Option --optimize must be any of: NSGA2");
+        }
+      }
+    }
+  } catch (std::exception& E) {
+    firestarter::log::error() << E.what() << "\n";
+    printHelp(Parser);
+    std::exit(EXIT_FAILURE);
+  }
+}
+} // namespace firestarter
\ No newline at end of file
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index 37b7bb67..7b3d935b 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -53,10 +53,11 @@ auto registerNameBySize(unsigned RegisterSize) -> std::string {
 
 namespace firestarter {
 
-void Firestarter::initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath) {
+void Firestarter::initDumpRegisterWorker() {
   // Create the data for the worker thread. The thread will dump the register contents periodically and calculate the
   // hamming distance between dumps.
-  auto Data = std::make_unique<DumpRegisterWorkerData>(this->LoadThreads.begin()->second, DumpTimeDelta, DumpFilePath);
+  auto Data = std::make_unique<DumpRegisterWorkerData>(this->LoadThreads.begin()->second, Cfg.DumpRegistersTimeDelta,
+                                                       Cfg.DumpRegistersOutpath);
 
   // Spawn the thread.
   DumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(Data));
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 281bbb86..af18f906 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -31,68 +31,17 @@
 
 namespace firestarter {
 
-Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds const& Timeout, unsigned LoadPercent,
-                         std::chrono::microseconds const& Period, unsigned RequestedNumThreads,
-                         std::string const& CpuBind, bool PrintFunctionSummary, unsigned FunctionId,
-                         bool ListInstructionGroups, std::string const& InstructionGroups, unsigned LineCount,
-                         bool AllowUnavailablePayload, bool DumpRegisters,
-                         std::chrono::seconds const& DumpRegistersTimeDelta, std::string DumpRegistersOutpath,
-                         bool ErrorDetection, int Gpus, unsigned GpuMatrixSize, bool GpuUseFloat, bool GpuUseDouble,
-                         bool ListMetrics, bool Measurement, std::chrono::milliseconds const& StartDelta,
-                         std::chrono::milliseconds const& StopDelta,
-                         std::chrono::milliseconds const& MeasurementInterval,
-                         std::vector<std::string> const& MetricPaths, std::vector<std::string> const& StdinMetrics,
-                         bool Optimize, std::chrono::seconds const& Preheat, std::string const& OptimizationAlgorithm,
-                         std::vector<std::string> const& OptimizationMetrics,
-                         std::chrono::seconds const& EvaluationDuration, unsigned Individuals,
-                         std::string OptimizeOutfile, unsigned Generations, double Nsga2Cr, double Nsga2M)
-    : Argc(Argc)
-    , Argv(Argv)
-    , Timeout(Timeout)
-    , LoadPercent(LoadPercent)
-    , Period(Period)
-    , DumpRegisters(DumpRegisters)
-    , DumpRegistersTimeDelta(DumpRegistersTimeDelta)
-    , DumpRegistersOutpath(std::move(DumpRegistersOutpath))
-    , ErrorDetection(ErrorDetection)
-    , Gpus(Gpus)
-    , GpuMatrixSize(GpuMatrixSize)
-    , GpuUseFloat(GpuUseFloat)
-    , GpuUseDouble(GpuUseDouble)
-    , StartDelta(StartDelta)
-    , StopDelta(StopDelta)
-    , Measurement(Measurement)
-    , Optimize(Optimize)
-    , Preheat(Preheat)
-    , OptimizationAlgorithm(OptimizationAlgorithm)
-    , OptimizationMetrics(OptimizationMetrics)
-    , EvaluationDuration(EvaluationDuration)
-    , Individuals(Individuals)
-    , OptimizeOutfile(std::move(OptimizeOutfile))
-    , Generations(Generations)
-    , Nsga2Cr(Nsga2Cr)
-    , Nsga2M(Nsga2M) {
-  Load = (Period * LoadPercent) / 100;
-  if (LoadPercent == 100 || Load == std::chrono::microseconds::zero()) {
-    this->Period = std::chrono::microseconds::zero();
-  }
-
-  if constexpr (!firestarter::OptionalFeatures.OptimizationEnabled) {
-    (void)ListMetrics;
-    (void)MeasurementInterval;
-    (void)MetricPaths;
-    (void)StdinMetrics;
-  }
-
+Firestarter::Firestarter(Config&& Cfg)
+    : Cfg(std::move(Cfg)) {
   if constexpr (firestarter::OptionalFeatures.IsX86) {
     Environment = std::make_unique<environment::x86::X86Environment>();
   }
 
-  Environment->evaluateCpuAffinity(RequestedNumThreads, CpuBind);
+  Environment->evaluateCpuAffinity(Cfg.RequestedNumThreads, Cfg.CpuBind);
 
   if constexpr (firestarter::OptionalFeatures.IsX86) {
     // Error detection uses crc32 instruction added by the SSE4.2 extension to x86
-    if (ErrorDetection) {
+    if (Cfg.ErrorDetection) {
       const auto& X86Env = *dynamic_cast<environment::x86::X86Environment*>(Environment.get());
       if (!X86Env.topology().featuresAsmjit().has(asmjit::CpuFeatures::X86::kSSE4_2)) {
         throw std::invalid_argument("Option --error-detection requires the crc32 "
@@ -101,7 +50,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
     }
   }
 
-  if (ErrorDetection && Environment->requestedNumThreads() < 2) {
+  if (Cfg.ErrorDetection && Environment->requestedNumThreads() < 2) {
     throw std::invalid_argument("Option --error-detection must run with 2 or more threads. Number of "
                                 "threads is " +
                                 std::to_string(Environment->requestedNumThreads()) + "\n");
@@ -109,32 +58,32 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
 
   Environment->evaluateFunctions();
 
-  if (PrintFunctionSummary) {
+  if (Cfg.PrintFunctionSummary) {
     Environment->printFunctionSummary();
     std::exit(EXIT_SUCCESS);
   }
 
-  Environment->selectFunction(FunctionId, AllowUnavailablePayload);
+  Environment->selectFunction(Cfg.FunctionId, Cfg.AllowUnavailablePayload);
 
-  if (ListInstructionGroups) {
+  if (Cfg.ListInstructionGroups) {
     Environment->printAvailableInstructionGroups();
     std::exit(EXIT_SUCCESS);
   }
 
-  if (!InstructionGroups.empty()) {
-    Environment->selectInstructionGroups(InstructionGroups);
+  if (!Cfg.InstructionGroups.empty()) {
+    Environment->selectInstructionGroups(Cfg.InstructionGroups);
   }
 
-  if (LineCount != 0) {
-    Environment->setLineCount(LineCount);
+  if (Cfg.LineCount != 0) {
+    Environment->setLineCount(Cfg.LineCount);
   }
 
   if constexpr (firestarter::OptionalFeatures.OptimizationEnabled) {
-    if (Measurement || ListMetrics || Optimize) {
+    if (Cfg.Measurement || Cfg.ListMetrics || Cfg.Optimize) {
       MeasurementWorker = std::make_shared<measurement::MeasurementWorker>(
-          MeasurementInterval, Environment->requestedNumThreads(), MetricPaths, StdinMetrics);
+          Cfg.MeasurementInterval, Environment->requestedNumThreads(), Cfg.MetricPaths, Cfg.StdinMetrics);
 
-      if (ListMetrics) {
+      if (Cfg.ListMetrics) {
         log::info() << MeasurementWorker->availableMetrics();
         std::exit(EXIT_SUCCESS);
       }
@@ -148,7 +97,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
       }
 
       // check if selected metrics are initialized
-      for (auto const& OptimizationMetric : OptimizationMetrics) {
+      for (auto const& OptimizationMetric : Cfg.OptimizationMetrics) {
         auto NameEqual = [OptimizationMetric](auto const& Name) {
           auto InvertedName = "-" + Name;
           return Name == OptimizationMetric || InvertedName == OptimizationMetric;
@@ -164,7 +113,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
       }
     }
 
-    if (Optimize) {
+    if (Cfg.Optimize) {
       auto ApplySettings = std::bind(
           [this](std::vector<std::pair<std::string, unsigned>> const& Setting) {
             using Clock = std::chrono::high_resolution_clock;
@@ -202,18 +151,19 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
           std::placeholders::_1);
 
       auto Prob = std::make_shared<firestarter::optimizer::problem::CLIArgumentProblem>(
-          std::move(ApplySettings), MeasurementWorker, OptimizationMetrics, EvaluationDuration, StartDelta, StopDelta,
-          Environment->selectedConfig().payloadItems());
+          std::move(ApplySettings), MeasurementWorker, Cfg.OptimizationMetrics, Cfg.EvaluationDuration, Cfg.StartDelta,
+          Cfg.StopDelta, Environment->selectedConfig().payloadItems());
 
       Population = firestarter::optimizer::Population(std::move(Prob));
 
-      if (OptimizationAlgorithm == "NSGA2") {
-        Algorithm = std::make_unique<firestarter::optimizer::algorithm::NSGA2>(Generations, Nsga2Cr, Nsga2M);
+      if (Cfg.OptimizationAlgorithm == "NSGA2") {
+        Algorithm =
+            std::make_unique<firestarter::optimizer::algorithm::NSGA2>(Cfg.Generations, Cfg.Nsga2Cr, Cfg.Nsga2M);
       } else {
-        throw std::invalid_argument("Algorithm " + OptimizationAlgorithm + " unknown.");
+        throw std::invalid_argument("Algorithm " + Cfg.OptimizationAlgorithm + " unknown.");
       }
 
-      Algorithm->checkPopulation(static_cast<firestarter::optimizer::Population const&>(Population), Individuals);
+      Algorithm->checkPopulation(static_cast<firestarter::optimizer::Population const&>(Population), Cfg.Individuals);
     }
   }
 
@@ -223,7 +173,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
 
   // setup thread with either high or low load configured at the start
   // low loads has to know the length of the period
-  initLoadWorkers((LoadPercent == 0), Period);
+  initLoadWorkers();
 
   // add some signal handler for aborting FIRESTARTER
   if constexpr (!firestarter::OptionalFeatures.IsWin32) {
@@ -237,12 +187,12 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds
 void Firestarter::mainThread() {
   Environment->printThreadSummary();
 
-  Cuda = std::make_unique<cuda::Cuda>(LoadVar, GpuUseFloat, GpuUseDouble, GpuMatrixSize, Gpus);
-  Oneapi = std::make_unique<oneapi::OneAPI>(LoadVar, GpuUseFloat, GpuUseDouble, GpuMatrixSize, Gpus);
+  Cuda = std::make_unique<cuda::Cuda>(LoadVar, Cfg.GpuUseFloat, Cfg.GpuUseDouble, Cfg.GpuMatrixSize, Cfg.Gpus);
+  Oneapi = std::make_unique<oneapi::OneAPI>(LoadVar, Cfg.GpuUseFloat, Cfg.GpuUseDouble, Cfg.GpuMatrixSize, Cfg.Gpus);
 
   if constexpr (firestarter::OptionalFeatures.OptimizationEnabled) {
     // if measurement is enabled, start it here
-    if (Measurement) {
+    if (Cfg.Measurement) {
       MeasurementWorker->startMeasurement();
     }
   }
@@ -250,31 +200,31 @@ void Firestarter::mainThread() {
   signalWork();
 
   if constexpr (firestarter::OptionalFeatures.DumpRegisterEnabled) {
-    if (DumpRegisters) {
-      initDumpRegisterWorker(DumpRegistersTimeDelta, DumpRegistersOutpath);
+    if (Cfg.DumpRegisters) {
+      initDumpRegisterWorker();
     }
   }
 
   // worker thread for load control
-  watchdogWorker(Period, Load, Timeout);
+  watchdogWorker(Cfg.Period, Cfg.Load, Cfg.Timeout);
 
   if constexpr (firestarter::OptionalFeatures.OptimizationEnabled) {
     // check if optimization is selected
-    if (Optimize) {
+    if (Cfg.Optimize) {
       auto StartTime = optimizer::History::getTime();
 
       Firestarter::Optimizer = std::make_unique<optimizer::OptimizerWorker>(
-          std::move(Algorithm), Population, OptimizationAlgorithm, Individuals, Preheat);
+          std::move(Algorithm), Population, Cfg.OptimizationAlgorithm, Cfg.Individuals, Cfg.Preheat);
 
       // wait here until optimizer thread terminates
       Firestarter::Optimizer->join();
 
       auto PayloadItems = Environment->selectedConfig().payloadItems();
 
-      firestarter::optimizer::History::save(OptimizeOutfile, StartTime, PayloadItems, Argc, Argv);
+      firestarter::optimizer::History::save(Cfg.OptimizeOutfile, StartTime, PayloadItems, Cfg.Argc, Cfg.Argv);
 
       // print the best 20 according to each metric
-      firestarter::optimizer::History::printBest(OptimizationMetrics, PayloadItems);
+      firestarter::optimizer::History::printBest(Cfg.OptimizationMetrics, PayloadItems);
 
       // stop all the load threads
       std::raise(SIGTERM);
@@ -284,28 +234,28 @@ void Firestarter::mainThread() {
   // wait for watchdog to timeout or until user terminates
   joinLoadWorkers();
   if constexpr (firestarter::OptionalFeatures.DumpRegisterEnabled) {
-    if (DumpRegisters) {
+    if (Cfg.DumpRegisters) {
       joinDumpRegisterWorker();
     }
   }
 
-  if (!Optimize) {
+  if (!Cfg.Optimize) {
     printPerformanceReport();
   }
 
   if constexpr (firestarter::OptionalFeatures.OptimizationEnabled) {
     // if measurment is enabled, stop it here
-    if (Measurement) {
+    if (Cfg.Measurement) {
       // TODO(Issue #77): clear this up
       log::info() << "metric,num_timepoints,duration_ms,average,stddev";
-      for (auto const& [name, sum] : MeasurementWorker->getValues(StartDelta, StopDelta)) {
+      for (auto const& [name, sum] : MeasurementWorker->getValues(Cfg.StartDelta, Cfg.StopDelta)) {
         log::info() << std::quoted(name) << "," << sum.NumTimepoints << "," << sum.Duration.count() << ","
                     << sum.Average << "," << sum.Stddev;
       }
     }
   }
 
-  if (ErrorDetection) {
+  if (Cfg.ErrorDetection) {
     printThreadErrorReport();
   }
 }
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index 4c2bf289..a1231286 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -46,18 +46,18 @@
 
 namespace firestarter {
 
-void Firestarter::initLoadWorkers(bool LowLoad, std::chrono::microseconds Period) {
+void Firestarter::initLoadWorkers() {
   Environment->setCpuAffinity(0);
 
   // setup load variable to execute low or high load once the threads switch to
   // work.
-  LoadVar = LowLoad ? LoadThreadWorkType::LoadLow : LoadThreadWorkType::LoadHigh;
+  LoadVar = Cfg.Load == std::chrono::microseconds::zero() ? LoadThreadWorkType::LoadLow : LoadThreadWorkType::LoadHigh;
 
   auto NumThreads = Environment->requestedNumThreads();
 
   // create a std::vector<std::shared_ptr<>> of requestenNumThreads()
   // communication pointers and add these to the threaddata
-  if (ErrorDetection) {
+  if (Cfg.ErrorDetection) {
     for (uint64_t I = 0; I < NumThreads; I++) {
       auto* CommPtr = static_cast<uint64_t*>(AlignedAlloc::malloc(2 * sizeof(uint64_t)));
       assert(CommPtr);
@@ -69,9 +69,10 @@ void Firestarter::initLoadWorkers(bool LowLoad, std::chrono::microseconds Period
   }
 
   for (uint64_t I = 0; I < NumThreads; I++) {
-    auto Td = std::make_shared<LoadWorkerData>(I, *Environment, LoadVar, Period, DumpRegisters, ErrorDetection);
+    auto Td =
+        std::make_shared<LoadWorkerData>(I, *Environment, LoadVar, Cfg.Period, Cfg.DumpRegisters, Cfg.ErrorDetection);
 
-    if (ErrorDetection) {
+    if (Cfg.ErrorDetection) {
       // distribute pointers for error deteciton. (set threads in a ring)
       // give this thread the left pointer i and right pointer (i+1) %
       // requestedNumThreads().
@@ -148,7 +149,7 @@ void Firestarter::joinLoadWorkers() {
 }
 
 void Firestarter::printThreadErrorReport() {
-  if (ErrorDetection) {
+  if (Cfg.ErrorDetection) {
     auto MaxSize = LoadThreads.size();
 
     std::vector<bool> Errors(MaxSize, false);
@@ -204,7 +205,7 @@ void Firestarter::printPerformanceReport() {
   // insert values for ipc-estimate metric
   // if we are on linux
 #if defined(linux) || defined(__linux__)
-  if (Measurement) {
+  if (Cfg.Measurement) {
     for (auto const& Thread : LoadThreads) {
       auto Td = Thread.second;
       ipcEstimateMetricInsert(static_cast<double>(Td->LastRun.Iterations) *
diff --git a/src/firestarter/Main.cpp b/src/firestarter/Main.cpp
index 04b17a28..8158c609 100644
--- a/src/firestarter/Main.cpp
+++ b/src/firestarter/Main.cpp
@@ -19,426 +19,11 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <firestarter/Config.hpp>
 #include <firestarter/Firestarter.hpp>
 #include <firestarter/Logging/Log.hpp>
 
-#include <cxxopts.hpp>
-
-#include <string>
-
-struct Config {
-  inline static const std::vector<std::pair<std::string, std::string>> OptionsMap = {
-      {"information", "Information Options:\n"},
-      {"general", "General Options:\n"},
-      {"specialized-workloads", "Specialized workloads:\n"},
-#ifdef FIRESTARTER_DEBUG_FEATURES
-      {"debug", "Debugging:\n"},
-#endif
-#if defined(linux) || defined(__linux__)
-      {"measurement", "Measurement:\n"},
-      {"optimization", "Optimization:\n"}
-#endif
-  };
-
-  // default parameters
-  std::chrono::seconds Timeout{};
-  unsigned LoadPercent;
-  std::chrono::microseconds Period{};
-  unsigned RequestedNumThreads;
-  std::string CpuBind;
-  bool PrintFunctionSummary;
-  unsigned FunctionId;
-  bool ListInstructionGroups;
-  std::string InstructionGroups;
-  unsigned LineCount = 0;
-  // debug features
-  bool AllowUnavailablePayload = false;
-  bool DumpRegisters = false;
-  std::chrono::seconds DumpRegistersTimeDelta = std::chrono::seconds(0);
-  std::string DumpRegistersOutpath;
-  bool ErrorDetection = false;
-  // CUDA parameters
-  int Gpus = 0;
-  unsigned GpuMatrixSize = 0;
-  bool GpuUseFloat = false;
-  bool GpuUseDouble = false;
-  // linux features
-  bool ListMetrics = false;
-  bool Measurement = false;
-  std::chrono::milliseconds StartDelta = std::chrono::milliseconds(0);
-  std::chrono::milliseconds StopDelta = std::chrono::milliseconds(0);
-  std::chrono::milliseconds MeasurementInterval = std::chrono::milliseconds(0);
-  std::vector<std::string> StdinMetrics;
-  // linux and dynamic linked binary
-  std::vector<std::string> MetricPaths;
-
-  // optimization
-  bool Optimize = false;
-  std::chrono::seconds Preheat{};
-  std::string OptimizationAlgorithm;
-  std::vector<std::string> OptimizationMetrics;
-  std::chrono::seconds EvaluationDuration{};
-  unsigned Individuals;
-  std::string OptimizeOutfile;
-  unsigned Generations;
-  double Nsga2Cr;
-  double Nsga2M;
-
-  Config(int Argc, const char** Argv);
-};
-
-namespace {
-
-void printCopyright() {
-  firestarter::log::info() << "This program is free software: you can redistribute it and/or "
-                              "modify\n"
-                           << "it under the terms of the GNU General Public License as published "
-                              "by\n"
-                           << "the Free Software Foundation, either version 3 of the License, or\n"
-                           << "(at your option) any later version.\n"
-                           << "\n"
-                           << "You should have received a copy of the GNU General Public License\n"
-                           << "along with this program.  If not, see "
-                              "<http://www.gnu.org/licenses/>.\n";
-}
-
-void printWarranty() {
-  firestarter::log::info() << "This program is distributed in the hope that it will be useful,\n"
-                           << "but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
-                           << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
-                           << "GNU General Public License for more details.\n"
-                           << "\n"
-                           << "You should have received a copy of the GNU General Public License\n"
-                           << "along with this program.  If not, see "
-                              "<http://www.gnu.org/licenses/>.\n";
-}
-
-void printHelp(cxxopts::Options const& Parser, std::string const& Section) {
-  std::vector<std::pair<std::string, std::string>> Options(Config::OptionsMap.size());
-
-  if (Section.empty()) {
-    std::copy(Config::OptionsMap.begin(), Config::OptionsMap.end(), Options.begin());
-  } else {
-    auto FindSection = [&](std::pair<std::string, std::string> const& Pair) { return Pair.first == Section; };
-    auto It = std::copy_if(Config::OptionsMap.begin(), Config::OptionsMap.end(), Options.begin(), FindSection);
-    Options.resize(std::distance(Options.begin(), It));
-  }
-
-  // clang-format off
-  firestarter::log::info()
-    << Parser.help(Options)
-    << "Examples:\n"
-    << "  ./FIRESTARTER                 starts FIRESTARTER without timeout\n"
-    << "  ./FIRESTARTER -t 300          starts a 5 minute run of FIRESTARTER\n"
-    << "  ./FIRESTARTER -l 50 -t 600    starts a 10 minute run of FIRESTARTER with\n"
-    << "                                50\% high load and 50\% idle time\n"
-    << (firestarter::OptionalFeatures.gpuEnabled() ? 
-       "                                on CPUs and full load on GPUs\n"
-     : "")
-    << "  ./FIRESTARTER -l 75 -p 20000000\n"
-    << "                                starts FIRESTARTER with an interval length\n"
-    << "                                of 2 sec, 1.5s high load"
-    << (firestarter::OptionalFeatures.gpuEnabled() ? 
-       "                                on CPUs and full load on GPUs\n"
-     : "\n")
-    << (firestarter::OptionalFeatures.OptimizationEnabled ?
-       "  ./FIRESTARTER --measurement --start-delta=300000 -t 900\n"
-       "                                starts FIRESTARTER measuring all available\n"
-       "                                metrics for 15 minutes disregarding the first\n"
-       "                                5 minutes and last two seconds (default to `--stop-delta`)\n"
-       "  ./FIRESTARTER -t 20 --optimize=NSGA2 --optimization-metric sysfs-powercap-rapl,perf-ipc\n"
-       "                                starts FIRESTARTER optimizing with the sysfs-powercap-rapl\n"
-       "                                and perf-ipc metric. The duration is 20s long. The default\n"
-       "                                instruction groups for the current platform will be used.\n"
-     : "")
-    ;
-  // clang-format on
-}
-
-} // namespace
-
-Config::Config(int Argc, const char** Argv) {
-  const auto* ExecutableName = *Argv;
-
-  cxxopts::Options Parser(ExecutableName);
-
-  const auto HelpDescription =
-      std::string("Display usage information. SECTION can be any of: information | general | specialized-workloads") +
-      (firestarter::OptionalFeatures.DebugFeatureEnabled ? " | debug" : "") +
-      (firestarter::OptionalFeatures.OptimizationEnabled ? "\n| measurement | optimization" : "");
-
-  const auto LoadDescription =
-      std::string("Set the percentage of high CPU load to LOAD\n(%) default: 100, valid values: 0 <= LOAD <=\n100, "
-                  "threads will be idle in the remaining time,\nfrequency of load changes is determined by -p.") +
-      (firestarter::OptionalFeatures.gpuEnabled() ? " This option does NOT influence the GPU\nworkload!" : "");
-
-  // clang-format off
-  Parser.add_options("information")
-    ("h,help", HelpDescription,
-      cxxopts::value<std::string>()->implicit_value(""), "SECTION")
-    ("v,version", "Display version information")
-    ("c,copyright", "Display copyright information")
-    ("w,warranty", "Display warranty information")
-    ("q,quiet", "Set log level to Warning")
-    ("r,report", "Display additional information (overridden by -q)")
-    ("debug", "Print debug output")
-    ("a,avail", "List available functions");
-
-  Parser.add_options("general")
-    ("i,function", "Specify integer ID of the load-function to be\nused (as listed by --avail)",
-      cxxopts::value<unsigned>()->default_value("0"), "ID");
-
-  if (firestarter::OptionalFeatures.gpuEnabled()) {
-    Parser.add_options("general")
-      ("f,usegpufloat", "Use single precision matrix multiplications\ninstead of default")
-      ("d,usegpudouble", "Use double precision matrix multiplications\ninstead of default")
-      ("g,gpus", "Number of gpus to use, default: -1 (all)",
-        cxxopts::value<int>()->default_value("-1"))
-      ("m,matrixsize", "Size of the matrix to calculate, default: 0 (maximum)",
-        cxxopts::value<unsigned>()->default_value("0"));
-  }
-
-  Parser.add_options("general")
-    ("t,timeout", "Set the timeout (seconds) after which FIRESTARTER\nterminates itself, default: 0 (no timeout)",
-      cxxopts::value<unsigned>()->default_value("0"), "TIMEOUT")
-    ("l,load", LoadDescription,
-      cxxopts::value<unsigned>()->default_value("100"), "LOAD")
-    ("p,period", "Set the interval length for CPUs to PERIOD\n(usec), default: 100000, each interval contains\na high load and an idle phase, the percentage\nof high load is defined by -l.",
-      cxxopts::value<unsigned>()->default_value("100000"), "PERIOD")
-    ("n,threads", "Specify the number of threads. Cannot be\ncombined with -b | --bind, which impicitly\nspecifies the number of threads.",
-      cxxopts::value<unsigned>()->default_value("0"), "COUNT")
-#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
-    ("b,bind", "Select certain CPUs. CPULIST format: \"x,y,z\",\n\"x-y\", \"x-y/step\", and any combination of the\nabove. Cannot be combined with -n | --threads.",
-      cxxopts::value<std::string>()->default_value(""), "CPULIST")
-#endif
-    ("error-detection", "Enable error detection. This aborts execution when the calculated data is corruped by errors. FIRESTARTER must run with 2 or more threads for this feature. Cannot be used with -l | --load and --optimize.");
-
-  Parser.add_options("specialized-workloads")
-    ("list-instruction-groups", "List the available instruction groups for the\npayload of the current platform.")
-    ("run-instruction-groups", "Run the payload with the specified\ninstruction groups. GROUPS format: multiple INST:VAL\npairs comma-seperated.",
-      cxxopts::value<std::string>()->default_value(""), "GROUPS")
-    ("set-line-count", "Set the number of lines for a payload.",
-      cxxopts::value<unsigned>());
-
-  if (firestarter::OptionalFeatures.DebugFeatureEnabled) {
-    Parser.add_options("debug")
-      ("allow-unavailable-payload", "")
-      ("dump-registers", "Dump the working registers on the first\nthread. Depending on the payload these are mm, xmm,\nymm or zmm. Only use it without a timeout and\n100 percent load. DELAY between dumps in secs. Cannot be used with --error-detection.",
-        cxxopts::value<unsigned>()->implicit_value("10"), "DELAY")
-      ("dump-registers-outpath", "Path for the dump of the output files. If\nPATH is not given, current working directory will\nbe used.",
-        cxxopts::value<std::string>()->default_value(""), "PATH");
-  }
-
-  if (firestarter::OptionalFeatures.OptimizationEnabled) {
-    Parser.add_options("measurement")
-      ("list-metrics", "List the available metrics.")
-#ifndef FIRESTARTER_LINK_STATIC
-      ("metric-path", "Add a path to a shared library representing an interface for a metric. This option can be specified multiple times.",
-        cxxopts::value<std::vector<std::string>>()->default_value(""))
-#endif
-      ("metric-from-stdin", "Add a metric NAME with values from stdin.\nFormat of input: \"NAME TIME_SINCE_EPOCH VALUE\\n\".\nTIME_SINCE_EPOCH is a int64 in nanoseconds. VALUE is a double. (Do not forget to flush\nlines!)",
-        cxxopts::value<std::vector<std::string>>(), "NAME")
-      ("measurement", "Start a measurement for the time specified by\n-t | --timeout. (The timeout must be greater\nthan the start and stop deltas.) Cannot be\ncombined with --optimize.")
-      ("measurement-interval", "Interval of measurements in milliseconds, default: 100",
-        cxxopts::value<unsigned>()->default_value("100"))
-      ("start-delta", "Cut of first N milliseconds of measurement, default: 5000",
-        cxxopts::value<unsigned>()->default_value("5000"), "N")
-      ("stop-delta", "Cut of last N milliseconds of measurement, default: 2000",
-        cxxopts::value<unsigned>()->default_value("2000"), "N")
-      ("preheat", "Preheat for N seconds, default: 240",
-        cxxopts::value<unsigned>()->default_value("240"), "N");
-  
-    Parser.add_options("optimization")
-      ("optimize", "Run the optimization with one of these algorithms: NSGA2.\nCannot be combined with --measurement.",
-        cxxopts::value<std::string>())
-      ("optimize-outfile", "Dump the output of the optimization into this\nfile, default: $PWD/$HOSTNAME_$DATE.json",
-        cxxopts::value<std::string>())
-      ("optimization-metric", "Use a metric for optimization. Metrics listed\nwith cli argument --list-metrics or specified\nwith --metric-from-stdin are valid.",
-        cxxopts::value<std::vector<std::string>>())
-      ("individuals", "Number of individuals for the population. For\nNSGA2 specify at least 5 and a multiple of 4,\ndefault: 20",
-        cxxopts::value<unsigned>()->default_value("20"))
-      ("generations", "Number of generations, default: 20",
-        cxxopts::value<unsigned>()->default_value("20"))
-      ("nsga2-cr", "Crossover probability. Must be in range [0,1[\ndefault: 0.6",
-        cxxopts::value<double>()->default_value("0.6"))
-      ("nsga2-m", "Mutation probability. Must be in range [0,1]\ndefault: 0.4",
-        cxxopts::value<double>()->default_value("0.4"));
-  }
-  // clang-format on
-
-  try {
-    auto Options = Parser.parse(Argc, Argv);
-
-    if (static_cast<bool>(Options.count("quiet"))) {
-      firestarter::logging::Filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::warn);
-    } else if (static_cast<bool>(Options.count("report"))) {
-      firestarter::logging::Filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::debug);
-    } else if (static_cast<bool>(Options.count("debug"))) {
-      firestarter::logging::Filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::trace);
-    } else {
-      firestarter::logging::Filter<firestarter::logging::record>::set_severity(nitro::log::severity_level::info);
-    }
-
-    if (static_cast<bool>(Options.count("version"))) {
-      std::exit(EXIT_SUCCESS);
-    }
-
-    if (static_cast<bool>(Options.count("copyright"))) {
-      printCopyright();
-      std::exit(EXIT_SUCCESS);
-    }
-
-    if (static_cast<bool>(Options.count("warranty"))) {
-      printWarranty();
-      std::exit(EXIT_SUCCESS);
-    }
-
-    firestarter::log::info() << "This program comes with ABSOLUTELY NO WARRANTY; for details run `" << ExecutableName
-                             << " -w`.\n"
-                             << "This is free software, and you are welcome to redistribute it\n"
-                             << "under certain conditions; run `" << ExecutableName << " -c` for details.\n";
-
-    if (static_cast<bool>(Options.count("help"))) {
-      auto Section = Options["help"].as<std::string>();
-
-      // section not found
-      auto FindSection = [&](std::pair<std::string, std::string> const& Pair) { return Pair.first == Section; };
-      if (std::find_if(OptionsMap.begin(), OptionsMap.end(), FindSection) == OptionsMap.end() && !Section.empty()) {
-        throw std::invalid_argument("Section \"" + Section + "\" not found in help.");
-      }
-
-      printHelp(Parser, Section);
-      std::exit(EXIT_SUCCESS);
-    }
-
-    Timeout = std::chrono::seconds(Options["timeout"].as<unsigned>());
-    LoadPercent = Options["load"].as<unsigned>();
-    Period = std::chrono::microseconds(Options["period"].as<unsigned>());
-
-    if (LoadPercent > 100) {
-      throw std::invalid_argument("Option -l/--load may not be above 100.");
-    }
-
-    ErrorDetection = static_cast<bool>(Options.count("error-detection"));
-    if (ErrorDetection && LoadPercent != 100) {
-      throw std::invalid_argument("Option --error-detection may only be used "
-                                  "with -l/--load equal 100.");
-    }
-
-    if (firestarter::OptionalFeatures.DebugFeatureEnabled) {
-      AllowUnavailablePayload = static_cast<bool>(Options.count("allow-unavailable-payload"));
-      DumpRegisters = static_cast<bool>(Options.count("dump-registers"));
-      if (DumpRegisters) {
-        DumpRegistersTimeDelta = std::chrono::seconds(Options["dump-registers"].as<unsigned>());
-        if (Timeout != std::chrono::microseconds::zero() && LoadPercent != 100) {
-          throw std::invalid_argument("Option --dump-registers may only be used "
-                                      "without a timeout and full load.");
-        }
-        if (ErrorDetection) {
-          throw std::invalid_argument("Options --dump-registers and --error-detection cannot be used "
-                                      "together.");
-        }
-      }
-    }
-
-    RequestedNumThreads = Options["threads"].as<unsigned>();
-
-#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
-    CpuBind = Options["bind"].as<std::string>();
-    if (!CpuBind.empty()) {
-      if (RequestedNumThreads != 0) {
-        throw std::invalid_argument("Options -b/--bind and -n/--threads cannot be used together.");
-      }
-    }
-#endif
-
-    if (firestarter::OptionalFeatures.gpuEnabled()) {
-      GpuUseFloat = static_cast<bool>(Options.count("usegpufloat"));
-      GpuUseDouble = static_cast<bool>(Options.count("usegpudouble"));
-
-      if (GpuUseFloat && GpuUseDouble) {
-        throw std::invalid_argument("Options -f/--usegpufloat and "
-                                    "-d/--usegpudouble cannot be used together.");
-      }
-
-      GpuMatrixSize = Options["matrixsize"].as<unsigned>();
-      if (GpuMatrixSize > 0 && GpuMatrixSize < 64) {
-        throw std::invalid_argument("Option -m/--matrixsize may not be below 64.");
-      }
-
-      Gpus = Options["gpus"].as<int>();
-    }
-
-    PrintFunctionSummary = static_cast<bool>(Options.count("avail"));
-
-    FunctionId = Options["function"].as<unsigned>();
-
-    ListInstructionGroups = static_cast<bool>(Options.count("list-instruction-groups"));
-    InstructionGroups = Options["run-instruction-groups"].as<std::string>();
-    if (static_cast<bool>(Options.count("set-line-count"))) {
-      LineCount = Options["set-line-count"].as<unsigned>();
-    }
-
-    if (firestarter::OptionalFeatures.OptimizationEnabled) {
-      StartDelta = std::chrono::milliseconds(Options["start-delta"].as<unsigned>());
-      StopDelta = std::chrono::milliseconds(Options["stop-delta"].as<unsigned>());
-      MeasurementInterval = std::chrono::milliseconds(Options["measurement-interval"].as<unsigned>());
-#ifndef FIRESTARTER_LINK_STATIC
-      MetricPaths = Options["metric-path"].as<std::vector<std::string>>();
-#endif
-      if (static_cast<bool>(Options.count("metric-from-stdin"))) {
-        StdinMetrics = Options["metric-from-stdin"].as<std::vector<std::string>>();
-      }
-      Measurement = static_cast<bool>(Options.count("measurement"));
-      ListMetrics = static_cast<bool>(Options.count("list-metrics"));
-      Optimize = static_cast<bool>(Options.count("optimize"));
-
-      if (Optimize) {
-        if (ErrorDetection) {
-          throw std::invalid_argument("Options --error-detection and --optimize "
-                                      "cannot be used together.");
-        }
-        if (Measurement) {
-          throw std::invalid_argument("Options --measurement and --optimize cannot be used together.");
-        }
-        Preheat = std::chrono::seconds(Options["preheat"].as<unsigned>());
-        OptimizationAlgorithm = Options["optimize"].as<std::string>();
-        if (static_cast<bool>(Options.count("optimization-metric"))) {
-          OptimizationMetrics = Options["optimization-metric"].as<std::vector<std::string>>();
-        }
-        if (LoadPercent != 100) {
-          throw std::invalid_argument("Options -p | --period and -l | --load are "
-                                      "not compatible with --optimize.");
-        }
-        if (Timeout == std::chrono::seconds::zero()) {
-          throw std::invalid_argument("Option -t | --timeout must be specified for optimization.");
-        }
-        EvaluationDuration = Timeout;
-        // this will deactivate the watchdog worker
-        Timeout = std::chrono::seconds::zero();
-        Individuals = Options["individuals"].as<unsigned>();
-        if (static_cast<bool>(Options.count("optimize-outfile"))) {
-          OptimizeOutfile = Options["optimize-outfile"].as<std::string>();
-        }
-        Generations = Options["generations"].as<unsigned>();
-        Nsga2Cr = Options["nsga2-cr"].as<double>();
-        Nsga2M = Options["nsga2-m"].as<double>();
-
-        if (OptimizationAlgorithm != "NSGA2") {
-          throw std::invalid_argument("Option --optimize must be any of: NSGA2");
-        }
-      }
-    }
-
-  } catch (std::exception& E) {
-    firestarter::log::error() << E.what() << "\n";
-    printHelp(Parser, "");
-    std::exit(EXIT_FAILURE);
-  }
-}
-
 auto main(int argc, const char** argv) -> int {
-
   firestarter::log::info() << "FIRESTARTER - A Processor Stress Test Utility, Version " << _FIRESTARTER_VERSION_STRING
                            << "\n"
                            << "Copyright (C) " << _FIRESTARTER_BUILD_YEAR
@@ -452,17 +37,10 @@ auto main(int argc, const char** argv) -> int {
                            << "\n";
 #endif
 
-  Config const Cfg{argc, argv};
-
   try {
-    firestarter::Firestarter Firestarter(
-        argc, argv, Cfg.Timeout, Cfg.LoadPercent, Cfg.Period, Cfg.RequestedNumThreads, Cfg.CpuBind,
-        Cfg.PrintFunctionSummary, Cfg.FunctionId, Cfg.ListInstructionGroups, Cfg.InstructionGroups, Cfg.LineCount,
-        Cfg.AllowUnavailablePayload, Cfg.DumpRegisters, Cfg.DumpRegistersTimeDelta, Cfg.DumpRegistersOutpath,
-        Cfg.ErrorDetection, Cfg.Gpus, Cfg.GpuMatrixSize, Cfg.GpuUseFloat, Cfg.GpuUseDouble, Cfg.ListMetrics,
-        Cfg.Measurement, Cfg.StartDelta, Cfg.StopDelta, Cfg.MeasurementInterval, Cfg.MetricPaths, Cfg.StdinMetrics,
-        Cfg.Optimize, Cfg.Preheat, Cfg.OptimizationAlgorithm, Cfg.OptimizationMetrics, Cfg.EvaluationDuration,
-        Cfg.Individuals, Cfg.OptimizeOutfile, Cfg.Generations, Cfg.Nsga2Cr, Cfg.Nsga2M);
+    firestarter::Config Cfg{argc, argv};
+
+    firestarter::Firestarter Firestarter(std::move(Cfg));
 
     Firestarter.mainThread();
 

From f77483345db1d4da395d4869b7e9bc9c03f274dd Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 26 Oct 2024 15:03:50 +0200
Subject: [PATCH 105/167] reorder config fields

---
 include/firestarter/Config.hpp | 59 +++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 30 deletions(-)

diff --git a/include/firestarter/Config.hpp b/include/firestarter/Config.hpp
index ce88008d..31f89062 100644
--- a/include/firestarter/Config.hpp
+++ b/include/firestarter/Config.hpp
@@ -29,51 +29,50 @@ namespace firestarter {
 
 struct Config {
   const char** Argv;
-  int Argc;
 
-  // default parameters
   std::chrono::seconds Timeout{};
   std::chrono::microseconds Period{};
   std::chrono::microseconds Load{};
-  unsigned RequestedNumThreads;
+
+  std::chrono::seconds DumpRegistersTimeDelta = std::chrono::seconds(0);
+  std::chrono::milliseconds StartDelta = std::chrono::milliseconds(0);
+  std::chrono::milliseconds StopDelta = std::chrono::milliseconds(0);
+  std::chrono::milliseconds MeasurementInterval = std::chrono::milliseconds(0);
+  std::chrono::seconds Preheat{};
+  std::chrono::seconds EvaluationDuration{};
+
+  double Nsga2Cr;
+  double Nsga2M;
+
+  std::vector<std::string> StdinMetrics;
+  std::vector<std::string> MetricPaths;
+  std::vector<std::string> OptimizationMetrics;
+
   std::string CpuBind;
-  bool PrintFunctionSummary;
-  unsigned FunctionId;
-  bool ListInstructionGroups;
   std::string InstructionGroups;
+  std::string DumpRegistersOutpath;
+  std::string OptimizationAlgorithm;
+  std::string OptimizeOutfile;
+
+  int Argc;
+  unsigned RequestedNumThreads;
+  unsigned FunctionId;
   unsigned LineCount = 0;
-  // debug features
+  int Gpus = 0;
+  unsigned GpuMatrixSize = 0;
+  unsigned Individuals;
+  unsigned Generations;
+
+  bool PrintFunctionSummary;
+  bool ListInstructionGroups;
   bool AllowUnavailablePayload = false;
   bool DumpRegisters = false;
-  std::chrono::seconds DumpRegistersTimeDelta = std::chrono::seconds(0);
-  std::string DumpRegistersOutpath;
   bool ErrorDetection = false;
-  // CUDA parameters
-  int Gpus = 0;
-  unsigned GpuMatrixSize = 0;
   bool GpuUseFloat = false;
   bool GpuUseDouble = false;
-  // linux features
   bool ListMetrics = false;
   bool Measurement = false;
-  std::chrono::milliseconds StartDelta = std::chrono::milliseconds(0);
-  std::chrono::milliseconds StopDelta = std::chrono::milliseconds(0);
-  std::chrono::milliseconds MeasurementInterval = std::chrono::milliseconds(0);
-  std::vector<std::string> StdinMetrics;
-  // linux and dynamic linked binary
-  std::vector<std::string> MetricPaths;
-
-  // optimization
   bool Optimize = false;
-  std::chrono::seconds Preheat{};
-  std::string OptimizationAlgorithm;
-  std::vector<std::string> OptimizationMetrics;
-  std::chrono::seconds EvaluationDuration{};
-  unsigned Individuals;
-  std::string OptimizeOutfile;
-  unsigned Generations;
-  double Nsga2Cr;
-  double Nsga2M;
 
   Config() = delete;
 

From 954bb3068cd2727302810bd7b2e0b7d97c93fac7 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 26 Oct 2024 15:39:05 +0200
Subject: [PATCH 106/167] do not access moved object

---
 include/firestarter/Firestarter.hpp | 2 +-
 src/firestarter/Firestarter.cpp     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 1e509d05..238c3ec9 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -51,7 +51,7 @@ class Firestarter {
 public:
   Firestarter() = delete;
 
-  explicit Firestarter(Config&& Cfg);
+  explicit Firestarter(Config&& ProvidedConfig);
 
   ~Firestarter() = default;
 
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index af18f906..5336c3b1 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -31,8 +31,8 @@
 
 namespace firestarter {
 
-Firestarter::Firestarter(Config&& Cfg)
-    : Cfg(std::move(Cfg)) {
+Firestarter::Firestarter(Config&& ProvidedConfig)
+    : Cfg(std::move(ProvidedConfig)) {
   if constexpr (firestarter::OptionalFeatures.IsX86) {
     Environment = std::make_unique<environment::x86::X86Environment>();
   }

From f1f882858305bf33303ea843fedcda2e3976c171 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 26 Oct 2024 15:33:41 +0200
Subject: [PATCH 107/167] X86 payloads: remove using. fix deprecation warning.

---
 .../Environment/X86/Payload/AVX512Payload.cpp | 73 +++++++-------
 .../Environment/X86/Payload/AVXPayload.cpp    | 67 +++++++------
 .../Environment/X86/Payload/FMA4Payload.cpp   | 98 ++++++++++---------
 .../Environment/X86/Payload/FMAPayload.cpp    | 74 +++++++-------
 .../Environment/X86/Payload/SSE2Payload.cpp   | 64 ++++++------
 .../Environment/X86/Payload/ZENFMAPayload.cpp | 84 ++++++++--------
 6 files changed, 250 insertions(+), 210 deletions(-)

diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index 3230e334..18d2f2d1 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -27,8 +27,14 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
                                    bool ErrorDetection) -> int {
-  using namespace asmjit;
-  using namespace asmjit::x86;
+  using Imm = asmjit::Imm;
+  using Zmm = asmjit::x86::Zmm;
+  // NOLINTBEGIN(readability-identifier-naming)
+  constexpr asmjit::x86::Mem (*zmmword_ptr)(const asmjit::x86::Gp&, int32_t) = asmjit::x86::zmmword_ptr;
+  constexpr auto zmm0 = asmjit::x86::zmm0;
+  constexpr auto zmm1 = asmjit::x86::zmm1;
+  constexpr auto zmm2 = asmjit::x86::zmm2;
+  // NOLINTEND(readability-identifier-naming)
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
@@ -75,51 +81,52 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
   const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
-  CodeHolder Code;
+  asmjit::CodeHolder Code;
   Code.init(Rt.environment());
 
   if (nullptr != LoadFunction) {
-    Rt.release(&LoadFunction);
+    Rt.release(LoadFunction);
   }
 
-  Builder Cb(&Code);
+  asmjit::x86::Builder Cb(&Code);
   Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
                           asmjit::DiagnosticOptions::kValidateIntermediate);
 
-  const auto PointerReg = rax;
-  const auto L1Addr = rbx;
-  const auto L2Addr = rcx;
-  const auto L3Addr = r8;
-  const auto RamAddr = r9;
-  const auto L2CountReg = r10;
-  const auto L3CountReg = r11;
-  const auto RamCountReg = r12;
-  const auto TempReg = r13;
-  const auto TempReg2 = rbp;
-  const auto OffsetReg = r14;
-  const auto AddrHighReg = r15;
-  const auto IterReg = mm0;
-  const auto ShiftReg = std::vector<Gp>({rdi, rsi, rdx});
-  const auto ShiftReg32 = std::vector<Gp>({edi, esi, edx});
+  const auto PointerReg = asmjit::x86::rax;
+  const auto L1Addr = asmjit::x86::rbx;
+  const auto L2Addr = asmjit::x86::rcx;
+  const auto L3Addr = asmjit::x86::r8;
+  const auto RamAddr = asmjit::x86::r9;
+  const auto L2CountReg = asmjit::x86::r10;
+  const auto L3CountReg = asmjit::x86::r11;
+  const auto RamCountReg = asmjit::x86::r12;
+  const auto TempReg = asmjit::x86::r13;
+  const auto TempReg2 = asmjit::x86::rbp;
+  const auto OffsetReg = asmjit::x86::r14;
+  const auto AddrHighReg = asmjit::x86::r15;
+  const auto IterReg = asmjit::x86::mm0;
+  const auto ShiftReg = std::vector<asmjit::x86::Gp>({asmjit::x86::rdi, asmjit::x86::rsi, asmjit::x86::rdx});
+  const auto ShiftReg32 = std::vector<asmjit::x86::Gp>({asmjit::x86::edi, asmjit::x86::esi, asmjit::x86::edx});
   const auto NrShiftRegs = 3;
   const auto MulRegs = 3;
   const auto AddRegs = 22;
   const auto AltDstRegs = 5;
-  const auto RamReg = zmm30;
+  const auto RamReg = asmjit::x86::zmm30;
 
-  FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+  asmjit::FuncDetail Func;
+  Func.init(asmjit::FuncSignature::build<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(
+                asmjit::CallConvId::kCDecl),
             Rt.environment());
 
-  FuncFrame Frame;
+  asmjit::FuncFrame Frame;
   Frame.init(Func);
 
   // make zmm registers dirty
-  for (int I = 0; I < 32; I++) {
+  for (auto I = 0U; I < 32U; I++) {
     Frame.addDirtyRegs(Zmm(I));
   }
-  for (int I = 0; I < 8; I++) {
-    Frame.addDirtyRegs(Mm(I));
+  for (auto I = 0U; I < 8U; I++) {
+    Frame.addDirtyRegs(asmjit::x86::Mm(I));
   }
   // make all other used registers dirty except RAX
   Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg,
@@ -128,7 +135,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     Frame.addDirtyRegs(Reg);
   }
 
-  FuncArgsAssignment Args(&Func);
+  asmjit::FuncArgsAssignment Args(&Func);
   // FIXME: asmjit assigment to mm0 does not seem to be supported
   Args.assignAll(PointerReg, AddrHighReg, TempReg);
   Args.updateFuncFrame(Frame);
@@ -154,14 +161,14 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     Cb.mov(Reg, Imm(0xAAAAAAAA));
   }
   // Initialize AVX512-Registers for FMA Operations
-  Cb.vmovapd(zmm0, zmmword_ptr(PointerReg));
+  Cb.vmovapd(zmm0, zmmword_ptr(PointerReg, 0));
   Cb.vmovapd(zmm1, zmmword_ptr(PointerReg, 64));
   Cb.vmovapd(zmm2, zmmword_ptr(PointerReg, 128));
   auto AddStart = MulRegs;
   auto AddEnd = MulRegs + AddRegs - 1;
   auto TransStart = AddRegs + MulRegs;
   auto TransEnd = AddRegs + MulRegs + AltDstRegs - 1;
-  for (int I = AddStart; I <= TransEnd; I++) {
+  for (auto I = AddStart; I <= TransEnd; I++) {
     Cb.vmovapd(Zmm(I), zmmword_ptr(PointerReg, 256 + (I * 64)));
   }
   Cb.mov(L1Addr, PointerReg); // address for L1-buffer
@@ -181,7 +188,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop ("
                      << RamSize / 1024 << ") KiB";
 
-  Cb.align(AlignMode::kCode, 64);
+  Cb.align(asmjit::AlignMode::kCode, 64);
 
   auto Loop = Cb.newLabel();
   Cb.bind(Loop);
@@ -205,7 +212,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); };
   const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); };
 
-  for (unsigned Count = 0; Count < Repetitions; Count++) {
+  for (auto Count = 0U; Count < Repetitions; Count++) {
     for (const auto& Item : Sequence) {
       if (Item == "REG") {
         Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2);
@@ -354,7 +361,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
 
   Cb.bind(FunctionExit);
 
-  Cb.movq(rax, IterReg);
+  Cb.movq(asmjit::x86::rax, IterReg);
 
   Cb.emitEpilog(Frame);
 
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index 65e78885..74dc8322 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -27,8 +27,12 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
                                 unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
                                 unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
                                 bool ErrorDetection) -> int {
-  using namespace asmjit;
-  using namespace asmjit::x86;
+  using Imm = asmjit::Imm;
+  using Mm = asmjit::x86::Mm;
+  using Xmm = asmjit::x86::Xmm;
+  using Ymm = asmjit::x86::Ymm;
+  // NOLINTNEXTLINE(readability-identifier-naming)
+  constexpr asmjit::x86::Mem (*xmmword_ptr)(const asmjit::x86::Gp&, int32_t) = asmjit::x86::xmmword_ptr;
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
@@ -75,54 +79,55 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
   const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
-  CodeHolder Code;
+  asmjit::CodeHolder Code;
   Code.init(Rt.environment());
 
   if (nullptr != LoadFunction) {
-    Rt.release(&LoadFunction);
+    Rt.release(LoadFunction);
   }
 
-  Builder Cb(&Code);
+  asmjit::x86::Builder Cb(&Code);
   Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
                           asmjit::DiagnosticOptions::kValidateIntermediate);
 
-  const auto PointerReg = rax;
-  const auto L1Addr = rbx;
-  const auto L2Addr = rcx;
-  const auto L3Addr = rdx;
-  const auto RamAddr = rdi;
-  const auto L2CountReg = r8;
-  const auto L3CountReg = r9;
-  const auto RamCountReg = r10;
-  const auto TempReg = r11;
-  const auto TempReg2 = rbp;
-  const auto OffsetReg = r12;
-  const auto AddrHighReg = r13;
-  const auto IterReg = r14;
+  const auto PointerReg = asmjit::x86::rax;
+  const auto L1Addr = asmjit::x86::rbx;
+  const auto L2Addr = asmjit::x86::rcx;
+  const auto L3Addr = asmjit::x86::rdx;
+  const auto RamAddr = asmjit::x86::rdi;
+  const auto L2CountReg = asmjit::x86::r8;
+  const auto L3CountReg = asmjit::x86::r9;
+  const auto RamCountReg = asmjit::x86::r10;
+  const auto TempReg = asmjit::x86::r11;
+  const auto TempReg2 = asmjit::x86::rbp;
+  const auto OffsetReg = asmjit::x86::r12;
+  const auto AddrHighReg = asmjit::x86::r13;
+  const auto IterReg = asmjit::x86::r14;
   const auto ShiftRegs = 6;
   const auto AddRegs = 10;
   const auto TransRegs = 6;
 
-  FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+  asmjit::FuncDetail Func;
+  Func.init(asmjit::FuncSignature::build<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(
+                asmjit::CallConvId::kCDecl),
             Rt.environment());
 
-  FuncFrame Frame;
+  asmjit::FuncFrame Frame;
   Frame.init(Func);
 
   // make xmm registers dirty
-  for (int I = 0; I < 16; I++) {
+  for (auto I = 0U; I < 16U; I++) {
     Frame.addDirtyRegs(Ymm(I));
   }
   // make mmx registers dirty
-  for (int I = 0; I < 8; I++) {
+  for (auto I = 0U; I < 8U; I++) {
     Frame.addDirtyRegs(Mm(I));
   }
   // make all other used registers dirty except RAX
   Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg,
                      AddrHighReg, IterReg);
 
-  FuncArgsAssignment Args(&Func);
+  asmjit::FuncArgsAssignment Args(&Func);
   Args.assignAll(PointerReg, AddrHighReg, IterReg);
   Args.updateFuncFrame(Frame);
   Frame.finalize();
@@ -146,7 +151,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   auto TransStart = AddRegs;
   auto TransEnd = AddRegs + TransRegs - 1;
   if (AddRegs > 0) {
-    for (int I = AddStart; I <= AddEnd; I++) {
+    for (auto I = AddStart; I <= AddEnd; I++) {
       Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 32 * I));
     }
   }
@@ -157,7 +162,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   if (ShiftRegs > 1) {
     Cb.mov(TempReg, Imm(0x5555555555555555));
     Cb.movq(Mm(ShiftStart), TempReg);
-    for (int I = ShiftStart + 1; I <= ShiftEnd; I++) {
+    for (auto I = ShiftStart + 1; I <= ShiftEnd; I++) {
       Cb.movq(Mm(I), Mm(ShiftStart));
     }
   }
@@ -172,7 +177,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     Cb.pinsrq(Xmm(TransStart), TempReg, Imm(0));
     Cb.pinsrq(Xmm(TransStart), TempReg, Imm(1));
     Cb.vinsertf128(Ymm(TransStart), Ymm(TransStart), Xmm(TransStart), Imm(1));
-    for (int I = TransStart + 1; I <= TransEnd; I++) {
+    for (auto I = TransStart + 1; I <= TransEnd; I++) {
       if (I % 2 == 0) {
         Cb.shr(TempReg, Imm(4));
       } else {
@@ -201,7 +206,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop ("
                      << RamSize / 1024 << ") KiB";
 
-  Cb.align(AlignMode::kCode, 64);
+  Cb.align(asmjit::AlignMode::kCode, 64);
 
   auto Loop = Cb.newLabel();
   Cb.bind(Loop);
@@ -226,7 +231,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); };
   const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); };
 
-  for (unsigned Count = 0; Count < Repetitions; Count++) {
+  for (auto Count = 0U; Count < Repetitions; Count++) {
     for (const auto& Item : Sequence) {
       if (Item == "REG") {
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
@@ -374,7 +379,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   Cb.mov(L1Addr, PointerReg);
 
   if (DumpRegisters) {
-    emitDumpRegisterCode<Ymm>(Cb, PointerReg, ymmword_ptr);
+    emitDumpRegisterCode<Ymm>(Cb, PointerReg, asmjit::x86::ymmword_ptr);
   }
 
   if (ErrorDetection) {
@@ -386,7 +391,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
 
   Cb.bind(FunctionExit);
 
-  Cb.mov(rax, IterReg); // restore iteration counter
+  Cb.mov(asmjit::x86::rax, IterReg); // restore iteration counter
 
   Cb.emitEpilog(Frame);
 
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index ca416516..8c503696 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -27,8 +27,13 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
                                  unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
                                  unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
                                  bool ErrorDetection) -> int {
-  using namespace asmjit;
-  using namespace asmjit::x86;
+  using Imm = asmjit::Imm;
+  using Xmm = asmjit::x86::Xmm;
+  // NOLINTBEGIN(readability-identifier-naming)
+  constexpr asmjit::x86::Mem (*xmmword_ptr)(const asmjit::x86::Gp&, int32_t) = asmjit::x86::xmmword_ptr;
+  constexpr auto xmm0 = asmjit::x86::xmm0;
+  constexpr auto xmm1 = asmjit::x86::xmm1;
+  // NOLINTEND(readability-identifier-naming)
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
@@ -75,51 +80,52 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
   const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
-  CodeHolder Code;
+  asmjit::CodeHolder Code;
   Code.init(Rt.environment());
 
   if (nullptr != LoadFunction) {
-    Rt.release(&LoadFunction);
+    Rt.release(LoadFunction);
   }
 
-  Builder Cb(&Code);
+  asmjit::x86::Builder Cb(&Code);
   Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
                           asmjit::DiagnosticOptions::kValidateIntermediate);
 
-  const auto PointerReg = rax;
-  const auto L1Addr = rbx;
-  const auto L2Addr = rcx;
-  const auto L3Addr = r8;
-  const auto RamAddr = r9;
-  const auto L2CountReg = r10;
-  const auto L3CountReg = r11;
-  const auto RamCountReg = r12;
-  const auto TempReg = r13;
-  const auto TempReg2 = rbp;
-  const auto OffsetReg = r14;
-  const auto AddrHighReg = r15;
-  const auto IterReg = mm0;
-  const auto ShiftReg = std::vector<Gp>({rdi, rsi, rdx});
-  const auto ShiftReg32 = std::vector<Gp>({edi, esi, edx});
+  const auto PointerReg = asmjit::x86::rax;
+  const auto L1Addr = asmjit::x86::rbx;
+  const auto L2Addr = asmjit::x86::rcx;
+  const auto L3Addr = asmjit::x86::r8;
+  const auto RamAddr = asmjit::x86::r9;
+  const auto L2CountReg = asmjit::x86::r10;
+  const auto L3CountReg = asmjit::x86::r11;
+  const auto RamCountReg = asmjit::x86::r12;
+  const auto TempReg = asmjit::x86::r13;
+  const auto TempReg2 = asmjit::x86::rbp;
+  const auto OffsetReg = asmjit::x86::r14;
+  const auto AddrHighReg = asmjit::x86::r15;
+  const auto IterReg = asmjit::x86::mm0;
+  const auto ShiftReg = std::vector<asmjit::x86::Gp>({asmjit::x86::rdi, asmjit::x86::rsi, asmjit::x86::rdx});
+  const auto ShiftReg32 = std::vector<asmjit::x86::Gp>({asmjit::x86::edi, asmjit::x86::esi, asmjit::x86::edx});
   const auto NbShiftRegs = 3;
   const auto MulRegs = 2;
   const auto AddRegs = 9;
   const auto AltDestRegs = 3;
-  const auto RamReg = xmm15;
+  const auto RamReg = asmjit::x86::xmm15;
 
-  FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+  asmjit::FuncDetail Func;
+  Func.init(asmjit::FuncSignature::build<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(
+                asmjit::CallConvId::kCDecl),
             Rt.environment());
 
-  FuncFrame Frame;
+  asmjit::FuncFrame Frame;
   Frame.init(Func);
 
   // make (x|y)mm registers dirty
-  for (int I = 0; I < 16; I++) {
-    Frame.addDirtyRegs(Ymm(I));
+  for (auto I = 0; I < 16; I++) {
+    Frame.addDirtyRegs(asmjit::x86::Ymm(I));
   }
-  for (int I = 0; I < 8; I++) {
-    Frame.addDirtyRegs(Mm(I));
+  for (auto I = 0; I < 8; I++) {
+    Frame.addDirtyRegs(asmjit::x86::Mm(I));
   }
   // make all other used registers dirty except RAX
   Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg,
@@ -128,7 +134,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
     Frame.addDirtyRegs(Reg);
   }
 
-  FuncArgsAssignment Args(&Func);
+  asmjit::FuncArgsAssignment Args(&Func);
   // FIXME: asmjit assigment to mm0 does not seem to be supported
   Args.assignAll(PointerReg, AddrHighReg, TempReg);
   Args.updateFuncFrame(Frame);
@@ -154,14 +160,14 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
     Cb.mov(Reg, Imm(0xAAAAAAAA));
   }
   // Initialize AVX-Registers for FMA4 Operations
-  Cb.vmovapd(ymm0, ymmword_ptr(PointerReg));
-  Cb.vmovapd(ymm1, ymmword_ptr(PointerReg));
+  Cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::ymmword_ptr(PointerReg));
+  Cb.vmovapd(asmjit::x86::ymm1, asmjit::x86::ymmword_ptr(PointerReg));
   auto AddStart = MulRegs;
   auto AddEnd = MulRegs + AddRegs - 1;
   auto TransStart = AddRegs + MulRegs;
   auto TransEnd = AddRegs + MulRegs + AltDestRegs - 1;
-  for (int I = AddStart; I <= TransEnd; I++) {
-    Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 256 + (I * 32)));
+  for (auto I = AddStart; I <= TransEnd; I++) {
+    Cb.vmovapd(asmjit::x86::Ymm(I), asmjit::x86::ymmword_ptr(PointerReg, 256 + (I * 32)));
   }
   Cb.mov(L1Addr, PointerReg); // address for L1-buffer
   Cb.mov(L2Addr, PointerReg);
@@ -180,7 +186,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop ("
                      << RamSize / 1024 << ") KiB";
 
-  Cb.align(AlignMode::kCode, 64);
+  Cb.align(asmjit::AlignMode::kCode, 64);
 
   auto Loop = Cb.newLabel();
   Cb.bind(Loop);
@@ -205,7 +211,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); };
   const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); };
 
-  for (unsigned Count = 0; Count < Repetitions; Count++) {
+  for (auto Count = 0U; Count < Repetitions; Count++) {
     for (const auto& Item : Sequence) {
       if (Item == "REG") {
         Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
@@ -214,15 +220,18 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
         MovDest++;
       } else if (Item == "L1_L") {
         Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
-        Cb.vfmaddpd(Ymm(AddDest), Ymm(AddDest), ymm1, ymmword_ptr(L1Addr, 32));
+        Cb.vfmaddpd(asmjit::x86::Ymm(AddDest), asmjit::x86::Ymm(AddDest), asmjit::x86::ymm1,
+                    asmjit::x86::ymmword_ptr(L1Addr, 32));
         L1Increment();
       } else if (Item == "L1_S") {
         Cb.vmovapd(xmmword_ptr(L1Addr, 32), Xmm(AddDest));
-        Cb.vfmaddpd(Ymm(AddDest), Ymm(AddDest), ymm0, Ymm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
+        Cb.vfmaddpd(asmjit::x86::Ymm(AddDest), asmjit::x86::Ymm(AddDest), asmjit::x86::ymm0,
+                    asmjit::x86::Ymm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
         L1Increment();
       } else if (Item == "L1_LS") {
         Cb.vmovapd(xmmword_ptr(L1Addr, 64), Xmm(AddDest));
-        Cb.vfmaddpd(Ymm(AddDest), Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 32));
+        Cb.vfmaddpd(asmjit::x86::Ymm(AddDest), asmjit::x86::Ymm(AddDest), asmjit::x86::ymm0,
+                    asmjit::x86::ymmword_ptr(L1Addr, 32));
         L1Increment();
       } else if (Item == "L2_L") {
         Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
@@ -250,7 +259,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
         L3Increment();
       } else if (Item == "L3_P") {
         Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, xmmword_ptr(L1Addr, 32));
-        Cb.prefetcht2(ptr(L3Addr));
+        Cb.prefetcht2(asmjit::x86::ptr(L3Addr));
         L3Increment();
       } else if (Item == "RAM_L") {
         Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
@@ -266,7 +275,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
         RamIncrement();
       } else if (Item == "RAM_P") {
         Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, xmmword_ptr(L1Addr, 32));
-        Cb.prefetcht2(ptr(RamAddr));
+        Cb.prefetcht2(asmjit::x86::ptr(RamAddr));
         RamIncrement();
       } else {
         workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
@@ -342,19 +351,20 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   Cb.mov(L1Addr, PointerReg);
 
   if (DumpRegisters) {
-    emitDumpRegisterCode<Ymm>(Cb, PointerReg, ymmword_ptr);
+    emitDumpRegisterCode<asmjit::x86::Ymm>(Cb, PointerReg, asmjit::x86::ymmword_ptr);
   }
 
   if (ErrorDetection) {
-    emitErrorDetectionCode<decltype(IterReg), Ymm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2);
+    emitErrorDetectionCode<decltype(IterReg), asmjit::x86::Ymm>(Cb, IterReg, AddrHighReg, PointerReg, TempReg,
+                                                                TempReg2);
   }
 
-  Cb.test(ptr_64(AddrHighReg), Imm(LoadThreadWorkType::LoadHigh));
+  Cb.test(asmjit::x86::ptr_64(AddrHighReg), Imm(LoadThreadWorkType::LoadHigh));
   Cb.jnz(Loop);
 
   Cb.bind(FunctionExit);
 
-  Cb.movq(rax, IterReg);
+  Cb.movq(asmjit::x86::rax, IterReg);
 
   Cb.emitEpilog(Frame);
 
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index 451a37ad..8d7951ec 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -27,8 +27,15 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
                                 unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
                                 unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
                                 bool ErrorDetection) -> int {
-  using namespace asmjit;
-  using namespace asmjit::x86;
+  using Imm = asmjit::Imm;
+  using Xmm = asmjit::x86::Xmm;
+  using Ymm = asmjit::x86::Ymm;
+  // NOLINTBEGIN(readability-identifier-naming)
+  constexpr asmjit::x86::Mem (*ymmword_ptr)(const asmjit::x86::Gp&, int32_t) = asmjit::x86::ymmword_ptr;
+  constexpr auto ymm0 = asmjit::x86::ymm0;
+  constexpr auto ymm1 = asmjit::x86::ymm1;
+  constexpr auto ymm2 = asmjit::x86::ymm2;
+  // NOLINTEND(readability-identifier-naming)
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
@@ -75,51 +82,52 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
   const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
-  CodeHolder Code;
+  asmjit::CodeHolder Code;
   Code.init(Rt.environment());
 
   if (nullptr != LoadFunction) {
-    Rt.release(&LoadFunction);
+    Rt.release(LoadFunction);
   }
 
-  Builder Cb(&Code);
+  asmjit::x86::Builder Cb(&Code);
   Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
                           asmjit::DiagnosticOptions::kValidateIntermediate);
 
-  const auto PointerReg = rax;
-  const auto L1Addr = rbx;
-  const auto L2Addr = rcx;
-  const auto L3Addr = r8;
-  const auto RamAddr = r9;
-  const auto L2CountReg = r10;
-  const auto L3CountReg = r11;
-  const auto RamCountReg = r12;
-  const auto TempReg = r13;
-  const auto TempReg2 = rbp;
-  const auto OffsetReg = r14;
-  const auto AddrHighReg = r15;
-  const auto IterReg = mm0;
-  const auto ShiftRegs = std::vector<Gp>({rdi, rsi, rdx});
-  const auto ShiftRegs32 = std::vector<Gp>({edi, esi, edx});
+  const auto PointerReg = asmjit::x86::rax;
+  const auto L1Addr = asmjit::x86::rbx;
+  const auto L2Addr = asmjit::x86::rcx;
+  const auto L3Addr = asmjit::x86::r8;
+  const auto RamAddr = asmjit::x86::r9;
+  const auto L2CountReg = asmjit::x86::r10;
+  const auto L3CountReg = asmjit::x86::r11;
+  const auto RamCountReg = asmjit::x86::r12;
+  const auto TempReg = asmjit::x86::r13;
+  const auto TempReg2 = asmjit::x86::rbp;
+  const auto OffsetReg = asmjit::x86::r14;
+  const auto AddrHighReg = asmjit::x86::r15;
+  const auto IterReg = asmjit::x86::mm0;
+  const auto ShiftRegs = std::vector<asmjit::x86::Gp>({asmjit::x86::rdi, asmjit::x86::rsi, asmjit::x86::rdx});
+  const auto ShiftRegs32 = std::vector<asmjit::x86::Gp>({asmjit::x86::edi, asmjit::x86::esi, asmjit::x86::edx});
   const auto NbShiftRegs = 3;
   const auto MulRegs = 3;
   const auto AddRegs = 9;
   const auto AltDestRegs = 3;
-  const auto RamReg = ymm15;
+  const auto RamReg = asmjit::x86::ymm15;
 
-  FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+  asmjit::FuncDetail Func;
+  Func.init(asmjit::FuncSignature::build<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(
+                asmjit::CallConvId::kCDecl),
             Rt.environment());
 
-  FuncFrame Frame;
+  asmjit::FuncFrame Frame;
   Frame.init(Func);
 
   // make (x|y)mm registers dirty
-  for (int I = 0; I < 16; I++) {
+  for (auto I = 0U; I < 16U; I++) {
     Frame.addDirtyRegs(Ymm(I));
   }
-  for (int I = 0; I < 8; I++) {
-    Frame.addDirtyRegs(Mm(I));
+  for (auto I = 0U; I < 8U; I++) {
+    Frame.addDirtyRegs(asmjit::x86::Mm(I));
   }
   // make all other used registers dirty except RAX
   Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg,
@@ -128,7 +136,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     Frame.addDirtyRegs(Reg);
   }
 
-  FuncArgsAssignment Args(&Func);
+  asmjit::FuncArgsAssignment Args(&Func);
   // FIXME: asmjit assigment to mm0 does not seem to be supported
   Args.assignAll(PointerReg, AddrHighReg, TempReg);
   Args.updateFuncFrame(Frame);
@@ -154,14 +162,14 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     Cb.mov(Reg, Imm(0xAAAAAAAA));
   }
   // Initialize AVX-Registers for FMA Operations
-  Cb.vmovapd(ymm0, ymmword_ptr(PointerReg));
+  Cb.vmovapd(ymm0, ymmword_ptr(PointerReg, 0));
   Cb.vmovapd(ymm1, ymmword_ptr(PointerReg, 32));
   Cb.vmovapd(ymm2, ymmword_ptr(PointerReg, 64));
   auto AddStart = MulRegs;
   auto AddEnd = MulRegs + AddRegs - 1;
   auto TransStart = AddRegs + MulRegs;
   auto TransEnd = AddRegs + MulRegs + AltDestRegs - 1;
-  for (int I = AddStart; I <= TransEnd; I++) {
+  for (auto I = AddStart; I <= TransEnd; I++) {
     Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 256 + (I * 32)));
   }
   Cb.mov(L1Addr, PointerReg); // address for L1-buffer
@@ -181,7 +189,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop ("
                      << RamSize / 1024 << ") KiB";
 
-  Cb.align(AlignMode::kCode, 64);
+  Cb.align(asmjit::AlignMode::kCode, 64);
 
   auto Loop = Cb.newLabel();
   Cb.bind(Loop);
@@ -214,7 +222,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); };
   const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); };
 
-  for (unsigned Count = 0; Count < Repetitions; Count++) {
+  for (auto Count = 0U; Count < Repetitions; Count++) {
     for (const auto& Item : Sequence) {
       if (Item == "REG") {
         Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2);
@@ -391,7 +399,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
 
   Cb.bind(FunctionExit);
 
-  Cb.movq(rax, IterReg);
+  Cb.movq(asmjit::x86::rax, IterReg);
 
   Cb.emitEpilog(Frame);
 
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index bd2aaba9..9b782597 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -27,8 +27,11 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
                                  unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
                                  unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
                                  bool ErrorDetection) -> int {
-  using namespace asmjit;
-  using namespace asmjit::x86;
+  using Imm = asmjit::Imm;
+  using Mm = asmjit::x86::Mm;
+  using Xmm = asmjit::x86::Xmm;
+  // NOLINTNEXTLINE(readability-identifier-naming)
+  constexpr asmjit::x86::Mem (*xmmword_ptr)(const asmjit::x86::Gp&, int32_t) = asmjit::x86::xmmword_ptr;
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
@@ -75,54 +78,55 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
   const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
-  CodeHolder Code;
+  asmjit::CodeHolder Code;
   Code.init(Rt.environment());
 
   if (nullptr != LoadFunction) {
-    Rt.release(&LoadFunction);
+    Rt.release(LoadFunction);
   }
 
-  Builder Cb(&Code);
+  asmjit::x86::Builder Cb(&Code);
   Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
                           asmjit::DiagnosticOptions::kValidateIntermediate);
 
-  const auto PointerReg = rax;
-  const auto L1Addr = rbx;
-  const auto L2Addr = rcx;
-  const auto L3Addr = rdx;
-  const auto RamAddr = rdi;
-  const auto L2CountReg = r8;
-  const auto L3CountReg = r9;
-  const auto RamCountReg = r10;
-  const auto TempReg = r11;
-  const auto TempReg2 = rbp;
-  const auto OffsetReg = r12;
-  const auto AddrHighReg = r13;
-  const auto IterReg = r14;
+  const auto PointerReg = asmjit::x86::rax;
+  const auto L1Addr = asmjit::x86::rbx;
+  const auto L2Addr = asmjit::x86::rcx;
+  const auto L3Addr = asmjit::x86::rdx;
+  const auto RamAddr = asmjit::x86::rdi;
+  const auto L2CountReg = asmjit::x86::r8;
+  const auto L3CountReg = asmjit::x86::r9;
+  const auto RamCountReg = asmjit::x86::r10;
+  const auto TempReg = asmjit::x86::r11;
+  const auto TempReg2 = asmjit::x86::rbp;
+  const auto OffsetReg = asmjit::x86::r12;
+  const auto AddrHighReg = asmjit::x86::r13;
+  const auto IterReg = asmjit::x86::r14;
   constexpr const auto MovRegs = 0;
   const auto AddRegs = 14;
   const auto TransRegs = 2;
 
-  FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+  asmjit::FuncDetail Func;
+  Func.init(asmjit::FuncSignature::build<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(
+                asmjit::CallConvId::kCDecl),
             Rt.environment());
 
-  FuncFrame Frame;
+  asmjit::FuncFrame Frame;
   Frame.init(Func);
 
   // make xmm registers dirty
-  for (int I = 0; I < 16; I++) {
+  for (auto I = 0U; I < 16U; I++) {
     Frame.addDirtyRegs(Xmm(I));
   }
   // make mmx registers dirty
-  for (int I = 0; I < 8; I++) {
+  for (auto I = 0U; I < 8U; I++) {
     Frame.addDirtyRegs(Mm(I));
   }
   // make all other used registers dirty except RAX
   Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg,
                      AddrHighReg, IterReg);
 
-  FuncArgsAssignment Args(&Func);
+  asmjit::FuncArgsAssignment Args(&Func);
   Args.assignAll(PointerReg, AddrHighReg, IterReg);
   Args.updateFuncFrame(Frame);
   Frame.finalize();
@@ -146,7 +150,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   const auto TransStart = AddRegs;
   const auto TransEnd = AddRegs + TransRegs - 1;
   if (AddRegs > 0) {
-    for (int I = AddStart; I <= AddEnd; I++) {
+    for (auto I = AddStart; I <= AddEnd; I++) {
       Cb.movapd(Xmm(I), xmmword_ptr(PointerReg, 32 * I));
     }
   }
@@ -157,7 +161,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   if (MovRegs > 0) {
     Cb.mov(TempReg, Imm(0x5555555555555555));
     Cb.movq(Mm(MovStart), TempReg);
-    for (int I = MovStart + 1; I <= MovEnd; I++) {
+    for (auto I = MovStart + 1; I <= MovEnd; I++) {
       Cb.movq(Mm(I), Mm(MovStart));
     }
   }
@@ -171,7 +175,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
     }
     Cb.pinsrq(Xmm(TransStart), TempReg, Imm(0));
     Cb.pinsrq(Xmm(TransStart), TempReg, Imm(1));
-    for (int I = TransStart + 1; I <= TransEnd; I++) {
+    for (auto I = TransStart + 1; I <= TransEnd; I++) {
       if (I % 2 == 0) {
         Cb.shr(TempReg, Imm(4));
       } else {
@@ -199,7 +203,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop ("
                      << RamSize / 1024 << ") KiB";
 
-  Cb.align(AlignMode::kCode, 64);
+  Cb.align(asmjit::AlignMode::kCode, 64);
 
   auto Loop = Cb.newLabel();
   Cb.bind(Loop);
@@ -223,7 +227,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); };
   const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); };
 
-  for (unsigned Count = 0; Count < Repetitions; Count++) {
+  for (auto Count = 0U; Count < Repetitions; Count++) {
     for (const auto& Item : Sequence) {
       if (Item == "REG") {
         Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs)));
@@ -378,7 +382,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
 
   Cb.bind(FunctionExit);
 
-  Cb.mov(rax, IterReg); // restore iteration counter
+  Cb.mov(asmjit::x86::rax, IterReg); // restore iteration counter
 
   Cb.emitEpilog(Frame);
 
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index e102bb1a..57361860 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -27,8 +27,11 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
                                    bool ErrorDetection) -> int {
-  using namespace asmjit;
-  using namespace asmjit::x86;
+  using Imm = asmjit::Imm;
+  using Xmm = asmjit::x86::Xmm;
+  using Ymm = asmjit::x86::Ymm;
+  // NOLINTNEXTLINE(readability-identifier-naming)
+  constexpr asmjit::x86::Mem (*xmmword_ptr)(const asmjit::x86::Gp&, int32_t) = asmjit::x86::xmmword_ptr;
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
@@ -75,48 +78,49 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
   auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
-  CodeHolder Code;
+  asmjit::CodeHolder Code;
   Code.init(Rt.environment());
 
   if (nullptr != LoadFunction) {
-    Rt.release(&LoadFunction);
+    Rt.release(LoadFunction);
   }
 
-  Builder Cb(&Code);
+  asmjit::x86::Builder Cb(&Code);
   Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
                           asmjit::DiagnosticOptions::kValidateIntermediate);
 
-  auto PointerReg = rax;
-  auto L1Addr = rbx;
-  auto L2Addr = rcx;
-  auto L3Addr = r8;
-  auto RamAddr = r9;
-  auto L2CountReg = r10;
-  auto L3CountReg = r11;
-  auto RamCountReg = r12;
-  auto TempReg = r13;
-  auto TempReg2 = rbp;
-  auto OffsetReg = r14;
-  auto AddrHighReg = r15;
-  auto IterReg = mm0;
-  auto ShiftRegs = std::vector<Gp>({rdi, rsi, rdx});
+  auto PointerReg = asmjit::x86::rax;
+  auto L1Addr = asmjit::x86::rbx;
+  auto L2Addr = asmjit::x86::rcx;
+  auto L3Addr = asmjit::x86::r8;
+  auto RamAddr = asmjit::x86::r9;
+  auto L2CountReg = asmjit::x86::r10;
+  auto L3CountReg = asmjit::x86::r11;
+  auto RamCountReg = asmjit::x86::r12;
+  auto TempReg = asmjit::x86::r13;
+  auto TempReg2 = asmjit::x86::rbp;
+  auto OffsetReg = asmjit::x86::r14;
+  auto AddrHighReg = asmjit::x86::r15;
+  auto IterReg = asmjit::x86::mm0;
+  auto ShiftRegs = std::vector<asmjit::x86::Gp>({asmjit::x86::rdi, asmjit::x86::rsi, asmjit::x86::rdx});
   auto NbShiftRegs = 3;
   auto NbAddRegs = 11;
-  auto RamReg = ymm15;
+  auto RamReg = asmjit::x86::ymm15;
 
-  FuncDetail Func;
-  Func.init(FuncSignatureT<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(CallConvId::kCDecl),
+  asmjit::FuncDetail Func;
+  Func.init(asmjit::FuncSignature::build<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(
+                asmjit::CallConvId::kCDecl),
             Rt.environment());
 
-  FuncFrame Frame;
+  asmjit::FuncFrame Frame;
   Frame.init(Func);
 
   // make (x|y)mm registers dirty
-  for (int I = 0; I < 16; I++) {
-    Frame.addDirtyRegs(Ymm(I));
+  for (auto I = 0U; I < 16U; I++) {
+    Frame.addDirtyRegs(asmjit::x86::Ymm(I));
   }
-  for (int I = 0; I < 8; I++) {
-    Frame.addDirtyRegs(Mm(I));
+  for (auto I = 0U; I < 8U; I++) {
+    Frame.addDirtyRegs(asmjit::x86::Mm(I));
   }
   // make all other used registers dirty except RAX
   Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg,
@@ -125,7 +129,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     Frame.addDirtyRegs(Reg);
   }
 
-  FuncArgsAssignment Args(&Func);
+  asmjit::FuncArgsAssignment Args(&Func);
   // FIXME: asmjit assigment to mm0 does not seem to be supported
   Args.assignAll(PointerReg, AddrHighReg, TempReg);
   Args.updateFuncFrame(Frame);
@@ -151,12 +155,12 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     Cb.mov(Reg, Imm(0xAAAAAAAAAAAAAAAA));
   }
   // Initialize AVX-Registers for FMA Operations
-  Cb.vmovapd(ymm0, ymmword_ptr(PointerReg));
-  Cb.vmovapd(ymm1, ymmword_ptr(PointerReg, 32));
+  Cb.vmovapd(asmjit::x86::ymm0, ymmword_ptr(PointerReg));
+  Cb.vmovapd(asmjit::x86::ymm1, ymmword_ptr(PointerReg, 32));
 
   auto AddRegsStart = 2;
   auto AddRegsEnd = AddRegsStart + NbAddRegs - 1;
-  for (int I = AddRegsStart; I <= AddRegsEnd; I++) {
+  for (auto I = AddRegsStart; I <= AddRegsEnd; I++) {
     Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 256 + (I * 32)));
   }
 
@@ -185,7 +189,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop ("
                      << RamSize / 1024 << ") KiB";
 
-  Cb.align(AlignMode::kCode, 64);
+  Cb.align(asmjit::AlignMode::kCode, 64);
 
   auto Loop = Cb.newLabel();
   Cb.bind(Loop);
@@ -209,7 +213,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); };
   const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); };
 
-  for (unsigned Count = 0; Count < Repetitions; Count++) {
+  for (auto Count = 0U; Count < Repetitions; Count++) {
     for (const auto& Item : Sequence) {
 
       // swap second and third param of fma instruction to force bitchanges on
@@ -217,11 +221,11 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
       Ymm SecondParam;
       Ymm ThirdParam;
       if (0 == ItemCount % 2) {
-        SecondParam = ymm0;
-        ThirdParam = ymm1;
+        SecondParam = asmjit::x86::ymm0;
+        ThirdParam = asmjit::x86::ymm1;
       } else {
-        SecondParam = ymm1;
-        ThirdParam = ymm0;
+        SecondParam = asmjit::x86::ymm1;
+        ThirdParam = asmjit::x86::ymm0;
       }
 
       if (Item == "REG") {
@@ -255,6 +259,8 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
 
       // make sure the shifts do could end up shifting out the data one end.
       if (ItemCount < (Sequence.size() * Repetitions) - ((Sequence.size() * Repetitions) % 4)) {
+        // all cases are covered
+        // NOLINTNEXTLINE(bugprone-switch-missing-default-case)
         switch (ItemCount % 4) {
         case 0:
           Cb.vpsrlq(Xmm(13), Xmm(13), Imm(1));
@@ -331,7 +337,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   Cb.mov(L1Addr, PointerReg);
 
   if (DumpRegisters) {
-    emitDumpRegisterCode<Ymm>(Cb, PointerReg, ymmword_ptr);
+    emitDumpRegisterCode<Ymm>(Cb, PointerReg, asmjit::x86::ymmword_ptr);
   }
 
   if (ErrorDetection) {
@@ -343,7 +349,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
 
   Cb.bind(FunctionExit);
 
-  Cb.movq(rax, IterReg);
+  Cb.movq(asmjit::x86::rax, IterReg);
 
   Cb.emitEpilog(Frame);
 

From 6a6e2619cf471ecb14ea3a83a182ba8ee960d47f Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 26 Oct 2024 23:41:57 +0200
Subject: [PATCH 108/167] remove unused variable

---
 .../Environment/X86/Platform/BulldozerConfig.hpp     |  4 ++--
 .../Environment/X86/Platform/HaswellConfig.hpp       |  4 ++--
 .../Environment/X86/Platform/HaswellEPConfig.hpp     |  4 ++--
 .../X86/Platform/KnightsLandingConfig.hpp            |  4 ++--
 .../Environment/X86/Platform/NaplesConfig.hpp        |  4 ++--
 .../Environment/X86/Platform/NehalemConfig.hpp       |  4 ++--
 .../Environment/X86/Platform/NehalemEPConfig.hpp     |  4 ++--
 .../Environment/X86/Platform/RomeConfig.hpp          |  4 ++--
 .../Environment/X86/Platform/SandyBridgeConfig.hpp   |  4 ++--
 .../Environment/X86/Platform/SandyBridgeEPConfig.hpp |  4 ++--
 .../Environment/X86/Platform/SkylakeConfig.hpp       |  4 ++--
 .../Environment/X86/Platform/SkylakeSPConfig.hpp     |  4 ++--
 .../Environment/X86/Platform/X86PlatformConfig.hpp   |  7 ++-----
 .../firestarter/Environment/X86/X86Environment.hpp   | 12 ++++++------
 src/firestarter/Environment/X86/X86Environment.cpp   |  9 ++++-----
 15 files changed, 36 insertions(+), 40 deletions(-)

diff --git a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
index fc7e5337..59c54ee5 100644
--- a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class BulldozerConfig final : public X86PlatformConfig {
 public:
-  BulldozerConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+  BulldozerConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
       : X86PlatformConfig("BLD_OPTERON", 21, {1, 2, 3}, {1}, 0, {16384, 1048576, 786432}, 104857600, 1536, Family,
-                          Model, Threads, std::make_unique<payload::FMA4Payload>(SupportedFeatures)) {}
+                          Model, std::make_unique<payload::FMA4Payload>(SupportedFeatures)) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
index 00bf3199..bfeb1c25 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class HaswellConfig final : public X86PlatformConfig {
 public:
-  HaswellConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+  HaswellConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
       : X86PlatformConfig("HSW_COREI", 6, {60, 61, 69, 70, 71}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
-                          Family, Model, Threads, std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
+                          Family, Model, std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
index d5df3341..a65fb4b0 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class HaswellEPConfig final : public X86PlatformConfig {
 public:
-  HaswellEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+  HaswellEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
       : X86PlatformConfig("HSW_XEONEP", 6, {63, 79}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, Family,
-                          Model, Threads, std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
+                          Model, std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
index cbc7d976..ad07148e 100644
--- a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class KnightsLandingConfig final : public X86PlatformConfig {
 public:
-  KnightsLandingConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+  KnightsLandingConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
       : X86PlatformConfig("KNL_XEONPHI", 6, {87}, {4}, 0, {32768, 524288, 236279125}, 26214400, 1536, Family, Model,
-                          Threads, std::make_unique<payload::AVX512Payload>(SupportedFeatures)) {}
+                          std::make_unique<payload::AVX512Payload>(SupportedFeatures)) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 3}, {"L2_S", 8}, {"L1_L", 40}, {"REG", 10}});
diff --git a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
index 56f5bdc6..da31bac5 100644
--- a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class NaplesConfig final : public X86PlatformConfig {
 public:
-  NaplesConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+  NaplesConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
       : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24}, {1, 2}, 0, {65536, 524288, 2097152}, 104857600, 1536, Family,
-                          Model, Threads, std::make_unique<payload::ZENFMAPayload>(SupportedFeatures)) {}
+                          Model, std::make_unique<payload::ZENFMAPayload>(SupportedFeatures)) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
index 320d29f4..f762425d 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class NehalemConfig final : public X86PlatformConfig {
 public:
-  NehalemConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+  NehalemConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
       : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, Family,
-                          Model, Threads, std::make_unique<payload::SSE2Payload>(SupportedFeatures)) {}
+                          Model, std::make_unique<payload::SSE2Payload>(SupportedFeatures)) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 1}, {"L1_LS", 70}, {"REG", 2}});
diff --git a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
index c9d032da..1b2ccfd3 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class NehalemEPConfig final : public X86PlatformConfig {
 public:
-  NehalemEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+  NehalemEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
       : X86PlatformConfig("NHM_XEONEP", 6, {26, 44}, {1, 2}, 0, {32768, 262144, 2097152}, 104857600, 1536, Family,
-                          Model, Threads, std::make_unique<payload::SSE2Payload>(SupportedFeatures)) {}
+                          Model, std::make_unique<payload::SSE2Payload>(SupportedFeatures)) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 1}, {"L1_LS", 60}, {"REG", 2}});
diff --git a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
index c8c1ea73..5d6ec9cc 100644
--- a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class RomeConfig final : public X86PlatformConfig {
 public:
-  RomeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+  RomeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
       : X86PlatformConfig("ZEN_2_EPYC", 23, {49}, {1, 2}, 0, {32768, 524288, 2097152}, 104857600, 1536, Family, Model,
-                          Threads, std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
+                          std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
index 5ceef9ff..b9fff84e 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class SandyBridgeConfig final : public X86PlatformConfig {
 public:
-  SandyBridgeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+  SandyBridgeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
       : X86PlatformConfig("SNB_COREI", 6, {42, 58}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, Family, Model,
-                          Threads, std::make_unique<payload::AVXPayload>(SupportedFeatures)) {}
+                          std::make_unique<payload::AVXPayload>(SupportedFeatures)) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
index 8449bee1..d32020a4 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
@@ -28,9 +28,9 @@
 namespace firestarter::environment::x86::platform {
 class SandyBridgeEPConfig final : public X86PlatformConfig {
 public:
-  SandyBridgeEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+  SandyBridgeEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
       : X86PlatformConfig("SNB_XEONEP", 6, {45, 62}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, Family,
-                          Model, Threads, std::make_unique<payload::AVXPayload>(SupportedFeatures)) {}
+                          Model, std::make_unique<payload::AVXPayload>(SupportedFeatures)) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
index 1437ce98..0ad4e98d 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
@@ -28,9 +28,9 @@
 namespace firestarter::environment::x86::platform {
 class SkylakeConfig final : public X86PlatformConfig {
 public:
-  SkylakeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+  SkylakeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
       : X86PlatformConfig("SKL_COREI", 6, {78, 94}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, Family, Model,
-                          Threads, std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
+                          std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
index 8a91b6de..ab72b5f5 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class SkylakeSPConfig final : public X86PlatformConfig {
 public:
-  SkylakeSPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads)
+  SkylakeSPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
       : X86PlatformConfig("SKL_XEONEP", 6, {85}, {1, 2}, 0, {32768, 1048576, 1441792}, 1048576000, 1536, Family, Model,
-                          Threads, std::make_unique<payload::AVX512Payload>(SupportedFeatures)) {}
+                          std::make_unique<payload::AVX512Payload>(SupportedFeatures)) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_S", 3},
diff --git a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
index 3d33756c..6f5eb6ff 100644
--- a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
@@ -36,21 +36,18 @@ class X86PlatformConfig : public environment::platform::PlatformConfig {
   std::list<unsigned> Models;
   unsigned CurrentFamily;
   unsigned CurrentModel;
-  unsigned CurrentThreads;
 
 public:
   X86PlatformConfig(std::string Name, unsigned Family, std::initializer_list<unsigned> Models,
                     std::initializer_list<unsigned> Threads, unsigned InstructionCacheSize,
                     std::initializer_list<unsigned> DataCacheBufferSize, unsigned RamBuffersize, unsigned Lines,
-                    unsigned CurrentFamily, unsigned CurrentModel, unsigned CurrentThreads,
-                    std::unique_ptr<payload::X86Payload>&& Payload)
+                    unsigned CurrentFamily, unsigned CurrentModel, std::unique_ptr<payload::X86Payload>&& Payload)
       : PlatformConfig(std::move(Name), Threads, InstructionCacheSize, DataCacheBufferSize, RamBuffersize, Lines,
                        std::move(Payload))
       , Family(Family)
       , Models(Models)
       , CurrentFamily(CurrentFamily)
-      , CurrentModel(CurrentModel)
-      , CurrentThreads(CurrentThreads) {}
+      , CurrentModel(CurrentModel) {}
 
   [[nodiscard]] auto isDefault() const -> bool override {
     return Family == CurrentFamily && (std::find(Models.begin(), Models.end(), CurrentModel) != Models.end()) &&
diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp
index fec0143f..8d544ee2 100644
--- a/include/firestarter/Environment/X86/X86Environment.hpp
+++ b/include/firestarter/Environment/X86/X86Environment.hpp
@@ -40,9 +40,9 @@
 #include "X86CPUTopology.hpp"
 
 #define REGISTER(NAME)                                                                                                 \
-  [](asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model,                                    \
-     unsigned threads) -> std::unique_ptr<platform::X86PlatformConfig> {                                               \
-    return std::make_unique<platform::NAME>(supportedFeatures, family, model, threads);                                \
+  [](asmjit::CpuFeatures const& supportedFeatures, unsigned family,                                                    \
+     unsigned model) -> std::unique_ptr<platform::X86PlatformConfig> {                                                 \
+    return std::make_unique<platform::NAME>(supportedFeatures, family, model);                                         \
   }
 
 namespace firestarter::environment::x86 {
@@ -70,8 +70,8 @@ class X86Environment final : public Environment {
   // The available function IDs are generated by iterating through this list
   // of PlatformConfig. Add new PlatformConfig at the bottom to maintain
   // stable IDs.
-  const std::list<std::function<std::unique_ptr<platform::X86PlatformConfig>(asmjit::CpuFeatures const&, unsigned,
-                                                                             unsigned, unsigned)>>
+  const std::list<
+      std::function<std::unique_ptr<platform::X86PlatformConfig>(asmjit::CpuFeatures const&, unsigned, unsigned)>>
       PlatformConfigsCtor = {REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig),   REGISTER(SkylakeSPConfig),
                              REGISTER(HaswellConfig),        REGISTER(HaswellEPConfig), REGISTER(SandyBridgeConfig),
                              REGISTER(SandyBridgeEPConfig),  REGISTER(NehalemConfig),   REGISTER(NehalemEPConfig),
@@ -81,7 +81,7 @@ class X86Environment final : public Environment {
 
   // List of fallback PlatformConfig. Add one for each x86 extension.
   const std::list<std::function<std::unique_ptr<platform::X86PlatformConfig>(asmjit::CpuFeatures const&, unsigned,
-                                                                             unsigned, unsigned)>>
+                                                                             unsigned)>>
       FallbackPlatformConfigsCtor = {
           REGISTER(SkylakeSPConfig),   // AVX512
           REGISTER(BulldozerConfig),   // FMA4
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index 951c5d05..11617318 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -31,13 +31,12 @@ namespace firestarter::environment::x86 {
 void X86Environment::evaluateFunctions() {
   for (const auto& Ctor : PlatformConfigsCtor) {
     // add asmjit for model and family detection
-    PlatformConfigs.emplace_back(
-        Ctor(topology().featuresAsmjit(), topology().familyId(), topology().modelId(), topology().numThreadsPerCore()));
+    PlatformConfigs.emplace_back(Ctor(topology().featuresAsmjit(), topology().familyId(), topology().modelId()));
   }
 
   for (const auto& Ctor : FallbackPlatformConfigsCtor) {
     FallbackPlatformConfigs.emplace_back(
-        Ctor(topology().featuresAsmjit(), topology().familyId(), topology().modelId(), topology().numThreadsPerCore()));
+        Ctor(topology().featuresAsmjit(), topology().familyId(), topology().modelId()));
   }
 }
 
@@ -94,7 +93,7 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
     // fallback
     for (const auto& Config : FallbackPlatformConfigs) {
       if (Config->isAvailable()) {
-        auto SelectedThread = 0;
+        auto SelectedThread = 0U;
         auto SelectedFunctionName = std::string("");
         for (auto const& [Thread, FunctionName] : Config->getThreadMap()) {
           if (Thread == topology().numThreadsPerCore()) {
@@ -192,7 +191,7 @@ void X86Environment::printFunctionSummary() {
                  "-------------------------------------------------------------"
                  "-----------------------------";
 
-  unsigned Id = 1;
+  auto Id = 1U;
 
   for (auto const& Config : PlatformConfigs) {
     for (auto const& [thread, functionName] : Config->getThreadMap()) {

From 244346538efc2c5725f2815e23ab46ba16f2494f Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 27 Oct 2024 00:39:46 +0200
Subject: [PATCH 109/167] thin out payload class

---
 .../Environment/Payload/Payload.hpp           |  3 +-
 .../Environment/Platform/PlatformConfig.hpp   |  5 +--
 .../Environment/X86/Payload/AVX512Payload.hpp |  8 ++---
 .../Environment/X86/Payload/AVXPayload.hpp    |  8 ++---
 .../Environment/X86/Payload/FMA4Payload.hpp   |  9 ++---
 .../Environment/X86/Payload/FMAPayload.hpp    |  8 ++---
 .../Environment/X86/Payload/SSE2Payload.hpp   |  8 ++---
 .../Environment/X86/Payload/X86Payload.hpp    | 36 ++++++++++---------
 .../Environment/X86/Payload/ZENFMAPayload.hpp |  9 ++---
 .../X86/Platform/BulldozerConfig.hpp          |  6 ++--
 .../X86/Platform/HaswellConfig.hpp            |  4 +--
 .../X86/Platform/HaswellEPConfig.hpp          |  6 ++--
 .../X86/Platform/KnightsLandingConfig.hpp     |  6 ++--
 .../Environment/X86/Platform/NaplesConfig.hpp |  6 ++--
 .../X86/Platform/NehalemConfig.hpp            |  6 ++--
 .../X86/Platform/NehalemEPConfig.hpp          |  6 ++--
 .../Environment/X86/Platform/RomeConfig.hpp   |  6 ++--
 .../X86/Platform/SandyBridgeConfig.hpp        |  6 ++--
 .../X86/Platform/SandyBridgeEPConfig.hpp      |  6 ++--
 .../X86/Platform/SkylakeConfig.hpp            |  6 ++--
 .../X86/Platform/SkylakeSPConfig.hpp          |  6 ++--
 .../X86/Platform/X86PlatformConfig.hpp        | 22 +++++++-----
 .../Environment/X86/X86Environment.hpp        | 32 +++++++----------
 .../Environment/X86/X86Environment.cpp        | 13 ++++---
 24 files changed, 109 insertions(+), 122 deletions(-)

diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index a4611ed0..13a4d956 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include "firestarter/Constants.hpp"
+#include "firestarter/Environment/CPUTopology.hpp"
 #include <chrono>
 #include <list>
 #include <memory>
@@ -92,7 +93,7 @@ class Payload {
   /// The number of SIMD registers used by the payload
   [[nodiscard]] auto registerCount() const -> unsigned { return RegisterCount; }
 
-  [[nodiscard]] virtual auto isAvailable() const -> bool = 0;
+  [[nodiscard]] virtual auto isAvailable(const CPUTopology*) const -> bool = 0;
 
   virtual void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) = 0;
 
diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp
index 88450cd8..e4ae1e69 100644
--- a/include/firestarter/Environment/Platform/PlatformConfig.hpp
+++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include "../Payload/Payload.hpp"
+#include "firestarter/Environment/CPUTopology.hpp"
 #include <initializer_list>
 #include <map>
 #include <sstream>
@@ -76,9 +77,9 @@ class PlatformConfig {
     return ThreadMap;
   }
 
-  [[nodiscard]] auto isAvailable() const -> bool { return payload().isAvailable(); }
+  [[nodiscard]] auto isAvailable(const CPUTopology* Topology) const -> bool { return payload().isAvailable(Topology); }
 
-  [[nodiscard]] virtual auto isDefault() const -> bool = 0;
+  [[nodiscard]] virtual auto isDefault(const CPUTopology*) const -> bool = 0;
 
   [[nodiscard]] virtual auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> = 0;
 
diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
index fef49a12..5e845ed6 100644
--- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
@@ -26,10 +26,8 @@
 namespace firestarter::environment::x86::payload {
 class AVX512Payload final : public X86Payload {
 public:
-  AVX512Payload() = delete;
-
-  explicit AVX512Payload(asmjit::CpuFeatures const& SupportedFeatures)
-      : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kAVX512_F}, "AVX512", 8, 32) {}
+  AVX512Payload()
+      : X86Payload({asmjit::CpuFeatures::X86::kAVX512_F}, "AVX512", 8, 32) {}
 
   auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
                       std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
@@ -38,7 +36,7 @@ class AVX512Payload final : public X86Payload {
   void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
-    return std::make_unique<AVX512Payload>(this->supportedFeatures());
+    return std::make_unique<AVX512Payload>();
   };
 
 private:
diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
index c249fd36..d9daa544 100644
--- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
@@ -26,10 +26,8 @@
 namespace firestarter::environment::x86::payload {
 class AVXPayload final : public X86Payload {
 public:
-  AVXPayload() = delete;
-
-  explicit AVXPayload(asmjit::CpuFeatures const& SupportedFeatures)
-      : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kAVX}, "AVX", 4, 16) {}
+  AVXPayload()
+      : X86Payload({asmjit::CpuFeatures::X86::kAVX}, "AVX", 4, 16) {}
 
   auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
                       std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
@@ -38,7 +36,7 @@ class AVXPayload final : public X86Payload {
   void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
-    return std::make_unique<AVXPayload>(this->supportedFeatures());
+    return std::make_unique<AVXPayload>();
   };
 
 private:
diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
index 1dee4a63..7aa3f208 100644
--- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
@@ -27,11 +27,8 @@ namespace firestarter::environment::x86::payload {
 
 class FMA4Payload final : public X86Payload {
 public:
-  FMA4Payload() = delete;
-
-  explicit FMA4Payload(asmjit::CpuFeatures const& SupportedFeatures)
-      : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4}, "FMA4", 4,
-                   16) {}
+  FMA4Payload()
+      : X86Payload({asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4}, "FMA4", 4, 16) {}
 
   auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
                       std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
@@ -40,7 +37,7 @@ class FMA4Payload final : public X86Payload {
   void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
-    return std::make_unique<FMA4Payload>(this->supportedFeatures());
+    return std::make_unique<FMA4Payload>();
   };
 
 private:
diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
index f4181dd8..b5d9f884 100644
--- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
@@ -26,10 +26,8 @@
 namespace firestarter::environment::x86::payload {
 class FMAPayload final : public X86Payload {
 public:
-  FMAPayload() = delete;
-
-  explicit FMAPayload(asmjit::CpuFeatures const& SupportedFeatures)
-      : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, "FMA", 4, 16) {}
+  FMAPayload()
+      : X86Payload({asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, "FMA", 4, 16) {}
 
   auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
                       std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
@@ -38,7 +36,7 @@ class FMAPayload final : public X86Payload {
   void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
-    return std::make_unique<FMAPayload>(this->supportedFeatures());
+    return std::make_unique<FMAPayload>();
   };
 
 private:
diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
index 13df4abf..23a3b1a6 100644
--- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
@@ -26,10 +26,8 @@
 namespace firestarter::environment::x86::payload {
 class SSE2Payload final : public X86Payload {
 public:
-  SSE2Payload() = delete;
-
-  explicit SSE2Payload(asmjit::CpuFeatures const& SupportedFeatures)
-      : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kSSE2}, "SSE2", 2, 16) {}
+  SSE2Payload()
+      : X86Payload({asmjit::CpuFeatures::X86::kSSE2}, "SSE2", 2, 16) {}
 
   auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
                       std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
@@ -38,7 +36,7 @@ class SSE2Payload final : public X86Payload {
   void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
-    return std::make_unique<SSE2Payload>(this->supportedFeatures());
+    return std::make_unique<SSE2Payload>();
   };
 
 private:
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index e46144c0..a0955415 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -26,6 +26,7 @@
 #include "../../../LoadWorkerData.hpp"
 #include "../../../Logging/Log.hpp" // IWYU pragma: keep
 #include "../../Payload/Payload.hpp"
+#include "../X86CPUTopology.hpp"
 #include <asmjit/x86.h>
 #include <cassert>
 #include <cstdint>
@@ -40,7 +41,6 @@ namespace firestarter::environment::x86::payload {
 class X86Payload : public environment::payload::Payload {
 private:
   // we can use this to check, if our platform support this payload
-  asmjit::CpuFeatures const& SupportedFeatures;
   std::list<asmjit::CpuFeatures::X86::Id> FeatureRequests;
 
 protected:
@@ -48,8 +48,6 @@ class X86Payload : public environment::payload::Payload {
   using LoadFunctionType = uint64_t (*)(double*, volatile LoadThreadWorkType*, uint64_t);
   LoadFunctionType LoadFunction = nullptr;
 
-  [[nodiscard]] auto supportedFeatures() const -> asmjit::CpuFeatures const& { return this->SupportedFeatures; }
-
   /// Emit the code to dump the xmm, ymm or zmm registers into memory for the dump registers feature.
   /// \arg Vec the type of the vector register used.
   /// \arg Cb The asmjit code builder that is used to emit the assembler code.
@@ -468,24 +466,12 @@ class X86Payload : public environment::payload::Payload {
   }
 
 public:
-  X86Payload(asmjit::CpuFeatures const& SupportedFeatures,
-             std::initializer_list<asmjit::CpuFeatures::X86::Id> FeatureRequests, std::string Name,
+  X86Payload(std::initializer_list<asmjit::CpuFeatures::X86::Id> FeatureRequests, std::string Name,
              unsigned RegisterSize, unsigned RegisterCount)
       : Payload(std::move(Name), RegisterSize, RegisterCount)
-      , SupportedFeatures(SupportedFeatures)
       , FeatureRequests(FeatureRequests) {}
 
-  [[nodiscard]] auto isAvailable() const -> bool override {
-    bool Available = true;
-
-    for (auto const& Feature : FeatureRequests) {
-      Available &= this->SupportedFeatures.has(Feature);
-    }
-
-    return Available;
-  };
-
-    // A generic implemenation for all x86 payloads
+  // A generic implemenation for all x86 payloads
 #if defined(__clang__)
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Woverloaded-virtual"
@@ -502,6 +488,22 @@ class X86Payload : public environment::payload::Payload {
 
   auto highLoadFunction(double* AddrMem, volatile LoadThreadWorkType& LoadVar, uint64_t Iterations)
       -> uint64_t override;
+
+  [[nodiscard]] auto isAvailable(const X86CPUTopology& Topology) const -> bool { return isAvailable(&Topology); }
+
+private:
+  [[nodiscard]] auto isAvailable(const CPUTopology* Topology) const -> bool final {
+    const auto* FinalTopology = dynamic_cast<const X86CPUTopology*>(Topology);
+    assert(FinalTopology && "isAvailable not called with const X86CPUTopology*");
+
+    bool Available = true;
+
+    for (auto const& Feature : FeatureRequests) {
+      Available &= FinalTopology->featuresAsmjit().has(Feature);
+    }
+
+    return Available;
+  };
 };
 
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
index 0b02742f..b76b767e 100644
--- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
@@ -26,11 +26,8 @@
 namespace firestarter::environment::x86::payload {
 class ZENFMAPayload final : public X86Payload {
 public:
-  ZENFMAPayload() = delete;
-
-  explicit ZENFMAPayload(asmjit::CpuFeatures const& SupportedFeatures)
-      : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA},
-                   "ZENFMA", 4, 16) {}
+  ZENFMAPayload()
+      : X86Payload({asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA}, "ZENFMA", 4, 16) {}
 
   auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
                       std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
@@ -39,7 +36,7 @@ class ZENFMAPayload final : public X86Payload {
   void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
-    return std::make_unique<ZENFMAPayload>(this->supportedFeatures());
+    return std::make_unique<ZENFMAPayload>();
   };
 
 private:
diff --git a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
index 59c54ee5..bb7c4145 100644
--- a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class BulldozerConfig final : public X86PlatformConfig {
 public:
-  BulldozerConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
-      : X86PlatformConfig("BLD_OPTERON", 21, {1, 2, 3}, {1}, 0, {16384, 1048576, 786432}, 104857600, 1536, Family,
-                          Model, std::make_unique<payload::FMA4Payload>(SupportedFeatures)) {}
+  BulldozerConfig()
+      : X86PlatformConfig("BLD_OPTERON", 21, {1, 2, 3}, {1}, 0, {16384, 1048576, 786432}, 104857600, 1536,
+                          std::make_unique<payload::FMA4Payload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
index bfeb1c25..3f8bfaf7 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class HaswellConfig final : public X86PlatformConfig {
 public:
-  HaswellConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
+  HaswellConfig()
       : X86PlatformConfig("HSW_COREI", 6, {60, 61, 69, 70, 71}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
-                          Family, Model, std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
+                          std::make_unique<payload::FMAPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
index a65fb4b0..ce8a19b9 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class HaswellEPConfig final : public X86PlatformConfig {
 public:
-  HaswellEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
-      : X86PlatformConfig("HSW_XEONEP", 6, {63, 79}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, Family,
-                          Model, std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
+  HaswellEPConfig()
+      : X86PlatformConfig("HSW_XEONEP", 6, {63, 79}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536,
+                          std::make_unique<payload::FMAPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
index ad07148e..b1d627ea 100644
--- a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class KnightsLandingConfig final : public X86PlatformConfig {
 public:
-  KnightsLandingConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
-      : X86PlatformConfig("KNL_XEONPHI", 6, {87}, {4}, 0, {32768, 524288, 236279125}, 26214400, 1536, Family, Model,
-                          std::make_unique<payload::AVX512Payload>(SupportedFeatures)) {}
+  KnightsLandingConfig()
+      : X86PlatformConfig("KNL_XEONPHI", 6, {87}, {4}, 0, {32768, 524288, 236279125}, 26214400, 1536,
+                          std::make_unique<payload::AVX512Payload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 3}, {"L2_S", 8}, {"L1_L", 40}, {"REG", 10}});
diff --git a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
index da31bac5..f33ced0f 100644
--- a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class NaplesConfig final : public X86PlatformConfig {
 public:
-  NaplesConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
-      : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24}, {1, 2}, 0, {65536, 524288, 2097152}, 104857600, 1536, Family,
-                          Model, std::make_unique<payload::ZENFMAPayload>(SupportedFeatures)) {}
+  NaplesConfig()
+      : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24}, {1, 2}, 0, {65536, 524288, 2097152}, 104857600, 1536,
+                          std::make_unique<payload::ZENFMAPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
index f762425d..0c39409f 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class NehalemConfig final : public X86PlatformConfig {
 public:
-  NehalemConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
-      : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, Family,
-                          Model, std::make_unique<payload::SSE2Payload>(SupportedFeatures)) {}
+  NehalemConfig()
+      : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
+                          std::make_unique<payload::SSE2Payload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 1}, {"L1_LS", 70}, {"REG", 2}});
diff --git a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
index 1b2ccfd3..cbb5b18f 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class NehalemEPConfig final : public X86PlatformConfig {
 public:
-  NehalemEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
-      : X86PlatformConfig("NHM_XEONEP", 6, {26, 44}, {1, 2}, 0, {32768, 262144, 2097152}, 104857600, 1536, Family,
-                          Model, std::make_unique<payload::SSE2Payload>(SupportedFeatures)) {}
+  NehalemEPConfig()
+      : X86PlatformConfig("NHM_XEONEP", 6, {26, 44}, {1, 2}, 0, {32768, 262144, 2097152}, 104857600, 1536,
+                          std::make_unique<payload::SSE2Payload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 1}, {"L1_LS", 60}, {"REG", 2}});
diff --git a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
index 5d6ec9cc..bc723d03 100644
--- a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class RomeConfig final : public X86PlatformConfig {
 public:
-  RomeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
-      : X86PlatformConfig("ZEN_2_EPYC", 23, {49}, {1, 2}, 0, {32768, 524288, 2097152}, 104857600, 1536, Family, Model,
-                          std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
+  RomeConfig()
+      : X86PlatformConfig("ZEN_2_EPYC", 23, {49}, {1, 2}, 0, {32768, 524288, 2097152}, 104857600, 1536,
+                          std::make_unique<payload::FMAPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
index b9fff84e..62a620c9 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class SandyBridgeConfig final : public X86PlatformConfig {
 public:
-  SandyBridgeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
-      : X86PlatformConfig("SNB_COREI", 6, {42, 58}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, Family, Model,
-                          std::make_unique<payload::AVXPayload>(SupportedFeatures)) {}
+  SandyBridgeConfig()
+      : X86PlatformConfig("SNB_COREI", 6, {42, 58}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
+                          std::make_unique<payload::AVXPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
index d32020a4..7571efb5 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
@@ -28,9 +28,9 @@
 namespace firestarter::environment::x86::platform {
 class SandyBridgeEPConfig final : public X86PlatformConfig {
 public:
-  SandyBridgeEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
-      : X86PlatformConfig("SNB_XEONEP", 6, {45, 62}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, Family,
-                          Model, std::make_unique<payload::AVXPayload>(SupportedFeatures)) {}
+  SandyBridgeEPConfig()
+      : X86PlatformConfig("SNB_XEONEP", 6, {45, 62}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536,
+                          std::make_unique<payload::AVXPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
index 0ad4e98d..de7d9f62 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
@@ -28,9 +28,9 @@
 namespace firestarter::environment::x86::platform {
 class SkylakeConfig final : public X86PlatformConfig {
 public:
-  SkylakeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
-      : X86PlatformConfig("SKL_COREI", 6, {78, 94}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, Family, Model,
-                          std::make_unique<payload::FMAPayload>(SupportedFeatures)) {}
+  SkylakeConfig()
+      : X86PlatformConfig("SKL_COREI", 6, {78, 94}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
+                          std::make_unique<payload::FMAPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
index ab72b5f5..9bcfd069 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class SkylakeSPConfig final : public X86PlatformConfig {
 public:
-  SkylakeSPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model)
-      : X86PlatformConfig("SKL_XEONEP", 6, {85}, {1, 2}, 0, {32768, 1048576, 1441792}, 1048576000, 1536, Family, Model,
-                          std::make_unique<payload::AVX512Payload>(SupportedFeatures)) {}
+  SkylakeSPConfig()
+      : X86PlatformConfig("SKL_XEONEP", 6, {85}, {1, 2}, 0, {32768, 1048576, 1441792}, 1048576000, 1536,
+                          std::make_unique<payload::AVX512Payload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_S", 3},
diff --git a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
index 6f5eb6ff..b1dc8ae7 100644
--- a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
@@ -23,6 +23,8 @@
 
 #include "../../Platform/PlatformConfig.hpp"
 #include "../Payload/X86Payload.hpp"
+#include "firestarter/Environment/CPUTopology.hpp"
+#include "firestarter/Environment/X86/X86CPUTopology.hpp"
 #include <algorithm>
 #include <memory>
 #include <string>
@@ -34,24 +36,26 @@ class X86PlatformConfig : public environment::platform::PlatformConfig {
 private:
   unsigned Family;
   std::list<unsigned> Models;
-  unsigned CurrentFamily;
-  unsigned CurrentModel;
 
 public:
   X86PlatformConfig(std::string Name, unsigned Family, std::initializer_list<unsigned> Models,
                     std::initializer_list<unsigned> Threads, unsigned InstructionCacheSize,
                     std::initializer_list<unsigned> DataCacheBufferSize, unsigned RamBuffersize, unsigned Lines,
-                    unsigned CurrentFamily, unsigned CurrentModel, std::unique_ptr<payload::X86Payload>&& Payload)
+                    std::unique_ptr<payload::X86Payload>&& Payload)
       : PlatformConfig(std::move(Name), Threads, InstructionCacheSize, DataCacheBufferSize, RamBuffersize, Lines,
                        std::move(Payload))
       , Family(Family)
-      , Models(Models)
-      , CurrentFamily(CurrentFamily)
-      , CurrentModel(CurrentModel) {}
+      , Models(Models) {}
 
-  [[nodiscard]] auto isDefault() const -> bool override {
-    return Family == CurrentFamily && (std::find(Models.begin(), Models.end(), CurrentModel) != Models.end()) &&
-           isAvailable();
+  [[nodiscard]] auto isDefault(const X86CPUTopology& Topology) const -> bool { return isDefault(&Topology); }
+
+private:
+  [[nodiscard]] auto isDefault(const CPUTopology* Topology) const -> bool final {
+    const auto* FinalTopology = dynamic_cast<const X86CPUTopology*>(Topology);
+    assert(FinalTopology && "isDefault not called with const X86CPUTopology*");
+
+    return Family == FinalTopology->familyId() &&
+           (std::find(Models.begin(), Models.end(), FinalTopology->modelId()) != Models.end()) && isAvailable(Topology);
   }
 };
 
diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp
index 8d544ee2..91ff68e2 100644
--- a/include/firestarter/Environment/X86/X86Environment.hpp
+++ b/include/firestarter/Environment/X86/X86Environment.hpp
@@ -40,10 +40,7 @@
 #include "X86CPUTopology.hpp"
 
 #define REGISTER(NAME)                                                                                                 \
-  [](asmjit::CpuFeatures const& supportedFeatures, unsigned family,                                                    \
-     unsigned model) -> std::unique_ptr<platform::X86PlatformConfig> {                                                 \
-    return std::make_unique<platform::NAME>(supportedFeatures, family, model);                                         \
-  }
+  []() -> std::unique_ptr<platform::X86PlatformConfig> { return std::make_unique<platform::NAME>(); }
 
 namespace firestarter::environment::x86 {
 
@@ -70,25 +67,22 @@ class X86Environment final : public Environment {
   // The available function IDs are generated by iterating through this list
   // of PlatformConfig. Add new PlatformConfig at the bottom to maintain
   // stable IDs.
-  const std::list<
-      std::function<std::unique_ptr<platform::X86PlatformConfig>(asmjit::CpuFeatures const&, unsigned, unsigned)>>
-      PlatformConfigsCtor = {REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig),   REGISTER(SkylakeSPConfig),
-                             REGISTER(HaswellConfig),        REGISTER(HaswellEPConfig), REGISTER(SandyBridgeConfig),
-                             REGISTER(SandyBridgeEPConfig),  REGISTER(NehalemConfig),   REGISTER(NehalemEPConfig),
-                             REGISTER(BulldozerConfig),      REGISTER(NaplesConfig),    REGISTER(RomeConfig)};
+  const std::list<std::function<std::unique_ptr<platform::X86PlatformConfig>()>> PlatformConfigsCtor = {
+      REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig),   REGISTER(SkylakeSPConfig),
+      REGISTER(HaswellConfig),        REGISTER(HaswellEPConfig), REGISTER(SandyBridgeConfig),
+      REGISTER(SandyBridgeEPConfig),  REGISTER(NehalemConfig),   REGISTER(NehalemEPConfig),
+      REGISTER(BulldozerConfig),      REGISTER(NaplesConfig),    REGISTER(RomeConfig)};
 
   std::list<std::unique_ptr<platform::X86PlatformConfig>> PlatformConfigs;
 
   // List of fallback PlatformConfig. Add one for each x86 extension.
-  const std::list<std::function<std::unique_ptr<platform::X86PlatformConfig>(asmjit::CpuFeatures const&, unsigned,
-                                                                             unsigned)>>
-      FallbackPlatformConfigsCtor = {
-          REGISTER(SkylakeSPConfig),   // AVX512
-          REGISTER(BulldozerConfig),   // FMA4
-          REGISTER(HaswellConfig),     // FMA
-          REGISTER(SandyBridgeConfig), // AVX
-          REGISTER(NehalemConfig)      // SSE2
-      };
+  const std::list<std::function<std::unique_ptr<platform::X86PlatformConfig>()>> FallbackPlatformConfigsCtor = {
+      REGISTER(SkylakeSPConfig),   // AVX512
+      REGISTER(BulldozerConfig),   // FMA4
+      REGISTER(HaswellConfig),     // FMA
+      REGISTER(SandyBridgeConfig), // AVX
+      REGISTER(NehalemConfig)      // SSE2
+  };
 
   std::list<std::unique_ptr<platform::X86PlatformConfig>> FallbackPlatformConfigs;
 
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index 11617318..ad055c6e 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -31,12 +31,11 @@ namespace firestarter::environment::x86 {
 void X86Environment::evaluateFunctions() {
   for (const auto& Ctor : PlatformConfigsCtor) {
     // add asmjit for model and family detection
-    PlatformConfigs.emplace_back(Ctor(topology().featuresAsmjit(), topology().familyId(), topology().modelId()));
+    PlatformConfigs.emplace_back(Ctor());
   }
 
   for (const auto& Ctor : FallbackPlatformConfigsCtor) {
-    FallbackPlatformConfigs.emplace_back(
-        Ctor(topology().featuresAsmjit(), topology().familyId(), topology().modelId()));
+    FallbackPlatformConfigs.emplace_back(Ctor());
   }
 }
 
@@ -49,7 +48,7 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
     for (auto const& [thread, functionName] : Config->getThreadMap()) {
       // the selected function
       if (Id == FunctionId) {
-        if (!Config->isAvailable()) {
+        if (!Config->isAvailable(Topology.get())) {
           const auto ErrorString = "Function " + std::to_string(FunctionId) + " (\"" + functionName + "\") requires " +
                                    Config->payload().name() + ", which is not supported by the processor.";
           if (AllowUnavailablePayload) {
@@ -64,7 +63,7 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
         return;
       }
       // default function
-      if (0 == FunctionId && Config->isDefault()) {
+      if (0 == FunctionId && Config->isDefault(topology())) {
         if (thread == topology().numThreadsPerCore()) {
           SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, thread,
                                                                                    topology().instructionCacheSize());
@@ -92,7 +91,7 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
     // loop over available implementation and check if they are marked as
     // fallback
     for (const auto& Config : FallbackPlatformConfigs) {
-      if (Config->isAvailable()) {
+      if (Config->isAvailable(Topology.get())) {
         auto SelectedThread = 0U;
         auto SelectedFunctionName = std::string("");
         for (auto const& [Thread, FunctionName] : Config->getThreadMap()) {
@@ -195,7 +194,7 @@ void X86Environment::printFunctionSummary() {
 
   for (auto const& Config : PlatformConfigs) {
     for (auto const& [thread, functionName] : Config->getThreadMap()) {
-      const char* Available = Config->isAvailable() ? "yes" : "no";
+      const char* Available = Config->isAvailable(Topology.get()) ? "yes" : "no";
       const char* Fmt = "  %4u | %-30s | %-24s | %s";
       int Sz = std::snprintf(nullptr, 0, Fmt, Id, functionName.c_str(), Available,
                              Config->getDefaultPayloadSettingsString().c_str());

From 48cc7a81e4afe3c178d9ec743a53aa14a458506e Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 27 Oct 2024 00:50:08 +0200
Subject: [PATCH 110/167] remove protected member

---
 include/firestarter/Environment/CPUTopology.hpp        | 4 +++-
 include/firestarter/Environment/X86/X86CPUTopology.hpp | 8 ++++++++
 src/firestarter/Environment/X86/X86CPUTopology.cpp     | 5 ++---
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/include/firestarter/Environment/CPUTopology.hpp b/include/firestarter/Environment/CPUTopology.hpp
index e50a0215..d05e4ac8 100644
--- a/include/firestarter/Environment/CPUTopology.hpp
+++ b/include/firestarter/Environment/CPUTopology.hpp
@@ -69,10 +69,12 @@ class CPUTopology {
   [[nodiscard]] static auto scalingGovernor() -> std::string;
   [[nodiscard]] auto print(std::ostream& Stream) const -> std::ostream&;
 
+private:
+  /// The Vendor name of the CPU.
   std::string Vendor;
+  /// The Model name of the CPU.
   std::string Model;
 
-private:
   [[nodiscard]] static auto getFileAsStream(std::string const& FilePath) -> std::stringstream;
 
   unsigned NumThreadsPerCore;
diff --git a/include/firestarter/Environment/X86/X86CPUTopology.hpp b/include/firestarter/Environment/X86/X86CPUTopology.hpp
index 44875e03..c7706a77 100644
--- a/include/firestarter/Environment/X86/X86CPUTopology.hpp
+++ b/include/firestarter/Environment/X86/X86CPUTopology.hpp
@@ -44,6 +44,9 @@ class X86CPUTopology final : public CPUTopology {
   [[nodiscard]] auto modelId() const -> unsigned { return this->CpuInfo.modelId(); }
   [[nodiscard]] auto stepping() const -> unsigned { return this->CpuInfo.stepping(); }
 
+  [[nodiscard]] auto vendor() const -> std::string const& final { return Vendor; }
+  [[nodiscard]] auto model() const -> std::string const& final { return Model; }
+
 private:
   [[nodiscard]] auto hasRdtsc() const -> bool { return this->HasRdtsc; }
   [[nodiscard]] auto hasInvariantRdtsc() const -> bool { return this->HasInvariantRdtsc; }
@@ -54,6 +57,11 @@ class X86CPUTopology final : public CPUTopology {
 
   bool HasRdtsc;
   bool HasInvariantRdtsc;
+
+  /// The Vendor name of the CPU.
+  std::string Vendor;
+  /// The Model name of the CPU.
+  std::string Model;
 };
 
 inline auto operator<<(std::ostream& Stream, X86CPUTopology const& CpuTopology) -> std::ostream& {
diff --git a/src/firestarter/Environment/X86/X86CPUTopology.cpp b/src/firestarter/Environment/X86/X86CPUTopology.cpp
index 153fc9df..a8eeda93 100644
--- a/src/firestarter/Environment/X86/X86CPUTopology.cpp
+++ b/src/firestarter/Environment/X86/X86CPUTopology.cpp
@@ -35,9 +35,8 @@ namespace firestarter::environment::x86 {
 
 X86CPUTopology::X86CPUTopology()
     : CPUTopology("x86_64")
-    , CpuInfo(asmjit::CpuInfo::host()) {
-
-  Vendor = CpuInfo.vendor();
+    , CpuInfo(asmjit::CpuInfo::host())
+    , Vendor(CpuInfo.vendor()) {
 
   {
     std::stringstream Ss;

From 0742466dc37a78c5e77114e0221d984d2e12a5e8 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 27 Oct 2024 01:17:50 +0200
Subject: [PATCH 111/167] remove protected data members. fix clang-tidy
 warnings

---
 include/firestarter/Environment/Environment.hpp     | 2 +-
 include/firestarter/Environment/Payload/Payload.hpp | 9 +++++----
 include/firestarter/LoadWorkerData.hpp              | 9 ++++-----
 src/firestarter/Environment/Environment.cpp         | 2 +-
 src/firestarter/Environment/X86/X86CPUTopology.cpp  | 2 +-
 src/firestarter/LoadWorker.cpp                      | 4 ++--
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index 89cff2cf..6ebad72e 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -37,7 +37,7 @@ class Environment {
   virtual ~Environment() { delete SelectedConfig; }
 
   void evaluateCpuAffinity(unsigned RequestedNumThreads, const std::string& CpuBind);
-  void setCpuAffinity(unsigned Thread);
+  void setCpuAffinity(unsigned Thread) const;
   void printThreadSummary();
 
   virtual void evaluateFunctions() = 0;
diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index 13a4d956..db9c5ec7 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -38,15 +38,16 @@ class Payload {
   [[nodiscard]] static auto getSequenceStartCount(const std::vector<std::string>& Sequence, const std::string& Start)
       -> unsigned;
 
+  /// The size of the SIMD registers in units of doubles (8B)
+  unsigned RegisterSize = 0;
+  /// The number of SIMD registers used by the payload
+  unsigned RegisterCount = 0;
+
 protected:
   unsigned Flops = 0;
   unsigned Bytes = 0;
   // number of instructions in load loop
   unsigned Instructions = 0;
-  /// The size of the SIMD registers in units of doubles (8B)
-  unsigned RegisterSize = 0;
-  /// The number of SIMD registers used by the payload
-  unsigned RegisterCount = 0;
 
   [[nodiscard]] static auto generateSequence(const std::vector<std::pair<std::string, unsigned>>& Proportion)
       -> std::vector<std::string>;
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index 65c3286d..d9177576 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -107,7 +107,7 @@ class LoadWorkerData {
     }
   };
 
-  LoadWorkerData(uint64_t Id, environment::Environment& Environment, volatile LoadThreadWorkType& LoadVar,
+  LoadWorkerData(uint64_t Id, const environment::Environment& Environment, volatile LoadThreadWorkType& LoadVar,
                  std::chrono::microseconds Period, bool DumpRegisters, bool ErrorDetection)
       : LoadVar(LoadVar)
       , Period(Period)
@@ -126,7 +126,7 @@ class LoadWorkerData {
   }
 
   [[nodiscard]] auto id() const -> uint64_t { return Id; }
-  [[nodiscard]] auto environment() const -> environment::Environment& { return Environment; }
+  [[nodiscard]] auto environment() const -> const environment::Environment& { return Environment; }
   [[nodiscard]] auto config() const -> environment::platform::RuntimeConfig& { return *Config; }
 
   /// Access the DumpRegisterStruct. Asserts when dumping registers is not enabled.
@@ -173,9 +173,8 @@ class LoadWorkerData {
   std::shared_ptr<uint64_t> CommunicationLeft;
   std::shared_ptr<uint64_t> CommunicationRight;
 
-private:
-  uint64_t Id;
-  environment::Environment& Environment;
+  const uint64_t Id;
+  const environment::Environment& Environment;
   environment::platform::RuntimeConfig* Config;
 };
 
diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp
index 88664044..c30e0fb1 100644
--- a/src/firestarter/Environment/Environment.cpp
+++ b/src/firestarter/Environment/Environment.cpp
@@ -208,7 +208,7 @@ void Environment::printThreadSummary() {
 #endif
 }
 
-void Environment::setCpuAffinity(unsigned Thread) {
+void Environment::setCpuAffinity(unsigned Thread) const {
   if (Thread >= requestedNumThreads()) {
     throw std::invalid_argument("Trying to set more CPUs than available.");
   }
diff --git a/src/firestarter/Environment/X86/X86CPUTopology.cpp b/src/firestarter/Environment/X86/X86CPUTopology.cpp
index a8eeda93..02a84218 100644
--- a/src/firestarter/Environment/X86/X86CPUTopology.cpp
+++ b/src/firestarter/Environment/X86/X86CPUTopology.cpp
@@ -153,7 +153,7 @@ auto X86CPUTopology::clockrate() const -> uint64_t {
   }
 
   /* non invariant TSCs can be used if CPUs run at fixed frequency */
-  if (!hasInvariantRdtsc() && Governor.compare("performance") && Governor.compare("powersave")) {
+  if (!hasInvariantRdtsc() && Governor != "performance" && Governor != "powersave") {
     return CPUTopology::clockrate();
   }
 
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index a1231286..9cef4d0b 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -69,8 +69,8 @@ void Firestarter::initLoadWorkers() {
   }
 
   for (uint64_t I = 0; I < NumThreads; I++) {
-    auto Td =
-        std::make_shared<LoadWorkerData>(I, *Environment, LoadVar, Cfg.Period, Cfg.DumpRegisters, Cfg.ErrorDetection);
+    auto Td = std::make_shared<LoadWorkerData>(I, std::cref(*Environment), std::ref(LoadVar), Cfg.Period,
+                                               Cfg.DumpRegisters, Cfg.ErrorDetection);
 
     if (Cfg.ErrorDetection) {
       // distribute pointers for error deteciton. (set threads in a ring)

From 73ee17b917d967c57e2a0a1e771765ff7d7372ab Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 27 Oct 2024 01:18:02 +0200
Subject: [PATCH 112/167] fix clang-tidy warnings

---
 src/firestarter/Optimizer/Util/MultiObjective.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/firestarter/Optimizer/Util/MultiObjective.cpp b/src/firestarter/Optimizer/Util/MultiObjective.cpp
index 3c0d1c8f..95554dd9 100644
--- a/src/firestarter/Optimizer/Util/MultiObjective.cpp
+++ b/src/firestarter/Optimizer/Util/MultiObjective.cpp
@@ -39,10 +39,8 @@ auto lessThanF(double A, double B) -> bool {
     }
     return true; // a < nan
   }
-  if (!std::isnan(B)) {
-    return false; // nan < b
-  }
-  return false; // nan < nan
+  // nan < b or nan < nan
+  return false;
 }
 
 // Greater than compares floating point types placing nans after inf or before

From bcd588b757986c45d87c8f6cd0bd2f7ff6548258 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 27 Oct 2024 10:36:54 +0100
Subject: [PATCH 113/167] remove warnings

---
 .../firestarter/Environment/Platform/PlatformConfig.hpp   | 2 --
 src/firestarter/Measurement/Summary.cpp                   | 8 ++++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp
index e4ae1e69..f0213036 100644
--- a/include/firestarter/Environment/Platform/PlatformConfig.hpp
+++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp
@@ -36,8 +36,6 @@ class PlatformConfig {
   std::string Name;
   std::list<unsigned> Threads;
   std::unique_ptr<payload::Payload> Payload;
-
-protected:
   unsigned InstructionCacheSize;
   std::list<unsigned> DataCacheBufferSize;
   unsigned RamBufferSize;
diff --git a/src/firestarter/Measurement/Summary.cpp b/src/firestarter/Measurement/Summary.cpp
index 90f770bd..1f54f721 100644
--- a/src/firestarter/Measurement/Summary.cpp
+++ b/src/firestarter/Measurement/Summary.cpp
@@ -45,7 +45,7 @@ auto Summary::calculate(std::vector<TimeValue>::iterator Begin, std::vector<Time
         double Value = ValueDiff / TimeDiff;
 
         if (MetricType.DivideByThreadCount) {
-          Value /= NumThreads;
+          Value /= static_cast<double>(NumThreads);
         }
 
         Values.emplace_back(Prev.Time, Value);
@@ -57,7 +57,7 @@ auto Summary::calculate(std::vector<TimeValue>::iterator Begin, std::vector<Time
       double Value = It->Value;
 
       if (MetricType.DivideByThreadCount) {
-        Value /= NumThreads;
+        Value /= static_cast<double>(NumThreads);
       }
 
       Values.emplace_back(It->Time, Value);
@@ -84,12 +84,12 @@ auto Summary::calculate(std::vector<TimeValue>::iterator Begin, std::vector<Time
       for (auto It = Begin; It != End; ++It) {
         Acc += Fn(It->Value);
       }
-      return Acc / SummaryVal.NumTimepoints;
+      return Acc / static_cast<double>(SummaryVal.NumTimepoints);
     };
 
     SummaryVal.Average = SumOverNths([](double V) { return V; });
     SummaryVal.Stddev = std::sqrt(SumOverNths([&SummaryVal](double V) {
-      double Centered = V - SummaryVal.Average;
+      const auto Centered = V - SummaryVal.Average;
       return Centered * Centered;
     }));
   }

From 295e5381d9915a0d63aa4caee83dc6aa0bd26387 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 27 Oct 2024 10:37:25 +0100
Subject: [PATCH 114/167] fix string compare in default cpu frequency lookup

---
 src/firestarter/Environment/CPUTopology.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/firestarter/Environment/CPUTopology.cpp b/src/firestarter/Environment/CPUTopology.cpp
index 4283c34b..b37b2900 100644
--- a/src/firestarter/Environment/CPUTopology.cpp
+++ b/src/firestarter/Environment/CPUTopology.cpp
@@ -233,7 +233,7 @@ CPUTopology::CPUTopology(std::string Architecture)
       auto ScalingMaxFreq = getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq").str();
       auto CpuinfoMaxFreq = getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq").str();
 
-      if (Governor.compare("performance") || Governor.compare("powersave")) {
+      if (Governor == "performance" || Governor == "powersave") {
         if (ScalingCurFreq.empty()) {
           if (!CpuinfoCurFreq.empty()) {
             ClockrateStr = CpuinfoCurFreq;

From e7222dba7271b445e5dc36fec614f7d203b53af5 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 27 Oct 2024 10:55:56 +0100
Subject: [PATCH 115/167] fix more warnings

---
 include/firestarter/Environment/CPUTopology.hpp | 11 ++++++-----
 include/firestarter/ErrorDetectionStruct.hpp    |  2 ++
 include/firestarter/LoadWorkerData.hpp          |  5 ++++-
 src/firestarter/Environment/CPUTopology.cpp     | 12 ++++++------
 src/firestarter/Environment/Environment.cpp     |  5 +++--
 5 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/include/firestarter/Environment/CPUTopology.hpp b/include/firestarter/Environment/CPUTopology.hpp
index d05e4ac8..cb6e3675 100644
--- a/include/firestarter/Environment/CPUTopology.hpp
+++ b/include/firestarter/Environment/CPUTopology.hpp
@@ -23,6 +23,7 @@
 
 #include <cstdint>
 #include <list>
+#include <optional>
 #include <ostream>
 #include <sstream>
 #include <string>
@@ -38,7 +39,7 @@ class CPUTopology {
   explicit CPUTopology(std::string Architecture);
   virtual ~CPUTopology();
 
-  friend auto operator<<(std::ostream& Stream, CPUTopology const& CpuTopology) -> std::ostream&;
+  friend auto operator<<(std::ostream& Stream, CPUTopology const& CpuTopologyRef) -> std::ostream&;
 
   [[nodiscard]] auto numThreads() const -> unsigned { return NumThreadsPerCore * NumCoresTotal; }
   [[nodiscard]] auto maxNumThreads() const -> unsigned;
@@ -62,8 +63,8 @@ class CPUTopology {
   // get a timestamp
   [[nodiscard]] virtual auto timestamp() const -> uint64_t = 0;
 
-  [[nodiscard]] auto getPkgIdFromPU(unsigned Pu) const -> int;
-  [[nodiscard]] auto getCoreIdFromPU(unsigned Pu) const -> int;
+  [[nodiscard]] auto getPkgIdFromPU(unsigned Pu) const -> std::optional<unsigned>;
+  [[nodiscard]] auto getCoreIdFromPU(unsigned Pu) const -> std::optional<unsigned>;
 
 protected:
   [[nodiscard]] static auto scalingGovernor() -> std::string;
@@ -87,8 +88,8 @@ class CPUTopology {
   hwloc_topology_t Topology{};
 };
 
-inline auto operator<<(std::ostream& Stream, CPUTopology const& CpuTopology) -> std::ostream& {
-  return CpuTopology.print(Stream);
+inline auto operator<<(std::ostream& Stream, CPUTopology const& CpuTopologyRef) -> std::ostream& {
+  return CpuTopologyRef.print(Stream);
 }
 
 } // namespace firestarter::environment
diff --git a/include/firestarter/ErrorDetectionStruct.hpp b/include/firestarter/ErrorDetectionStruct.hpp
index dc4d43ce..38a696c4 100644
--- a/include/firestarter/ErrorDetectionStruct.hpp
+++ b/include/firestarter/ErrorDetectionStruct.hpp
@@ -28,10 +28,12 @@ struct ErrorDetectionStruct {
   struct OneSide {
     // the pointer to 16B of communication
     volatile uint64_t* Communication;
+    // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
     volatile uint64_t Locals[4];
     // if this variable is not 0, an error occured in the comparison with the
     // left thread.
     volatile uint64_t Error;
+    // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
     volatile uint64_t Padding[2];
   };
 
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index d9177576..0e5088f9 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -72,7 +72,10 @@ struct LoadWorkerMemory {
 
   /// Get the pointer to the start of the memory use for computations.
   /// \returns the pointer to the memory.
-  [[nodiscard]] auto getMemoryAddress() -> auto{ return reinterpret_cast<double*>(&DoNotUseAddrMem); }
+  [[nodiscard]] auto getMemoryAddress() -> auto{
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+    return reinterpret_cast<double*>(&DoNotUseAddrMem);
+  }
 
   /// Get the offset to the memory which is used by the high-load functions
   /// \returns the offset to the memory
diff --git a/src/firestarter/Environment/CPUTopology.cpp b/src/firestarter/Environment/CPUTopology.cpp
index b37b2900..b0fabc52 100644
--- a/src/firestarter/Environment/CPUTopology.cpp
+++ b/src/firestarter/Environment/CPUTopology.cpp
@@ -222,7 +222,7 @@ CPUTopology::CPUTopology(std::string Architecture)
       firestarter::log::warn() << "Can't determine clockrate from /proc/cpuinfo";
     } else {
       firestarter::log::trace() << "Clockrate from /proc/cpuinfo is " << ClockrateStr;
-      Clockrate = 1e6 * std::stoi(ClockrateStr);
+      Clockrate = static_cast<uint64_t>(1000000U) * std::stoi(ClockrateStr);
     }
 
     auto Governor = scalingGovernor();
@@ -251,7 +251,7 @@ CPUTopology::CPUTopology(std::string Architecture)
         }
       }
 
-      Clockrate = 1e3 * std::stoi(ClockrateStr);
+      Clockrate = static_cast<uint64_t>(1000U) * std::stoi(ClockrateStr);
     }
   }
 #endif
@@ -327,7 +327,7 @@ auto CPUTopology::scalingGovernor() -> std::string {
   return getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor").str();
 }
 
-auto CPUTopology::getCoreIdFromPU(unsigned Pu) const -> int {
+auto CPUTopology::getCoreIdFromPU(unsigned Pu) const -> std::optional<unsigned> {
   auto Width = hwloc_get_nbobjs_by_type(Topology, HWLOC_OBJ_PU);
 
   if (Width >= 1) {
@@ -343,10 +343,10 @@ auto CPUTopology::getCoreIdFromPU(unsigned Pu) const -> int {
     }
   }
 
-  return -1;
+  return {};
 }
 
-auto CPUTopology::getPkgIdFromPU(unsigned Pu) const -> int {
+auto CPUTopology::getPkgIdFromPU(unsigned Pu) const -> std::optional<unsigned> {
   auto Width = hwloc_get_nbobjs_by_type(Topology, HWLOC_OBJ_PU);
 
   if (Width >= 1) {
@@ -362,7 +362,7 @@ auto CPUTopology::getPkgIdFromPU(unsigned Pu) const -> int {
     }
   }
 
-  return -1;
+  return {};
 }
 
 auto CPUTopology::maxNumThreads() const -> unsigned {
diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp
index c30e0fb1..fec4ed48 100644
--- a/src/firestarter/Environment/Environment.cpp
+++ b/src/firestarter/Environment/Environment.cpp
@@ -194,8 +194,9 @@ void Environment::printThreadSummary() {
     const auto CoreId = topology().getCoreIdFromPU(Bind);
     const auto PkgId = topology().getPkgIdFromPU(Bind);
 
-    if (CoreId != -1 && PkgId != -1) {
-      log::info() << "    - Thread " << I << " run on CPU " << Bind << ", core " << CoreId << " in package: " << PkgId;
+    if (CoreId && PkgId) {
+      log::info() << "    - Thread " << I << " run on CPU " << Bind << ", core " << *CoreId
+                  << " in package: " << *PkgId;
       PrintCoreIdInfo = true;
     }
 

From 6f52af8d12a5208442f4b0e8000cda54620d5ec5 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 27 Oct 2024 11:51:08 +0100
Subject: [PATCH 116/167] Perf: fix warnings

---
 .../firestarter/Measurement/Metric/Perf.hpp   |  2 +-
 src/firestarter/Measurement/Metric/Perf.cpp   | 78 +++++++++++--------
 2 files changed, 47 insertions(+), 33 deletions(-)

diff --git a/include/firestarter/Measurement/Metric/Perf.hpp b/include/firestarter/Measurement/Metric/Perf.hpp
index 4fdc943a..2dd9b76d 100644
--- a/include/firestarter/Measurement/Metric/Perf.hpp
+++ b/include/firestarter/Measurement/Metric/Perf.hpp
@@ -45,7 +45,7 @@ struct PerfMetricData {
   inline static struct ReadFormat Last;
   static auto fini() -> int32_t;
   static auto init() -> int32_t;
-  static auto valueFromId(struct ReadFormat* Values, uint64_t Id) -> uint64_t;
+  static auto valueFromId(struct ReadFormat* Reader, uint64_t Id) -> uint64_t;
   static auto getReading(double* IpcValue, double* FreqValue) -> int32_t;
   static auto getReadingIpc(double* Value) -> int32_t;
   static auto getReadingFreq(double* Value) -> int32_t;
diff --git a/src/firestarter/Measurement/Metric/Perf.cpp b/src/firestarter/Measurement/Metric/Perf.cpp
index 6cac1708..3240d969 100644
--- a/src/firestarter/Measurement/Metric/Perf.cpp
+++ b/src/firestarter/Measurement/Metric/Perf.cpp
@@ -19,6 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <array>
+#include <cassert>
 #include <cstring>
 #include <string>
 
@@ -31,10 +33,12 @@ extern "C" {
 #include <unistd.h>
 }
 
-static auto perfEventOpen(struct perf_event_attr* HwEvent, pid_t Pid, int Cpu, int GroupFd, unsigned long Flags)
-    -> long {
-  return syscall(__NR_perf_event_open, HwEvent, Pid, Cpu, GroupFd, Flags);
+namespace {
+auto perfEventOpen(struct perf_event_attr* HwEvent, pid_t Pid, int Cpu, int GroupFd, uint64_t Flags) -> int {
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
+  return static_cast<int>(syscall(__NR_perf_event_open, HwEvent, Pid, Cpu, GroupFd, Flags));
 }
+} // namespace
 
 auto PerfMetricData::fini() -> int32_t {
   if (!(CpuCyclesFd < 0)) {
@@ -95,16 +99,18 @@ auto PerfMetricData::init() -> int32_t {
   CpuCyclesAttr.exclude_kernel = 1;
   CpuCyclesAttr.exclude_hv = 1;
 
-  if ((CpuCyclesFd = perfEventOpen(&CpuCyclesAttr,
-                                   // pid == 0 and cpu == -1
-                                   // This measures the calling process/thread on any CPU.
-                                   0, -1,
-                                   // The group_fd argument allows event groups to be created.  An event
-                                   // group has one event which is the group leader.  The leader is
-                                   // created first, with group_fd = -1.  The rest of the group members
-                                   // are created with subsequent perf_event_open() calls with group_fd
-                                   // being set to the file descriptor of the group leader.
-                                   -1, 0)) < 0) {
+  CpuCyclesFd = perfEventOpen(&CpuCyclesAttr,
+                              // pid == 0 and cpu == -1
+                              // This measures the calling process/thread on any CPU.
+                              0, -1,
+                              // The group_fd argument allows event groups to be created.  An event
+                              // group has one event which is the group leader.  The leader is
+                              // created first, with group_fd = -1.  The rest of the group members
+                              // are created with subsequent perf_event_open() calls with group_fd
+                              // being set to the file descriptor of the group leader.
+                              -1, 0);
+
+  if (CpuCyclesFd < 0) {
     fini();
     ErrorString = "perf_event_open failed for PERF_COUNT_HW_CPU_CYCLES";
     InitValue = EXIT_FAILURE;
@@ -112,6 +118,7 @@ auto PerfMetricData::init() -> int32_t {
     return EXIT_FAILURE;
   }
 
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
   ioctl(CpuCyclesFd, PERF_EVENT_IOC_ID, &CpuCyclesId);
 
   struct perf_event_attr InstructionsAttr {};
@@ -124,16 +131,18 @@ auto PerfMetricData::init() -> int32_t {
   InstructionsAttr.exclude_kernel = 1;
   InstructionsAttr.exclude_hv = 1;
 
-  if ((InstructionsFd = perfEventOpen(&InstructionsAttr,
-                                      // pid == 0 and cpu == -1
-                                      // This measures the calling process/thread on any CPU.
-                                      0, -1,
-                                      // The group_fd argument allows event groups to be created.  An event
-                                      // group has one event which is the group leader.  The leader is
-                                      // created first, with group_fd = -1.  The rest of the group members
-                                      // are created with subsequent perf_event_open() calls with group_fd
-                                      // being set to the file descriptor of the group leader.
-                                      CpuCyclesFd, 0)) < 0) {
+  InstructionsFd = perfEventOpen(&InstructionsAttr,
+                                 // pid == 0 and cpu == -1
+                                 // This measures the calling process/thread on any CPU.
+                                 0, -1,
+                                 // The group_fd argument allows event groups to be created.  An event
+                                 // group has one event which is the group leader.  The leader is
+                                 // created first, with group_fd = -1.  The rest of the group members
+                                 // are created with subsequent perf_event_open() calls with group_fd
+                                 // being set to the file descriptor of the group leader.
+                                 CpuCyclesFd, 0);
+
+  if (InstructionsFd < 0) {
     fini();
     ErrorString = "perf_event_open failed for PERF_COUNT_HW_INSTRUCTIONS";
     InitValue = EXIT_FAILURE;
@@ -141,9 +150,12 @@ auto PerfMetricData::init() -> int32_t {
     return EXIT_FAILURE;
   }
 
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
   ioctl(InstructionsFd, PERF_EVENT_IOC_ID, &InstructionsId);
 
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
   ioctl(CpuCyclesFd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
   ioctl(CpuCyclesFd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
 
   if (0 == read(CpuCyclesFd, &Last, sizeof(Last))) {
@@ -159,11 +171,14 @@ auto PerfMetricData::init() -> int32_t {
   return EXIT_SUCCESS;
 }
 
-auto PerfMetricData::valueFromId(struct ReadFormat* Values, uint64_t Id) -> uint64_t {
-  for (decltype(Values->Nr) I = 0; I < Values->Nr; ++I) {
-    if (Id == Values->Values[I].Id) {
-      return Values->Values[I].Value;
+auto PerfMetricData::valueFromId(struct ReadFormat* Reader, uint64_t Id) -> uint64_t {
+  for (decltype(Reader->Nr) I = 0; I < Reader->Nr; ++I) {
+    assert(I < 2 && "Index is out of bounds");
+    // NOLINTBEGIN(cppcoreguidelines-pro-bounds-constant-array-index)
+    if (Id == Reader->Values[I].Id) {
+      return Reader->Values[I].Value;
     }
+    // NOLINTEND(cppcoreguidelines-pro-bounds-constant-array-index)
   }
 
   return 0;
@@ -185,17 +200,16 @@ auto PerfMetricData::getReading(double* IpcValue, double* FreqValue) -> int32_t
   }
 
   if (IpcValue != nullptr) {
-    uint64_t Diff[2];
-    Diff[0] = valueFromId(&ReadValues, InstructionsId) - valueFromId(&Last, InstructionsId);
-    Diff[1] = valueFromId(&ReadValues, CpuCyclesId) - valueFromId(&Last, CpuCyclesId);
+    std::array<uint64_t, 2> Diff = {valueFromId(&ReadValues, InstructionsId) - valueFromId(&Last, InstructionsId),
+                                    valueFromId(&ReadValues, CpuCyclesId) - valueFromId(&Last, CpuCyclesId)};
 
     std::memcpy(&Last, &ReadValues, sizeof(Last));
 
-    *IpcValue = (double)Diff[0] / (double)Diff[1];
+    *IpcValue = static_cast<double>(Diff[0]) / static_cast<double>(Diff[1]);
   }
 
   if (FreqValue != nullptr) {
-    *FreqValue = (double)valueFromId(&ReadValues, CpuCyclesId) / 1e9;
+    *FreqValue = static_cast<double>(valueFromId(&ReadValues, CpuCyclesId)) / 1e9;
   }
 
   return EXIT_SUCCESS;

From c05f47bd4d666712e9555c7fc589b85b18a5d60d Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 27 Oct 2024 12:21:18 +0100
Subject: [PATCH 117/167] rapl: fix warnings

---
 .../firestarter/Measurement/Metric/Perf.hpp   |  2 +-
 .../firestarter/Measurement/Metric/RAPL.hpp   | 22 +++++----
 src/firestarter/Measurement/Metric/RAPL.cpp   | 47 +++----------------
 3 files changed, 20 insertions(+), 51 deletions(-)

diff --git a/include/firestarter/Measurement/Metric/Perf.hpp b/include/firestarter/Measurement/Metric/Perf.hpp
index 2dd9b76d..9d65b94e 100644
--- a/include/firestarter/Measurement/Metric/Perf.hpp
+++ b/include/firestarter/Measurement/Metric/Perf.hpp
@@ -25,7 +25,7 @@
 #include <string>
 
 struct PerfMetricData {
-  inline static const char* PerfEventParanoidFile = "/proc/sys/kernel/perf_event_paranoid";
+  inline static const char* const PerfEventParanoidFile = "/proc/sys/kernel/perf_event_paranoid";
 
   struct ReadFormat {
     uint64_t Nr;
diff --git a/include/firestarter/Measurement/Metric/RAPL.hpp b/include/firestarter/Measurement/Metric/RAPL.hpp
index 4e0d4781..e16a731b 100644
--- a/include/firestarter/Measurement/Metric/RAPL.hpp
+++ b/include/firestarter/Measurement/Metric/RAPL.hpp
@@ -27,22 +27,26 @@
 #include <vector>
 
 struct RaplMetricData {
-  inline static const char* RaplPath = "/sys/class/powercap";
+  inline static const char* const RaplPath = "/sys/class/powercap";
 
   inline static std::string ErrorString;
 
   struct ReaderDef {
-    char* Path;
-    long long int LastReading;
-    long long int Overflow;
-    long long int Max;
-  };
+    ReaderDef() = delete;
+
+    ReaderDef(std::string Path, int64_t LastReading, int64_t Overflow, int64_t Max)
+        : Path(std::move(Path))
+        , LastReading(LastReading)
+        , Overflow(Overflow)
+        , Max(Max){};
 
-  struct ReaderDefFree {
-    void operator()(struct ReaderDef* Def);
+    std::string Path;
+    int64_t LastReading;
+    int64_t Overflow;
+    int64_t Max;
   };
 
-  inline static std::vector<std::shared_ptr<struct ReaderDef>> Readers;
+  inline static std::vector<std::unique_ptr<ReaderDef>> Readers;
 
   static auto fini() -> int32_t;
   static auto init() -> int32_t;
diff --git a/src/firestarter/Measurement/Metric/RAPL.cpp b/src/firestarter/Measurement/Metric/RAPL.cpp
index a33b7453..a05f8a4c 100644
--- a/src/firestarter/Measurement/Metric/RAPL.cpp
+++ b/src/firestarter/Measurement/Metric/RAPL.cpp
@@ -25,21 +25,13 @@
 #include <fstream>
 #include <memory>
 #include <sstream>
+#include <string>
 #include <vector>
 
 extern "C" {
 #include <dirent.h>
 }
 
-void RaplMetricData::ReaderDefFree::operator()(struct ReaderDef* Def) {
-  if (Def != nullptr) {
-    if (((void*)Def->Path) != nullptr) {
-      free((void*)Def->Path);
-    }
-    free((void*)Def);
-  }
-}
-
 auto RaplMetricData::fini() -> int32_t {
   Readers.clear();
 
@@ -121,43 +113,17 @@ auto RaplMetricData::init() -> int32_t {
       break;
     }
 
-    uint64_t Reading = 0;
-    uint64_t Max = 0;
     std::string Buffer;
-    int Read = 0;
 
     std::getline(EnergyReadingStream, Buffer);
-    Read = std::sscanf(Buffer.c_str(), "%lu", &Reading);
-
-    if (Read == 0) {
-      std::stringstream Ss;
-      Ss << "Contents in file " << EnergyUjPath.str() << " do not conform to mask (uint64_t)";
-      ErrorString = Ss.str();
-      break;
-    }
+    const auto Reading = std::stoul(Buffer);
 
     std::getline(MaxEnergyReadingStream, Buffer);
-    Read = std::sscanf(Buffer.c_str(), "%lu", &Max);
+    const auto Max = std::stoul(Buffer);
 
-    if (Read == 0) {
-      std::stringstream Ss;
-      Ss << "Contents in file " << MaxEnergyUjRangePath.str() << " do not conform to mask (uint64_t)";
-      ErrorString = Ss.str();
-      break;
-    }
-
-    std::shared_ptr<struct ReaderDef> Def(static_cast<struct ReaderDef*>(malloc(sizeof(struct ReaderDef))),
-                                          ReaderDefFree());
-    const auto* PathName = Path.c_str();
-    size_t Size = (strlen(PathName) + 1) * sizeof(char);
-    void* Name = malloc(Size);
-    memcpy(Name, PathName, Size);
-    Def->Path = (char*)Name;
-    Def->Max = Max;
-    Def->LastReading = Reading;
-    Def->Overflow = 0;
+    auto Def = std::make_unique<ReaderDef>(Path, Max, Reading, 0);
 
-    Readers.push_back(Def);
+    Readers.emplace_back(std::move(Def));
   }
 
   if (!ErrorString.empty()) {
@@ -172,14 +138,13 @@ auto RaplMetricData::getReading(double* Value) -> int32_t {
   double FinalReading = 0.0;
 
   for (auto& Def : Readers) {
-    long long int Reading = 0;
     std::string Buffer;
 
     std::stringstream EnergyUjPath;
     EnergyUjPath << Def->Path << "/energy_uj";
     std::ifstream EnergyReadingStream(EnergyUjPath.str());
     std::getline(EnergyReadingStream, Buffer);
-    std::sscanf(Buffer.c_str(), "%llu", &Reading);
+    const auto Reading = std::stoll(Buffer);
 
     if (Reading < Def->LastReading) {
       Def->Overflow += 1;

From aac47aa82e19dc256511a3303531d3de715e6228 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 27 Oct 2024 12:44:26 +0100
Subject: [PATCH 118/167] DumpRegisterStruct: fix warnings

---
 include/firestarter/DumpRegisterStruct.hpp | 7 +++++--
 src/firestarter/DumpRegisterWorker.cpp     | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/firestarter/DumpRegisterStruct.hpp b/include/firestarter/DumpRegisterStruct.hpp
index bddc538a..44fe2244 100644
--- a/include/firestarter/DumpRegisterStruct.hpp
+++ b/include/firestarter/DumpRegisterStruct.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include "firestarter/Constants.hpp"
+#include <array>
 
 namespace firestarter {
 
@@ -36,12 +37,14 @@ enum class DumpVariable : EightBytesType { Start = 0, Wait = 1 };
 constexpr const auto RegisterMaxNum = 32;
 /// The maximal number of doubles in SIMD registers. This is currently 8 for zmm registers.
 constexpr const auto RegisterMaxSize = 8;
+/// The maximum number of doubles in SIMD registers multiplied with the maximum number of vector registers.
+constexpr const auto MaxNumberOfDoublesInVectorRegisters = RegisterMaxNum * RegisterMaxSize;
 
 // REGISTER_MAX_NUM cachelines
 struct DumpRegisterStruct {
-  volatile double RegisterValues[RegisterMaxNum * RegisterMaxSize];
+  std::array<volatile double, MaxNumberOfDoublesInVectorRegisters> RegisterValues;
   // pad to use a whole cacheline
-  volatile EightBytesType Padding[7];
+  std::array<volatile EightBytesType, 7> Padding;
   volatile DumpVariable DumpVar;
 };
 
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index 7b3d935b..2966ec50 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -78,7 +78,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Dat
   auto& DumpRegisterStructRef = Data->LoadWorkerDataPtr->Memory->ExtraVars.Drs;
   auto& DumpVar = DumpRegisterStructRef.DumpVar;
   // memory of simd variables is before the padding
-  const auto* DumpMemAddr = static_cast<volatile uint64_t*>(DumpRegisterStructRef.Padding) - Offset;
+  const auto* DumpMemAddr = static_cast<volatile uint64_t*>(DumpRegisterStructRef.Padding.data()) - Offset;
 
   // allocate continous memory that fits the register contents
   auto Last = std::vector<uint64_t>(Offset);

From ef9052524b802ac36353b62ebb073e39505bb460 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 27 Oct 2024 18:14:18 +0100
Subject: [PATCH 119/167] use shared ptr for payload. less clone(). make
 functions in payload private/protected.

---
 .../Environment/Payload/Payload.hpp           | 63 +++++++++++++----
 .../Environment/Payload/PayloadStats.hpp      | 33 +++++++++
 .../Environment/Platform/PlatformConfig.hpp   |  4 +-
 .../Environment/Platform/RuntimeConfig.hpp    | 43 +++++-------
 .../Environment/X86/Payload/AVX512Payload.hpp | 12 ++--
 .../Environment/X86/Payload/AVXPayload.hpp    | 12 ++--
 .../X86/Payload/CompiledX86Payload.hpp        | 63 +++++++++++++++++
 .../Environment/X86/Payload/FMA4Payload.hpp   | 12 ++--
 .../Environment/X86/Payload/FMAPayload.hpp    | 12 ++--
 .../Environment/X86/Payload/SSE2Payload.hpp   | 12 ++--
 .../Environment/X86/Payload/X86Payload.hpp    | 24 +++----
 .../Environment/X86/Payload/ZENFMAPayload.hpp | 12 ++--
 include/firestarter/LoadWorkerData.hpp        |  3 +
 .../Environment/Payload/Payload.cpp           |  6 ++
 .../Environment/X86/Payload/AVX512Payload.cpp | 47 +++++--------
 .../Environment/X86/Payload/AVXPayload.cpp    | 69 ++++++++----------
 .../Environment/X86/Payload/FMA4Payload.cpp   | 47 +++++--------
 .../Environment/X86/Payload/FMAPayload.cpp    | 47 +++++--------
 .../Environment/X86/Payload/SSE2Payload.cpp   | 70 ++++++++-----------
 .../Environment/X86/Payload/X86Payload.cpp    |  7 +-
 .../Environment/X86/Payload/ZENFMAPayload.cpp | 47 +++++--------
 src/firestarter/Firestarter.cpp               |  2 +-
 src/firestarter/LoadWorker.cpp                | 41 +++++------
 23 files changed, 386 insertions(+), 302 deletions(-)
 create mode 100644 include/firestarter/Environment/Payload/PayloadStats.hpp
 create mode 100644 include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp

diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index db9c5ec7..7bc4cafa 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -23,6 +23,7 @@
 
 #include "firestarter/Constants.hpp"
 #include "firestarter/Environment/CPUTopology.hpp"
+#include "firestarter/Environment/Payload/PayloadStats.hpp"
 #include <chrono>
 #include <list>
 #include <memory>
@@ -32,6 +33,46 @@
 
 namespace firestarter::environment::payload {
 
+class Payload;
+
+class CompiledPayload {
+public:
+  CompiledPayload() = delete;
+  virtual ~CompiledPayload() = default;
+
+  using UniquePtr = std::unique_ptr<CompiledPayload, void (*)(CompiledPayload*)>;
+
+  using HighLoadFunctionPtr = uint64_t (*)(double*, volatile LoadThreadWorkType*, uint64_t);
+
+  CompiledPayload(const PayloadStats& Stats, std::unique_ptr<Payload>&& PayloadPtr,
+                  HighLoadFunctionPtr HighLoadFunction)
+      : Stats(Stats)
+      , PayloadPtr(std::move(PayloadPtr))
+      , HighLoadFunction(HighLoadFunction) {}
+
+  [[nodiscard]] auto stats() const -> const PayloadStats& { return Stats; };
+
+  void init(double* MemoryAddr, uint64_t BufferSize);
+
+  void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period);
+
+  [[nodiscard]] auto highLoadFunction(double* AddrMem, volatile LoadThreadWorkType& LoadVar, uint64_t Iterations)
+      -> uint64_t {
+    return HighLoadFunction(AddrMem, &LoadVar, Iterations);
+  }
+
+protected:
+  // We need to access this pointer directly to free the associated memory from asmjit
+  [[nodiscard]] auto highLoadFunctionPtr() -> HighLoadFunctionPtr { return HighLoadFunction; }
+
+private:
+  PayloadStats Stats;
+
+  std::unique_ptr<Payload> PayloadPtr;
+
+  HighLoadFunctionPtr HighLoadFunction;
+};
+
 class Payload {
 private:
   std::string Name;
@@ -44,11 +85,6 @@ class Payload {
   unsigned RegisterCount = 0;
 
 protected:
-  unsigned Flops = 0;
-  unsigned Bytes = 0;
-  // number of instructions in load loop
-  unsigned Instructions = 0;
-
   [[nodiscard]] static auto generateSequence(const std::vector<std::pair<std::string, unsigned>>& Proportion)
       -> std::vector<std::string>;
   [[nodiscard]] static auto getL2SequenceCount(const std::vector<std::string>& Sequence) -> unsigned {
@@ -76,6 +112,10 @@ class Payload {
   [[nodiscard]] static auto getRAMLoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
                                             unsigned Size, unsigned Threads) -> unsigned;
 
+  virtual void init(double* MemoryAddr, uint64_t BufferSize) const = 0;
+
+  virtual void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) const = 0;
+
 public:
   Payload() = delete;
 
@@ -85,10 +125,10 @@ class Payload {
       , RegisterCount(RegisterCount) {}
   virtual ~Payload() = default;
 
+  friend void CompiledPayload::init(double*, uint64_t);
+  friend void CompiledPayload::lowLoadFunction(volatile LoadThreadWorkType&, std::chrono::microseconds);
+
   [[nodiscard]] auto name() const -> const std::string& { return Name; }
-  [[nodiscard]] auto flops() const -> unsigned { return Flops; }
-  [[nodiscard]] auto bytes() const -> unsigned { return Bytes; }
-  [[nodiscard]] auto instructions() const -> unsigned { return Instructions; }
   /// The size of the SIMD registers in units of doubles (8B)
   [[nodiscard]] auto registerSize() const -> unsigned { return RegisterSize; }
   /// The number of SIMD registers used by the payload
@@ -96,17 +136,12 @@ class Payload {
 
   [[nodiscard]] virtual auto isAvailable(const CPUTopology*) const -> bool = 0;
 
-  virtual void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) = 0;
-
   [[nodiscard]] virtual auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
                                             unsigned InstructionCacheSize,
                                             std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize,
                                             unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
-                                            bool ErrorDetection) -> int = 0;
+                                            bool ErrorDetection) const -> CompiledPayload::UniquePtr = 0;
   [[nodiscard]] virtual auto getAvailableInstructions() const -> std::list<std::string> = 0;
-  virtual void init(double* MemoryAddr, uint64_t BufferSize) = 0;
-  [[nodiscard]] virtual auto highLoadFunction(double* AddrMem, volatile LoadThreadWorkType& LoadVar,
-                                              uint64_t Iterations) -> uint64_t = 0;
 
   [[nodiscard]] virtual auto clone() const -> std::unique_ptr<Payload> = 0;
 };
diff --git a/include/firestarter/Environment/Payload/PayloadStats.hpp b/include/firestarter/Environment/Payload/PayloadStats.hpp
new file mode 100644
index 00000000..5bd79bcc
--- /dev/null
+++ b/include/firestarter/Environment/Payload/PayloadStats.hpp
@@ -0,0 +1,33 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2024 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#pragma once
+
+namespace firestarter::environment::payload {
+
+struct PayloadStats {
+  unsigned Flops = 0;
+  unsigned Bytes = 0;
+  // number of instructions in load loop
+  unsigned Instructions = 0;
+};
+
+} // namespace firestarter::environment::payload
diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp
index f0213036..f0a9c0ae 100644
--- a/include/firestarter/Environment/Platform/PlatformConfig.hpp
+++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp
@@ -35,7 +35,7 @@ class PlatformConfig {
 private:
   std::string Name;
   std::list<unsigned> Threads;
-  std::unique_ptr<payload::Payload> Payload;
+  std::shared_ptr<payload::Payload> Payload;
   unsigned InstructionCacheSize;
   std::list<unsigned> DataCacheBufferSize;
   unsigned RamBufferSize;
@@ -46,7 +46,7 @@ class PlatformConfig {
 
   PlatformConfig(std::string Name, std::list<unsigned> Threads, unsigned InstructionCacheSize,
                  std::initializer_list<unsigned> DataCacheBufferSize, unsigned RamBufferSize, unsigned Lines,
-                 std::unique_ptr<payload::Payload>&& Payload)
+                 std::shared_ptr<payload::Payload>&& Payload)
       : Name(std::move(Name))
       , Threads(std::move(Threads))
       , Payload(std::move(Payload))
diff --git a/include/firestarter/Environment/Platform/RuntimeConfig.hpp b/include/firestarter/Environment/Platform/RuntimeConfig.hpp
index 46c3a142..8c3fdc88 100644
--- a/include/firestarter/Environment/Platform/RuntimeConfig.hpp
+++ b/include/firestarter/Environment/Platform/RuntimeConfig.hpp
@@ -29,8 +29,7 @@ namespace firestarter::environment::platform {
 
 class RuntimeConfig {
 private:
-  PlatformConfig const& PlatformConfigValue;
-  std::unique_ptr<payload::Payload> Payload;
+  PlatformConfig const& PlatformConfigRef;
   unsigned Thread;
   std::vector<std::pair<std::string, unsigned>> PayloadSettings;
   unsigned InstructionCacheSize;
@@ -39,37 +38,33 @@ class RuntimeConfig {
   unsigned Lines;
 
 public:
-  RuntimeConfig(PlatformConfig const& PlatformConfigValue, unsigned Thread, unsigned DetectedInstructionCacheSize)
-      : PlatformConfigValue(PlatformConfigValue)
-      , Payload(nullptr)
+  RuntimeConfig(PlatformConfig const& PlatformConfigRef, unsigned Thread, unsigned DetectedInstructionCacheSize)
+      : PlatformConfigRef(PlatformConfigRef)
       , Thread(Thread)
-      , PayloadSettings(PlatformConfigValue.getDefaultPayloadSettings())
-      , InstructionCacheSize(PlatformConfigValue.instructionCacheSize())
-      , DataCacheBufferSize(PlatformConfigValue.dataCacheBufferSize())
-      , RamBufferSize(PlatformConfigValue.ramBufferSize())
-      , Lines(PlatformConfigValue.lines()) {
+      , PayloadSettings(PlatformConfigRef.getDefaultPayloadSettings())
+      , InstructionCacheSize(PlatformConfigRef.instructionCacheSize())
+      , DataCacheBufferSize(PlatformConfigRef.dataCacheBufferSize())
+      , RamBufferSize(PlatformConfigRef.ramBufferSize())
+      , Lines(PlatformConfigRef.lines()) {
     if (DetectedInstructionCacheSize != 0) {
       this->InstructionCacheSize = DetectedInstructionCacheSize;
     }
   };
 
-  RuntimeConfig(const RuntimeConfig& Other)
-      : PlatformConfigValue(Other.platformConfig())
-      , Payload(Other.platformConfig().payload().clone())
-      , Thread(Other.thread())
-      , PayloadSettings(Other.payloadSettings())
-      , InstructionCacheSize(Other.instructionCacheSize())
-      , DataCacheBufferSize(Other.dataCacheBufferSize())
-      , RamBufferSize(Other.ramBufferSize())
-      , Lines(Other.lines()) {}
+  // RuntimeConfig(const RuntimeConfig& Other)
+  //     : PlatformConfigRef(Other.platformConfig())
+  //     , Payload(Other.platformConfig().payload().clone())
+  //     , Thread(Other.thread())
+  //     , PayloadSettings(Other.payloadSettings())
+  //     , InstructionCacheSize(Other.instructionCacheSize())
+  //     , DataCacheBufferSize(Other.dataCacheBufferSize())
+  //     , RamBufferSize(Other.ramBufferSize())
+  //     , Lines(Other.lines()) {}
 
   ~RuntimeConfig() = default;
 
-  [[nodiscard]] auto platformConfig() const -> PlatformConfig const& { return PlatformConfigValue; }
-  [[nodiscard]] auto payload() const -> payload::Payload& {
-    assert(Payload != nullptr && "Payload pointer is null. Each thread has to use it's own RuntimeConfig");
-    return *Payload;
-  }
+  [[nodiscard]] auto platformConfig() const -> PlatformConfig const& { return PlatformConfigRef; }
+  [[nodiscard]] auto payload() const -> const payload::Payload& { return PlatformConfigRef.payload(); }
   [[nodiscard]] auto thread() const -> unsigned { return Thread; }
   [[nodiscard]] auto payloadSettings() const -> const std::vector<std::pair<std::string, unsigned>>& {
     return PayloadSettings;
diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
index 5e845ed6..00b74107 100644
--- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
@@ -29,17 +29,21 @@ class AVX512Payload final : public X86Payload {
   AVX512Payload()
       : X86Payload({asmjit::CpuFeatures::X86::kAVX512_F}, "AVX512", 8, 32) {}
 
-  auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
-                      std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
-                      unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
+  [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
+                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
+                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+                                    bool ErrorDetection) const
+      -> environment::payload::CompiledPayload::UniquePtr override;
+
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-  void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
     return std::make_unique<AVX512Payload>();
   };
 
 private:
+  void init(double* MemoryAddr, uint64_t BufferSize) const override;
+
   const std::map<std::string, unsigned> InstructionFlops = {
       {"REG", 32},   {"L1_L", 32},  {"L1_BROADCAST", 16}, {"L1_S", 16}, {"L1_LS", 16}, {"L2_L", 32},
       {"L2_S", 16},  {"L2_LS", 16}, {"L3_L", 32},         {"L3_S", 16}, {"L3_LS", 16}, {"L3_P", 16},
diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
index d9daa544..4a300f6a 100644
--- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
@@ -29,17 +29,21 @@ class AVXPayload final : public X86Payload {
   AVXPayload()
       : X86Payload({asmjit::CpuFeatures::X86::kAVX}, "AVX", 4, 16) {}
 
-  auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
-                      std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
-                      unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
+  [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
+                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
+                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+                                    bool ErrorDetection) const
+      -> environment::payload::CompiledPayload::UniquePtr override;
+
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-  void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
     return std::make_unique<AVXPayload>();
   };
 
 private:
+  void init(double* MemoryAddr, uint64_t BufferSize) const override;
+
   const std::map<std::string, unsigned> InstructionFlops = {
       {"REG", 4},  {"L1_L", 4},  {"L1_S", 4}, {"L1_LS", 4}, {"L2_L", 4},  {"L2_S", 4},   {"L2_LS", 4}, {"L3_L", 4},
       {"L3_S", 4}, {"L3_LS", 4}, {"L3_P", 4}, {"RAM_L", 4}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}};
diff --git a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
new file mode 100644
index 00000000..69a5fe21
--- /dev/null
+++ b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
@@ -0,0 +1,63 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2024 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#pragma once
+
+#include "asmjit/core/jitruntime.h"
+#include "firestarter/Environment/Payload/Payload.hpp"
+#include "firestarter/Logging/Log.hpp"
+
+namespace firestarter::environment::x86::payload {
+
+class CompiledX86Payload final : public environment::payload::CompiledPayload {
+private:
+  inline static asmjit::JitRuntime Runtime;
+
+  static void deleter(CompiledX86Payload* Payload) {
+    if (Payload && Payload->highLoadFunctionPtr()) {
+      Runtime.release(Payload->highLoadFunctionPtr());
+    }
+  }
+
+  static void deleter(CompiledPayload* Payload) { deleter(dynamic_cast<CompiledX86Payload*>(Payload)); }
+
+  CompiledX86Payload(const environment::payload::PayloadStats& Stats,
+                     std::unique_ptr<environment::payload::Payload>&& PayloadPtr, HighLoadFunctionPtr HighLoadFunction)
+      : CompiledPayload(Stats, std::move(PayloadPtr), HighLoadFunction) {}
+
+public:
+  CompiledX86Payload() = delete;
+  ~CompiledX86Payload() override = default;
+
+  [[nodiscard]] static auto create(environment::payload::PayloadStats Stats, asmjit::CodeHolder& Code,
+                                   std::unique_ptr<environment::payload::Payload>&& PayloadPtr) -> UniquePtr {
+    HighLoadFunctionPtr HighLoadFunction{};
+    const auto Err = Runtime.add(&HighLoadFunction, &Code);
+    if (Err) {
+      workerLog::error() << "Asmjit adding Assembler to JitRuntime failed";
+      std::exit(EXIT_FAILURE);
+    }
+
+    return {new CompiledX86Payload(Stats, std::move(PayloadPtr), HighLoadFunction), deleter};
+  }
+};
+
+} // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
index 7aa3f208..36a81e1a 100644
--- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
@@ -30,11 +30,15 @@ class FMA4Payload final : public X86Payload {
   FMA4Payload()
       : X86Payload({asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4}, "FMA4", 4, 16) {}
 
-  auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
-                      std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
-                      unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
+  [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
+                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
+                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+                                    bool ErrorDetection) const
+      -> environment::payload::CompiledPayload::UniquePtr override;
+
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-  void init(double* MemoryAddr, uint64_t BufferSize) override;
+
+  void init(double* MemoryAddr, uint64_t BufferSize) const override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
     return std::make_unique<FMA4Payload>();
diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
index b5d9f884..b6e7fe6d 100644
--- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
@@ -29,17 +29,21 @@ class FMAPayload final : public X86Payload {
   FMAPayload()
       : X86Payload({asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, "FMA", 4, 16) {}
 
-  auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
-                      std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
-                      unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
+  [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
+                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
+                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+                                    bool ErrorDetection) const
+      -> environment::payload::CompiledPayload::UniquePtr override;
+
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-  void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
     return std::make_unique<FMAPayload>();
   };
 
 private:
+  void init(double* MemoryAddr, uint64_t BufferSize) const override;
+
   const std::map<std::string, unsigned> InstructionFlops = {
       {"REG", 16},        {"L1_L", 16},  {"L1_2L", 16}, {"L1_S", 8},      {"L1_LS", 8},     {"L1_LS_256", 8},
       {"L1_2LS_256", 16}, {"L2_L", 16},  {"L2_S", 8},   {"L2_LS", 8},     {"L2_LS_256", 8}, {"L2_2LS_256", 16},
diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
index 23a3b1a6..2ade146a 100644
--- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
@@ -29,17 +29,21 @@ class SSE2Payload final : public X86Payload {
   SSE2Payload()
       : X86Payload({asmjit::CpuFeatures::X86::kSSE2}, "SSE2", 2, 16) {}
 
-  auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
-                      std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
-                      unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
+  [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
+                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
+                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+                                    bool ErrorDetection) const
+      -> environment::payload::CompiledPayload::UniquePtr override;
+
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-  void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
     return std::make_unique<SSE2Payload>();
   };
 
 private:
+  void init(double* MemoryAddr, uint64_t BufferSize) const override;
+
   const std::map<std::string, unsigned> InstructionFlops = {
       {"REG", 2},  {"L1_L", 2},  {"L1_S", 2}, {"L1_LS", 2}, {"L2_L", 2},  {"L2_S", 2},   {"L2_LS", 2}, {"L3_L", 2},
       {"L3_S", 2}, {"L3_LS", 2}, {"L3_P", 2}, {"RAM_L", 2}, {"RAM_S", 2}, {"RAM_LS", 2}, {"RAM_P", 2}};
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index a0955415..2dd12444 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -44,10 +44,6 @@ class X86Payload : public environment::payload::Payload {
   std::list<asmjit::CpuFeatures::X86::Id> FeatureRequests;
 
 protected:
-  asmjit::JitRuntime Rt;
-  using LoadFunctionType = uint64_t (*)(double*, volatile LoadThreadWorkType*, uint64_t);
-  LoadFunctionType LoadFunction = nullptr;
-
   /// Emit the code to dump the xmm, ymm or zmm registers into memory for the dump registers feature.
   /// \arg Vec the type of the vector register used.
   /// \arg Cb The asmjit code builder that is used to emit the assembler code.
@@ -56,7 +52,7 @@ class X86Payload : public environment::payload::Payload {
   /// \arg VecPtr The function that is used to create a ptr to the vector register
   template <class Vec>
   void emitDumpRegisterCode(asmjit::x86::Builder& Cb, const asmjit::x86::Gpq& PointerReg,
-                            asmjit::x86::Mem (*VecPtr)(const asmjit::x86::Gp&, int32_t)) {
+                            asmjit::x86::Mem (*VecPtr)(const asmjit::x86::Gp&, int32_t)) const {
     constexpr const auto DumpRegisterStructRegisterValuesTopOffset =
         -static_cast<int32_t>(LoadWorkerMemory::getMemoryOffset()) +
         static_cast<int32_t>(offsetof(LoadWorkerMemory, ExtraVars.Drs.Padding));
@@ -87,7 +83,7 @@ class X86Payload : public environment::payload::Payload {
   template <class MaybeConstIterRegT, class MaybeConstVectorRegT>
   void emitErrorDetectionCode(asmjit::x86::Builder& Cb, MaybeConstIterRegT& IterReg,
                               const asmjit::x86::Gpq& AddrHighReg, const asmjit::x86::Gpq& PointerReg,
-                              const asmjit::x86::Gpq& TempReg, const asmjit::x86::Gpq& TempReg2) {
+                              const asmjit::x86::Gpq& TempReg, const asmjit::x86::Gpq& TempReg2) const {
     using IterRegT = std::remove_const_t<MaybeConstIterRegT>;
     using VectorRegT = std::remove_const_t<MaybeConstVectorRegT>;
 
@@ -465,12 +461,6 @@ class X86Payload : public environment::payload::Payload {
     Cb.bind(SkipErrorDetection);
   }
 
-public:
-  X86Payload(std::initializer_list<asmjit::CpuFeatures::X86::Id> FeatureRequests, std::string Name,
-             unsigned RegisterSize, unsigned RegisterCount)
-      : Payload(std::move(Name), RegisterSize, RegisterCount)
-      , FeatureRequests(FeatureRequests) {}
-
   // A generic implemenation for all x86 payloads
 #if defined(__clang__)
 #pragma clang diagnostic push
@@ -483,11 +473,15 @@ class X86Payload : public environment::payload::Payload {
 #if defined(__clang__)
 #pragma clang diagnostic pop
 #endif
+
   // use cpuid and usleep as low load
-  void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) override;
+  void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) const final;
 
-  auto highLoadFunction(double* AddrMem, volatile LoadThreadWorkType& LoadVar, uint64_t Iterations)
-      -> uint64_t override;
+public:
+  X86Payload(std::initializer_list<asmjit::CpuFeatures::X86::Id> FeatureRequests, std::string Name,
+             unsigned RegisterSize, unsigned RegisterCount)
+      : Payload(std::move(Name), RegisterSize, RegisterCount)
+      , FeatureRequests(FeatureRequests) {}
 
   [[nodiscard]] auto isAvailable(const X86CPUTopology& Topology) const -> bool { return isAvailable(&Topology); }
 
diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
index b76b767e..f6a90b4d 100644
--- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
@@ -29,17 +29,21 @@ class ZENFMAPayload final : public X86Payload {
   ZENFMAPayload()
       : X86Payload({asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA}, "ZENFMA", 4, 16) {}
 
-  auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion, unsigned InstructionCacheSize,
-                      std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread,
-                      unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override;
+  [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
+                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
+                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+                                    bool ErrorDetection) const
+      -> environment::payload::CompiledPayload::UniquePtr override;
+
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-  void init(double* MemoryAddr, uint64_t BufferSize) override;
 
   [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
     return std::make_unique<ZENFMAPayload>();
   };
 
 private:
+  void init(double* MemoryAddr, uint64_t BufferSize) const override;
+
   const std::map<std::string, unsigned> InstructionFlops = {
       {"REG", 8}, {"L1_LS", 8}, {"L2_L", 8}, {"L3_L", 8}, {"RAM_L", 8}};
 
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index 0e5088f9..24daebce 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -159,6 +159,9 @@ class LoadWorkerData {
   /// The memory which is used by the load worker.
   LoadWorkerMemory::UniquePtr Memory = {nullptr, nullptr};
 
+  /// The compiled payload which contains the pointers to the specific functions which are executed and some stats.
+  environment::payload::CompiledPayload::UniquePtr CompiledPayloadPtr = {nullptr, nullptr};
+
   volatile LoadThreadWorkType& LoadVar;
   uint64_t BuffersizeMem{};
 
diff --git a/src/firestarter/Environment/Payload/Payload.cpp b/src/firestarter/Environment/Payload/Payload.cpp
index c7ced50b..29bac51e 100644
--- a/src/firestarter/Environment/Payload/Payload.cpp
+++ b/src/firestarter/Environment/Payload/Payload.cpp
@@ -26,6 +26,12 @@
 
 namespace firestarter::environment::payload {
 
+void CompiledPayload::init(double* MemoryAddr, uint64_t BufferSize) { PayloadPtr->init(MemoryAddr, BufferSize); }
+
+void CompiledPayload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) {
+  PayloadPtr->lowLoadFunction(LoadVar, Period);
+};
+
 auto Payload::getSequenceStartCount(const std::vector<std::string>& Sequence, const std::string& Start) -> unsigned {
   unsigned I = 0;
 
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index 18d2f2d1..3fcfedf8 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -20,13 +20,14 @@
  *****************************************************************************/
 
 #include <firestarter/Environment/X86/Payload/AVX512Payload.hpp>
+#include <firestarter/Environment/X86/Payload/CompiledX86Payload.hpp>
 
 namespace firestarter::environment::x86::payload {
 
 auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
-                                   bool ErrorDetection) -> int {
+                                   bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr {
   using Imm = asmjit::Imm;
   using Zmm = asmjit::x86::Zmm;
   // NOLINTBEGIN(readability-identifier-naming)
@@ -42,29 +43,28 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
 
   // compute count of flops and memory access for performance report
-  Flops = 0;
-  Bytes = 0;
+  environment::payload::PayloadStats Stats;
 
   for (const auto& Item : Sequence) {
     auto It = InstructionFlops.find(Item);
 
     if (It == InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
-      return EXIT_FAILURE;
+      std::exit(EXIT_FAILURE);
     }
 
-    Flops += It->second;
+    Stats.Flops += It->second;
 
     It = InstructionMemory.find(Item);
 
     if (It != InstructionMemory.end()) {
-      Bytes += It->second;
+      Stats.Bytes += It->second;
     }
   }
 
-  Flops *= Repetitions;
-  Bytes *= Repetitions;
-  Instructions = Repetitions * Sequence.size() * 4 + 6;
+  Stats.Flops *= Repetitions;
+  Stats.Bytes *= Repetitions;
+  Stats.Instructions = Repetitions * Sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
   const auto L1iCacheSize = InstructionCacheSize / Thread;
@@ -82,11 +82,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
   asmjit::CodeHolder Code;
-  Code.init(Rt.environment());
-
-  if (nullptr != LoadFunction) {
-    Rt.release(LoadFunction);
-  }
+  Code.init(asmjit::Environment::host());
 
   asmjit::x86::Builder Cb(&Code);
   Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
@@ -116,7 +112,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   asmjit::FuncDetail Func;
   Func.init(asmjit::FuncSignature::build<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(
                 asmjit::CallConvId::kCDecl),
-            Rt.environment());
+            Code.environment());
 
   asmjit::FuncFrame Frame;
   Frame.init(Func);
@@ -281,7 +277,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
         RamIncrement();
       } else {
         workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
-        return EXIT_FAILURE;
+        std::exit(EXIT_FAILURE);
       }
 
       if (Left) {
@@ -316,7 +312,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     Cb.add(RamAddr, Imm(L3Size));
     Cb.bind(NoRamReset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.inc(TempReg); // increment iteration counter
   if (getL2SequenceCount(Sequence) > 0) {
@@ -330,7 +326,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     Cb.add(L2Addr, Imm(L1Size));
     Cb.bind(NoL2Reset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.movq(IterReg, TempReg); // store iteration counter
   if (getL3SequenceCount(Sequence) > 0) {
@@ -344,7 +340,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     Cb.add(L3Addr, Imm(L2Size));
     Cb.bind(NoL3Reset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.mov(L1Addr, PointerReg);
 
@@ -367,14 +363,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
 
   Cb.finalize();
 
-  // String sb;
-  // cb.dump(sb);
-
-  const auto Err = Rt.add(&LoadFunction, &Code);
-  if (Err) {
-    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
-    return EXIT_FAILURE;
-  }
+  auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code, clone());
 
   // skip if we could not determine cache size
   if (L1iCacheSize != 0) {
@@ -391,7 +380,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     workerLog::trace() << "Repetition count: " << Repetitions;
   }
 
-  return EXIT_SUCCESS;
+  return CompiledPayloadPtr;
 }
 
 auto AVX512Payload::getAvailableInstructions() const -> std::list<std::string> {
@@ -403,7 +392,7 @@ auto AVX512Payload::getAvailableInstructions() const -> std::list<std::string> {
   return Instructions;
 }
 
-void AVX512Payload::init(double* MemoryAddr, uint64_t BufferSize) {
+void AVX512Payload::init(double* MemoryAddr, uint64_t BufferSize) const {
   X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
 
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index 74dc8322..e24a40f2 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -20,13 +20,14 @@
  *****************************************************************************/
 
 #include <firestarter/Environment/X86/Payload/AVXPayload.hpp>
+#include <firestarter/Environment/X86/Payload/CompiledX86Payload.hpp>
 
 namespace firestarter::environment::x86::payload {
 
 auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
                                 unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
                                 unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
-                                bool ErrorDetection) -> int {
+                                bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr {
   using Imm = asmjit::Imm;
   using Mm = asmjit::x86::Mm;
   using Xmm = asmjit::x86::Xmm;
@@ -40,29 +41,28 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
 
   // compute count of flops and memory access for performance report
-  Flops = 0;
-  Bytes = 0;
+  environment::payload::PayloadStats Stats;
 
   for (const auto& Item : Sequence) {
     auto It = InstructionFlops.find(Item);
 
     if (It == InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
-      return EXIT_FAILURE;
+      std::exit(EXIT_FAILURE);
     }
 
-    Flops += It->second;
+    Stats.Flops += It->second;
 
     It = InstructionMemory.find(Item);
 
     if (It != InstructionMemory.end()) {
-      Bytes += It->second;
+      Stats.Bytes += It->second;
     }
   }
 
-  Flops *= Repetitions;
-  Bytes *= Repetitions;
-  Instructions = Repetitions * Sequence.size() * 2 + 4;
+  Stats.Flops *= Repetitions;
+  Stats.Bytes *= Repetitions;
+  Stats.Instructions = Repetitions * Sequence.size() * 2 + 4;
 
   // calculate the buffer sizes
   const auto L1iCacheSize = InstructionCacheSize / Thread;
@@ -80,11 +80,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
   asmjit::CodeHolder Code;
-  Code.init(Rt.environment());
-
-  if (nullptr != LoadFunction) {
-    Rt.release(LoadFunction);
-  }
+  Code.init(asmjit::Environment::host());
 
   asmjit::x86::Builder Cb(&Code);
   Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
@@ -110,7 +106,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   asmjit::FuncDetail Func;
   Func.init(asmjit::FuncSignature::build<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(
                 asmjit::CallConvId::kCDecl),
-            Rt.environment());
+            Code.environment());
 
   asmjit::FuncFrame Frame;
   Frame.init(Func);
@@ -243,12 +239,12 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
         Cb.vmovapd(xmmword_ptr(L1Addr, 32), Xmm(AddDest));
         L1Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "L1_LS") {
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L1Addr, 32));
         Cb.vmovapd(xmmword_ptr(L1Addr, 64), Xmm(AddDest));
         L1Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "L2_L") {
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L2Addr, 64));
         L2Increment();
@@ -256,12 +252,12 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
         Cb.vmovapd(xmmword_ptr(L2Addr, 64), Xmm(AddDest));
         L2Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "L2_LS") {
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L2Addr, 64));
         Cb.vmovapd(xmmword_ptr(L2Addr, 96), Xmm(AddDest));
         L2Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "L3_L") {
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L3Addr, 64));
         L3Increment();
@@ -269,17 +265,17 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
         Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest));
         L3Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "L3_LS") {
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L3Addr, 64));
         Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest));
         L3Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "L3_P") {
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L1Addr, 32));
         Cb.prefetcht0(ptr(L3Addr));
         L3Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "RAM_L") {
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(RamAddr, 64));
         RamIncrement();
@@ -287,24 +283,24 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
         Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest));
         RamIncrement();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "RAM_LS") {
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L3Addr, 64));
         Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest));
         RamIncrement();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "RAM_P") {
         Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L1Addr, 32));
         Cb.prefetcht2(ptr(RamAddr));
         RamIncrement();
-        Instructions++;
+        Stats.Instructions++;
       } else {
         workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
-        return EXIT_FAILURE;
+        std::exit(EXIT_FAILURE);
       }
 
       if (ShiftRegs > 1) {
-        Instructions++;
+        Stats.Instructions++;
         if (Left) {
           Cb.psrlw(Mm(ShiftStart + ((ShiftDest - ShiftStart + 3) % ShiftRegs)), Mm(ShiftDest));
         } else {
@@ -347,7 +343,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     Cb.add(RamAddr, Imm(L3Size));
     Cb.bind(NoRamReset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   if (getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
@@ -360,7 +356,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     Cb.add(L2Addr, Imm(L1Size));
     Cb.bind(NoL2Reset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   if (getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
@@ -373,7 +369,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     Cb.add(L3Addr, Imm(L2Size));
     Cb.bind(NoL3Reset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.inc(IterReg); // increment iteration counter
   Cb.mov(L1Addr, PointerReg);
@@ -397,14 +393,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
 
   Cb.finalize();
 
-  // String sb;
-  // cb.dump(sb);
-
-  const auto Err = Rt.add(&LoadFunction, &Code);
-  if (Err) {
-    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
-    return EXIT_FAILURE;
-  }
+  auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code, clone());
 
   // skip if we could not determine cache size
   if (L1iCacheSize != 0) {
@@ -421,7 +410,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     workerLog::trace() << "Repetition count: " << Repetitions;
   }
 
-  return EXIT_SUCCESS;
+  return CompiledPayloadPtr;
 }
 
 auto AVXPayload::getAvailableInstructions() const -> std::list<std::string> {
@@ -433,7 +422,7 @@ auto AVXPayload::getAvailableInstructions() const -> std::list<std::string> {
   return Instructions;
 }
 
-void AVXPayload::init(double* MemoryAddr, uint64_t BufferSize) {
+void AVXPayload::init(double* MemoryAddr, uint64_t BufferSize) const {
   X86Payload::init(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
 
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index 8c503696..f374dbe4 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -19,6 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <firestarter/Environment/X86/Payload/CompiledX86Payload.hpp>
 #include <firestarter/Environment/X86/Payload/FMA4Payload.hpp>
 
 namespace firestarter::environment::x86::payload {
@@ -26,7 +27,7 @@ namespace firestarter::environment::x86::payload {
 auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
                                  unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
                                  unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
-                                 bool ErrorDetection) -> int {
+                                 bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr {
   using Imm = asmjit::Imm;
   using Xmm = asmjit::x86::Xmm;
   // NOLINTBEGIN(readability-identifier-naming)
@@ -41,29 +42,28 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
 
   // compute count of flops and memory access for performance report
-  Flops = 0;
-  Bytes = 0;
+  environment::payload::PayloadStats Stats;
 
   for (const auto& Item : Sequence) {
     auto It = InstructionFlops.find(Item);
 
     if (It == InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
-      return EXIT_FAILURE;
+      std::exit(EXIT_FAILURE);
     }
 
-    Flops += It->second;
+    Stats.Flops += It->second;
 
     It = InstructionMemory.find(Item);
 
     if (It != InstructionMemory.end()) {
-      Bytes += It->second;
+      Stats.Bytes += It->second;
     }
   }
 
-  Flops *= Repetitions;
-  Bytes *= Repetitions;
-  Instructions = Repetitions * Sequence.size() * 4 + 6;
+  Stats.Flops *= Repetitions;
+  Stats.Bytes *= Repetitions;
+  Stats.Instructions = Repetitions * Sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
   const auto L1iCacheSize = InstructionCacheSize / Thread;
@@ -81,11 +81,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
   asmjit::CodeHolder Code;
-  Code.init(Rt.environment());
-
-  if (nullptr != LoadFunction) {
-    Rt.release(LoadFunction);
-  }
+  Code.init(asmjit::Environment::host());
 
   asmjit::x86::Builder Cb(&Code);
   Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
@@ -115,7 +111,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   asmjit::FuncDetail Func;
   Func.init(asmjit::FuncSignature::build<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(
                 asmjit::CallConvId::kCDecl),
-            Rt.environment());
+            Code.environment());
 
   asmjit::FuncFrame Frame;
   Frame.init(Func);
@@ -279,7 +275,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
         RamIncrement();
       } else {
         workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
-        return EXIT_FAILURE;
+        std::exit(EXIT_FAILURE);
       }
 
       if (Left) {
@@ -318,7 +314,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
     Cb.add(RamAddr, Imm(L3Size));
     Cb.bind(NoRamReset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.inc(TempReg); // increment iteration counter
   if (getL2SequenceCount(Sequence) > 0) {
@@ -332,7 +328,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
     Cb.add(L2Addr, Imm(L1Size));
     Cb.bind(NoL2Reset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.movq(IterReg, TempReg); // store iteration counter
   if (getL3SequenceCount(Sequence) > 0) {
@@ -346,7 +342,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
     Cb.add(L3Addr, Imm(L2Size));
     Cb.bind(NoL3Reset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.mov(L1Addr, PointerReg);
 
@@ -370,14 +366,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
 
   Cb.finalize();
 
-  // String sb;
-  // cb.dump(sb);
-
-  const auto Err = Rt.add(&LoadFunction, &Code);
-  if (Err) {
-    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
-    return EXIT_FAILURE;
-  }
+  auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code, clone());
 
   // skip if we could not determine cache size
   if (L1iCacheSize != 0) {
@@ -394,7 +383,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
     workerLog::trace() << "Repetition count: " << Repetitions;
   }
 
-  return EXIT_SUCCESS;
+  return CompiledPayloadPtr;
 }
 
 auto FMA4Payload::getAvailableInstructions() const -> std::list<std::string> {
@@ -406,7 +395,7 @@ auto FMA4Payload::getAvailableInstructions() const -> std::list<std::string> {
   return Instructions;
 }
 
-void FMA4Payload::init(double* MemoryAddr, uint64_t BufferSize) {
+void FMA4Payload::init(double* MemoryAddr, uint64_t BufferSize) const {
   X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
 
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index 8d7951ec..807f8fd8 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -19,6 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <firestarter/Environment/X86/Payload/CompiledX86Payload.hpp>
 #include <firestarter/Environment/X86/Payload/FMAPayload.hpp>
 
 namespace firestarter::environment::x86::payload {
@@ -26,7 +27,7 @@ namespace firestarter::environment::x86::payload {
 auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
                                 unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
                                 unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
-                                bool ErrorDetection) -> int {
+                                bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr {
   using Imm = asmjit::Imm;
   using Xmm = asmjit::x86::Xmm;
   using Ymm = asmjit::x86::Ymm;
@@ -43,29 +44,28 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
 
   // compute count of flops and memory access for performance report
-  Flops = 0;
-  Bytes = 0;
+  environment::payload::PayloadStats Stats;
 
   for (const auto& Item : Sequence) {
     auto It = InstructionFlops.find(Item);
 
     if (It == InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
-      return EXIT_FAILURE;
+      std::exit(EXIT_FAILURE);
     }
 
-    Flops += It->second;
+    Stats.Flops += It->second;
 
     It = InstructionMemory.find(Item);
 
     if (It != InstructionMemory.end()) {
-      Bytes += It->second;
+      Stats.Bytes += It->second;
     }
   }
 
-  Flops *= Repetitions;
-  Bytes *= Repetitions;
-  Instructions = Repetitions * Sequence.size() * 4 + 6;
+  Stats.Flops *= Repetitions;
+  Stats.Bytes *= Repetitions;
+  Stats.Instructions = Repetitions * Sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
   const auto L1iCacheSize = InstructionCacheSize / Thread;
@@ -83,11 +83,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
   asmjit::CodeHolder Code;
-  Code.init(Rt.environment());
-
-  if (nullptr != LoadFunction) {
-    Rt.release(LoadFunction);
-  }
+  Code.init(asmjit::Environment::host());
 
   asmjit::x86::Builder Cb(&Code);
   Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
@@ -117,7 +113,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   asmjit::FuncDetail Func;
   Func.init(asmjit::FuncSignature::build<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(
                 asmjit::CallConvId::kCDecl),
-            Rt.environment());
+            Code.environment());
 
   asmjit::FuncFrame Frame;
   Frame.init(Func);
@@ -313,7 +309,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
         RamIncrement();
       } else {
         workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
-        return EXIT_FAILURE;
+        std::exit(EXIT_FAILURE);
       }
 
       if (Item != "L1_2LS_256" && Item != "L2_2LS_256") {
@@ -354,7 +350,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     Cb.add(RamAddr, Imm(L3Size));
     Cb.bind(NoRamReset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.inc(TempReg); // increment iteration counter
   if (getL2SequenceCount(Sequence) > 0) {
@@ -368,7 +364,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     Cb.add(L2Addr, Imm(L1Size));
     Cb.bind(NoL2Reset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.movq(IterReg, TempReg); // store iteration counter
   if (getL3SequenceCount(Sequence) > 0) {
@@ -382,7 +378,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     Cb.add(L3Addr, Imm(L2Size));
     Cb.bind(NoL3Reset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.mov(L1Addr, PointerReg);
 
@@ -405,14 +401,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
 
   Cb.finalize();
 
-  // String sb;
-  // cb.dump(sb);
-
-  const auto Err = Rt.add(&LoadFunction, &Code);
-  if (Err) {
-    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
-    return EXIT_FAILURE;
-  }
+  auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code, clone());
 
   // skip if we could not determine cache size
   if (L1iCacheSize != 0) {
@@ -429,7 +418,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
     workerLog::trace() << "Repetition count: " << Repetitions;
   }
 
-  return EXIT_SUCCESS;
+  return CompiledPayloadPtr;
 }
 
 auto FMAPayload::getAvailableInstructions() const -> std::list<std::string> {
@@ -441,7 +430,7 @@ auto FMAPayload::getAvailableInstructions() const -> std::list<std::string> {
   return Instructions;
 }
 
-void FMAPayload::init(double* MemoryAddr, uint64_t BufferSize) {
+void FMAPayload::init(double* MemoryAddr, uint64_t BufferSize) const {
   X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
 
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index 9b782597..834972b4 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -19,6 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "asmjit/core/environment.h"
+#include <firestarter/Environment/X86/Payload/CompiledX86Payload.hpp>
 #include <firestarter/Environment/X86/Payload/SSE2Payload.hpp>
 
 namespace firestarter::environment::x86::payload {
@@ -26,7 +28,7 @@ namespace firestarter::environment::x86::payload {
 auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
                                  unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
                                  unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
-                                 bool ErrorDetection) -> int {
+                                 bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr {
   using Imm = asmjit::Imm;
   using Mm = asmjit::x86::Mm;
   using Xmm = asmjit::x86::Xmm;
@@ -39,29 +41,28 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
 
   // compute count of flops and memory access for performance report
-  Flops = 0;
-  Bytes = 0;
+  environment::payload::PayloadStats Stats;
 
   for (const auto& Item : Sequence) {
     auto It = InstructionFlops.find(Item);
 
     if (It == InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
-      return EXIT_FAILURE;
+      std::exit(EXIT_FAILURE);
     }
 
-    Flops += It->second;
+    Stats.Flops += It->second;
 
     It = InstructionMemory.find(Item);
 
     if (It != InstructionMemory.end()) {
-      Bytes += It->second;
+      Stats.Bytes += It->second;
     }
   }
 
-  Flops *= Repetitions;
-  Bytes *= Repetitions;
-  Instructions = Repetitions * Sequence.size() * 2 + 4;
+  Stats.Flops *= Repetitions;
+  Stats.Bytes *= Repetitions;
+  Stats.Instructions = Repetitions * Sequence.size() * 2 + 4;
 
   // calculate the buffer sizes
   const auto L1iCacheSize = InstructionCacheSize / Thread;
@@ -79,11 +80,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
   asmjit::CodeHolder Code;
-  Code.init(Rt.environment());
-
-  if (nullptr != LoadFunction) {
-    Rt.release(LoadFunction);
-  }
+  Code.init(asmjit::Environment::host());
 
   asmjit::x86::Builder Cb(&Code);
   Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
@@ -109,7 +106,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   asmjit::FuncDetail Func;
   Func.init(asmjit::FuncSignature::build<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(
                 asmjit::CallConvId::kCDecl),
-            Rt.environment());
+            Code.environment());
 
   asmjit::FuncFrame Frame;
   Frame.init(Func);
@@ -239,12 +236,12 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
         Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
         Cb.movapd(xmmword_ptr(L1Addr, 32), Xmm(AddDest));
         L1Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "L1_LS") {
         Cb.addpd(Xmm(AddDest), xmmword_ptr(L1Addr, 32));
         Cb.movapd(xmmword_ptr(L1Addr, 64), Xmm(AddDest));
         L1Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "L2_L") {
         Cb.addpd(Xmm(AddDest), xmmword_ptr(L2Addr, 64));
         L2Increment();
@@ -252,12 +249,12 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
         Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
         Cb.movapd(xmmword_ptr(L2Addr, 64), Xmm(AddDest));
         L2Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "L2_LS") {
         Cb.addpd(Xmm(AddDest), xmmword_ptr(L2Addr, 64));
         Cb.movapd(xmmword_ptr(L2Addr, 96), Xmm(AddDest));
         L2Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "L3_L") {
         Cb.addpd(Xmm(AddDest), xmmword_ptr(L3Addr, 64));
         L3Increment();
@@ -265,17 +262,17 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
         Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
         Cb.movapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest));
         L3Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "L3_LS") {
         Cb.addpd(Xmm(AddDest), xmmword_ptr(L3Addr, 64));
         Cb.movapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest));
         L3Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "L3_P") {
         Cb.addpd(Xmm(AddDest), xmmword_ptr(L1Addr, 32));
         Cb.prefetcht0(ptr(L3Addr));
         L3Increment();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "RAM_L") {
         Cb.addpd(Xmm(AddDest), xmmword_ptr(RamAddr, 64));
         RamIncrement();
@@ -283,24 +280,24 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
         Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs)));
         Cb.movapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest));
         RamIncrement();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "RAM_LS") {
         Cb.addpd(Xmm(AddDest), xmmword_ptr(L3Addr, 64));
         Cb.movapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest));
         RamIncrement();
-        Instructions++;
+        Stats.Instructions++;
       } else if (Item == "RAM_P") {
         Cb.addpd(Xmm(AddDest), xmmword_ptr(L1Addr, 32));
         Cb.prefetcht2(ptr(RamAddr));
         RamIncrement();
-        Instructions++;
+        Stats.Instructions++;
       } else {
         workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
-        return EXIT_FAILURE;
+        std::exit(EXIT_FAILURE);
       }
 
       if constexpr (MovRegs > 0) {
-        Instructions++;
+        Stats.Instructions++;
         Cb.movq(Mm(MovStart + ((MovqDest - MovStart + MovRegs - 1) % MovRegs)), Mm(MovqDest));
       }
 
@@ -338,7 +335,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
     Cb.add(RamAddr, Imm(L3Size));
     Cb.bind(NoRamReset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   if (getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
@@ -351,7 +348,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
     Cb.add(L2Addr, Imm(L1Size));
     Cb.bind(NoL2Reset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   if (getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
@@ -364,7 +361,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
     Cb.add(L3Addr, Imm(L2Size));
     Cb.bind(NoL3Reset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.inc(IterReg); // increment iteration counter
   Cb.mov(L1Addr, PointerReg);
@@ -388,14 +385,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
 
   Cb.finalize();
 
-  // String sb;
-  // cb.dump(sb);
-
-  const auto Err = Rt.add(&LoadFunction, &Code);
-  if (Err) {
-    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
-    return EXIT_FAILURE;
-  }
+  auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code, clone());
 
   // skip if we could not determine cache size
   if (L1iCacheSize != 0) {
@@ -412,7 +402,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
     workerLog::trace() << "Repetition count: " << Repetitions;
   }
 
-  return EXIT_SUCCESS;
+  return CompiledPayloadPtr;
 }
 
 auto SSE2Payload::getAvailableInstructions() const -> std::list<std::string> {
@@ -424,7 +414,7 @@ auto SSE2Payload::getAvailableInstructions() const -> std::list<std::string> {
   return Instructions;
 }
 
-void SSE2Payload::init(double* MemoryAddr, uint64_t BufferSize) {
+void SSE2Payload::init(double* MemoryAddr, uint64_t BufferSize) const {
   X86Payload::init(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
 
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index 6d63a187..962f712b 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -28,7 +28,7 @@
 
 namespace firestarter::environment::x86::payload {
 
-void X86Payload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) {
+void X86Payload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) const {
   auto Nap = Period / 100;
 
   if constexpr (firestarter::OptionalFeatures.IsMsc) {
@@ -79,9 +79,4 @@ void X86Payload::init(double* MemoryAddr, uint64_t BufferSize, double FirstValue
   }
 }
 
-auto X86Payload::highLoadFunction(double* AddrMem, volatile LoadThreadWorkType& LoadVar, uint64_t Iterations)
-    -> uint64_t {
-  return this->LoadFunction(AddrMem, &LoadVar, Iterations);
-}
-
 }; // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index 57361860..a5ac515b 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -19,6 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <firestarter/Environment/X86/Payload/CompiledX86Payload.hpp>
 #include <firestarter/Environment/X86/Payload/ZENFMAPayload.hpp>
 
 namespace firestarter::environment::x86::payload {
@@ -26,7 +27,7 @@ namespace firestarter::environment::x86::payload {
 auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
-                                   bool ErrorDetection) -> int {
+                                   bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr {
   using Imm = asmjit::Imm;
   using Xmm = asmjit::x86::Xmm;
   using Ymm = asmjit::x86::Ymm;
@@ -39,29 +40,28 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
 
   // compute count of flops and memory access for performance report
-  Flops = 0;
-  Bytes = 0;
+  environment::payload::PayloadStats Stats;
 
   for (const auto& Item : Sequence) {
     auto It = InstructionFlops.find(Item);
 
     if (It == InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
-      return EXIT_FAILURE;
+      std::exit(EXIT_FAILURE);
     }
 
-    Flops += It->second;
+    Stats.Flops += It->second;
 
     It = InstructionMemory.find(Item);
 
     if (It != InstructionMemory.end()) {
-      Bytes += It->second;
+      Stats.Bytes += It->second;
     }
   }
 
-  Flops *= Repetitions;
-  Bytes *= Repetitions;
-  Instructions = Repetitions * Sequence.size() * 4 + 6;
+  Stats.Flops *= Repetitions;
+  Stats.Bytes *= Repetitions;
+  Stats.Instructions = Repetitions * Sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
   auto L1iCacheSize = InstructionCacheSize / Thread;
@@ -79,11 +79,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
 
   asmjit::CodeHolder Code;
-  Code.init(Rt.environment());
-
-  if (nullptr != LoadFunction) {
-    Rt.release(LoadFunction);
-  }
+  Code.init(asmjit::Environment::host());
 
   asmjit::x86::Builder Cb(&Code);
   Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler |
@@ -110,7 +106,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   asmjit::FuncDetail Func;
   Func.init(asmjit::FuncSignature::build<uint64_t, double*, volatile LoadThreadWorkType*, uint64_t>(
                 asmjit::CallConvId::kCDecl),
-            Rt.environment());
+            Code.environment());
 
   asmjit::FuncFrame Frame;
   Frame.init(Func);
@@ -254,7 +250,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
         RamIncrement();
       } else {
         workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
-        return EXIT_FAILURE;
+        std::exit(EXIT_FAILURE);
       }
 
       // make sure the shifts do could end up shifting out the data one end.
@@ -304,7 +300,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     Cb.add(RamAddr, Imm(L3Size));
     Cb.bind(NoRamReset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.inc(TempReg); // increment iteration counter
   if (getL2SequenceCount(Sequence) > 0) {
@@ -318,7 +314,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     Cb.add(L2Addr, Imm(L1Size));
     Cb.bind(NoL2Reset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.movq(IterReg, TempReg); // store iteration counter
   if (getL3SequenceCount(Sequence) > 0) {
@@ -332,7 +328,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     Cb.add(L3Addr, Imm(L2Size));
     Cb.bind(NoL3Reset);
     // adds always two instruction
-    Instructions += 2;
+    Stats.Instructions += 2;
   }
   Cb.mov(L1Addr, PointerReg);
 
@@ -355,14 +351,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
 
   Cb.finalize();
 
-  // String sb;
-  // cb.dump(sb);
-
-  const auto Err = Rt.add(&LoadFunction, &Code);
-  if (Err) {
-    workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__;
-    return EXIT_FAILURE;
-  }
+  auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code, clone());
 
   // skip if we could not determine cache size
   if (L1iCacheSize != 0) {
@@ -379,7 +368,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
     workerLog::trace() << "Repetition count: " << Repetitions;
   }
 
-  return EXIT_SUCCESS;
+  return CompiledPayloadPtr;
 }
 
 auto ZENFMAPayload::getAvailableInstructions() const -> std::list<std::string> {
@@ -391,7 +380,7 @@ auto ZENFMAPayload::getAvailableInstructions() const -> std::list<std::string> {
   return Instructions;
 }
 
-void ZENFMAPayload::init(double* MemoryAddr, uint64_t BufferSize) {
+void ZENFMAPayload::init(double* MemoryAddr, uint64_t BufferSize) const {
   X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
 
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 5336c3b1..30990e9a 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -139,7 +139,7 @@ Firestarter::Firestarter(Config&& ProvidedConfig)
               auto Td = Thread.second;
               ipcEstimateMetricInsert(
                   static_cast<double>(Td->LastRun.Iterations) *
-                  static_cast<double>(LoadThreads.front().second->config().payload().instructions()) /
+                  static_cast<double>(LoadThreads.front().second->CompiledPayloadPtr->stats().Instructions) /
                   static_cast<double>(StopTimestamp - StartTimestamp));
             }
 
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index 9cef4d0b..1024c39f 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -197,10 +197,10 @@ void Firestarter::printPerformanceReport() {
 
   double const Runtime =
       static_cast<double>(StopTimestamp - StartTimestamp) / static_cast<double>(Environment->topology().clockrate());
-  double const GFlops = static_cast<double>(LoadThreads.front().second->config().payload().flops()) * 0.000000001 *
-                        static_cast<double>(Iterations) / Runtime;
-  double const Bandwidth = static_cast<double>(LoadThreads.front().second->config().payload().bytes()) * 0.000000001 *
-                           static_cast<double>(Iterations) / Runtime;
+  double const GFlops = static_cast<double>(LoadThreads.front().second->CompiledPayloadPtr->stats().Flops) *
+                        0.000000001 * static_cast<double>(Iterations) / Runtime;
+  double const Bandwidth = static_cast<double>(LoadThreads.front().second->CompiledPayloadPtr->stats().Bytes) *
+                           0.000000001 * static_cast<double>(Iterations) / Runtime;
 
   // insert values for ipc-estimate metric
   // if we are on linux
@@ -208,9 +208,10 @@ void Firestarter::printPerformanceReport() {
   if (Cfg.Measurement) {
     for (auto const& Thread : LoadThreads) {
       auto Td = Thread.second;
-      ipcEstimateMetricInsert(static_cast<double>(Td->LastRun.Iterations) *
-                              static_cast<double>(LoadThreads.front().second->config().payload().instructions()) /
-                              static_cast<double>(StopTimestamp - StartTimestamp));
+      ipcEstimateMetricInsert(
+          static_cast<double>(Td->LastRun.Iterations) *
+          static_cast<double>(LoadThreads.front().second->CompiledPayloadPtr->stats().Instructions) /
+          static_cast<double>(StopTimestamp - StartTimestamp));
     }
   }
 #endif
@@ -271,10 +272,10 @@ void Firestarter::loadThreadWorker(const std::shared_ptr<LoadWorkerData>& Td) {
       Td->environment().setCpuAffinity(Td->id());
 
       // compile payload
-      Td->config().payload().compilePayload(Td->config().payloadSettings(), Td->config().instructionCacheSize(),
-                                            Td->config().dataCacheBufferSize(), Td->config().ramBufferSize(),
-                                            Td->config().thread(), Td->config().lines(), Td->DumpRegisters,
-                                            Td->ErrorDetection);
+      Td->CompiledPayloadPtr = Td->config().payload().compilePayload(
+          Td->config().payloadSettings(), Td->config().instructionCacheSize(), Td->config().dataCacheBufferSize(),
+          Td->config().ramBufferSize(), Td->config().thread(), Td->config().lines(), Td->DumpRegisters,
+          Td->ErrorDetection);
 
       // allocate memory
       // if we should dump some registers, we use the first part of the memory
@@ -305,7 +306,7 @@ void Firestarter::loadThreadWorker(const std::shared_ptr<LoadWorkerData>& Td) {
       }
 
       // call init function
-      Td->config().payload().init(Td->Memory->getMemoryAddress(), Td->BuffersizeMem);
+      Td->CompiledPayloadPtr->init(Td->Memory->getMemoryAddress(), Td->BuffersizeMem);
 
       break;
     // perform stress test
@@ -323,8 +324,8 @@ void Firestarter::loadThreadWorker(const std::shared_ptr<LoadWorkerData>& Td) {
 #ifdef ENABLE_SCOREP
         SCOREP_USER_REGION_BY_NAME_BEGIN("HIGH", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
-        Td->CurrentRun.Iterations = Td->config().payload().highLoadFunction(Td->Memory->getMemoryAddress(), Td->LoadVar,
-                                                                            Td->CurrentRun.Iterations);
+        Td->CurrentRun.Iterations = Td->CompiledPayloadPtr->highLoadFunction(Td->Memory->getMemoryAddress(),
+                                                                             Td->LoadVar, Td->CurrentRun.Iterations);
 
         // call low load function
 #ifdef ENABLE_VTRACING
@@ -335,7 +336,7 @@ void Firestarter::loadThreadWorker(const std::shared_ptr<LoadWorkerData>& Td) {
         SCOREP_USER_REGION_BY_NAME_END("HIGH");
         SCOREP_USER_REGION_BY_NAME_BEGIN("LOW", SCOREP_USER_REGION_TYPE_COMMON);
 #endif
-        Td->config().payload().lowLoadFunction(Td->LoadVar, Td->Period);
+        Td->CompiledPayloadPtr->lowLoadFunction(Td->LoadVar, Td->Period);
 #ifdef ENABLE_VTRACING
         VT_USER_END("LOW_LOAD_FUNC");
 #endif
@@ -361,13 +362,13 @@ void Firestarter::loadThreadWorker(const std::shared_ptr<LoadWorkerData>& Td) {
       break;
     case LoadThreadState::ThreadSwitch:
       // compile payload
-      Td->config().payload().compilePayload(Td->config().payloadSettings(), Td->config().instructionCacheSize(),
-                                            Td->config().dataCacheBufferSize(), Td->config().ramBufferSize(),
-                                            Td->config().thread(), Td->config().lines(), Td->DumpRegisters,
-                                            Td->ErrorDetection);
+      Td->CompiledPayloadPtr = Td->config().payload().compilePayload(
+          Td->config().payloadSettings(), Td->config().instructionCacheSize(), Td->config().dataCacheBufferSize(),
+          Td->config().ramBufferSize(), Td->config().thread(), Td->config().lines(), Td->DumpRegisters,
+          Td->ErrorDetection);
 
       // call init function
-      Td->config().payload().init(Td->Memory->getMemoryAddress(), Td->BuffersizeMem);
+      Td->CompiledPayloadPtr->init(Td->Memory->getMemoryAddress(), Td->BuffersizeMem);
       break;
     case LoadThreadState::ThreadWait:
       break;

From d9b2979a78b7f758d1f75f4491ce3453bb3709ca Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Mon, 28 Oct 2024 09:41:08 +0100
Subject: [PATCH 120/167] remove warnings

---
 include/firestarter/Cuda/CudaHipCompat.hpp          |  1 -
 include/firestarter/Environment/Payload/Payload.hpp |  4 ++--
 .../Environment/X86/Payload/CompiledX86Payload.hpp  |  1 -
 .../Environment/X86/Payload/X86Payload.hpp          | 13 +------------
 include/firestarter/Logging/Log.hpp                 |  6 ++++++
 src/firestarter/Config.cpp                          | 11 +++++------
 .../Environment/X86/Payload/AVX512Payload.cpp       |  4 +---
 .../Environment/X86/Payload/AVXPayload.cpp          |  4 +---
 .../Environment/X86/Payload/FMA4Payload.cpp         |  4 +---
 .../Environment/X86/Payload/FMAPayload.cpp          |  4 +---
 .../Environment/X86/Payload/SSE2Payload.cpp         |  4 +---
 .../Environment/X86/Payload/X86Payload.cpp          |  2 +-
 .../Environment/X86/Payload/ZENFMAPayload.cpp       |  4 +---
 src/firestarter/Firestarter.cpp                     |  7 ++++---
 src/firestarter/LoadWorker.cpp                      |  1 -
 15 files changed, 25 insertions(+), 45 deletions(-)

diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
index c2c009f4..7166ba30 100644
--- a/include/firestarter/Cuda/CudaHipCompat.hpp
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -387,7 +387,6 @@ template <typename T> void accellSafeCall(T TVal, const char* File, const int Li
   }
 
   firestarter::log::error() << Ss.str();
-  exit(static_cast<int>(TVal));
 }
 
 /// Wrapper to cuInit or hipInit.
diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index 7bc4cafa..38f33092 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -125,8 +125,8 @@ class Payload {
       , RegisterCount(RegisterCount) {}
   virtual ~Payload() = default;
 
-  friend void CompiledPayload::init(double*, uint64_t);
-  friend void CompiledPayload::lowLoadFunction(volatile LoadThreadWorkType&, std::chrono::microseconds);
+  friend void CompiledPayload::init(double* MemoryAddr, uint64_t BufferSize);
+  friend void CompiledPayload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period);
 
   [[nodiscard]] auto name() const -> const std::string& { return Name; }
   /// The size of the SIMD registers in units of doubles (8B)
diff --git a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
index 69a5fe21..e23f91ff 100644
--- a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
@@ -53,7 +53,6 @@ class CompiledX86Payload final : public environment::payload::CompiledPayload {
     const auto Err = Runtime.add(&HighLoadFunction, &Code);
     if (Err) {
       workerLog::error() << "Asmjit adding Assembler to JitRuntime failed";
-      std::exit(EXIT_FAILURE);
     }
 
     return {new CompiledX86Payload(Stats, std::move(PayloadPtr), HighLoadFunction), deleter};
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index 2dd12444..c2cd7946 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -461,18 +461,7 @@ class X86Payload : public environment::payload::Payload {
     Cb.bind(SkipErrorDetection);
   }
 
-  // A generic implemenation for all x86 payloads
-#if defined(__clang__)
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Woverloaded-virtual"
-#endif
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Woverloaded-virtual"
-  static void init(double* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue);
-#pragma GCC diagnostic pop
-#if defined(__clang__)
-#pragma clang diagnostic pop
-#endif
+  static void initMemory(double* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue);
 
   // use cpuid and usleep as low load
   void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) const final;
diff --git a/include/firestarter/Logging/Log.hpp b/include/firestarter/Logging/Log.hpp
index d5fc9d5e..128ba3f5 100644
--- a/include/firestarter/Logging/Log.hpp
+++ b/include/firestarter/Logging/Log.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include "FirstWorkerThreadFilter.hpp"
+#include <cstdlib>
 #include <iostream>
 #include <nitro/log/attribute/message.hpp>
 #include <nitro/log/attribute/severity.hpp>
@@ -51,6 +52,11 @@ class StdOut {
       std::cout << FormattedRecord << '\n' << std::flush;
       break;
     }
+
+    // Exit on error or fatal
+    if (Severity == nitro::log::severity_level::error || Severity == nitro::log::severity_level::fatal) {
+      std::quick_exit(EXIT_FAILURE);
+    }
   }
 };
 
diff --git a/src/firestarter/Config.cpp b/src/firestarter/Config.cpp
index 1c1f0677..90b8bf10 100644
--- a/src/firestarter/Config.cpp
+++ b/src/firestarter/Config.cpp
@@ -235,17 +235,17 @@ Config::Config(int Argc, const char** Argv)
     }
 
     if (static_cast<bool>(Options.count("version"))) {
-      std::exit(EXIT_SUCCESS);
+      std::quick_exit(EXIT_SUCCESS);
     }
 
     if (static_cast<bool>(Options.count("copyright"))) {
       printCopyright();
-      std::exit(EXIT_SUCCESS);
+      std::quick_exit(EXIT_SUCCESS);
     }
 
     if (static_cast<bool>(Options.count("warranty"))) {
       printWarranty();
-      std::exit(EXIT_SUCCESS);
+      std::quick_exit(EXIT_SUCCESS);
     }
 
     firestarter::log::info() << "This program comes with ABSOLUTELY NO WARRANTY; for details run `" << ExecutableName
@@ -257,7 +257,7 @@ Config::Config(int Argc, const char** Argv)
       auto Section = Options["help"].as<std::string>();
 
       printHelp(Parser, Section);
-      std::exit(EXIT_SUCCESS);
+      std::quick_exit(EXIT_SUCCESS);
     }
 
     Timeout = std::chrono::seconds(Options["timeout"].as<unsigned>());
@@ -384,9 +384,8 @@ Config::Config(int Argc, const char** Argv)
       }
     }
   } catch (std::exception& E) {
-    firestarter::log::error() << E.what() << "\n";
     printHelp(Parser);
-    std::exit(EXIT_FAILURE);
+    firestarter::log::error() << E.what() << "\n";
   }
 }
 } // namespace firestarter
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index 3fcfedf8..035a590f 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -50,7 +50,6 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
 
     if (It == InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
-      std::exit(EXIT_FAILURE);
     }
 
     Stats.Flops += It->second;
@@ -277,7 +276,6 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
         RamIncrement();
       } else {
         workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
-        std::exit(EXIT_FAILURE);
       }
 
       if (Left) {
@@ -393,7 +391,7 @@ auto AVX512Payload::getAvailableInstructions() const -> std::list<std::string> {
 }
 
 void AVX512Payload::init(double* MemoryAddr, uint64_t BufferSize) const {
-  X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
+  X86Payload::initMemory(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
 
 } // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index e24a40f2..7aff0ec9 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -48,7 +48,6 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
 
     if (It == InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
-      std::exit(EXIT_FAILURE);
     }
 
     Stats.Flops += It->second;
@@ -296,7 +295,6 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
         Stats.Instructions++;
       } else {
         workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
-        std::exit(EXIT_FAILURE);
       }
 
       if (ShiftRegs > 1) {
@@ -423,7 +421,7 @@ auto AVXPayload::getAvailableInstructions() const -> std::list<std::string> {
 }
 
 void AVXPayload::init(double* MemoryAddr, uint64_t BufferSize) const {
-  X86Payload::init(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15);
+  X86Payload::initMemory(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
 
 } // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index f374dbe4..7aaf9a48 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -49,7 +49,6 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
 
     if (It == InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
-      std::exit(EXIT_FAILURE);
     }
 
     Stats.Flops += It->second;
@@ -275,7 +274,6 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
         RamIncrement();
       } else {
         workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
-        std::exit(EXIT_FAILURE);
       }
 
       if (Left) {
@@ -396,7 +394,7 @@ auto FMA4Payload::getAvailableInstructions() const -> std::list<std::string> {
 }
 
 void FMA4Payload::init(double* MemoryAddr, uint64_t BufferSize) const {
-  X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
+  X86Payload::initMemory(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
 
 } // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index 807f8fd8..fcd1ad1c 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -51,7 +51,6 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
 
     if (It == InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
-      std::exit(EXIT_FAILURE);
     }
 
     Stats.Flops += It->second;
@@ -309,7 +308,6 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
         RamIncrement();
       } else {
         workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
-        std::exit(EXIT_FAILURE);
       }
 
       if (Item != "L1_2LS_256" && Item != "L2_2LS_256") {
@@ -431,7 +429,7 @@ auto FMAPayload::getAvailableInstructions() const -> std::list<std::string> {
 }
 
 void FMAPayload::init(double* MemoryAddr, uint64_t BufferSize) const {
-  X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
+  X86Payload::initMemory(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
 
 } // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index 834972b4..8504a5ed 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -48,7 +48,6 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
 
     if (It == InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
-      std::exit(EXIT_FAILURE);
     }
 
     Stats.Flops += It->second;
@@ -293,7 +292,6 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
         Stats.Instructions++;
       } else {
         workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
-        std::exit(EXIT_FAILURE);
       }
 
       if constexpr (MovRegs > 0) {
@@ -415,7 +413,7 @@ auto SSE2Payload::getAvailableInstructions() const -> std::list<std::string> {
 }
 
 void SSE2Payload::init(double* MemoryAddr, uint64_t BufferSize) const {
-  X86Payload::init(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15);
+  X86Payload::initMemory(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
 
 } // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index 962f712b..529d8d21 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -65,7 +65,7 @@ void X86Payload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chro
   }
 }
 
-void X86Payload::init(double* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue) {
+void X86Payload::initMemory(double* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue) {
   uint64_t I = 0;
 
   for (; I < InitBlocksize; I++) {
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index a5ac515b..5fed5a42 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -47,7 +47,6 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
 
     if (It == InstructionFlops.end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
-      std::exit(EXIT_FAILURE);
     }
 
     Stats.Flops += It->second;
@@ -250,7 +249,6 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
         RamIncrement();
       } else {
         workerLog::error() << "Instruction group " << Item << " not found in " << name() << ".";
-        std::exit(EXIT_FAILURE);
       }
 
       // make sure the shifts do could end up shifting out the data one end.
@@ -381,7 +379,7 @@ auto ZENFMAPayload::getAvailableInstructions() const -> std::list<std::string> {
 }
 
 void ZENFMAPayload::init(double* MemoryAddr, uint64_t BufferSize) const {
-  X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
+  X86Payload::initMemory(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
 
 } // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 30990e9a..ad5cd2ea 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -20,6 +20,7 @@
  *****************************************************************************/
 
 #include <csignal>
+#include <cstdlib>
 #include <firestarter/Environment/X86/X86Environment.hpp>
 #include <firestarter/Firestarter.hpp>
 #include <firestarter/Logging/Log.hpp>
@@ -60,14 +61,14 @@ Firestarter::Firestarter(Config&& ProvidedConfig)
 
   if (Cfg.PrintFunctionSummary) {
     Environment->printFunctionSummary();
-    std::exit(EXIT_SUCCESS);
+    std::quick_exit(EXIT_SUCCESS);
   }
 
   Environment->selectFunction(Cfg.FunctionId, Cfg.AllowUnavailablePayload);
 
   if (Cfg.ListInstructionGroups) {
     Environment->printAvailableInstructionGroups();
-    std::exit(EXIT_SUCCESS);
+    std::quick_exit(EXIT_SUCCESS);
   }
 
   if (!Cfg.InstructionGroups.empty()) {
@@ -85,7 +86,7 @@ Firestarter::Firestarter(Config&& ProvidedConfig)
 
       if (Cfg.ListMetrics) {
         log::info() << MeasurementWorker->availableMetrics();
-        std::exit(EXIT_SUCCESS);
+        std::quick_exit(EXIT_SUCCESS);
       }
 
       // init all metrics
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index 1024c39f..a3e520e0 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -285,7 +285,6 @@ void Firestarter::loadThreadWorker(const std::shared_ptr<LoadWorkerData>& Td) {
       // exit application on error
       if (Td->Memory == nullptr) {
         workerLog::error() << "Could not allocate memory for CPU load thread " << Td->id() << "\n";
-        exit(ENOMEM);
       }
 
       if (Td->DumpRegisters) {

From ba314d7ccc4feed021e1bf61de1e2c2a5673d08e Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Mon, 28 Oct 2024 11:54:47 +0100
Subject: [PATCH 121/167] remove clone from payload

---
 .../firestarter/Environment/Environment.hpp   |  1 -
 .../Environment/Payload/Payload.hpp           |  4 +--
 .../Environment/Platform/PlatformConfig.hpp   |  3 +-
 .../Environment/Platform/RuntimeConfig.hpp    | 34 ++++++++----------
 .../Environment/X86/Payload/AVX512Payload.hpp |  6 +---
 .../Environment/X86/Payload/AVXPayload.hpp    |  4 ---
 .../X86/Payload/CompiledX86Payload.hpp        |  7 ++--
 .../Environment/X86/Payload/FMA4Payload.hpp   |  6 +---
 .../Environment/X86/Payload/FMAPayload.hpp    |  6 +---
 .../Environment/X86/Payload/SSE2Payload.hpp   |  6 +---
 .../Environment/X86/Payload/X86Payload.hpp    |  2 +-
 .../Environment/X86/Payload/ZENFMAPayload.hpp |  6 +---
 .../X86/Platform/BulldozerConfig.hpp          |  4 +--
 .../X86/Platform/HaswellConfig.hpp            |  4 +--
 .../X86/Platform/HaswellEPConfig.hpp          |  4 +--
 .../X86/Platform/KnightsLandingConfig.hpp     |  4 +--
 .../Environment/X86/Platform/NaplesConfig.hpp |  4 +--
 .../X86/Platform/NehalemConfig.hpp            |  4 +--
 .../X86/Platform/NehalemEPConfig.hpp          |  4 +--
 .../Environment/X86/Platform/RomeConfig.hpp   |  4 +--
 .../X86/Platform/SandyBridgeConfig.hpp        |  4 +--
 .../X86/Platform/SandyBridgeEPConfig.hpp      |  4 +--
 .../X86/Platform/SkylakeConfig.hpp            |  4 +--
 .../X86/Platform/SkylakeSPConfig.hpp          |  4 +--
 .../X86/Platform/X86PlatformConfig.hpp        |  2 +-
 .../Environment/X86/X86Environment.hpp        | 35 ++++++++-----------
 .../Environment/X86/Payload/AVX512Payload.cpp |  2 +-
 .../Environment/X86/Payload/AVXPayload.cpp    |  2 +-
 .../Environment/X86/Payload/FMA4Payload.cpp   |  2 +-
 .../Environment/X86/Payload/FMAPayload.cpp    |  2 +-
 .../Environment/X86/Payload/SSE2Payload.cpp   |  2 +-
 .../Environment/X86/Payload/ZENFMAPayload.cpp |  2 +-
 .../Environment/X86/X86Environment.cpp        | 17 ++-------
 src/firestarter/Firestarter.cpp               |  2 --
 34 files changed, 75 insertions(+), 126 deletions(-)

diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index 6ebad72e..254c2f1e 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -40,7 +40,6 @@ class Environment {
   void setCpuAffinity(unsigned Thread) const;
   void printThreadSummary();
 
-  virtual void evaluateFunctions() = 0;
   virtual void selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) = 0;
   virtual void selectInstructionGroups(std::string Groups) = 0;
   virtual void printAvailableInstructionGroups() = 0;
diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index 38f33092..2306fb77 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -119,7 +119,7 @@ class Payload {
 public:
   Payload() = delete;
 
-  Payload(std::string Name, unsigned RegisterSize, unsigned RegisterCount)
+  Payload(std::string Name, unsigned RegisterSize, unsigned RegisterCount) noexcept
       : Name(std::move(Name))
       , RegisterSize(RegisterSize)
       , RegisterCount(RegisterCount) {}
@@ -142,8 +142,6 @@ class Payload {
                                             unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
                                             bool ErrorDetection) const -> CompiledPayload::UniquePtr = 0;
   [[nodiscard]] virtual auto getAvailableInstructions() const -> std::list<std::string> = 0;
-
-  [[nodiscard]] virtual auto clone() const -> std::unique_ptr<Payload> = 0;
 };
 
 } // namespace firestarter::environment::payload
diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp
index f0a9c0ae..114cbb45 100644
--- a/include/firestarter/Environment/Platform/PlatformConfig.hpp
+++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp
@@ -46,7 +46,7 @@ class PlatformConfig {
 
   PlatformConfig(std::string Name, std::list<unsigned> Threads, unsigned InstructionCacheSize,
                  std::initializer_list<unsigned> DataCacheBufferSize, unsigned RamBufferSize, unsigned Lines,
-                 std::shared_ptr<payload::Payload>&& Payload)
+                 std::shared_ptr<payload::Payload>&& Payload) noexcept
       : Name(std::move(Name))
       , Threads(std::move(Threads))
       , Payload(std::move(Payload))
@@ -54,6 +54,7 @@ class PlatformConfig {
       , DataCacheBufferSize(DataCacheBufferSize)
       , RamBufferSize(RamBufferSize)
       , Lines(Lines) {}
+
   virtual ~PlatformConfig() = default;
 
   [[nodiscard]] auto name() const -> const std::string& { return Name; }
diff --git a/include/firestarter/Environment/Platform/RuntimeConfig.hpp b/include/firestarter/Environment/Platform/RuntimeConfig.hpp
index 8c3fdc88..3ffdc29c 100644
--- a/include/firestarter/Environment/Platform/RuntimeConfig.hpp
+++ b/include/firestarter/Environment/Platform/RuntimeConfig.hpp
@@ -27,9 +27,12 @@
 
 namespace firestarter::environment::platform {
 
+// This is effectivly a wrapper around a PlatformConfig that allow overriding some vairables.
+// TODO: move these functions into the PlatformConfig and make them non const. The default PlatformConfig(s) shall be
+// const.
 class RuntimeConfig {
 private:
-  PlatformConfig const& PlatformConfigRef;
+  std::shared_ptr<PlatformConfig> SelectedPlatformConfig;
   unsigned Thread;
   std::vector<std::pair<std::string, unsigned>> PayloadSettings;
   unsigned InstructionCacheSize;
@@ -38,33 +41,24 @@ class RuntimeConfig {
   unsigned Lines;
 
 public:
-  RuntimeConfig(PlatformConfig const& PlatformConfigRef, unsigned Thread, unsigned DetectedInstructionCacheSize)
-      : PlatformConfigRef(PlatformConfigRef)
+  RuntimeConfig(const std::shared_ptr<PlatformConfig>& SelectedPlatformConfig, unsigned Thread,
+                unsigned DetectedInstructionCacheSize)
+      : SelectedPlatformConfig(SelectedPlatformConfig)
       , Thread(Thread)
-      , PayloadSettings(PlatformConfigRef.getDefaultPayloadSettings())
-      , InstructionCacheSize(PlatformConfigRef.instructionCacheSize())
-      , DataCacheBufferSize(PlatformConfigRef.dataCacheBufferSize())
-      , RamBufferSize(PlatformConfigRef.ramBufferSize())
-      , Lines(PlatformConfigRef.lines()) {
+      , PayloadSettings(SelectedPlatformConfig->getDefaultPayloadSettings())
+      , InstructionCacheSize(SelectedPlatformConfig->instructionCacheSize())
+      , DataCacheBufferSize(SelectedPlatformConfig->dataCacheBufferSize())
+      , RamBufferSize(SelectedPlatformConfig->ramBufferSize())
+      , Lines(SelectedPlatformConfig->lines()) {
     if (DetectedInstructionCacheSize != 0) {
       this->InstructionCacheSize = DetectedInstructionCacheSize;
     }
   };
 
-  // RuntimeConfig(const RuntimeConfig& Other)
-  //     : PlatformConfigRef(Other.platformConfig())
-  //     , Payload(Other.platformConfig().payload().clone())
-  //     , Thread(Other.thread())
-  //     , PayloadSettings(Other.payloadSettings())
-  //     , InstructionCacheSize(Other.instructionCacheSize())
-  //     , DataCacheBufferSize(Other.dataCacheBufferSize())
-  //     , RamBufferSize(Other.ramBufferSize())
-  //     , Lines(Other.lines()) {}
-
   ~RuntimeConfig() = default;
 
-  [[nodiscard]] auto platformConfig() const -> PlatformConfig const& { return PlatformConfigRef; }
-  [[nodiscard]] auto payload() const -> const payload::Payload& { return PlatformConfigRef.payload(); }
+  [[nodiscard]] auto platformConfig() const -> PlatformConfig const& { return *SelectedPlatformConfig; }
+  [[nodiscard]] auto payload() const -> const payload::Payload& { return SelectedPlatformConfig->payload(); }
   [[nodiscard]] auto thread() const -> unsigned { return Thread; }
   [[nodiscard]] auto payloadSettings() const -> const std::vector<std::pair<std::string, unsigned>>& {
     return PayloadSettings;
diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
index 00b74107..4884e8c3 100644
--- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
@@ -26,7 +26,7 @@
 namespace firestarter::environment::x86::payload {
 class AVX512Payload final : public X86Payload {
 public:
-  AVX512Payload()
+  AVX512Payload() noexcept
       : X86Payload({asmjit::CpuFeatures::X86::kAVX512_F}, "AVX512", 8, 32) {}
 
   [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
@@ -37,10 +37,6 @@ class AVX512Payload final : public X86Payload {
 
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
 
-  [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
-    return std::make_unique<AVX512Payload>();
-  };
-
 private:
   void init(double* MemoryAddr, uint64_t BufferSize) const override;
 
diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
index 4a300f6a..75e44360 100644
--- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
@@ -37,10 +37,6 @@ class AVXPayload final : public X86Payload {
 
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
 
-  [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
-    return std::make_unique<AVXPayload>();
-  };
-
 private:
   void init(double* MemoryAddr, uint64_t BufferSize) const override;
 
diff --git a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
index e23f91ff..9414041c 100644
--- a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
@@ -24,6 +24,7 @@
 #include "asmjit/core/jitruntime.h"
 #include "firestarter/Environment/Payload/Payload.hpp"
 #include "firestarter/Logging/Log.hpp"
+#include <memory>
 
 namespace firestarter::environment::x86::payload {
 
@@ -47,15 +48,15 @@ class CompiledX86Payload final : public environment::payload::CompiledPayload {
   CompiledX86Payload() = delete;
   ~CompiledX86Payload() override = default;
 
-  [[nodiscard]] static auto create(environment::payload::PayloadStats Stats, asmjit::CodeHolder& Code,
-                                   std::unique_ptr<environment::payload::Payload>&& PayloadPtr) -> UniquePtr {
+  template <class DerivedPayload>
+  [[nodiscard]] static auto create(environment::payload::PayloadStats Stats, asmjit::CodeHolder& Code) -> UniquePtr {
     HighLoadFunctionPtr HighLoadFunction{};
     const auto Err = Runtime.add(&HighLoadFunction, &Code);
     if (Err) {
       workerLog::error() << "Asmjit adding Assembler to JitRuntime failed";
     }
 
-    return {new CompiledX86Payload(Stats, std::move(PayloadPtr), HighLoadFunction), deleter};
+    return {new CompiledX86Payload(Stats, std::move(std::make_unique<DerivedPayload>()), HighLoadFunction), deleter};
   }
 };
 
diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
index 36a81e1a..a8a82649 100644
--- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
@@ -27,7 +27,7 @@ namespace firestarter::environment::x86::payload {
 
 class FMA4Payload final : public X86Payload {
 public:
-  FMA4Payload()
+  FMA4Payload() noexcept
       : X86Payload({asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4}, "FMA4", 4, 16) {}
 
   [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
@@ -40,10 +40,6 @@ class FMA4Payload final : public X86Payload {
 
   void init(double* MemoryAddr, uint64_t BufferSize) const override;
 
-  [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
-    return std::make_unique<FMA4Payload>();
-  };
-
 private:
   const std::map<std::string, unsigned> InstructionFlops = {
       {"REG", 8},  {"L1_L", 12}, {"L1_S", 8}, {"L1_LS", 8}, {"L2_L", 8},  {"L2_S", 4},   {"L2_LS", 4}, {"L3_L", 8},
diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
index b6e7fe6d..ec5b2bea 100644
--- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
@@ -26,7 +26,7 @@
 namespace firestarter::environment::x86::payload {
 class FMAPayload final : public X86Payload {
 public:
-  FMAPayload()
+  FMAPayload() noexcept
       : X86Payload({asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, "FMA", 4, 16) {}
 
   [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
@@ -37,10 +37,6 @@ class FMAPayload final : public X86Payload {
 
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
 
-  [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
-    return std::make_unique<FMAPayload>();
-  };
-
 private:
   void init(double* MemoryAddr, uint64_t BufferSize) const override;
 
diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
index 2ade146a..a026bbe5 100644
--- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
@@ -26,7 +26,7 @@
 namespace firestarter::environment::x86::payload {
 class SSE2Payload final : public X86Payload {
 public:
-  SSE2Payload()
+  SSE2Payload() noexcept
       : X86Payload({asmjit::CpuFeatures::X86::kSSE2}, "SSE2", 2, 16) {}
 
   [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
@@ -37,10 +37,6 @@ class SSE2Payload final : public X86Payload {
 
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
 
-  [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
-    return std::make_unique<SSE2Payload>();
-  };
-
 private:
   void init(double* MemoryAddr, uint64_t BufferSize) const override;
 
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index c2cd7946..beaa2b0c 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -468,7 +468,7 @@ class X86Payload : public environment::payload::Payload {
 
 public:
   X86Payload(std::initializer_list<asmjit::CpuFeatures::X86::Id> FeatureRequests, std::string Name,
-             unsigned RegisterSize, unsigned RegisterCount)
+             unsigned RegisterSize, unsigned RegisterCount) noexcept
       : Payload(std::move(Name), RegisterSize, RegisterCount)
       , FeatureRequests(FeatureRequests) {}
 
diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
index f6a90b4d..1cb13e0a 100644
--- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
@@ -26,7 +26,7 @@
 namespace firestarter::environment::x86::payload {
 class ZENFMAPayload final : public X86Payload {
 public:
-  ZENFMAPayload()
+  ZENFMAPayload() noexcept
       : X86Payload({asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA}, "ZENFMA", 4, 16) {}
 
   [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
@@ -37,10 +37,6 @@ class ZENFMAPayload final : public X86Payload {
 
   [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
 
-  [[nodiscard]] auto clone() const -> std::unique_ptr<firestarter::environment::payload::Payload> override {
-    return std::make_unique<ZENFMAPayload>();
-  };
-
 private:
   void init(double* MemoryAddr, uint64_t BufferSize) const override;
 
diff --git a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
index bb7c4145..8b0b722a 100644
--- a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class BulldozerConfig final : public X86PlatformConfig {
 public:
-  BulldozerConfig()
+  BulldozerConfig() noexcept
       : X86PlatformConfig("BLD_OPTERON", 21, {1, 2, 3}, {1}, 0, {16384, 1048576, 786432}, 104857600, 1536,
-                          std::make_unique<payload::FMA4Payload>()) {}
+                          std::make_shared<payload::FMA4Payload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
index 3f8bfaf7..94313c10 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class HaswellConfig final : public X86PlatformConfig {
 public:
-  HaswellConfig()
+  HaswellConfig() noexcept
       : X86PlatformConfig("HSW_COREI", 6, {60, 61, 69, 70, 71}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
-                          std::make_unique<payload::FMAPayload>()) {}
+                          std::make_shared<payload::FMAPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
index ce8a19b9..8e7b0f16 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class HaswellEPConfig final : public X86PlatformConfig {
 public:
-  HaswellEPConfig()
+  HaswellEPConfig() noexcept
       : X86PlatformConfig("HSW_XEONEP", 6, {63, 79}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536,
-                          std::make_unique<payload::FMAPayload>()) {}
+                          std::make_shared<payload::FMAPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
index b1d627ea..1067d786 100644
--- a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class KnightsLandingConfig final : public X86PlatformConfig {
 public:
-  KnightsLandingConfig()
+  KnightsLandingConfig() noexcept
       : X86PlatformConfig("KNL_XEONPHI", 6, {87}, {4}, 0, {32768, 524288, 236279125}, 26214400, 1536,
-                          std::make_unique<payload::AVX512Payload>()) {}
+                          std::make_shared<payload::AVX512Payload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 3}, {"L2_S", 8}, {"L1_L", 40}, {"REG", 10}});
diff --git a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
index f33ced0f..8603d233 100644
--- a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class NaplesConfig final : public X86PlatformConfig {
 public:
-  NaplesConfig()
+  NaplesConfig() noexcept
       : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24}, {1, 2}, 0, {65536, 524288, 2097152}, 104857600, 1536,
-                          std::make_unique<payload::ZENFMAPayload>()) {}
+                          std::make_shared<payload::ZENFMAPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
index 0c39409f..8777e262 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class NehalemConfig final : public X86PlatformConfig {
 public:
-  NehalemConfig()
+  NehalemConfig() noexcept
       : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
-                          std::make_unique<payload::SSE2Payload>()) {}
+                          std::make_shared<payload::SSE2Payload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 1}, {"L1_LS", 70}, {"REG", 2}});
diff --git a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
index cbb5b18f..a97fde41 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class NehalemEPConfig final : public X86PlatformConfig {
 public:
-  NehalemEPConfig()
+  NehalemEPConfig() noexcept
       : X86PlatformConfig("NHM_XEONEP", 6, {26, 44}, {1, 2}, 0, {32768, 262144, 2097152}, 104857600, 1536,
-                          std::make_unique<payload::SSE2Payload>()) {}
+                          std::make_shared<payload::SSE2Payload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 1}, {"L1_LS", 60}, {"REG", 2}});
diff --git a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
index bc723d03..1f9509a3 100644
--- a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class RomeConfig final : public X86PlatformConfig {
 public:
-  RomeConfig()
+  RomeConfig() noexcept
       : X86PlatformConfig("ZEN_2_EPYC", 23, {49}, {1, 2}, 0, {32768, 524288, 2097152}, 104857600, 1536,
-                          std::make_unique<payload::FMAPayload>()) {}
+                          std::make_shared<payload::FMAPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
index 62a620c9..14dfd03d 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class SandyBridgeConfig final : public X86PlatformConfig {
 public:
-  SandyBridgeConfig()
+  SandyBridgeConfig() noexcept
       : X86PlatformConfig("SNB_COREI", 6, {42, 58}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
-                          std::make_unique<payload::AVXPayload>()) {}
+                          std::make_shared<payload::AVXPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
index 7571efb5..eead4d32 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
@@ -28,9 +28,9 @@
 namespace firestarter::environment::x86::platform {
 class SandyBridgeEPConfig final : public X86PlatformConfig {
 public:
-  SandyBridgeEPConfig()
+  SandyBridgeEPConfig() noexcept
       : X86PlatformConfig("SNB_XEONEP", 6, {45, 62}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536,
-                          std::make_unique<payload::AVXPayload>()) {}
+                          std::make_shared<payload::AVXPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
index de7d9f62..f2799ace 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
@@ -28,9 +28,9 @@
 namespace firestarter::environment::x86::platform {
 class SkylakeConfig final : public X86PlatformConfig {
 public:
-  SkylakeConfig()
+  SkylakeConfig() noexcept
       : X86PlatformConfig("SKL_COREI", 6, {78, 94}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
-                          std::make_unique<payload::FMAPayload>()) {}
+                          std::make_shared<payload::FMAPayload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>(
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
index 9bcfd069..1efeb5b0 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
@@ -27,9 +27,9 @@
 namespace firestarter::environment::x86::platform {
 class SkylakeSPConfig final : public X86PlatformConfig {
 public:
-  SkylakeSPConfig()
+  SkylakeSPConfig() noexcept
       : X86PlatformConfig("SKL_XEONEP", 6, {85}, {1, 2}, 0, {32768, 1048576, 1441792}, 1048576000, 1536,
-                          std::make_unique<payload::AVX512Payload>()) {}
+                          std::make_shared<payload::AVX512Payload>()) {}
 
   [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
     return std::vector<std::pair<std::string, unsigned>>({{"RAM_S", 3},
diff --git a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
index b1dc8ae7..df112574 100644
--- a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
@@ -41,7 +41,7 @@ class X86PlatformConfig : public environment::platform::PlatformConfig {
   X86PlatformConfig(std::string Name, unsigned Family, std::initializer_list<unsigned> Models,
                     std::initializer_list<unsigned> Threads, unsigned InstructionCacheSize,
                     std::initializer_list<unsigned> DataCacheBufferSize, unsigned RamBuffersize, unsigned Lines,
-                    std::unique_ptr<payload::X86Payload>&& Payload)
+                    std::shared_ptr<payload::X86Payload>&& Payload) noexcept
       : PlatformConfig(std::move(Name), Threads, InstructionCacheSize, DataCacheBufferSize, RamBuffersize, Lines,
                        std::move(Payload))
       , Family(Family)
diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp
index 91ff68e2..0dc21475 100644
--- a/include/firestarter/Environment/X86/X86Environment.hpp
+++ b/include/firestarter/Environment/X86/X86Environment.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include <asmjit/asmjit.h>
+#include <memory>
 
 #include "../Environment.hpp"
 #include "Platform/BulldozerConfig.hpp"
@@ -39,9 +40,6 @@
 #include "Platform/X86PlatformConfig.hpp"
 #include "X86CPUTopology.hpp"
 
-#define REGISTER(NAME)                                                                                                 \
-  []() -> std::unique_ptr<platform::X86PlatformConfig> { return std::make_unique<platform::NAME>(); }
-
 namespace firestarter::environment::x86 {
 
 class X86Environment final : public Environment {
@@ -55,7 +53,6 @@ class X86Environment final : public Environment {
     return *X86Topology;
   }
 
-  void evaluateFunctions() override;
   void selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) override;
   void selectInstructionGroups(std::string Groups) override;
   void printAvailableInstructionGroups() override;
@@ -67,26 +64,22 @@ class X86Environment final : public Environment {
   // The available function IDs are generated by iterating through this list
   // of PlatformConfig. Add new PlatformConfig at the bottom to maintain
   // stable IDs.
-  const std::list<std::function<std::unique_ptr<platform::X86PlatformConfig>()>> PlatformConfigsCtor = {
-      REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig),   REGISTER(SkylakeSPConfig),
-      REGISTER(HaswellConfig),        REGISTER(HaswellEPConfig), REGISTER(SandyBridgeConfig),
-      REGISTER(SandyBridgeEPConfig),  REGISTER(NehalemConfig),   REGISTER(NehalemEPConfig),
-      REGISTER(BulldozerConfig),      REGISTER(NaplesConfig),    REGISTER(RomeConfig)};
-
-  std::list<std::unique_ptr<platform::X86PlatformConfig>> PlatformConfigs;
+  const std::list<std::shared_ptr<platform::X86PlatformConfig>> PlatformConfigs = {
+      std::make_shared<platform::KnightsLandingConfig>(), std::make_shared<platform::SkylakeConfig>(),
+      std::make_shared<platform::SkylakeSPConfig>(),      std::make_shared<platform::HaswellConfig>(),
+      std::make_shared<platform::HaswellEPConfig>(),      std::make_shared<platform::SandyBridgeConfig>(),
+      std::make_shared<platform::SandyBridgeEPConfig>(),  std::make_shared<platform::NehalemConfig>(),
+      std::make_shared<platform::NehalemEPConfig>(),      std::make_shared<platform::BulldozerConfig>(),
+      std::make_shared<platform::NaplesConfig>(),         std::make_shared<platform::RomeConfig>()};
 
   // List of fallback PlatformConfig. Add one for each x86 extension.
-  const std::list<std::function<std::unique_ptr<platform::X86PlatformConfig>()>> FallbackPlatformConfigsCtor = {
-      REGISTER(SkylakeSPConfig),   // AVX512
-      REGISTER(BulldozerConfig),   // FMA4
-      REGISTER(HaswellConfig),     // FMA
-      REGISTER(SandyBridgeConfig), // AVX
-      REGISTER(NehalemConfig)      // SSE2
+  const std::list<std::shared_ptr<platform::X86PlatformConfig>> FallbackPlatformConfigs = {
+      std::make_shared<platform::SkylakeSPConfig>(),   // AVX512
+      std::make_shared<platform::BulldozerConfig>(),   // FMA4
+      std::make_shared<platform::HaswellConfig>(),     // FMA
+      std::make_shared<platform::SandyBridgeConfig>(), // AVX
+      std::make_shared<platform::NehalemConfig>()      // SSE2
   };
-
-  std::list<std::unique_ptr<platform::X86PlatformConfig>> FallbackPlatformConfigs;
-
-#undef REGISTER
 };
 
 } // namespace firestarter::environment::x86
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index 035a590f..c412e5fe 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -361,7 +361,7 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
 
   Cb.finalize();
 
-  auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code, clone());
+  auto CompiledPayloadPtr = CompiledX86Payload::create<AVX512Payload>(Stats, Code);
 
   // skip if we could not determine cache size
   if (L1iCacheSize != 0) {
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index 7aff0ec9..5290e584 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -391,7 +391,7 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
 
   Cb.finalize();
 
-  auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code, clone());
+  auto CompiledPayloadPtr = CompiledX86Payload::create<AVXPayload>(Stats, Code);
 
   // skip if we could not determine cache size
   if (L1iCacheSize != 0) {
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index 7aaf9a48..c82123c1 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -364,7 +364,7 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
 
   Cb.finalize();
 
-  auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code, clone());
+  auto CompiledPayloadPtr = CompiledX86Payload::create<FMA4Payload>(Stats, Code);
 
   // skip if we could not determine cache size
   if (L1iCacheSize != 0) {
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index fcd1ad1c..e00ec268 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -399,7 +399,7 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
 
   Cb.finalize();
 
-  auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code, clone());
+  auto CompiledPayloadPtr = CompiledX86Payload::create<FMAPayload>(Stats, Code);
 
   // skip if we could not determine cache size
   if (L1iCacheSize != 0) {
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index 8504a5ed..4f1dbaac 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -383,7 +383,7 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
 
   Cb.finalize();
 
-  auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code, clone());
+  auto CompiledPayloadPtr = CompiledX86Payload::create<SSE2Payload>(Stats, Code);
 
   // skip if we could not determine cache size
   if (L1iCacheSize != 0) {
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index 5fed5a42..4518f54c 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -349,7 +349,7 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
 
   Cb.finalize();
 
-  auto CompiledPayloadPtr = CompiledX86Payload::create(Stats, Code, clone());
+  auto CompiledPayloadPtr = CompiledX86Payload::create<ZENFMAPayload>(Stats, Code);
 
   // skip if we could not determine cache size
   if (L1iCacheSize != 0) {
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index ad055c6e..e76cf738 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -28,17 +28,6 @@
 
 namespace firestarter::environment::x86 {
 
-void X86Environment::evaluateFunctions() {
-  for (const auto& Ctor : PlatformConfigsCtor) {
-    // add asmjit for model and family detection
-    PlatformConfigs.emplace_back(Ctor());
-  }
-
-  for (const auto& Ctor : FallbackPlatformConfigsCtor) {
-    FallbackPlatformConfigs.emplace_back(Ctor());
-  }
-}
-
 void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) {
   unsigned Id = 1;
   std::string DefaultPayloadName;
@@ -59,13 +48,13 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
         }
         // found function
         SelectedConfig =
-            new ::firestarter::environment::platform::RuntimeConfig(*Config, thread, topology().instructionCacheSize());
+            new ::firestarter::environment::platform::RuntimeConfig(Config, thread, topology().instructionCacheSize());
         return;
       }
       // default function
       if (0 == FunctionId && Config->isDefault(topology())) {
         if (thread == topology().numThreadsPerCore()) {
-          SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, thread,
+          SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(Config, thread,
                                                                                    topology().instructionCacheSize());
           return;
         }
@@ -104,7 +93,7 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
           SelectedThread = Config->getThreadMap().begin()->first;
           SelectedFunctionName = Config->getThreadMap().begin()->second;
         }
-        SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, SelectedThread,
+        SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(Config, SelectedThread,
                                                                                  topology().instructionCacheSize());
         log::warn() << "Using function " << SelectedFunctionName << " as fallback.\n"
                     << "You can use the parameter --function to try other "
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index ad5cd2ea..464f4dad 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -57,8 +57,6 @@ Firestarter::Firestarter(Config&& ProvidedConfig)
                                 std::to_string(Environment->requestedNumThreads()) + "\n");
   }
 
-  Environment->evaluateFunctions();
-
   if (Cfg.PrintFunctionSummary) {
     Environment->printFunctionSummary();
     std::quick_exit(EXIT_SUCCESS);

From 0dbe9d23cea8316139e0ab6aee3da2187e6490d8 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Mon, 28 Oct 2024 13:06:15 +0100
Subject: [PATCH 122/167] fix build with gcc

---
 .../firestarter/Environment/X86/Payload/CompiledX86Payload.hpp  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
index 9414041c..b6e2fee1 100644
--- a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
@@ -30,7 +30,7 @@ namespace firestarter::environment::x86::payload {
 
 class CompiledX86Payload final : public environment::payload::CompiledPayload {
 private:
-  inline static asmjit::JitRuntime Runtime;
+  inline static asmjit::JitRuntime Runtime = asmjit::JitRuntime();
 
   static void deleter(CompiledX86Payload* Payload) {
     if (Payload && Payload->highLoadFunctionPtr()) {

From 041acfe44d02db7cab01fadede300e98c75be813 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Mon, 28 Oct 2024 13:22:54 +0100
Subject: [PATCH 123/167] replace std::quick_exit with a custom thread safe
 exit function

---
 include/firestarter/Logging/Log.hpp |  3 ++-
 include/firestarter/SafeExit.hpp    | 30 ++++++++++++++++++++++++++
 src/CMakeLists.txt                  |  1 +
 src/firestarter/Config.cpp          |  8 +++----
 src/firestarter/Firestarter.cpp     |  6 +++---
 src/firestarter/SafeExit.cpp        | 33 +++++++++++++++++++++++++++++
 6 files changed, 73 insertions(+), 8 deletions(-)
 create mode 100644 include/firestarter/SafeExit.hpp
 create mode 100644 src/firestarter/SafeExit.cpp

diff --git a/include/firestarter/Logging/Log.hpp b/include/firestarter/Logging/Log.hpp
index 128ba3f5..ec543aa1 100644
--- a/include/firestarter/Logging/Log.hpp
+++ b/include/firestarter/Logging/Log.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include "FirstWorkerThreadFilter.hpp"
+#include "firestarter/SafeExit.hpp"
 #include <cstdlib>
 #include <iostream>
 #include <nitro/log/attribute/message.hpp>
@@ -55,7 +56,7 @@ class StdOut {
 
     // Exit on error or fatal
     if (Severity == nitro::log::severity_level::error || Severity == nitro::log::severity_level::fatal) {
-      std::quick_exit(EXIT_FAILURE);
+      safeExit(EXIT_FAILURE);
     }
   }
 };
diff --git a/include/firestarter/SafeExit.hpp b/include/firestarter/SafeExit.hpp
new file mode 100644
index 00000000..68823831
--- /dev/null
+++ b/include/firestarter/SafeExit.hpp
@@ -0,0 +1,30 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2024 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#pragma once
+
+namespace firestarter {
+
+/// A thread safe wrapper to std::exit
+/// \arg Status The status passed to std::exit
+[[noreturn]] void safeExit(int Status);
+
+} // namespace firestarter
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 47e01cca..0673af9b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,6 +3,7 @@ SET(FIRESTARTER_FILES
 	firestarter/Main.cpp
 	firestarter/Firestarter.cpp
 	firestarter/LoadWorker.cpp
+	firestarter/SafeExit.cpp
 	firestarter/WatchdogWorker.cpp
 	firestarter/DumpRegisterWorker.cpp
 	
diff --git a/src/firestarter/Config.cpp b/src/firestarter/Config.cpp
index 90b8bf10..7842e8e6 100644
--- a/src/firestarter/Config.cpp
+++ b/src/firestarter/Config.cpp
@@ -235,17 +235,17 @@ Config::Config(int Argc, const char** Argv)
     }
 
     if (static_cast<bool>(Options.count("version"))) {
-      std::quick_exit(EXIT_SUCCESS);
+      safeExit(EXIT_SUCCESS);
     }
 
     if (static_cast<bool>(Options.count("copyright"))) {
       printCopyright();
-      std::quick_exit(EXIT_SUCCESS);
+      safeExit(EXIT_SUCCESS);
     }
 
     if (static_cast<bool>(Options.count("warranty"))) {
       printWarranty();
-      std::quick_exit(EXIT_SUCCESS);
+      safeExit(EXIT_SUCCESS);
     }
 
     firestarter::log::info() << "This program comes with ABSOLUTELY NO WARRANTY; for details run `" << ExecutableName
@@ -257,7 +257,7 @@ Config::Config(int Argc, const char** Argv)
       auto Section = Options["help"].as<std::string>();
 
       printHelp(Parser, Section);
-      std::quick_exit(EXIT_SUCCESS);
+      safeExit(EXIT_SUCCESS);
     }
 
     Timeout = std::chrono::seconds(Options["timeout"].as<unsigned>());
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 464f4dad..b71bbba4 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -59,14 +59,14 @@ Firestarter::Firestarter(Config&& ProvidedConfig)
 
   if (Cfg.PrintFunctionSummary) {
     Environment->printFunctionSummary();
-    std::quick_exit(EXIT_SUCCESS);
+    safeExit(EXIT_SUCCESS);
   }
 
   Environment->selectFunction(Cfg.FunctionId, Cfg.AllowUnavailablePayload);
 
   if (Cfg.ListInstructionGroups) {
     Environment->printAvailableInstructionGroups();
-    std::quick_exit(EXIT_SUCCESS);
+    safeExit(EXIT_SUCCESS);
   }
 
   if (!Cfg.InstructionGroups.empty()) {
@@ -84,7 +84,7 @@ Firestarter::Firestarter(Config&& ProvidedConfig)
 
       if (Cfg.ListMetrics) {
         log::info() << MeasurementWorker->availableMetrics();
-        std::quick_exit(EXIT_SUCCESS);
+        safeExit(EXIT_SUCCESS);
       }
 
       // init all metrics
diff --git a/src/firestarter/SafeExit.cpp b/src/firestarter/SafeExit.cpp
new file mode 100644
index 00000000..c5f6e604
--- /dev/null
+++ b/src/firestarter/SafeExit.cpp
@@ -0,0 +1,33 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2020-2023 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#include <firestarter/SafeExit.hpp>
+#include <mutex>
+
+[[noreturn]] void firestarter::safeExit(const int Status) {
+  // This mutex is shared across all calls to safeExit, therefore also calls between different threads
+  static std::mutex ExitMutex;
+
+  ExitMutex.lock();
+
+  // NOLINTNEXTLINE(concurrency-mt-unsafe)
+  std::exit(Status);
+}
\ No newline at end of file

From 54fd66235f8bfd53e2a3c55087320b38addd097a Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Mon, 28 Oct 2024 18:57:00 +0100
Subject: [PATCH 124/167] refactor PlatformConfig and settings for the Payload.
 remove more non owning memory.

---
 .../firestarter/Environment/CPUTopology.hpp   |   4 +-
 .../firestarter/Environment/Environment.hpp   |  13 +-
 .../Environment/Payload/Payload.hpp           |  12 +-
 .../Environment/Payload/PayloadSettings.hpp   | 160 ++++++++++++++++++
 .../Environment/Platform/PlatformConfig.hpp   | 104 ++++++------
 .../Environment/Platform/RuntimeConfig.hpp    | 106 ------------
 .../Environment/X86/Payload/AVX512Payload.hpp |   4 +-
 .../Environment/X86/Payload/AVXPayload.hpp    |   4 +-
 .../Environment/X86/Payload/FMA4Payload.hpp   |   4 +-
 .../Environment/X86/Payload/FMAPayload.hpp    |   4 +-
 .../Environment/X86/Payload/SSE2Payload.hpp   |   4 +-
 .../Environment/X86/Payload/X86Payload.hpp    |   4 +-
 .../Environment/X86/Payload/ZENFMAPayload.hpp |   4 +-
 .../X86/Platform/BulldozerConfig.hpp          |  12 +-
 .../X86/Platform/HaswellConfig.hpp            |  12 +-
 .../X86/Platform/HaswellEPConfig.hpp          |  12 +-
 .../X86/Platform/KnightsLandingConfig.hpp     |  10 +-
 .../Environment/X86/Platform/NaplesConfig.hpp |  12 +-
 .../X86/Platform/NehalemConfig.hpp            |  10 +-
 .../X86/Platform/NehalemEPConfig.hpp          |  10 +-
 .../Environment/X86/Platform/RomeConfig.hpp   |  13 +-
 .../X86/Platform/SandyBridgeConfig.hpp        |  12 +-
 .../X86/Platform/SandyBridgeEPConfig.hpp      |  12 +-
 .../X86/Platform/SkylakeConfig.hpp            |  12 +-
 .../X86/Platform/SkylakeSPConfig.hpp          |  26 ++-
 .../X86/Platform/X86PlatformConfig.hpp        |  36 ++--
 .../Environment/X86/X86Environment.hpp        |   4 -
 include/firestarter/Firestarter.hpp           |   2 +-
 include/firestarter/LoadWorkerData.hpp        |  74 +-------
 include/firestarter/LoadWorkerMemory.hpp      |  90 ++++++++++
 src/firestarter/DumpRegisterWorker.cpp        |   4 +-
 .../Environment/Payload/Payload.cpp           |  15 +-
 .../Environment/X86/Payload/AVX512Payload.cpp |  35 ++--
 .../Environment/X86/Payload/AVXPayload.cpp    |  35 ++--
 .../Environment/X86/Payload/FMA4Payload.cpp   |  35 ++--
 .../Environment/X86/Payload/FMAPayload.cpp    |  35 ++--
 .../Environment/X86/Payload/SSE2Payload.cpp   |  35 ++--
 .../Environment/X86/Payload/ZENFMAPayload.cpp |  35 ++--
 .../Environment/X86/X86Environment.cpp        |  81 +++++----
 src/firestarter/Firestarter.cpp               |   4 +-
 src/firestarter/LoadWorker.cpp                |  19 +--
 41 files changed, 577 insertions(+), 542 deletions(-)
 create mode 100644 include/firestarter/Environment/Payload/PayloadSettings.hpp
 delete mode 100644 include/firestarter/Environment/Platform/RuntimeConfig.hpp
 create mode 100644 include/firestarter/LoadWorkerMemory.hpp

diff --git a/include/firestarter/Environment/CPUTopology.hpp b/include/firestarter/Environment/CPUTopology.hpp
index cb6e3675..a68eee81 100644
--- a/include/firestarter/Environment/CPUTopology.hpp
+++ b/include/firestarter/Environment/CPUTopology.hpp
@@ -53,7 +53,7 @@ class CPUTopology {
   [[nodiscard]] virtual auto model() const -> std::string const& { return Model; }
 
   // get the size of the L1i-cache in bytes
-  [[nodiscard]] auto instructionCacheSize() const -> unsigned { return InstructionCacheSize; }
+  [[nodiscard]] auto instructionCacheSize() const -> const auto& { return InstructionCacheSize; }
 
   // return the cpu clockrate in Hz
   [[nodiscard]] virtual auto clockrate() const -> uint64_t { return Clockrate; }
@@ -83,7 +83,7 @@ class CPUTopology {
   unsigned NumPackages;
   std::string Architecture;
   std::string ProcessorName;
-  unsigned InstructionCacheSize = 0;
+  std::optional<unsigned> InstructionCacheSize;
   uint64_t Clockrate = 0;
   hwloc_topology_t Topology{};
 };
diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index 254c2f1e..17abac8a 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -22,9 +22,10 @@
 #pragma once
 
 #include "CPUTopology.hpp"
-#include "Platform/RuntimeConfig.hpp"
+#include "firestarter/Environment/Platform/PlatformConfig.hpp"
 #include <cassert>
 #include <cstdint>
+#include <memory>
 #include <vector>
 
 namespace firestarter::environment {
@@ -34,7 +35,7 @@ class Environment {
   Environment() = delete;
   explicit Environment(std::unique_ptr<CPUTopology>&& Topology)
       : Topology(std::move(Topology)) {}
-  virtual ~Environment() { delete SelectedConfig; }
+  virtual ~Environment() = default;
 
   void evaluateCpuAffinity(unsigned RequestedNumThreads, const std::string& CpuBind);
   void setCpuAffinity(unsigned Thread) const;
@@ -47,9 +48,9 @@ class Environment {
   virtual void printSelectedCodePathSummary() = 0;
   virtual void printFunctionSummary() = 0;
 
-  [[nodiscard]] auto selectedConfig() const -> platform::RuntimeConfig& {
-    assert(SelectedConfig != nullptr && "No RuntimeConfig selected");
-    return *SelectedConfig;
+  [[nodiscard]] auto config() const -> platform::PlatformConfig& {
+    assert(Config && "No PlatformConfig selected");
+    return *Config;
   }
 
   [[nodiscard]] auto requestedNumThreads() const -> uint64_t { return RequestedNumThreads; }
@@ -60,7 +61,7 @@ class Environment {
   }
 
 protected:
-  platform::RuntimeConfig* SelectedConfig = nullptr;
+  std::unique_ptr<platform::PlatformConfig> Config;
   std::unique_ptr<CPUTopology> Topology;
 
 private:
diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index 2306fb77..eede4002 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -23,6 +23,7 @@
 
 #include "firestarter/Constants.hpp"
 #include "firestarter/Environment/CPUTopology.hpp"
+#include "firestarter/Environment/Payload/PayloadSettings.hpp"
 #include "firestarter/Environment/Payload/PayloadStats.hpp"
 #include <chrono>
 #include <list>
@@ -106,11 +107,11 @@ class Payload {
   };
 
   [[nodiscard]] static auto getL2LoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
-                                           unsigned Size, unsigned Threads) -> unsigned;
+                                           unsigned Size) -> unsigned;
   [[nodiscard]] static auto getL3LoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
-                                           unsigned Size, unsigned Threads) -> unsigned;
+                                           unsigned Size) -> unsigned;
   [[nodiscard]] static auto getRAMLoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
-                                            unsigned Size, unsigned Threads) -> unsigned;
+                                            unsigned Size) -> unsigned;
 
   virtual void init(double* MemoryAddr, uint64_t BufferSize) const = 0;
 
@@ -136,10 +137,7 @@ class Payload {
 
   [[nodiscard]] virtual auto isAvailable(const CPUTopology*) const -> bool = 0;
 
-  [[nodiscard]] virtual auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
-                                            unsigned InstructionCacheSize,
-                                            std::list<unsigned> const& DataCacheBufferSize, unsigned RamBufferSize,
-                                            unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+  [[nodiscard]] virtual auto compilePayload(const PayloadSettings& Settings, bool DumpRegisters,
                                             bool ErrorDetection) const -> CompiledPayload::UniquePtr = 0;
   [[nodiscard]] virtual auto getAvailableInstructions() const -> std::list<std::string> = 0;
 };
diff --git a/include/firestarter/Environment/Payload/PayloadSettings.hpp b/include/firestarter/Environment/Payload/PayloadSettings.hpp
new file mode 100644
index 00000000..5c287e10
--- /dev/null
+++ b/include/firestarter/Environment/Payload/PayloadSettings.hpp
@@ -0,0 +1,160 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2024 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#pragma once
+
+#include <cassert>
+#include <cstddef>
+#include <initializer_list>
+#include <list>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace firestarter::environment::payload {
+
+struct PayloadSettings {
+public:
+  using InstructionWithProportion = std::pair<std::string, unsigned>;
+
+private:
+  /// The number of threads for which this payload is available. Multiple ones may exsists. The PayloadSettings are
+  /// concreate once this is set to contain only one element.
+  std::list<unsigned> Threads;
+  std::optional<unsigned> InstructionCacheSize;
+  std::list<unsigned> DataCacheBufferSize;
+  unsigned RamBufferSize;
+  unsigned Lines;
+  std::vector<InstructionWithProportion> InstructionGroups;
+
+public:
+  PayloadSettings() = delete;
+
+  PayloadSettings(std::initializer_list<unsigned> Threads, std::initializer_list<unsigned> DataCacheBufferSize,
+                  unsigned RamBufferSize, unsigned Lines, std::vector<InstructionWithProportion>&& InstructionGroups)
+      : Threads(Threads)
+      , DataCacheBufferSize(DataCacheBufferSize)
+      , RamBufferSize(RamBufferSize)
+      , Lines(Lines)
+      , InstructionGroups(std::move(InstructionGroups)) {}
+
+  /// Are the payload settings concreate, i.e. can one specific payload be compiled with these settings. This is the
+  /// case if the option of threads is reduces to a single element.
+  [[nodiscard]] auto isConcreate() const -> bool { return Threads.size() == 1; }
+
+  /// The number of threads which are available with the associated platform/payload.
+  [[nodiscard]] auto threads() const -> const auto& { return Threads; }
+
+  /// The concreate number of threads which is selected.
+  [[nodiscard]] auto thread() const -> unsigned {
+    assert(isConcreate() && "Number of threads is not concreate.");
+    return Threads.front();
+  }
+
+  /// The available instruction cache size. This refers to the L1i-Cache on the physical CPU core.
+  [[nodiscard]] auto instructionCacheSize() const -> const auto& { return InstructionCacheSize; }
+  /// The size of the L1d,L2,...,L3 caches per physical CPU core.
+  [[nodiscard]] auto dataCacheBufferSize() const -> const auto& { return DataCacheBufferSize; }
+  /// The selected size of the buffer that is in the RAM on the physical CPU core.
+  [[nodiscard]] auto ramBufferSize() const -> auto{ return RamBufferSize; }
+  /// Return the total buffer size for the data caches and the ram per physical CPU core.
+  [[nodiscard]] auto totalBufferSize() const -> std::size_t {
+    std::size_t Total = 0;
+    for (const auto& DataCacheSize : DataCacheBufferSize) {
+      Total += DataCacheSize;
+    }
+    Total += RamBufferSize;
+    return Total;
+  }
+  /// The number of instruction groups which should be used in the payload per physical CPU core.
+  [[nodiscard]] auto lines() const -> auto{ return Lines; }
+
+  /// The available instruction cache size. This refers to the L1i-Cache per thread on the physical CPU core.
+  [[nodiscard]] auto instructionCacheSizePerThread() const -> std::optional<unsigned> {
+    auto InstructionCacheSize = this->InstructionCacheSize;
+    if (*InstructionCacheSize) {
+      return *InstructionCacheSize / thread();
+    }
+    return {};
+  }
+  /// The size of the L1d,L2,...,L3 caches per thread on the physical CPU core.
+  [[nodiscard]] auto dataCacheBufferSizePerThread() const -> std::list<unsigned> {
+    auto DataCacheBufferSizePerThread = DataCacheBufferSize;
+    for (auto& Value : DataCacheBufferSizePerThread) {
+      Value /= thread();
+    }
+    return DataCacheBufferSizePerThread;
+  }
+  /// The selected size of the buffer that is in the RAM per thread on the physical CPU core.
+  [[nodiscard]] auto ramBufferSizePerThread() const -> auto{ return RamBufferSize / thread(); }
+  /// Return the total buffer size for the data caches and the ram per thread on the physical CPU core.
+  [[nodiscard]] auto totalBufferSizePerThread() const -> std::size_t { return totalBufferSize() / thread(); }
+  /// The number of instruction groups which should be used in the payload per thread on the physical CPU core.
+  [[nodiscard]] auto linesPerThread() const -> auto{ return Lines / thread(); }
+
+  /// The vector of instruction groups with proportions.
+  [[nodiscard]] auto instructionGroups() const -> const auto& { return InstructionGroups; }
+
+  /// The vector of instructions that are saved in the instruction groups
+  [[nodiscard]] auto instructionGroupItems() const -> std::vector<std::string> {
+    std::vector<std::string> Items;
+    Items.reserve(InstructionGroups.size());
+    for (auto const& Pair : InstructionGroups) {
+      Items.push_back(Pair.first);
+    }
+    return Items;
+  }
+
+  [[nodiscard]] auto getInstructionGroupsString() const -> std::string {
+    std::stringstream Ss;
+
+    for (auto const& [Name, Value] : InstructionGroups) {
+      Ss << Name << ":" << Value << ",";
+    }
+
+    auto Str = Ss.str();
+    if (!Str.empty()) {
+      Str.pop_back();
+    }
+
+    return Str;
+  }
+
+  /// Make the settings concreate.
+  /// \arg InstructionCacheSize The detected size of the instructions cache.
+  /// \arg ThreadPerCore The number of threads per pysical CPU.
+  void concretize(std::optional<unsigned> InstructionCacheSize, unsigned ThreadsPerCore) {
+    this->InstructionCacheSize = InstructionCacheSize;
+    this->Threads = {ThreadsPerCore};
+  }
+
+  /// Save the supplied instruction groups with their proportion in the payload settings.
+  /// \arg InstructionGroups The vector with pairs of instructions and proportions
+  void selectInstructionGroups(std::vector<InstructionWithProportion> const& InstructionGroups) {
+    this->InstructionGroups = InstructionGroups;
+  }
+
+  /// Save the line count in the payload settings.
+  void setLineCount(unsigned LineCount) { this->Lines = LineCount; }
+};
+
+} // namespace firestarter::environment::payload
diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp
index 114cbb45..58afe996 100644
--- a/include/firestarter/Environment/Platform/PlatformConfig.hpp
+++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp
@@ -23,78 +23,82 @@
 
 #include "../Payload/Payload.hpp"
 #include "firestarter/Environment/CPUTopology.hpp"
-#include <initializer_list>
-#include <map>
-#include <sstream>
-#include <string>
-#include <utility>
+#include "firestarter/Environment/Payload/PayloadSettings.hpp"
+#include "firestarter/Logging/Log.hpp"
 
 namespace firestarter::environment::platform {
 
 class PlatformConfig {
 private:
   std::string Name;
-  std::list<unsigned> Threads;
-  std::shared_ptr<payload::Payload> Payload;
-  unsigned InstructionCacheSize;
-  std::list<unsigned> DataCacheBufferSize;
-  unsigned RamBufferSize;
-  unsigned Lines;
+  payload::PayloadSettings Settings;
+  std::shared_ptr<const payload::Payload> Payload;
 
 public:
   PlatformConfig() = delete;
 
-  PlatformConfig(std::string Name, std::list<unsigned> Threads, unsigned InstructionCacheSize,
-                 std::initializer_list<unsigned> DataCacheBufferSize, unsigned RamBufferSize, unsigned Lines,
-                 std::shared_ptr<payload::Payload>&& Payload) noexcept
+  PlatformConfig(std::string Name, payload::PayloadSettings&& Settings,
+                 std::shared_ptr<const payload::Payload>&& Payload) noexcept
       : Name(std::move(Name))
-      , Threads(std::move(Threads))
-      , Payload(std::move(Payload))
-      , InstructionCacheSize(InstructionCacheSize)
-      , DataCacheBufferSize(DataCacheBufferSize)
-      , RamBufferSize(RamBufferSize)
-      , Lines(Lines) {}
+      , Settings(std::move(Settings))
+      , Payload(std::move(Payload)) {}
 
   virtual ~PlatformConfig() = default;
 
-  [[nodiscard]] auto name() const -> const std::string& { return Name; }
-  [[nodiscard]] auto instructionCacheSize() const -> unsigned { return InstructionCacheSize; }
-  [[nodiscard]] auto dataCacheBufferSize() const -> const std::list<unsigned>& { return DataCacheBufferSize; }
-  [[nodiscard]] auto ramBufferSize() const -> unsigned { return RamBufferSize; }
-  [[nodiscard]] auto lines() const -> unsigned { return Lines; }
-  [[nodiscard]] auto payload() const -> payload::Payload const& { return *Payload; }
+  /// Getter for the name of the platform.
+  [[nodiscard]] auto name() const -> const auto& { return Name; }
+  /// Getter for the settings of the platform.
+  [[nodiscard]] auto settings() const -> const auto& { return Settings; }
+  /// Reference to the settings. This allows them to be overriden.
+  [[nodiscard]] auto settings() -> auto& { return Settings; }
+  /// Getter for the payload of the platform.
+  [[nodiscard]] auto payload() const -> const auto& { return Payload; }
 
-  [[nodiscard]] auto getThreadMap() const -> std::map<unsigned, std::string> {
-    std::map<unsigned, std::string> ThreadMap;
-
-    for (auto const& Thread : Threads) {
-      std::stringstream FunctionName;
-      FunctionName << "FUNC_" << name() << "_" << payload().name() << "_" << Thread << "T";
-      ThreadMap[Thread] = FunctionName.str();
-    }
-
-    return ThreadMap;
-  }
-
-  [[nodiscard]] auto isAvailable(const CPUTopology* Topology) const -> bool { return payload().isAvailable(Topology); }
+  [[nodiscard]] auto isAvailable(const CPUTopology* Topology) const -> bool { return payload()->isAvailable(Topology); }
 
   [[nodiscard]] virtual auto isDefault(const CPUTopology*) const -> bool = 0;
 
-  [[nodiscard]] virtual auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> = 0;
-
-  [[nodiscard]] auto getDefaultPayloadSettingsString() const -> std::string {
-    std::stringstream Ss;
-
-    for (auto const& [name, value] : this->getDefaultPayloadSettings()) {
-      Ss << name << ":" << value << ",";
+  /// Clone a the platform config.
+  [[nodiscard]] virtual auto clone() const -> std::unique_ptr<PlatformConfig> = 0;
+
+  /// Clone a concreate platform config.
+  /// \arg InstructionCacheSize The detected size of the instructions cache.
+  /// \arg ThreadPerCore The number of threads per pysical CPU.
+  [[nodiscard]] virtual auto cloneConcreate(std::optional<unsigned> InstructionCacheSize, unsigned ThreadsPerCore) const
+      -> std::unique_ptr<PlatformConfig> = 0;
+
+  /// The function name for this platform config given a specific thread per core count.
+  /// \arg ThreadsPerCore The number of threads per core.
+  /// \returns The name of the function (a platform name, payload name and a specific thread per core count)
+  [[nodiscard]] auto functionName(unsigned ThreadsPerCore) const -> std::string {
+    return "FUNC_" + Name + "_" + Payload->name() + "_" + std::to_string(ThreadsPerCore) + "T";
+  };
+
+  /// Get the concreate functions name.
+  [[nodiscard]] auto functionName() const -> std::string {
+    assert(Settings.isConcreate() && "Settings must be concreate for a concreate function name");
+    return functionName(Settings.thread());
+  };
+
+  void printCodePathSummary() const {
+    assert(Settings.isConcreate() && "Setting must be concreate to print the code path summary.");
+
+    log::info() << "\n"
+                << "  Taking " << Payload->name() << " path optimized for " << Name << " - " << Settings.thread()
+                << " thread(s) per core\n"
+                << "  Used buffersizes per thread:";
+
+    if (Settings.instructionCacheSizePerThread()) {
+      log::info() << "    - L1i-Cache: " << *Settings.instructionCacheSizePerThread() << " Bytes";
     }
 
-    auto Str = Ss.str();
-    if (!Str.empty()) {
-      Str.pop_back();
+    unsigned I = 1;
+    for (auto const& Bytes : Settings.dataCacheBufferSizePerThread()) {
+      log::info() << "    - L" << I << "d-Cache: " << Bytes << " Bytes";
+      I++;
     }
 
-    return Str;
+    log::info() << "    - Memory: " << Settings.ramBufferSizePerThread() << " Bytes";
   }
 };
 
diff --git a/include/firestarter/Environment/Platform/RuntimeConfig.hpp b/include/firestarter/Environment/Platform/RuntimeConfig.hpp
deleted file mode 100644
index 3ffdc29c..00000000
--- a/include/firestarter/Environment/Platform/RuntimeConfig.hpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/******************************************************************************
- * FIRESTARTER - A Processor Stress Test Utility
- * Copyright (C) 2020-2023 TU Dresden, Center for Information Services and High
- * Performance Computing
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
- *
- * Contact: daniel.hackenberg@tu-dresden.de
- *****************************************************************************/
-
-#pragma once
-
-#include "../../Logging/Log.hpp"
-#include "../Platform/PlatformConfig.hpp"
-#include <cassert>
-
-namespace firestarter::environment::platform {
-
-// This is effectivly a wrapper around a PlatformConfig that allow overriding some vairables.
-// TODO: move these functions into the PlatformConfig and make them non const. The default PlatformConfig(s) shall be
-// const.
-class RuntimeConfig {
-private:
-  std::shared_ptr<PlatformConfig> SelectedPlatformConfig;
-  unsigned Thread;
-  std::vector<std::pair<std::string, unsigned>> PayloadSettings;
-  unsigned InstructionCacheSize;
-  std::list<unsigned> DataCacheBufferSize;
-  unsigned RamBufferSize;
-  unsigned Lines;
-
-public:
-  RuntimeConfig(const std::shared_ptr<PlatformConfig>& SelectedPlatformConfig, unsigned Thread,
-                unsigned DetectedInstructionCacheSize)
-      : SelectedPlatformConfig(SelectedPlatformConfig)
-      , Thread(Thread)
-      , PayloadSettings(SelectedPlatformConfig->getDefaultPayloadSettings())
-      , InstructionCacheSize(SelectedPlatformConfig->instructionCacheSize())
-      , DataCacheBufferSize(SelectedPlatformConfig->dataCacheBufferSize())
-      , RamBufferSize(SelectedPlatformConfig->ramBufferSize())
-      , Lines(SelectedPlatformConfig->lines()) {
-    if (DetectedInstructionCacheSize != 0) {
-      this->InstructionCacheSize = DetectedInstructionCacheSize;
-    }
-  };
-
-  ~RuntimeConfig() = default;
-
-  [[nodiscard]] auto platformConfig() const -> PlatformConfig const& { return *SelectedPlatformConfig; }
-  [[nodiscard]] auto payload() const -> const payload::Payload& { return SelectedPlatformConfig->payload(); }
-  [[nodiscard]] auto thread() const -> unsigned { return Thread; }
-  [[nodiscard]] auto payloadSettings() const -> const std::vector<std::pair<std::string, unsigned>>& {
-    return PayloadSettings;
-  }
-  [[nodiscard]] auto payloadItems() const -> std::vector<std::string> {
-    std::vector<std::string> Items;
-    Items.reserve(PayloadSettings.size());
-    for (auto const& Pair : PayloadSettings) {
-      Items.push_back(Pair.first);
-    }
-    return Items;
-  }
-
-  [[nodiscard]] auto instructionCacheSize() const -> unsigned { return InstructionCacheSize; }
-  [[nodiscard]] auto dataCacheBufferSize() const -> const std::list<unsigned>& { return DataCacheBufferSize; }
-  [[nodiscard]] auto ramBufferSize() const -> unsigned { return RamBufferSize; }
-  [[nodiscard]] auto lines() const -> unsigned { return Lines; }
-
-  void setPayloadSettings(std::vector<std::pair<std::string, unsigned>> const& PayloadSettings) {
-    this->PayloadSettings = PayloadSettings;
-  }
-
-  void setLineCount(unsigned LineCount) { this->Lines = LineCount; }
-
-  void printCodePathSummary() const {
-    log::info() << "\n"
-                << "  Taking " << platformConfig().payload().name() << " path optimized for " << platformConfig().name()
-                << " - " << thread() << " thread(s) per core\n"
-                << "  Used buffersizes per thread:";
-
-    if (instructionCacheSize() != 0) {
-      log::info() << "    - L1i-Cache: " << instructionCacheSize() / thread() << " Bytes";
-    }
-
-    unsigned I = 1;
-    for (auto const& Bytes : dataCacheBufferSize()) {
-      log::info() << "    - L" << I << "d-Cache: " << Bytes / thread() << " Bytes";
-      I++;
-    }
-
-    log::info() << "    - Memory: " << ramBufferSize() / thread() << " Bytes";
-  }
-};
-
-} // namespace firestarter::environment::platform
diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
index 4884e8c3..6c80810b 100644
--- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
@@ -29,9 +29,7 @@ class AVX512Payload final : public X86Payload {
   AVX512Payload() noexcept
       : X86Payload({asmjit::CpuFeatures::X86::kAVX512_F}, "AVX512", 8, 32) {}
 
-  [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
-                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
-                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+  [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                     bool ErrorDetection) const
       -> environment::payload::CompiledPayload::UniquePtr override;
 
diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
index 75e44360..d4af02c0 100644
--- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
@@ -29,9 +29,7 @@ class AVXPayload final : public X86Payload {
   AVXPayload()
       : X86Payload({asmjit::CpuFeatures::X86::kAVX}, "AVX", 4, 16) {}
 
-  [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
-                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
-                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+  [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                     bool ErrorDetection) const
       -> environment::payload::CompiledPayload::UniquePtr override;
 
diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
index a8a82649..021ca8aa 100644
--- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
@@ -30,9 +30,7 @@ class FMA4Payload final : public X86Payload {
   FMA4Payload() noexcept
       : X86Payload({asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4}, "FMA4", 4, 16) {}
 
-  [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
-                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
-                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+  [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                     bool ErrorDetection) const
       -> environment::payload::CompiledPayload::UniquePtr override;
 
diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
index ec5b2bea..e147451f 100644
--- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
@@ -29,9 +29,7 @@ class FMAPayload final : public X86Payload {
   FMAPayload() noexcept
       : X86Payload({asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, "FMA", 4, 16) {}
 
-  [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
-                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
-                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+  [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                     bool ErrorDetection) const
       -> environment::payload::CompiledPayload::UniquePtr override;
 
diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
index a026bbe5..652e9cef 100644
--- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
@@ -29,9 +29,7 @@ class SSE2Payload final : public X86Payload {
   SSE2Payload() noexcept
       : X86Payload({asmjit::CpuFeatures::X86::kSSE2}, "SSE2", 2, 16) {}
 
-  [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
-                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
-                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+  [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                     bool ErrorDetection) const
       -> environment::payload::CompiledPayload::UniquePtr override;
 
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index beaa2b0c..6b045d43 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -23,10 +23,10 @@
 
 #include "../../../Constants.hpp"          // IWYU pragma: keep
 #include "../../../DumpRegisterStruct.hpp" // IWYU pragma: keep
-#include "../../../LoadWorkerData.hpp"
-#include "../../../Logging/Log.hpp" // IWYU pragma: keep
+#include "../../../Logging/Log.hpp"        // IWYU pragma: keep
 #include "../../Payload/Payload.hpp"
 #include "../X86CPUTopology.hpp"
+#include "firestarter/LoadWorkerMemory.hpp"
 #include <asmjit/x86.h>
 #include <cassert>
 #include <cstdint>
diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
index 1cb13e0a..4bd69d7c 100644
--- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
@@ -29,9 +29,7 @@ class ZENFMAPayload final : public X86Payload {
   ZENFMAPayload() noexcept
       : X86Payload({asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA}, "ZENFMA", 4, 16) {}
 
-  [[nodiscard]] auto compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
-                                    unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
-                                    unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+  [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                     bool ErrorDetection) const
       -> environment::payload::CompiledPayload::UniquePtr override;
 
diff --git a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
index 8b0b722a..b3d50c1a 100644
--- a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
@@ -28,12 +28,10 @@ namespace firestarter::environment::x86::platform {
 class BulldozerConfig final : public X86PlatformConfig {
 public:
   BulldozerConfig() noexcept
-      : X86PlatformConfig("BLD_OPTERON", 21, {1, 2, 3}, {1}, 0, {16384, 1048576, 786432}, 104857600, 1536,
-                          std::make_shared<payload::FMA4Payload>()) {}
-
-  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
-    return std::vector<std::pair<std::string, unsigned>>(
-        {{"RAM_L", 1}, {"L3_L", 1}, {"L2_LS", 5}, {"L1_L", 90}, {"REG", 45}});
-  }
+      : X86PlatformConfig(
+            "BLD_OPTERON", 21, {1, 2, 3},
+            environment::payload::PayloadSettings({1}, {16384, 1048576, 786432}, 104857600, 1536,
+                                                  {{"RAM_L", 1}, {"L3_L", 1}, {"L2_LS", 5}, {"L1_L", 90}, {"REG", 45}}),
+            std::make_shared<const payload::FMA4Payload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
index 94313c10..d6ce0078 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
@@ -28,12 +28,10 @@ namespace firestarter::environment::x86::platform {
 class HaswellConfig final : public X86PlatformConfig {
 public:
   HaswellConfig() noexcept
-      : X86PlatformConfig("HSW_COREI", 6, {60, 61, 69, 70, 71}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
-                          std::make_shared<payload::FMAPayload>()) {}
-
-  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
-    return std::vector<std::pair<std::string, unsigned>>(
-        {{"RAM_L", 2}, {"L3_LS", 3}, {"L2_LS", 9}, {"L1_LS", 90}, {"REG", 40}});
-  }
+      : X86PlatformConfig("HSW_COREI", 6, {60, 61, 69, 70, 71},
+                          environment::payload::PayloadSettings(
+                              {1, 2}, {32768, 262144, 1572864}, 104857600, 1536,
+                              {{"RAM_L", 2}, {"L3_LS", 3}, {"L2_LS", 9}, {"L1_LS", 90}, {"REG", 40}}),
+                          std::make_shared<const payload::FMAPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
index 8e7b0f16..ae4e9a72 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
@@ -28,12 +28,10 @@ namespace firestarter::environment::x86::platform {
 class HaswellEPConfig final : public X86PlatformConfig {
 public:
   HaswellEPConfig() noexcept
-      : X86PlatformConfig("HSW_XEONEP", 6, {63, 79}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536,
-                          std::make_shared<payload::FMAPayload>()) {}
-
-  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
-    return std::vector<std::pair<std::string, unsigned>>(
-        {{"RAM_L", 8}, {"L3_LS", 1}, {"L2_LS", 29}, {"L1_LS", 100}, {"REG", 100}});
-  }
+      : X86PlatformConfig("HSW_XEONEP", 6, {63, 79},
+                          environment::payload::PayloadSettings(
+                              {1, 2}, {32768, 262144, 2621440}, 104857600, 1536,
+                              {{"RAM_L", 8}, {"L3_LS", 1}, {"L2_LS", 29}, {"L1_LS", 100}, {"REG", 100}}),
+                          std::make_shared<const payload::FMAPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
index 1067d786..23230e14 100644
--- a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
@@ -28,11 +28,9 @@ namespace firestarter::environment::x86::platform {
 class KnightsLandingConfig final : public X86PlatformConfig {
 public:
   KnightsLandingConfig() noexcept
-      : X86PlatformConfig("KNL_XEONPHI", 6, {87}, {4}, 0, {32768, 524288, 236279125}, 26214400, 1536,
-                          std::make_shared<payload::AVX512Payload>()) {}
-
-  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
-    return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 3}, {"L2_S", 8}, {"L1_L", 40}, {"REG", 10}});
-  }
+      : X86PlatformConfig("KNL_XEONPHI", 6, {87},
+                          environment::payload::PayloadSettings({4}, {32768, 524288, 236279125}, 26214400, 1536,
+                                                                {{"RAM_P", 3}, {"L2_S", 8}, {"L1_L", 40}, {"REG", 10}}),
+                          std::make_shared<const payload::AVX512Payload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
index 8603d233..07ed7f50 100644
--- a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
@@ -28,12 +28,10 @@ namespace firestarter::environment::x86::platform {
 class NaplesConfig final : public X86PlatformConfig {
 public:
   NaplesConfig() noexcept
-      : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24}, {1, 2}, 0, {65536, 524288, 2097152}, 104857600, 1536,
-                          std::make_shared<payload::ZENFMAPayload>()) {}
-
-  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
-    return std::vector<std::pair<std::string, unsigned>>(
-        {{"RAM_L", 3}, {"L3_L", 14}, {"L2_L", 75}, {"L1_LS", 81}, {"REG", 100}});
-  }
+      : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24},
+                          environment::payload::PayloadSettings(
+                              {1, 2}, {65536, 524288, 2097152}, 104857600, 1536,
+                              {{"RAM_L", 3}, {"L3_L", 14}, {"L2_L", 75}, {"L1_LS", 81}, {"REG", 100}}),
+                          std::make_shared<const payload::ZENFMAPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
index 8777e262..8a0c9699 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
@@ -28,11 +28,9 @@ namespace firestarter::environment::x86::platform {
 class NehalemConfig final : public X86PlatformConfig {
 public:
   NehalemConfig() noexcept
-      : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
-                          std::make_shared<payload::SSE2Payload>()) {}
-
-  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
-    return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 1}, {"L1_LS", 70}, {"REG", 2}});
-  }
+      : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23},
+                          environment::payload::PayloadSettings({1, 2}, {32768, 262144, 1572864}, 104857600, 1536,
+                                                                {{"RAM_P", 1}, {"L1_LS", 70}, {"REG", 2}}),
+                          std::make_shared<const payload::SSE2Payload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
index a97fde41..a2815577 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
@@ -28,11 +28,9 @@ namespace firestarter::environment::x86::platform {
 class NehalemEPConfig final : public X86PlatformConfig {
 public:
   NehalemEPConfig() noexcept
-      : X86PlatformConfig("NHM_XEONEP", 6, {26, 44}, {1, 2}, 0, {32768, 262144, 2097152}, 104857600, 1536,
-                          std::make_shared<payload::SSE2Payload>()) {}
-
-  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
-    return std::vector<std::pair<std::string, unsigned>>({{"RAM_P", 1}, {"L1_LS", 60}, {"REG", 2}});
-  }
+      : X86PlatformConfig("NHM_XEONEP", 6, {26, 44},
+                          environment::payload::PayloadSettings({1, 2}, {32768, 262144, 2097152}, 104857600, 1536,
+                                                                {{"RAM_P", 1}, {"L1_LS", 60}, {"REG", 2}}),
+                          std::make_shared<const payload::SSE2Payload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
index 1f9509a3..69b0e9e2 100644
--- a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
@@ -28,12 +28,11 @@ namespace firestarter::environment::x86::platform {
 class RomeConfig final : public X86PlatformConfig {
 public:
   RomeConfig() noexcept
-      : X86PlatformConfig("ZEN_2_EPYC", 23, {49}, {1, 2}, 0, {32768, 524288, 2097152}, 104857600, 1536,
-                          std::make_shared<payload::FMAPayload>()) {}
-
-  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
-    return std::vector<std::pair<std::string, unsigned>>(
-        {{"RAM_L", 10}, {"L3_L", 25}, {"L2_L", 91}, {"L1_2LS_256", 72}, {"L1_LS_256", 82}, {"REG", 75}});
-  }
+      : X86PlatformConfig(
+            "ZEN_2_EPYC", 23, {49},
+            environment::payload::PayloadSettings(
+                {1, 2}, {32768, 524288, 2097152}, 104857600, 1536,
+                {{"RAM_L", 10}, {"L3_L", 25}, {"L2_L", 91}, {"L1_2LS_256", 72}, {"L1_LS_256", 82}, {"REG", 75}}),
+            std::make_shared<const payload::FMAPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
index 14dfd03d..ebad14ca 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
@@ -28,12 +28,10 @@ namespace firestarter::environment::x86::platform {
 class SandyBridgeConfig final : public X86PlatformConfig {
 public:
   SandyBridgeConfig() noexcept
-      : X86PlatformConfig("SNB_COREI", 6, {42, 58}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
-                          std::make_shared<payload::AVXPayload>()) {}
-
-  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
-    return std::vector<std::pair<std::string, unsigned>>(
-        {{"RAM_L", 2}, {"L3_LS", 4}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 45}});
-  }
+      : X86PlatformConfig("SNB_COREI", 6, {42, 58},
+                          environment::payload::PayloadSettings(
+                              {1, 2}, {32768, 262144, 1572864}, 104857600, 1536,
+                              {{"RAM_L", 2}, {"L3_LS", 4}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 45}}),
+                          std::make_shared<const payload::AVXPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
index eead4d32..b42ca0c5 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
@@ -29,13 +29,11 @@ namespace firestarter::environment::x86::platform {
 class SandyBridgeEPConfig final : public X86PlatformConfig {
 public:
   SandyBridgeEPConfig() noexcept
-      : X86PlatformConfig("SNB_XEONEP", 6, {45, 62}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536,
-                          std::make_shared<payload::AVXPayload>()) {}
-
-  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
-    return std::vector<std::pair<std::string, unsigned>>(
-        {{"RAM_L", 3}, {"L3_LS", 2}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 30}});
-  }
+      : X86PlatformConfig("SNB_XEONEP", 6, {45, 62},
+                          environment::payload::PayloadSettings(
+                              {1, 2}, {32768, 262144, 2621440}, 104857600, 1536,
+                              {{"RAM_L", 3}, {"L3_LS", 2}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 30}}),
+                          std::make_shared<const payload::AVXPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
 
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
index f2799ace..57cf9eec 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
@@ -29,13 +29,11 @@ namespace firestarter::environment::x86::platform {
 class SkylakeConfig final : public X86PlatformConfig {
 public:
   SkylakeConfig() noexcept
-      : X86PlatformConfig("SKL_COREI", 6, {78, 94}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536,
-                          std::make_shared<payload::FMAPayload>()) {}
-
-  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
-    return std::vector<std::pair<std::string, unsigned>>(
-        {{"RAM_L", 3}, {"L3_LS_256", 5}, {"L2_LS_256", 18}, {"L1_2LS_256", 78}, {"REG", 40}});
-  }
+      : X86PlatformConfig("SKL_COREI", 6, {78, 94},
+                          environment::payload::PayloadSettings(
+                              {1, 2}, {32768, 262144, 1572864}, 104857600, 1536,
+                              {{"RAM_L", 3}, {"L3_LS_256", 5}, {"L2_LS_256", 18}, {"L1_2LS_256", 78}, {"REG", 40}}),
+                          std::make_shared<const payload::FMAPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
 
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
index 1efeb5b0..e9b94f94 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
@@ -28,19 +28,17 @@ namespace firestarter::environment::x86::platform {
 class SkylakeSPConfig final : public X86PlatformConfig {
 public:
   SkylakeSPConfig() noexcept
-      : X86PlatformConfig("SKL_XEONEP", 6, {85}, {1, 2}, 0, {32768, 1048576, 1441792}, 1048576000, 1536,
-                          std::make_shared<payload::AVX512Payload>()) {}
-
-  [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector<std::pair<std::string, unsigned>> override {
-    return std::vector<std::pair<std::string, unsigned>>({{"RAM_S", 3},
-                                                          {"RAM_P", 1},
-                                                          {"L3_S", 1},
-                                                          {"L3_P", 1},
-                                                          {"L2_S", 4},
-                                                          {"L2_L", 70},
-                                                          {"L1_S", 0},
-                                                          {"L1_L", 40},
-                                                          {"REG", 140}});
-  }
+      : X86PlatformConfig("SKL_XEONEP", 6, {85},
+                          environment::payload::PayloadSettings({1, 2}, {32768, 1048576, 1441792}, 1048576000, 1536,
+                                                                {{"RAM_S", 3},
+                                                                 {"RAM_P", 1},
+                                                                 {"L3_S", 1},
+                                                                 {"L3_P", 1},
+                                                                 {"L2_S", 4},
+                                                                 {"L2_L", 70},
+                                                                 {"L1_S", 0},
+                                                                 {"L1_L", 40},
+                                                                 {"REG", 140}}),
+                          std::make_shared<const payload::AVX512Payload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
index df112574..bdef9c39 100644
--- a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
@@ -22,13 +22,7 @@
 #pragma once
 
 #include "../../Platform/PlatformConfig.hpp"
-#include "../Payload/X86Payload.hpp"
-#include "firestarter/Environment/CPUTopology.hpp"
 #include "firestarter/Environment/X86/X86CPUTopology.hpp"
-#include <algorithm>
-#include <memory>
-#include <string>
-#include <vector> // IWYU pragma: keep
 
 namespace firestarter::environment::x86::platform {
 
@@ -38,17 +32,33 @@ class X86PlatformConfig : public environment::platform::PlatformConfig {
   std::list<unsigned> Models;
 
 public:
-  X86PlatformConfig(std::string Name, unsigned Family, std::initializer_list<unsigned> Models,
-                    std::initializer_list<unsigned> Threads, unsigned InstructionCacheSize,
-                    std::initializer_list<unsigned> DataCacheBufferSize, unsigned RamBuffersize, unsigned Lines,
-                    std::shared_ptr<payload::X86Payload>&& Payload) noexcept
-      : PlatformConfig(std::move(Name), Threads, InstructionCacheSize, DataCacheBufferSize, RamBuffersize, Lines,
-                       std::move(Payload))
+  X86PlatformConfig(std::string Name, unsigned Family, std::list<unsigned>&& Models,
+                    environment::payload::PayloadSettings&& Settings,
+                    std::shared_ptr<const environment::payload::Payload>&& Payload) noexcept
+      : PlatformConfig(std::move(Name), std::move(Settings), std::move(Payload))
       , Family(Family)
-      , Models(Models) {}
+      , Models(std::move(Models)) {}
 
   [[nodiscard]] auto isDefault(const X86CPUTopology& Topology) const -> bool { return isDefault(&Topology); }
 
+  /// Clone a the platform config.
+  [[nodiscard]] auto clone() const -> std::unique_ptr<PlatformConfig> final {
+    auto Ptr = std::make_unique<X86PlatformConfig>(name(), Family, std::list<unsigned>(Models),
+                                                   environment::payload::PayloadSettings(settings()),
+                                                   std::shared_ptr(payload()));
+    return Ptr;
+  }
+
+  /// Clone a concreate platform config.
+  /// \arg InstructionCacheSize The detected size of the instructions cache.
+  /// \arg ThreadPerCore The number of threads per pysical CPU.
+  [[nodiscard]] auto cloneConcreate(std::optional<unsigned> InstructionCacheSize, unsigned ThreadsPerCore) const
+      -> std::unique_ptr<PlatformConfig> final {
+    auto Ptr = clone();
+    Ptr->settings().concretize(InstructionCacheSize, ThreadsPerCore);
+    return Ptr;
+  }
+
 private:
   [[nodiscard]] auto isDefault(const CPUTopology* Topology) const -> bool final {
     const auto* FinalTopology = dynamic_cast<const X86CPUTopology*>(Topology);
diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp
index 0dc21475..f2df4e3a 100644
--- a/include/firestarter/Environment/X86/X86Environment.hpp
+++ b/include/firestarter/Environment/X86/X86Environment.hpp
@@ -21,9 +21,6 @@
 
 #pragma once
 
-#include <asmjit/asmjit.h>
-#include <memory>
-
 #include "../Environment.hpp"
 #include "Platform/BulldozerConfig.hpp"
 #include "Platform/HaswellConfig.hpp"
@@ -38,7 +35,6 @@
 #include "Platform/SkylakeConfig.hpp"
 #include "Platform/SkylakeSPConfig.hpp"
 #include "Platform/X86PlatformConfig.hpp"
-#include "X86CPUTopology.hpp"
 
 namespace firestarter::environment::x86 {
 
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 238c3ec9..2f384fb9 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -104,7 +104,7 @@ class Firestarter {
     for (auto& Thread : LoadThreads) {
       auto Td = Thread.second;
 
-      Td->config().setPayloadSettings(Setting);
+      Td->config().settings().selectInstructionGroups(Setting);
     }
 
     signalLoadWorkers(LoadThreadState::ThreadSwitch, SwitchLoad::func);
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index 24daebce..99f0f475 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -21,80 +21,18 @@
 
 #pragma once
 
-#include "AlignedAlloc.hpp"
 #include "Constants.hpp"
-#include "DumpRegisterStruct.hpp"
 #include "Environment/Environment.hpp"
-#include "ErrorDetectionStruct.hpp"
-#include <array>
+#include "LoadWorkerMemory.hpp"
+#include "firestarter/Environment/Platform/PlatformConfig.hpp"
 #include <atomic>
 #include <cmath>
-#include <cstddef>
 #include <memory>
 #include <mutex>
 #include <utility>
 
 namespace firestarter {
 
-/// This struct is used to allocate the memory for the high-load routine.
-struct LoadWorkerMemory {
-private:
-  LoadWorkerMemory() = default;
-  ~LoadWorkerMemory() = default;
-
-  /// Function to deallocate the memory for this struct to be used with unique_ptr.
-  /// \arg Ptr The pointer to the memory
-  static void deallocate(void* Ptr) {
-    static_cast<LoadWorkerMemory*>(Ptr)->~LoadWorkerMemory();
-    AlignedAlloc::free(Ptr);
-  }
-
-public:
-  using UniquePtr = std::unique_ptr<LoadWorkerMemory, void (*)(void*)>;
-
-  /// The extra variables that are before the memory used for the calculation in the high-load routine. They are used
-  /// for optional FIRESTARTER features where further communication between the high-load routine is needed e.g., for
-  /// error detection or dumping registers.
-  struct ExtraLoadWorkerVariables {
-    /// The data for the dump registers functionality.
-    DumpRegisterStruct Drs;
-    /// The data for the error detections functionality.
-    ErrorDetectionStruct Eds;
-  } ExtraVars;
-
-  /// A placeholder to extract the address of the memory region with dynamic size which is used for the calculation in
-  /// the high-load routine. Do not write or read to this type directly.
-  EightBytesType DoNotUseAddrMem;
-
-  /// This padding makes shure that we are aligned to a cache line. The allocated memory will most probably reach beyond
-  /// this array.
-  std::array<EightBytesType, 7> DoNotUsePadding;
-
-  /// Get the pointer to the start of the memory use for computations.
-  /// \returns the pointer to the memory.
-  [[nodiscard]] auto getMemoryAddress() -> auto{
-    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
-    return reinterpret_cast<double*>(&DoNotUseAddrMem);
-  }
-
-  /// Get the offset to the memory which is used by the high-load functions
-  /// \returns the offset to the memory
-  [[nodiscard]] constexpr static auto getMemoryOffset() -> auto{ return offsetof(LoadWorkerMemory, DoNotUseAddrMem); }
-
-  /// Allocate the memory for the high-load thread on 64B cache line boundaries and return a unique_ptr.
-  /// \arg Bytes The number of bytes allocated for the array whoose start address is returned by the getMemoryAddress
-  /// function.
-  /// \returns A unique_ptr to the memory for the high-load thread.
-  [[nodiscard]] static auto allocate(const std::size_t Bytes) -> UniquePtr {
-    // Allocate the memory for the ExtraLoadWorkerVariables (which are 64B aligned) and the data for the high-load
-    // routine which may not be 64B aligned.
-    static_assert(sizeof(ExtraLoadWorkerVariables) % 64 == 0,
-                  "ExtraLoadWorkerVariables is not a multiple of 64B i.e., multiple cachelines.");
-    auto* Ptr = AlignedAlloc::malloc(Bytes + sizeof(ExtraLoadWorkerVariables));
-    return {static_cast<LoadWorkerMemory*>(Ptr), deallocate};
-  }
-};
-
 class LoadWorkerData {
 public:
   struct Metrics {
@@ -118,9 +56,9 @@ class LoadWorkerData {
       , ErrorDetection(ErrorDetection)
       , Id(Id)
       , Environment(Environment)
-      , Config(new environment::platform::RuntimeConfig(Environment.selectedConfig())) {}
+      , Config(Environment.config().clone()) {}
 
-  ~LoadWorkerData() { delete Config; }
+  ~LoadWorkerData() = default;
 
   void setErrorCommunication(std::shared_ptr<uint64_t> CommunicationLeft,
                              std::shared_ptr<uint64_t> CommunicationRight) {
@@ -130,7 +68,7 @@ class LoadWorkerData {
 
   [[nodiscard]] auto id() const -> uint64_t { return Id; }
   [[nodiscard]] auto environment() const -> const environment::Environment& { return Environment; }
-  [[nodiscard]] auto config() const -> environment::platform::RuntimeConfig& { return *Config; }
+  [[nodiscard]] auto config() const -> environment::platform::PlatformConfig& { return *Config; }
 
   /// Access the DumpRegisterStruct. Asserts when dumping registers is not enabled.
   /// \returns a reference to the DumpRegisterStruct
@@ -181,7 +119,7 @@ class LoadWorkerData {
 
   const uint64_t Id;
   const environment::Environment& Environment;
-  environment::platform::RuntimeConfig* Config;
+  std::unique_ptr<environment::platform::PlatformConfig> Config;
 };
 
 } // namespace firestarter
diff --git a/include/firestarter/LoadWorkerMemory.hpp b/include/firestarter/LoadWorkerMemory.hpp
new file mode 100644
index 00000000..2a5ea253
--- /dev/null
+++ b/include/firestarter/LoadWorkerMemory.hpp
@@ -0,0 +1,90 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2020-2023 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#pragma once
+
+#include "AlignedAlloc.hpp"
+#include "DumpRegisterStruct.hpp"
+#include "ErrorDetectionStruct.hpp"
+#include <memory>
+
+namespace firestarter {
+
+/// This struct is used to allocate the memory for the high-load routine.
+struct LoadWorkerMemory {
+private:
+  LoadWorkerMemory() = default;
+  ~LoadWorkerMemory() = default;
+
+  /// Function to deallocate the memory for this struct to be used with unique_ptr.
+  /// \arg Ptr The pointer to the memory
+  static void deallocate(void* Ptr) {
+    static_cast<LoadWorkerMemory*>(Ptr)->~LoadWorkerMemory();
+    AlignedAlloc::free(Ptr);
+  }
+
+public:
+  using UniquePtr = std::unique_ptr<LoadWorkerMemory, void (*)(void*)>;
+
+  /// The extra variables that are before the memory used for the calculation in the high-load routine. They are used
+  /// for optional FIRESTARTER features where further communication between the high-load routine is needed e.g., for
+  /// error detection or dumping registers.
+  struct ExtraLoadWorkerVariables {
+    /// The data for the dump registers functionality.
+    DumpRegisterStruct Drs;
+    /// The data for the error detections functionality.
+    ErrorDetectionStruct Eds;
+  } ExtraVars;
+
+  /// A placeholder to extract the address of the memory region with dynamic size which is used for the calculation in
+  /// the high-load routine. Do not write or read to this type directly.
+  EightBytesType DoNotUseAddrMem;
+
+  /// This padding makes shure that we are aligned to a cache line. The allocated memory will most probably reach beyond
+  /// this array.
+  std::array<EightBytesType, 7> DoNotUsePadding;
+
+  /// Get the pointer to the start of the memory use for computations.
+  /// \returns the pointer to the memory.
+  [[nodiscard]] auto getMemoryAddress() -> auto{
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+    return reinterpret_cast<double*>(&DoNotUseAddrMem);
+  }
+
+  /// Get the offset to the memory which is used by the high-load functions
+  /// \returns the offset to the memory
+  [[nodiscard]] constexpr static auto getMemoryOffset() -> auto{ return offsetof(LoadWorkerMemory, DoNotUseAddrMem); }
+
+  /// Allocate the memory for the high-load thread on 64B cache line boundaries and return a unique_ptr.
+  /// \arg Bytes The number of bytes allocated for the array whoose start address is returned by the getMemoryAddress
+  /// function.
+  /// \returns A unique_ptr to the memory for the high-load thread.
+  [[nodiscard]] static auto allocate(const std::size_t Bytes) -> UniquePtr {
+    // Allocate the memory for the ExtraLoadWorkerVariables (which are 64B aligned) and the data for the high-load
+    // routine which may not be 64B aligned.
+    static_assert(sizeof(ExtraLoadWorkerVariables) % 64 == 0,
+                  "ExtraLoadWorkerVariables is not a multiple of 64B i.e., multiple cachelines.");
+    auto* Ptr = AlignedAlloc::malloc(Bytes + sizeof(ExtraLoadWorkerVariables));
+    return {static_cast<LoadWorkerMemory*>(Ptr), deallocate};
+  }
+};
+
+} // namespace firestarter
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index 2966ec50..84af6e7b 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -70,8 +70,8 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Dat
   pthread_setname_np(pthread_self(), "DumpRegWorker");
 #endif
 
-  const auto RegisterCount = Data->LoadWorkerDataPtr->config().payload().registerCount();
-  const auto RegisterSize = Data->LoadWorkerDataPtr->config().payload().registerSize();
+  const auto RegisterCount = Data->LoadWorkerDataPtr->config().payload()->registerCount();
+  const auto RegisterSize = Data->LoadWorkerDataPtr->config().payload()->registerSize();
   const auto Offset = RegisterCount * RegisterSize;
   const std::string RegisterPrefix = registerNameBySize(RegisterSize);
 
diff --git a/src/firestarter/Environment/Payload/Payload.cpp b/src/firestarter/Environment/Payload/Payload.cpp
index 29bac51e..9d4242c5 100644
--- a/src/firestarter/Environment/Payload/Payload.cpp
+++ b/src/firestarter/Environment/Payload/Payload.cpp
@@ -73,33 +73,30 @@ auto Payload::generateSequence(std::vector<std::pair<std::string, unsigned>> con
 }
 
 auto Payload::getL2LoopCount(const std::vector<std::string>& Sequence, const unsigned NumberOfLines,
-                             const unsigned Size, const unsigned Threads) -> unsigned {
+                             const unsigned Size) -> unsigned {
   if (getL2SequenceCount(Sequence) == 0) {
     return 0;
   }
   return static_cast<unsigned>(
-      (0.8 * Size / 64 / Threads /
-       (getL2SequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Threads))));
+      (0.8 * Size / 64 / (getL2SequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines))));
 }
 
 auto Payload::getL3LoopCount(const std::vector<std::string>& Sequence, const unsigned NumberOfLines,
-                             const unsigned Size, const unsigned Threads) -> unsigned {
+                             const unsigned Size) -> unsigned {
   if (getL3SequenceCount(Sequence) == 0) {
     return 0;
   }
   return static_cast<unsigned>(
-      (0.8 * Size / 64 / Threads /
-       (getL3SequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Threads))));
+      (0.8 * Size / 64 / (getL3SequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines))));
 }
 
 auto Payload::getRAMLoopCount(const std::vector<std::string>& Sequence, const unsigned NumberOfLines,
-                              const unsigned Size, const unsigned Threads) -> unsigned {
+                              const unsigned Size) -> unsigned {
   if (getRAMSequenceCount(Sequence) == 0) {
     return 0;
   }
   return static_cast<unsigned>(
-      (1.0 * Size / 64 / Threads /
-       (getRAMSequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Threads))));
+      (1.0 * Size / 64 / (getRAMSequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines))));
 }
 
 }; // namespace firestarter::environment::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index c412e5fe..f317a4db 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -24,9 +24,7 @@
 
 namespace firestarter::environment::x86::payload {
 
-auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
-                                   unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
-                                   unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+auto AVX512Payload::compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                    bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr {
   using Imm = asmjit::Imm;
   using Zmm = asmjit::x86::Zmm;
@@ -39,8 +37,8 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto Sequence = generateSequence(Proportion);
-  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
+  auto Sequence = generateSequence(Settings.instructionGroups());
+  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
 
   // compute count of flops and memory access for performance report
   environment::payload::PayloadStats Stats;
@@ -66,19 +64,20 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   Stats.Instructions = Repetitions * Sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
-  const auto L1iCacheSize = InstructionCacheSize / Thread;
-  auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin();
-  const auto L1Size = *DataCacheBufferSizeIterator / Thread;
+  const auto L1iCacheSize = Settings.instructionCacheSizePerThread();
+  const auto DataCacheBufferSizes = Settings.dataCacheBufferSizePerThread();
+  auto DataCacheBufferSizeIterator = DataCacheBufferSizes.begin();
+  const auto L1Size = *DataCacheBufferSizeIterator;
   std::advance(DataCacheBufferSizeIterator, 1);
-  const auto L2Size = *DataCacheBufferSizeIterator / Thread;
+  const auto L2Size = *DataCacheBufferSizeIterator;
   std::advance(DataCacheBufferSizeIterator, 1);
-  const auto L3Size = *DataCacheBufferSizeIterator / Thread;
-  const auto RamSize = RamBufferSize / Thread;
+  const auto L3Size = *DataCacheBufferSizeIterator;
+  const auto RamSize = Settings.ramBufferSizePerThread();
 
   // calculate the reset counters for the buffers
-  const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread);
-  const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
-  const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
+  const auto L2LoopCount = getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
+  const auto L3LoopCount = getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
+  const auto RamLoopCount = getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
 
   asmjit::CodeHolder Code;
   Code.init(asmjit::Environment::host());
@@ -364,15 +363,15 @@ auto AVX512Payload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   auto CompiledPayloadPtr = CompiledX86Payload::create<AVX512Payload>(Stats, Code);
 
   // skip if we could not determine cache size
-  if (L1iCacheSize != 0) {
+  if (L1iCacheSize) {
     auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop);
-    auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize;
+    auto InstructionCachePercentage = 100 * LoopSize / *L1iCacheSize;
 
-    if (LoopSize > L1iCacheSize) {
+    if (LoopSize > *L1iCacheSize) {
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage
+    workerLog::trace() << "Using " << LoopSize << " of " << *L1iCacheSize << " Bytes (" << InstructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
     workerLog::trace() << "Sequence size: " << Sequence.size();
     workerLog::trace() << "Repetition count: " << Repetitions;
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index 5290e584..2d8923f9 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -24,9 +24,7 @@
 
 namespace firestarter::environment::x86::payload {
 
-auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
-                                unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
-                                unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+auto AVXPayload::compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                 bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr {
   using Imm = asmjit::Imm;
   using Mm = asmjit::x86::Mm;
@@ -37,8 +35,8 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto Sequence = generateSequence(Proportion);
-  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
+  auto Sequence = generateSequence(Settings.instructionGroups());
+  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
 
   // compute count of flops and memory access for performance report
   environment::payload::PayloadStats Stats;
@@ -64,19 +62,20 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   Stats.Instructions = Repetitions * Sequence.size() * 2 + 4;
 
   // calculate the buffer sizes
-  const auto L1iCacheSize = InstructionCacheSize / Thread;
-  auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin();
-  const auto L1Size = *DataCacheBufferSizeIterator / Thread;
+  const auto L1iCacheSize = Settings.instructionCacheSizePerThread();
+  const auto DataCacheBufferSizes = Settings.dataCacheBufferSizePerThread();
+  auto DataCacheBufferSizeIterator = DataCacheBufferSizes.begin();
+  const auto L1Size = *DataCacheBufferSizeIterator;
   std::advance(DataCacheBufferSizeIterator, 1);
-  const auto L2Size = *DataCacheBufferSizeIterator / Thread;
+  const auto L2Size = *DataCacheBufferSizeIterator;
   std::advance(DataCacheBufferSizeIterator, 1);
-  const auto L3Size = *DataCacheBufferSizeIterator / Thread;
-  const auto RamSize = RamBufferSize / Thread;
+  const auto L3Size = *DataCacheBufferSizeIterator;
+  const auto RamSize = Settings.ramBufferSizePerThread();
 
   // calculate the reset counters for the buffers
-  const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread);
-  const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
-  const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
+  const auto L2LoopCount = getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
+  const auto L3LoopCount = getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
+  const auto RamLoopCount = getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
 
   asmjit::CodeHolder Code;
   Code.init(asmjit::Environment::host());
@@ -394,15 +393,15 @@ auto AVXPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   auto CompiledPayloadPtr = CompiledX86Payload::create<AVXPayload>(Stats, Code);
 
   // skip if we could not determine cache size
-  if (L1iCacheSize != 0) {
+  if (L1iCacheSize) {
     auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop);
-    auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize;
+    auto InstructionCachePercentage = 100 * LoopSize / *L1iCacheSize;
 
-    if (LoopSize > L1iCacheSize) {
+    if (LoopSize > *L1iCacheSize) {
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage
+    workerLog::trace() << "Using " << LoopSize << " of " << *L1iCacheSize << " Bytes (" << InstructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
     workerLog::trace() << "Sequence size: " << Sequence.size();
     workerLog::trace() << "Repetition count: " << Repetitions;
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index c82123c1..f2342323 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -24,9 +24,7 @@
 
 namespace firestarter::environment::x86::payload {
 
-auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
-                                 unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
-                                 unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+auto FMA4Payload::compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                  bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr {
   using Imm = asmjit::Imm;
   using Xmm = asmjit::x86::Xmm;
@@ -38,8 +36,8 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto Sequence = generateSequence(Proportion);
-  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
+  auto Sequence = generateSequence(Settings.instructionGroups());
+  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
 
   // compute count of flops and memory access for performance report
   environment::payload::PayloadStats Stats;
@@ -65,19 +63,20 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   Stats.Instructions = Repetitions * Sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
-  const auto L1iCacheSize = InstructionCacheSize / Thread;
-  auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin();
-  const auto L1Size = *DataCacheBufferSizeIterator / Thread;
+  const auto L1iCacheSize = Settings.instructionCacheSizePerThread();
+  const auto DataCacheBufferSizes = Settings.dataCacheBufferSizePerThread();
+  auto DataCacheBufferSizeIterator = DataCacheBufferSizes.begin();
+  const auto L1Size = *DataCacheBufferSizeIterator;
   std::advance(DataCacheBufferSizeIterator, 1);
-  const auto L2Size = *DataCacheBufferSizeIterator / Thread;
+  const auto L2Size = *DataCacheBufferSizeIterator;
   std::advance(DataCacheBufferSizeIterator, 1);
-  const auto L3Size = *DataCacheBufferSizeIterator / Thread;
-  const auto RamSize = RamBufferSize / Thread;
+  const auto L3Size = *DataCacheBufferSizeIterator;
+  const auto RamSize = Settings.ramBufferSizePerThread();
 
   // calculate the reset counters for the buffers
-  const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread);
-  const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
-  const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
+  const auto L2LoopCount = getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
+  const auto L3LoopCount = getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
+  const auto RamLoopCount = getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
 
   asmjit::CodeHolder Code;
   Code.init(asmjit::Environment::host());
@@ -367,15 +366,15 @@ auto FMA4Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   auto CompiledPayloadPtr = CompiledX86Payload::create<FMA4Payload>(Stats, Code);
 
   // skip if we could not determine cache size
-  if (L1iCacheSize != 0) {
+  if (L1iCacheSize) {
     auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop);
-    auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize;
+    auto InstructionCachePercentage = 100 * LoopSize / *L1iCacheSize;
 
-    if (LoopSize > L1iCacheSize) {
+    if (LoopSize > *L1iCacheSize) {
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage
+    workerLog::trace() << "Using " << LoopSize << " of " << *L1iCacheSize << " Bytes (" << InstructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
     workerLog::trace() << "Sequence size: " << Sequence.size();
     workerLog::trace() << "Repetition count: " << Repetitions;
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index e00ec268..eba17753 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -24,9 +24,7 @@
 
 namespace firestarter::environment::x86::payload {
 
-auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
-                                unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
-                                unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+auto FMAPayload::compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                 bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr {
   using Imm = asmjit::Imm;
   using Xmm = asmjit::x86::Xmm;
@@ -40,8 +38,8 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto Sequence = generateSequence(Proportion);
-  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
+  auto Sequence = generateSequence(Settings.instructionGroups());
+  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
 
   // compute count of flops and memory access for performance report
   environment::payload::PayloadStats Stats;
@@ -67,19 +65,20 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   Stats.Instructions = Repetitions * Sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
-  const auto L1iCacheSize = InstructionCacheSize / Thread;
-  auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin();
-  const auto L1Size = *DataCacheBufferSizeIterator / Thread;
+  const auto L1iCacheSize = Settings.instructionCacheSizePerThread();
+  const auto DataCacheBufferSizes = Settings.dataCacheBufferSizePerThread();
+  auto DataCacheBufferSizeIterator = DataCacheBufferSizes.begin();
+  const auto L1Size = *DataCacheBufferSizeIterator;
   std::advance(DataCacheBufferSizeIterator, 1);
-  const auto L2Size = *DataCacheBufferSizeIterator / Thread;
+  const auto L2Size = *DataCacheBufferSizeIterator;
   std::advance(DataCacheBufferSizeIterator, 1);
-  const auto L3Size = *DataCacheBufferSizeIterator / Thread;
-  const auto RamSize = RamBufferSize / Thread;
+  const auto L3Size = *DataCacheBufferSizeIterator;
+  const auto RamSize = Settings.ramBufferSizePerThread();
 
   // calculate the reset counters for the buffers
-  const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread);
-  const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
-  const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
+  const auto L2LoopCount = getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
+  const auto L3LoopCount = getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
+  const auto RamLoopCount = getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
 
   asmjit::CodeHolder Code;
   Code.init(asmjit::Environment::host());
@@ -402,15 +401,15 @@ auto FMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> co
   auto CompiledPayloadPtr = CompiledX86Payload::create<FMAPayload>(Stats, Code);
 
   // skip if we could not determine cache size
-  if (L1iCacheSize != 0) {
+  if (L1iCacheSize) {
     auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop);
-    auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize;
+    auto InstructionCachePercentage = 100 * LoopSize / *L1iCacheSize;
 
-    if (LoopSize > L1iCacheSize) {
+    if (LoopSize > *L1iCacheSize) {
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage
+    workerLog::trace() << "Using " << LoopSize << " of " << *L1iCacheSize << " Bytes (" << InstructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
     workerLog::trace() << "Sequence size: " << Sequence.size();
     workerLog::trace() << "Repetition count: " << Repetitions;
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index 4f1dbaac..47126f1f 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -25,9 +25,7 @@
 
 namespace firestarter::environment::x86::payload {
 
-auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
-                                 unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
-                                 unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+auto SSE2Payload::compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                  bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr {
   using Imm = asmjit::Imm;
   using Mm = asmjit::x86::Mm;
@@ -37,8 +35,8 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto Sequence = generateSequence(Proportion);
-  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
+  auto Sequence = generateSequence(Settings.instructionGroups());
+  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
 
   // compute count of flops and memory access for performance report
   environment::payload::PayloadStats Stats;
@@ -64,19 +62,20 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   Stats.Instructions = Repetitions * Sequence.size() * 2 + 4;
 
   // calculate the buffer sizes
-  const auto L1iCacheSize = InstructionCacheSize / Thread;
-  auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin();
-  const auto L1Size = *DataCacheBufferSizeIterator / Thread;
+  const auto L1iCacheSize = Settings.instructionCacheSizePerThread();
+  const auto DataCacheBufferSizes = Settings.dataCacheBufferSizePerThread();
+  auto DataCacheBufferSizeIterator = DataCacheBufferSizes.begin();
+  const auto L1Size = *DataCacheBufferSizeIterator;
   std::advance(DataCacheBufferSizeIterator, 1);
-  const auto L2Size = *DataCacheBufferSizeIterator / Thread;
+  const auto L2Size = *DataCacheBufferSizeIterator;
   std::advance(DataCacheBufferSizeIterator, 1);
-  const auto L3Size = *DataCacheBufferSizeIterator / Thread;
-  const auto RamSize = RamBufferSize / Thread;
+  const auto L3Size = *DataCacheBufferSizeIterator;
+  const auto RamSize = Settings.ramBufferSizePerThread();
 
   // calculate the reset counters for the buffers
-  const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread);
-  const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
-  const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
+  const auto L2LoopCount = getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
+  const auto L3LoopCount = getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
+  const auto RamLoopCount = getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
 
   asmjit::CodeHolder Code;
   Code.init(asmjit::Environment::host());
@@ -386,15 +385,15 @@ auto SSE2Payload::compilePayload(std::vector<std::pair<std::string, unsigned>> c
   auto CompiledPayloadPtr = CompiledX86Payload::create<SSE2Payload>(Stats, Code);
 
   // skip if we could not determine cache size
-  if (L1iCacheSize != 0) {
+  if (L1iCacheSize) {
     auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop);
-    auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize;
+    auto InstructionCachePercentage = 100 * LoopSize / *L1iCacheSize;
 
-    if (LoopSize > L1iCacheSize) {
+    if (LoopSize > *L1iCacheSize) {
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage
+    workerLog::trace() << "Using " << LoopSize << " of " << *L1iCacheSize << " Bytes (" << InstructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
     workerLog::trace() << "Sequence size: " << Sequence.size();
     workerLog::trace() << "Repetition count: " << Repetitions;
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index 4518f54c..64a3593c 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -24,9 +24,7 @@
 
 namespace firestarter::environment::x86::payload {
 
-auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>> const& Proportion,
-                                   unsigned InstructionCacheSize, std::list<unsigned> const& DataCacheBufferSize,
-                                   unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters,
+auto ZENFMAPayload::compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                    bool ErrorDetection) const -> environment::payload::CompiledPayload::UniquePtr {
   using Imm = asmjit::Imm;
   using Xmm = asmjit::x86::Xmm;
@@ -36,8 +34,8 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto Sequence = generateSequence(Proportion);
-  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread);
+  auto Sequence = generateSequence(Settings.instructionGroups());
+  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
 
   // compute count of flops and memory access for performance report
   environment::payload::PayloadStats Stats;
@@ -63,19 +61,20 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   Stats.Instructions = Repetitions * Sequence.size() * 4 + 6;
 
   // calculate the buffer sizes
-  auto L1iCacheSize = InstructionCacheSize / Thread;
-  auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin();
-  auto L1Size = *DataCacheBufferSizeIterator / Thread;
+  const auto L1iCacheSize = Settings.instructionCacheSizePerThread();
+  const auto DataCacheBufferSizes = Settings.dataCacheBufferSizePerThread();
+  auto DataCacheBufferSizeIterator = DataCacheBufferSizes.begin();
+  const auto L1Size = *DataCacheBufferSizeIterator;
   std::advance(DataCacheBufferSizeIterator, 1);
-  auto L2Size = *DataCacheBufferSizeIterator / Thread;
+  const auto L2Size = *DataCacheBufferSizeIterator;
   std::advance(DataCacheBufferSizeIterator, 1);
-  auto L3Size = *DataCacheBufferSizeIterator / Thread;
-  auto RamSize = RamBufferSize / Thread;
+  const auto L3Size = *DataCacheBufferSizeIterator;
+  const auto RamSize = Settings.ramBufferSizePerThread();
 
   // calculate the reset counters for the buffers
-  auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread);
-  auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread);
-  auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread);
+  const auto L2LoopCount = getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
+  const auto L3LoopCount = getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
+  const auto RamLoopCount = getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
 
   asmjit::CodeHolder Code;
   Code.init(asmjit::Environment::host());
@@ -352,15 +351,15 @@ auto ZENFMAPayload::compilePayload(std::vector<std::pair<std::string, unsigned>>
   auto CompiledPayloadPtr = CompiledX86Payload::create<ZENFMAPayload>(Stats, Code);
 
   // skip if we could not determine cache size
-  if (L1iCacheSize != 0) {
+  if (L1iCacheSize) {
     auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop);
-    auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize;
+    auto InstructionCachePercentage = 100 * LoopSize / *L1iCacheSize;
 
-    if (LoopSize > L1iCacheSize) {
+    if (LoopSize > *L1iCacheSize) {
       workerLog::warn() << "Work-loop is bigger than the L1i-Cache.";
     }
 
-    workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage
+    workerLog::trace() << "Using " << LoopSize << " of " << *L1iCacheSize << " Bytes (" << InstructionCachePercentage
                        << "%) from the L1i-Cache for the work-loop.";
     workerLog::trace() << "Sequence size: " << Sequence.size();
     workerLog::trace() << "Repetition count: " << Repetitions;
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index e76cf738..1438cfb6 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -30,16 +30,17 @@ namespace firestarter::environment::x86 {
 
 void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) {
   unsigned Id = 1;
-  std::string DefaultPayloadName;
+  std::optional<std::string> DefaultPayloadName;
 
   // if functionId is 0 get the default or fallback
-  for (const auto& Config : PlatformConfigs) {
-    for (auto const& [thread, functionName] : Config->getThreadMap()) {
+  for (const auto& PlatformConfigPtr : PlatformConfigs) {
+    for (auto const& ThreadsPerCore : PlatformConfigPtr->settings().threads()) {
       // the selected function
       if (Id == FunctionId) {
-        if (!Config->isAvailable(Topology.get())) {
-          const auto ErrorString = "Function " + std::to_string(FunctionId) + " (\"" + functionName + "\") requires " +
-                                   Config->payload().name() + ", which is not supported by the processor.";
+        if (!PlatformConfigPtr->isAvailable(Topology.get())) {
+          const auto ErrorString = "Function " + std::to_string(FunctionId) + " (\"" +
+                                   PlatformConfigPtr->functionName(ThreadsPerCore) + "\") requires " +
+                                   PlatformConfigPtr->payload()->name() + ", which is not supported by the processor.";
           if (AllowUnavailablePayload) {
             log::error() << ErrorString;
           } else {
@@ -47,18 +48,16 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
           }
         }
         // found function
-        SelectedConfig =
-            new ::firestarter::environment::platform::RuntimeConfig(Config, thread, topology().instructionCacheSize());
+        Config = PlatformConfigPtr->cloneConcreate(topology().instructionCacheSize(), ThreadsPerCore);
         return;
       }
       // default function
-      if (0 == FunctionId && Config->isDefault(topology())) {
-        if (thread == topology().numThreadsPerCore()) {
-          SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(Config, thread,
-                                                                                   topology().instructionCacheSize());
+      if (0 == FunctionId && PlatformConfigPtr->isDefault(topology())) {
+        if (ThreadsPerCore == topology().numThreadsPerCore()) {
+          Config = PlatformConfigPtr->cloneConcreate(topology().instructionCacheSize(), ThreadsPerCore);
           return;
         }
-        DefaultPayloadName = Config->payload().name();
+        DefaultPayloadName = PlatformConfigPtr->payload()->name();
       }
       Id++;
     }
@@ -67,10 +66,10 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
   // no default found
   // use fallback
   if (0 == FunctionId) {
-    if (!DefaultPayloadName.empty()) {
+    if (DefaultPayloadName) {
       // default payload available, but number of threads per core is not
       // supported
-      log::warn() << "No " << DefaultPayloadName << " code path for " << topology().numThreadsPerCore()
+      log::warn() << "No " << *DefaultPayloadName << " code path for " << topology().numThreadsPerCore()
                   << " threads per core!";
     }
     log::warn() << topology().vendor() << " " << topology().model()
@@ -79,23 +78,22 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
 
     // loop over available implementation and check if they are marked as
     // fallback
-    for (const auto& Config : FallbackPlatformConfigs) {
-      if (Config->isAvailable(Topology.get())) {
-        auto SelectedThread = 0U;
-        auto SelectedFunctionName = std::string("");
-        for (auto const& [Thread, FunctionName] : Config->getThreadMap()) {
-          if (Thread == topology().numThreadsPerCore()) {
-            SelectedThread = Thread;
-            SelectedFunctionName = FunctionName;
+    for (const auto& FallbackPlatformConfigPtr : FallbackPlatformConfigs) {
+      if (FallbackPlatformConfigPtr->isAvailable(Topology.get())) {
+        std::optional<unsigned> SelectedThreadsPerCore;
+        // find the fallback implementation with the correct thread per core count
+        for (auto const& ThreadsPerCore : FallbackPlatformConfigPtr->settings().threads()) {
+          if (ThreadsPerCore == topology().numThreadsPerCore()) {
+            SelectedThreadsPerCore = ThreadsPerCore;
           }
         }
-        if (SelectedThread == 0) {
-          SelectedThread = Config->getThreadMap().begin()->first;
-          SelectedFunctionName = Config->getThreadMap().begin()->second;
+        // Otherwise select the first available thread per core count
+        if (!SelectedThreadsPerCore) {
+          SelectedThreadsPerCore = FallbackPlatformConfigPtr->settings().threads().front();
         }
-        SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(Config, SelectedThread,
-                                                                                 topology().instructionCacheSize());
-        log::warn() << "Using function " << SelectedFunctionName << " as fallback.\n"
+        Config = FallbackPlatformConfigPtr->cloneConcreate(topology().instructionCacheSize(), *SelectedThreadsPerCore);
+        log::warn() << "Using function " << FallbackPlatformConfigPtr->functionName(*SelectedThreadsPerCore)
+                    << " as fallback.\n"
                     << "You can use the parameter --function to try other "
                        "functions.";
         return;
@@ -113,7 +111,7 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
 void X86Environment::selectInstructionGroups(std::string Groups) {
   const auto Delimiter = ',';
   const std::regex Re("^(\\w+):(\\d+)$");
-  const auto AvailableInstructionGroups = selectedConfig().platformConfig().payload().getAvailableInstructions();
+  const auto AvailableInstructionGroups = config().payload()->getAvailableInstructions();
 
   std::stringstream Ss(Groups);
   std::vector<std::pair<std::string, unsigned>> PayloadSettings = {};
@@ -130,7 +128,7 @@ void X86Environment::selectInstructionGroups(std::string Groups) {
                                     "\n       --run-instruction-groups format: multiple INST:VAL "
                                     "pairs comma-seperated");
       }
-      int Num = std::stoul(M[2].str());
+      auto Num = std::stoul(M[2].str());
       if (Num == 0) {
         throw std::invalid_argument("instruction-group VAL may not contain number 0"
                                     "\n       --run-instruction-groups format: multiple INST:VAL "
@@ -144,7 +142,7 @@ void X86Environment::selectInstructionGroups(std::string Groups) {
     }
   }
 
-  selectedConfig().setPayloadSettings(PayloadSettings);
+  config().settings().selectInstructionGroups(PayloadSettings);
 
   log::info() << "  Running custom instruction group: " << Groups;
 }
@@ -152,7 +150,7 @@ void X86Environment::selectInstructionGroups(std::string Groups) {
 void X86Environment::printAvailableInstructionGroups() {
   std::stringstream Ss;
 
-  for (auto const& Item : selectedConfig().platformConfig().payload().getAvailableInstructions()) {
+  for (auto const& Item : config().payload()->getAvailableInstructions()) {
     Ss << Item << ",";
   }
 
@@ -161,14 +159,13 @@ void X86Environment::printAvailableInstructionGroups() {
     S.pop_back();
   }
 
-  log::info() << " available instruction-groups for payload " << selectedConfig().platformConfig().payload().name()
-              << ":\n"
+  log::info() << " available instruction-groups for payload " << config().payload()->name() << ":\n"
               << "  " << S;
 }
 
-void X86Environment::setLineCount(unsigned LineCount) { selectedConfig().setLineCount(LineCount); }
+void X86Environment::setLineCount(unsigned LineCount) { config().settings().setLineCount(LineCount); }
 
-void X86Environment::printSelectedCodePathSummary() { selectedConfig().printCodePathSummary(); }
+void X86Environment::printSelectedCodePathSummary() { config().printCodePathSummary(); }
 
 void X86Environment::printFunctionSummary() {
   log::info() << " available load-functions:\n"
@@ -182,14 +179,14 @@ void X86Environment::printFunctionSummary() {
   auto Id = 1U;
 
   for (auto const& Config : PlatformConfigs) {
-    for (auto const& [thread, functionName] : Config->getThreadMap()) {
+    for (auto const& ThreadsPerCore : Config->settings().threads()) {
       const char* Available = Config->isAvailable(Topology.get()) ? "yes" : "no";
       const char* Fmt = "  %4u | %-30s | %-24s | %s";
-      int Sz = std::snprintf(nullptr, 0, Fmt, Id, functionName.c_str(), Available,
-                             Config->getDefaultPayloadSettingsString().c_str());
+      const auto& FunctionName = Config->functionName(ThreadsPerCore);
+      const auto& InstructionGroupsString = Config->settings().getInstructionGroupsString();
+      int Sz = std::snprintf(nullptr, 0, Fmt, Id, FunctionName.c_str(), Available, InstructionGroupsString.c_str());
       std::vector<char> Buf(Sz + 1);
-      std::snprintf(Buf.data(), Buf.size(), Fmt, Id, functionName.c_str(), Available,
-                    Config->getDefaultPayloadSettingsString().c_str());
+      std::snprintf(Buf.data(), Buf.size(), Fmt, Id, FunctionName.c_str(), Available, InstructionGroupsString.c_str());
       log::info() << std::string(Buf.data());
       Id++;
     }
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index b71bbba4..b8553973 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -151,7 +151,7 @@ Firestarter::Firestarter(Config&& ProvidedConfig)
 
       auto Prob = std::make_shared<firestarter::optimizer::problem::CLIArgumentProblem>(
           std::move(ApplySettings), MeasurementWorker, Cfg.OptimizationMetrics, Cfg.EvaluationDuration, Cfg.StartDelta,
-          Cfg.StopDelta, Environment->selectedConfig().payloadItems());
+          Cfg.StopDelta, Environment->config().settings().instructionGroupItems());
 
       Population = firestarter::optimizer::Population(std::move(Prob));
 
@@ -218,7 +218,7 @@ void Firestarter::mainThread() {
       // wait here until optimizer thread terminates
       Firestarter::Optimizer->join();
 
-      auto PayloadItems = Environment->selectedConfig().payloadItems();
+      auto PayloadItems = Environment->config().settings().instructionGroupItems();
 
       firestarter::optimizer::History::save(Cfg.OptimizeOutfile, StartTime, PayloadItems, Cfg.Argc, Cfg.Argv);
 
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index a3e520e0..8ab145f9 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -79,12 +79,7 @@ void Firestarter::initLoadWorkers() {
       Td->setErrorCommunication(ErrorCommunication[I], ErrorCommunication[(I + 1) % NumThreads]);
     }
 
-    auto DataCacheSizeIt = Td->config().platformConfig().dataCacheBufferSize().begin();
-    auto RamBufferSize = Td->config().platformConfig().ramBufferSize();
-
-    Td->BuffersizeMem =
-        (*DataCacheSizeIt + *std::next(DataCacheSizeIt, 1) + *std::next(DataCacheSizeIt, 2) + RamBufferSize) /
-        Td->config().thread() / sizeof(uint64_t);
+    Td->BuffersizeMem = Td->config().settings().totalBufferSizePerThread() / sizeof(uint64_t);
 
     // create the thread
     std::thread T(Firestarter::loadThreadWorker, Td);
@@ -272,10 +267,8 @@ void Firestarter::loadThreadWorker(const std::shared_ptr<LoadWorkerData>& Td) {
       Td->environment().setCpuAffinity(Td->id());
 
       // compile payload
-      Td->CompiledPayloadPtr = Td->config().payload().compilePayload(
-          Td->config().payloadSettings(), Td->config().instructionCacheSize(), Td->config().dataCacheBufferSize(),
-          Td->config().ramBufferSize(), Td->config().thread(), Td->config().lines(), Td->DumpRegisters,
-          Td->ErrorDetection);
+      Td->CompiledPayloadPtr =
+          Td->config().payload()->compilePayload(Td->config().settings(), Td->DumpRegisters, Td->ErrorDetection);
 
       // allocate memory
       // if we should dump some registers, we use the first part of the memory
@@ -361,10 +354,8 @@ void Firestarter::loadThreadWorker(const std::shared_ptr<LoadWorkerData>& Td) {
       break;
     case LoadThreadState::ThreadSwitch:
       // compile payload
-      Td->CompiledPayloadPtr = Td->config().payload().compilePayload(
-          Td->config().payloadSettings(), Td->config().instructionCacheSize(), Td->config().dataCacheBufferSize(),
-          Td->config().ramBufferSize(), Td->config().thread(), Td->config().lines(), Td->DumpRegisters,
-          Td->ErrorDetection);
+      Td->CompiledPayloadPtr =
+          Td->config().payload()->compilePayload(Td->config().settings(), Td->DumpRegisters, Td->ErrorDetection);
 
       // call init function
       Td->CompiledPayloadPtr->init(Td->Memory->getMemoryAddress(), Td->BuffersizeMem);

From af0609718f0e3810b2d6d244d55ec3133b0695df Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 29 Oct 2024 09:52:20 +0100
Subject: [PATCH 125/167] clang-tidy fixes

---
 .../Environment/X86/Payload/CompiledX86Payload.hpp         | 1 +
 src/firestarter/Optimizer/Util/MultiObjective.cpp          | 7 +++----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
index b6e2fee1..c3f89c9c 100644
--- a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
@@ -30,6 +30,7 @@ namespace firestarter::environment::x86::payload {
 
 class CompiledX86Payload final : public environment::payload::CompiledPayload {
 private:
+  // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
   inline static asmjit::JitRuntime Runtime = asmjit::JitRuntime();
 
   static void deleter(CompiledX86Payload* Payload) {
diff --git a/src/firestarter/Optimizer/Util/MultiObjective.cpp b/src/firestarter/Optimizer/Util/MultiObjective.cpp
index 95554dd9..de7da71d 100644
--- a/src/firestarter/Optimizer/Util/MultiObjective.cpp
+++ b/src/firestarter/Optimizer/Util/MultiObjective.cpp
@@ -53,10 +53,9 @@ auto greaterThanF(double A, double B) -> bool {
     }
     return false; // a > nan
   }
-  if (!std::isnan(B)) {
-    return true; // nan > b
-  }
-  return false; // nan > nan
+  // nan > b -> true
+  // nan > nan -> false
+  return !std::isnan(B);
 }
 
 /// Pareto-dominance

From a6d9c3f630112f05820205bf5dd3d61c34510054 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 29 Oct 2024 10:01:15 +0100
Subject: [PATCH 126/167] cleanup clang-tidy script

---
 tooling/clang-tidy.py | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/tooling/clang-tidy.py b/tooling/clang-tidy.py
index 1e8bded1..b8ce7b5c 100755
--- a/tooling/clang-tidy.py
+++ b/tooling/clang-tidy.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-import glob
 import json
 from pathlib import Path
 import subprocess
@@ -12,7 +11,7 @@
 import random
 from functools import partial
 
-# Find all source files from the compile commands database that are in a specific directory
+# Find all source files from the compile commands database that are in a specific directory.
 def find_source_files_from_compile_commands(compile_commands_path: Path, sources_dir: Path) -> typing.List[Path]:
     with open(compile_commands_path, 'r') as fp:
         compile_commands = json.loads(fp.read())
@@ -20,21 +19,6 @@ def find_source_files_from_compile_commands(compile_commands_path: Path, sources
         sources = list(filter(lambda file: str(file).startswith(str(sources_dir)), sources))
         return sources
 
-# Find all source and header files in the project root that belong to FIRESTARTER
-def find_source_and_header_files(project_root: Path, build_root: Path) -> typing.List[Path]:
-    src_path = project_root / Path('src')
-    include_path = project_root / Path('include')
-
-    # find all cpp file from the compile commands database
-    compile_commands_path = build_root / Path('compile_commands.json')
-    files = find_source_files_from_compile_commands(compile_commands_path, src_path)
-
-    # find all headers based on glob
-    files += glob.glob(f'{include_path}/**/*.hpp', recursive=True)
-    files += glob.glob(f'{include_path}/**/*.h', recursive=True)
-
-    return files
-
 # Split a list of paths into multiple list of paths
 def split_in_chunks(chunk_size: int, input: typing.List[Path]) -> typing.List[typing.List[Path]]:
     length = len(input) // chunk_size
@@ -45,7 +29,7 @@ def split_in_chunks(chunk_size: int, input: typing.List[Path]) -> typing.List[ty
 
 # Run clang-tidy on a set of input files and return the stdout
 def run_clang_tidy(files: typing.List[Path], project_root_path: Path, build_root_path: Path, clang_tidy_file_path: Path) -> bytes:
-    command_args = ['clang-tidy', '-extra-arg=-std=c++17', f'-p={build_root_path}', f'--config-file={clang_tidy_file_path}', '--header-filter=include/firestarter/*', '--format-style=file']
+    command_args = ['clang-tidy', '-extra-arg=-std=c++17', f'-p={build_root_path}', f'--config-file={clang_tidy_file_path}', '--format-style=file']
     command_args += files
     print(f'Starting {command_args}')
     p = subprocess.Popen(command_args, stdout=subprocess.PIPE, cwd=project_root_path)
@@ -88,6 +72,7 @@ def check(build_root):
 def clang_tidy_report(project_root, build_root, cores):
     project_root_path = Path(project_root).absolute()
     build_root_path = Path(build_root).absolute()
+    src_path = project_root_path / Path('src')
 
     print(f'Looking for compile_commands.json in {build_root_path}')
     compile_commands_path = build_root_path / Path('compile_commands.json')
@@ -103,7 +88,7 @@ def clang_tidy_report(project_root, build_root, cores):
     else:
         sys.exit("Dind't find .clang-tidy. Aborting.")
 
-    files = find_source_and_header_files(project_root_path, build_root_path)
+    files = find_source_files_from_compile_commands(compile_commands_path, src_path)
     print(f'Found {len(files)} source and header files.')
     
     print(f'Lanching {cores} instances of clang-tidy in project root: {project_root_path}')
@@ -112,6 +97,7 @@ def clang_tidy_report(project_root, build_root, cores):
     files_shuffled = files.copy()
     random.Random(123).shuffle(files_shuffled)
 
+    # Spawn multiple python thread that each start their own instance of clang-tidy. Opening all processes in the same python thread caused problems with github actions.
     with multiprocessing.Pool(cores) as p:
         stdout = p.map(partial(run_clang_tidy, project_root_path=project_root_path, build_root_path=build_root_path, clang_tidy_file_path=clang_tidy_file_path), split_in_chunks(cores, files_shuffled))
 

From f8af51439c0b361dbabc56f112a3a73993f77ef0 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 29 Oct 2024 13:12:31 +0100
Subject: [PATCH 127/167] make Config and Topology private in Environment class

---
 .../firestarter/Environment/Environment.hpp   | 18 +++++---
 .../Environment/Payload/Payload.hpp           |  2 +-
 .../Environment/Platform/PlatformConfig.hpp   | 30 ++++++++-----
 .../Environment/X86/Payload/X86Payload.hpp    | 42 +++++++++----------
 .../X86/Platform/X86PlatformConfig.hpp        |  9 ++++
 .../Environment/X86/X86Environment.hpp        | 18 ++++++--
 .../Environment/X86/X86Environment.cpp        | 13 +++---
 7 files changed, 84 insertions(+), 48 deletions(-)

diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index 17abac8a..72f1a583 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -48,23 +48,31 @@ class Environment {
   virtual void printSelectedCodePathSummary() = 0;
   virtual void printFunctionSummary() = 0;
 
-  [[nodiscard]] auto config() const -> platform::PlatformConfig& {
+  [[nodiscard]] auto requestedNumThreads() const -> uint64_t { return RequestedNumThreads; }
+
+  [[nodiscard]] virtual auto config() -> platform::PlatformConfig& {
     assert(Config && "No PlatformConfig selected");
     return *Config;
   }
 
-  [[nodiscard]] auto requestedNumThreads() const -> uint64_t { return RequestedNumThreads; }
+  [[nodiscard]] virtual auto config() const -> const platform::PlatformConfig& {
+    assert(Config && "No PlatformConfig selected");
+    return *Config;
+  }
 
-  [[nodiscard]] auto topology() const -> CPUTopology const& {
-    assert(Topology != nullptr && "Topology is a nullptr");
+  [[nodiscard]] virtual auto topology() const -> const CPUTopology& {
+    assert(Topology && "Topology is a nullptr");
     return *Topology;
   }
 
 protected:
+  /// This function sets the config based on the
+  void setConfig(std::unique_ptr<platform::PlatformConfig>&& Config) { this->Config = std::move(Config); }
+
+private:
   std::unique_ptr<platform::PlatformConfig> Config;
   std::unique_ptr<CPUTopology> Topology;
 
-private:
   uint64_t RequestedNumThreads = 0;
 
   // TODO(Issue #74): Use hwloc for cpu thread affinity.
diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index eede4002..01982a85 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -135,7 +135,7 @@ class Payload {
   /// The number of SIMD registers used by the payload
   [[nodiscard]] auto registerCount() const -> unsigned { return RegisterCount; }
 
-  [[nodiscard]] virtual auto isAvailable(const CPUTopology*) const -> bool = 0;
+  [[nodiscard]] virtual auto isAvailable(const CPUTopology&) const -> bool = 0;
 
   [[nodiscard]] virtual auto compilePayload(const PayloadSettings& Settings, bool DumpRegisters,
                                             bool ErrorDetection) const -> CompiledPayload::UniquePtr = 0;
diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp
index 58afe996..a9a1e9ac 100644
--- a/include/firestarter/Environment/Platform/PlatformConfig.hpp
+++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp
@@ -35,16 +35,6 @@ class PlatformConfig {
   std::shared_ptr<const payload::Payload> Payload;
 
 public:
-  PlatformConfig() = delete;
-
-  PlatformConfig(std::string Name, payload::PayloadSettings&& Settings,
-                 std::shared_ptr<const payload::Payload>&& Payload) noexcept
-      : Name(std::move(Name))
-      , Settings(std::move(Settings))
-      , Payload(std::move(Payload)) {}
-
-  virtual ~PlatformConfig() = default;
-
   /// Getter for the name of the platform.
   [[nodiscard]] auto name() const -> const auto& { return Name; }
   /// Getter for the settings of the platform.
@@ -54,10 +44,28 @@ class PlatformConfig {
   /// Getter for the payload of the platform.
   [[nodiscard]] auto payload() const -> const auto& { return Payload; }
 
-  [[nodiscard]] auto isAvailable(const CPUTopology* Topology) const -> bool { return payload()->isAvailable(Topology); }
+  [[nodiscard]] auto isAvailable(const CPUTopology& Topology) const -> bool { return isAvailable(&Topology); }
+
+  [[nodiscard]] auto isDefault(const CPUTopology& Topology) const -> bool { return isDefault(&Topology); }
+
+protected:
+  [[nodiscard]] virtual auto isAvailable(const CPUTopology* Topology) const -> bool {
+    return payload()->isAvailable(*Topology);
+  }
 
   [[nodiscard]] virtual auto isDefault(const CPUTopology*) const -> bool = 0;
 
+public:
+  PlatformConfig() = delete;
+
+  PlatformConfig(std::string Name, payload::PayloadSettings&& Settings,
+                 std::shared_ptr<const payload::Payload>&& Payload) noexcept
+      : Name(std::move(Name))
+      , Settings(std::move(Settings))
+      , Payload(std::move(Payload)) {}
+
+  virtual ~PlatformConfig() = default;
+
   /// Clone a the platform config.
   [[nodiscard]] virtual auto clone() const -> std::unique_ptr<PlatformConfig> = 0;
 
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index 6b045d43..ef8fab69 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -43,6 +43,26 @@ class X86Payload : public environment::payload::Payload {
   // we can use this to check, if our platform support this payload
   std::list<asmjit::CpuFeatures::X86::Id> FeatureRequests;
 
+public:
+  X86Payload(std::initializer_list<asmjit::CpuFeatures::X86::Id> FeatureRequests, std::string Name,
+             unsigned RegisterSize, unsigned RegisterCount) noexcept
+      : Payload(std::move(Name), RegisterSize, RegisterCount)
+      , FeatureRequests(FeatureRequests) {}
+
+private:
+  [[nodiscard]] auto isAvailable(const CPUTopology& Topology) const -> bool final {
+    const auto* FinalTopology = dynamic_cast<const X86CPUTopology*>(&Topology);
+    assert(FinalTopology && "isAvailable not called with const X86CPUTopology*");
+
+    bool Available = true;
+
+    for (auto const& Feature : FeatureRequests) {
+      Available &= FinalTopology->featuresAsmjit().has(Feature);
+    }
+
+    return Available;
+  };
+
 protected:
   /// Emit the code to dump the xmm, ymm or zmm registers into memory for the dump registers feature.
   /// \arg Vec the type of the vector register used.
@@ -465,28 +485,6 @@ class X86Payload : public environment::payload::Payload {
 
   // use cpuid and usleep as low load
   void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) const final;
-
-public:
-  X86Payload(std::initializer_list<asmjit::CpuFeatures::X86::Id> FeatureRequests, std::string Name,
-             unsigned RegisterSize, unsigned RegisterCount) noexcept
-      : Payload(std::move(Name), RegisterSize, RegisterCount)
-      , FeatureRequests(FeatureRequests) {}
-
-  [[nodiscard]] auto isAvailable(const X86CPUTopology& Topology) const -> bool { return isAvailable(&Topology); }
-
-private:
-  [[nodiscard]] auto isAvailable(const CPUTopology* Topology) const -> bool final {
-    const auto* FinalTopology = dynamic_cast<const X86CPUTopology*>(Topology);
-    assert(FinalTopology && "isAvailable not called with const X86CPUTopology*");
-
-    bool Available = true;
-
-    for (auto const& Feature : FeatureRequests) {
-      Available &= FinalTopology->featuresAsmjit().has(Feature);
-    }
-
-    return Available;
-  };
 };
 
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
index bdef9c39..42a63f22 100644
--- a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include "../../Platform/PlatformConfig.hpp"
+#include "firestarter/Environment/CPUTopology.hpp"
 #include "firestarter/Environment/X86/X86CPUTopology.hpp"
 
 namespace firestarter::environment::x86::platform {
@@ -39,6 +40,8 @@ class X86PlatformConfig : public environment::platform::PlatformConfig {
       , Family(Family)
       , Models(std::move(Models)) {}
 
+  [[nodiscard]] auto isAvailable(const X86CPUTopology& Topology) const -> bool { return isAvailable(&Topology); }
+
   [[nodiscard]] auto isDefault(const X86CPUTopology& Topology) const -> bool { return isDefault(&Topology); }
 
   /// Clone a the platform config.
@@ -60,10 +63,16 @@ class X86PlatformConfig : public environment::platform::PlatformConfig {
   }
 
 private:
+  [[nodiscard]] auto isAvailable(const CPUTopology* Topology) const -> bool final {
+    return environment::platform::PlatformConfig::isAvailable(Topology);
+  }
+
   [[nodiscard]] auto isDefault(const CPUTopology* Topology) const -> bool final {
     const auto* FinalTopology = dynamic_cast<const X86CPUTopology*>(Topology);
     assert(FinalTopology && "isDefault not called with const X86CPUTopology*");
 
+    // Check if the family of the topology matches the family of the config, if the model of the topology is contained
+    // in the models list of the config and if the config is available on the current platform.
     return Family == FinalTopology->familyId() &&
            (std::find(Models.begin(), Models.end(), FinalTopology->modelId()) != Models.end()) && isAvailable(Topology);
   }
diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp
index f2df4e3a..395b1cf7 100644
--- a/include/firestarter/Environment/X86/X86Environment.hpp
+++ b/include/firestarter/Environment/X86/X86Environment.hpp
@@ -43,9 +43,21 @@ class X86Environment final : public Environment {
   X86Environment()
       : Environment(std::make_unique<X86CPUTopology>()) {}
 
-  [[nodiscard]] auto topology() const -> X86CPUTopology const& {
-    const auto* X86Topology = dynamic_cast<X86CPUTopology*>(Topology.get());
-    assert(X86Topology != nullptr && "X86Topology is a nullptr");
+  [[nodiscard]] auto config() -> platform::X86PlatformConfig& final {
+    auto* X86PlatformConfig = dynamic_cast<platform::X86PlatformConfig*>(&Environment::config());
+    assert(X86PlatformConfig && "X86PlatformConfig is a nullptr");
+    return *X86PlatformConfig;
+  }
+
+  [[nodiscard]] auto config() const -> const platform::X86PlatformConfig& final {
+    const auto* X86PlatformConfig = dynamic_cast<const platform::X86PlatformConfig*>(&Environment::config());
+    assert(X86PlatformConfig && "X86PlatformConfig is a nullptr");
+    return *X86PlatformConfig;
+  }
+
+  [[nodiscard]] auto topology() const -> const X86CPUTopology& final {
+    const auto* X86Topology = dynamic_cast<const X86CPUTopology*>(&Environment::topology());
+    assert(X86Topology && "X86Topology is a nullptr");
     return *X86Topology;
   }
 
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index 1438cfb6..13123476 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -37,7 +37,7 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
     for (auto const& ThreadsPerCore : PlatformConfigPtr->settings().threads()) {
       // the selected function
       if (Id == FunctionId) {
-        if (!PlatformConfigPtr->isAvailable(Topology.get())) {
+        if (!PlatformConfigPtr->isAvailable(topology())) {
           const auto ErrorString = "Function " + std::to_string(FunctionId) + " (\"" +
                                    PlatformConfigPtr->functionName(ThreadsPerCore) + "\") requires " +
                                    PlatformConfigPtr->payload()->name() + ", which is not supported by the processor.";
@@ -48,13 +48,13 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
           }
         }
         // found function
-        Config = PlatformConfigPtr->cloneConcreate(topology().instructionCacheSize(), ThreadsPerCore);
+        setConfig(PlatformConfigPtr->cloneConcreate(topology().instructionCacheSize(), ThreadsPerCore));
         return;
       }
       // default function
       if (0 == FunctionId && PlatformConfigPtr->isDefault(topology())) {
         if (ThreadsPerCore == topology().numThreadsPerCore()) {
-          Config = PlatformConfigPtr->cloneConcreate(topology().instructionCacheSize(), ThreadsPerCore);
+          setConfig(PlatformConfigPtr->cloneConcreate(topology().instructionCacheSize(), ThreadsPerCore));
           return;
         }
         DefaultPayloadName = PlatformConfigPtr->payload()->name();
@@ -79,7 +79,7 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
     // loop over available implementation and check if they are marked as
     // fallback
     for (const auto& FallbackPlatformConfigPtr : FallbackPlatformConfigs) {
-      if (FallbackPlatformConfigPtr->isAvailable(Topology.get())) {
+      if (FallbackPlatformConfigPtr->isAvailable(topology())) {
         std::optional<unsigned> SelectedThreadsPerCore;
         // find the fallback implementation with the correct thread per core count
         for (auto const& ThreadsPerCore : FallbackPlatformConfigPtr->settings().threads()) {
@@ -91,7 +91,8 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
         if (!SelectedThreadsPerCore) {
           SelectedThreadsPerCore = FallbackPlatformConfigPtr->settings().threads().front();
         }
-        Config = FallbackPlatformConfigPtr->cloneConcreate(topology().instructionCacheSize(), *SelectedThreadsPerCore);
+        setConfig(
+            FallbackPlatformConfigPtr->cloneConcreate(topology().instructionCacheSize(), *SelectedThreadsPerCore));
         log::warn() << "Using function " << FallbackPlatformConfigPtr->functionName(*SelectedThreadsPerCore)
                     << " as fallback.\n"
                     << "You can use the parameter --function to try other "
@@ -180,7 +181,7 @@ void X86Environment::printFunctionSummary() {
 
   for (auto const& Config : PlatformConfigs) {
     for (auto const& ThreadsPerCore : Config->settings().threads()) {
-      const char* Available = Config->isAvailable(Topology.get()) ? "yes" : "no";
+      const char* Available = Config->isAvailable(topology()) ? "yes" : "no";
       const char* Fmt = "  %4u | %-30s | %-24s | %s";
       const auto& FunctionName = Config->functionName(ThreadsPerCore);
       const auto& InstructionGroupsString = Config->settings().getInstructionGroupsString();

From 738933f92267f9332e644b8151c31d90c5d33040 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Wed, 30 Oct 2024 11:29:01 +0100
Subject: [PATCH 128/167] clang-tidy: fix warnings

---
 .../Logging/FirstWorkerThreadFilter.hpp       |  1 +
 include/firestarter/Optimizer/History.hpp     | 44 ++++++++++++-------
 2 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
index 2a1a51f1..1da12b39 100644
--- a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
+++ b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
@@ -37,6 +37,7 @@ template <typename Record> class FirstWorkerThreadFilter {
   }
 
 private:
+  // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
   inline static std::thread::id FirstThread{};
 };
 } // namespace firestarter::logging
diff --git a/include/firestarter/Optimizer/History.hpp b/include/firestarter/Optimizer/History.hpp
index 0e694bb2..55c8d05c 100644
--- a/include/firestarter/Optimizer/History.hpp
+++ b/include/firestarter/Optimizer/History.hpp
@@ -32,6 +32,7 @@
 #include <ctime>
 #include <fstream>
 #include <iomanip>
+#include <memory>
 #include <nlohmann/json.hpp>
 #include <optional>
 #include <vector>
@@ -55,11 +56,13 @@ struct History {
     }
   }
 
-  inline static int MaxElementPrintCount = 20;
-  inline static std::size_t MinColumnWidth = 10;
+  static constexpr const int MaxElementPrintCount = 20;
+  static constexpr const std::size_t MinColumnWidth = 10;
 
+  // NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
   inline static std::vector<Individual> X = {};
   inline static std::vector<std::map<std::string, firestarter::measurement::Summary>> F = {};
+  // NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)
 
 public:
   static void append(std::vector<unsigned> const& Ind,
@@ -149,7 +152,7 @@ struct History {
 
       std::stringstream FirstLine;
       std::stringstream SecondLine;
-      std::string Ind = "INDIVIDUAL";
+      std::string const Ind = "INDIVIDUAL";
 
       FirstLine << "  " << Ind;
       padding(FirstLine, Max, Ind.size(), ' ');
@@ -215,6 +218,13 @@ struct History {
                                 "`--run-instruction-groups=INDIVIDUAL`";
   }
 
+  /// Save the history to a file. This function is not threadsafe as is calls History::getTime.
+  /// \arg Path The folder in which the outfile shall be created. If it is empty the current directory name or /tmp will
+  /// be choosen.
+  /// \arg StartTime The start time as a string which is saved in the json datastructure.
+  /// \arg PayloadItems The Vector of meta instructions which map to the vector of individuals.
+  /// \arg Argc The Argc of the executed programm.
+  /// \arg Argv The Argv of the executed programm.
   static void save(std::string const& Path, std::string const& StartTime, std::vector<std::string> const& PayloadItems,
                    const int Argc, const char** Argv) {
     using json = nlohmann::json;
@@ -231,13 +241,11 @@ struct History {
       J["metrics"].push_back(Eval);
     }
 
+    // Initialize a string with length of 256 filled with null characters
+    auto Hostname = std::string(256, 0);
     // get the hostname
-    char CHostname[256];
-    std::string Hostname;
-    if (0 != gethostname(CHostname, sizeof(CHostname))) {
+    if (0 != gethostname(Hostname.data(), Hostname.size())) {
       Hostname = "unknown";
-    } else {
-      Hostname = CHostname;
     }
 
     J["hostname"] = Hostname;
@@ -254,20 +262,21 @@ struct History {
     // save the arguments
     J["args"] = json::array();
     for (int I = 0; I < Argc; ++I) {
+      // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
       J["args"].push_back(Argv[I]);
     }
 
     // dump the output
-    std::string S = J.dump();
+    const auto S = J.dump();
 
     firestarter::log::trace() << S;
 
     std::string Outpath = Path;
     if (Outpath.empty()) {
-      char* Pwd = get_current_dir_name();
-      if (Pwd) {
-        Outpath = Pwd;
-        free(Pwd);
+      // Wrapp get_current_dir_name in a unique ptr, as it needs to get deleted by free when it is not used anymore.
+      const std::unique_ptr<char, void (*)(void*)> WrappedPwd = {get_current_dir_name(), free};
+      if (WrappedPwd) {
+        Outpath = *WrappedPwd;
       } else {
         firestarter::log::warn() << "Could not find $PWD.";
         Outpath = "/tmp";
@@ -289,11 +298,14 @@ struct History {
     Fp.close();
   }
 
+  /// Get the current time in the local timezone as a string formatted by "%F_%T%z". This function is NOT threadsafe.
+  /// \returns The current time in local timezone as a formatted string.
   static auto getTime() -> std::string {
-    auto T = std::time(nullptr);
-    auto Tm = *std::localtime(&T);
+    const auto T = std::time(nullptr);
+    // NOLINTNEXTLINE(concurrency-mt-unsafe)
+    const auto* Tm = std::localtime(&T);
     std::stringstream Ss;
-    Ss << std::put_time(&Tm, "%F_%T%z");
+    Ss << std::put_time(Tm, "%F_%T%z");
     return Ss.str();
   }
 };

From a218aad9fc28975d53fbd78d9d4d88f01885551c Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Wed, 30 Oct 2024 14:47:36 +0100
Subject: [PATCH 129/167] add todo note

---
 include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
index 7b43e5e4..da118321 100644
--- a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
+++ b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
@@ -74,8 +74,8 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem {
     // wait for the measurement to finish
     std::this_thread::sleep_for(Timeout);
 
-    // FIXME: this is an ugly workaround for the ipc-estimate metric
-    // changeing the payload triggers a write of the iteration counter of
+    // TODO(Issue #82): This is an ugly workaround for the ipc-estimate metric.
+    // Changing the payload triggers a write of the iteration counter of
     // the last payload, which we use to estimate the ipc.
     ChangePayloadFunction(Payload);
 

From 7362553c554ac24c770e1edb2d2de91d07528954 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Wed, 30 Oct 2024 18:33:35 +0100
Subject: [PATCH 130/167] clang-tidy: fix warnings

---
 src/firestarter/Environment/Environment.cpp   |  6 +-
 .../Environment/Payload/Payload.cpp           |  3 +-
 src/firestarter/Firestarter.cpp               | 58 +++++++++----------
 3 files changed, 33 insertions(+), 34 deletions(-)

diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp
index fec4ed48..548aca58 100644
--- a/src/firestarter/Environment/Environment.cpp
+++ b/src/firestarter/Environment/Environment.cpp
@@ -134,10 +134,10 @@ void Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::s
       std::getline(Ss, Token, Delimiter);
 
       if (std::regex_match(Token, M, Re)) {
-        unsigned long Y = 0;
-        unsigned long S = 0;
+        uint64_t Y = 0;
+        uint64_t S = 0;
 
-        unsigned long X = std::stoul(M[1].str());
+        auto X = std::stoul(M[1].str());
         if (M[2].matched) {
           Y = std::stoul(M[2].str());
         } else {
diff --git a/src/firestarter/Environment/Payload/Payload.cpp b/src/firestarter/Environment/Payload/Payload.cpp
index 9d4242c5..1bd89385 100644
--- a/src/firestarter/Environment/Payload/Payload.cpp
+++ b/src/firestarter/Environment/Payload/Payload.cpp
@@ -64,7 +64,8 @@ auto Payload::generateSequence(std::vector<std::pair<std::string, unsigned>> con
   for (++It; It != Prop.end(); ++It) {
     for (unsigned I = 0; I < It->second; I++) {
       InsertIt = Sequence.begin();
-      std::advance(InsertIt, 1 + std::floor(I * (Sequence.size() + It->second - I) / static_cast<float>(It->second)));
+      std::advance(InsertIt, 1 + std::floor(static_cast<float>(I * (Sequence.size() + It->second - I)) /
+                                            static_cast<float>(It->second)));
       Sequence.insert(InsertIt, It->first);
     }
   }
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index b8553973..f19a3892 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -113,41 +113,39 @@ Firestarter::Firestarter(Config&& ProvidedConfig)
     }
 
     if (Cfg.Optimize) {
-      auto ApplySettings = std::bind(
-          [this](std::vector<std::pair<std::string, unsigned>> const& Setting) {
-            using Clock = std::chrono::high_resolution_clock;
-            auto Start = Clock::now();
+      auto ApplySettings = [this](std::vector<std::pair<std::string, unsigned>> const& Setting) {
+        using Clock = std::chrono::high_resolution_clock;
+        auto Start = Clock::now();
 
-            signalSwitch(Setting);
+        signalSwitch(Setting);
 
-            LoadVar = LoadThreadWorkType::LoadHigh;
+        LoadVar = LoadThreadWorkType::LoadHigh;
 
-            signalWork();
+        signalWork();
 
-            uint64_t StartTimestamp = (std::numeric_limits<uint64_t>::max)();
-            uint64_t StopTimestamp = 0;
+        uint64_t StartTimestamp = (std::numeric_limits<uint64_t>::max)();
+        uint64_t StopTimestamp = 0;
 
-            for (auto const& Thread : LoadThreads) {
-              auto Td = Thread.second;
+        for (auto const& Thread : LoadThreads) {
+          auto Td = Thread.second;
 
-              StartTimestamp = std::min<uint64_t>(StartTimestamp, Td->LastRun.StartTsc);
-              StopTimestamp = std::max<uint64_t>(StopTimestamp, Td->LastRun.StopTsc);
-            }
+          StartTimestamp = std::min<uint64_t>(StartTimestamp, Td->LastRun.StartTsc);
+          StopTimestamp = std::max<uint64_t>(StopTimestamp, Td->LastRun.StopTsc);
+        }
 
-            for (auto const& Thread : LoadThreads) {
-              auto Td = Thread.second;
-              ipcEstimateMetricInsert(
-                  static_cast<double>(Td->LastRun.Iterations) *
-                  static_cast<double>(LoadThreads.front().second->CompiledPayloadPtr->stats().Instructions) /
-                  static_cast<double>(StopTimestamp - StartTimestamp));
-            }
+        for (auto const& Thread : LoadThreads) {
+          auto Td = Thread.second;
+          ipcEstimateMetricInsert(
+              static_cast<double>(Td->LastRun.Iterations) *
+              static_cast<double>(LoadThreads.front().second->CompiledPayloadPtr->stats().Instructions) /
+              static_cast<double>(StopTimestamp - StartTimestamp));
+        }
 
-            auto End = Clock::now();
+        auto End = Clock::now();
 
-            log::trace() << "Switching payload took "
-                         << std::chrono::duration_cast<std::chrono::milliseconds>(End - Start).count() << "ms";
-          },
-          std::placeholders::_1);
+        log::trace() << "Switching payload took "
+                     << std::chrono::duration_cast<std::chrono::milliseconds>(End - Start).count() << "ms";
+      };
 
       auto Prob = std::make_shared<firestarter::optimizer::problem::CLIArgumentProblem>(
           std::move(ApplySettings), MeasurementWorker, Cfg.OptimizationMetrics, Cfg.EvaluationDuration, Cfg.StartDelta,
@@ -176,11 +174,11 @@ Firestarter::Firestarter(Config&& ProvidedConfig)
 
   // add some signal handler for aborting FIRESTARTER
   if constexpr (!firestarter::OptionalFeatures.IsWin32) {
-    std::signal(SIGALRM, Firestarter::sigalrmHandler);
+    (void)std::signal(SIGALRM, Firestarter::sigalrmHandler);
   }
 
-  std::signal(SIGTERM, Firestarter::sigtermHandler);
-  std::signal(SIGINT, Firestarter::sigtermHandler);
+  (void)std::signal(SIGTERM, Firestarter::sigtermHandler);
+  (void)std::signal(SIGINT, Firestarter::sigtermHandler);
 }
 
 void Firestarter::mainThread() {
@@ -226,7 +224,7 @@ void Firestarter::mainThread() {
       firestarter::optimizer::History::printBest(Cfg.OptimizationMetrics, PayloadItems);
 
       // stop all the load threads
-      std::raise(SIGTERM);
+      (void)std::raise(SIGTERM);
     }
   }
 

From 6ad9537a9b2845372243af702dcb3ab14f4f5ea2 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 31 Oct 2024 11:43:50 +0100
Subject: [PATCH 131/167] clang-tidy fixes

---
 include/firestarter/DumpRegisterStruct.hpp    |   4 +-
 include/firestarter/ErrorDetectionStruct.hpp  |   8 +-
 include/firestarter/Firestarter.hpp           |   2 +-
 include/firestarter/LoadWorkerData.hpp        |   4 +
 .../Measurement/MeasurementWorker.hpp         |   4 +-
 .../Measurement/Metric/IPCEstimate.hpp        |  25 +++-
 .../firestarter/Measurement/Metric/Perf.hpp   |  42 ++++--
 .../firestarter/Measurement/Metric/RAPL.hpp   |  24 +++-
 .../firestarter/Optimizer/Algorithm/NSGA2.hpp |   7 +
 .../firestarter/Optimizer/OptimizerWorker.hpp |   4 +-
 include/firestarter/Optimizer/Population.hpp  |  71 +++++-----
 include/firestarter/Optimizer/Problem.hpp     |  33 +++--
 .../Optimizer/Problem/CLIArgumentProblem.hpp  |   4 +-
 src/firestarter/DumpRegisterWorker.cpp        |   8 +-
 .../Environment/X86/X86Environment.cpp        |   9 +-
 src/firestarter/Firestarter.cpp               |   9 +-
 src/firestarter/LoadWorker.cpp                |  20 +--
 .../Measurement/MeasurementWorker.cpp         |  28 ++--
 .../Measurement/Metric/IPCEstimate.cpp        |  28 ++--
 src/firestarter/Measurement/Metric/Perf.cpp   | 126 +++++++++---------
 src/firestarter/Measurement/Metric/RAPL.cpp   |  22 +--
 src/firestarter/Optimizer/Algorithm/NSGA2.cpp |  13 +-
 src/firestarter/Optimizer/OptimizerWorker.cpp |  11 +-
 src/firestarter/Optimizer/Population.cpp      |  34 ++---
 24 files changed, 298 insertions(+), 242 deletions(-)

diff --git a/include/firestarter/DumpRegisterStruct.hpp b/include/firestarter/DumpRegisterStruct.hpp
index 44fe2244..49213508 100644
--- a/include/firestarter/DumpRegisterStruct.hpp
+++ b/include/firestarter/DumpRegisterStruct.hpp
@@ -42,9 +42,9 @@ constexpr const auto MaxNumberOfDoublesInVectorRegisters = RegisterMaxNum * Regi
 
 // REGISTER_MAX_NUM cachelines
 struct DumpRegisterStruct {
-  std::array<volatile double, MaxNumberOfDoublesInVectorRegisters> RegisterValues;
+  std::array<double, MaxNumberOfDoublesInVectorRegisters> RegisterValues;
   // pad to use a whole cacheline
-  std::array<volatile EightBytesType, 7> Padding;
+  std::array<EightBytesType, 7> Padding;
   volatile DumpVariable DumpVar;
 };
 
diff --git a/include/firestarter/ErrorDetectionStruct.hpp b/include/firestarter/ErrorDetectionStruct.hpp
index 38a696c4..6e5a7626 100644
--- a/include/firestarter/ErrorDetectionStruct.hpp
+++ b/include/firestarter/ErrorDetectionStruct.hpp
@@ -27,14 +27,14 @@ namespace firestarter {
 struct ErrorDetectionStruct {
   struct OneSide {
     // the pointer to 16B of communication
-    volatile uint64_t* Communication;
+    uint64_t* Communication;
     // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
-    volatile uint64_t Locals[4];
+    uint64_t Locals[4];
     // if this variable is not 0, an error occured in the comparison with the
     // left thread.
-    volatile uint64_t Error;
+    uint64_t Error;
     // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
-    volatile uint64_t Padding[2];
+    uint64_t Padding[2];
   };
 
   // we have two cache lines (64B) containing each two 16B local variable and
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 2f384fb9..7d55cda8 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -70,7 +70,7 @@ class Firestarter {
   std::vector<std::pair<std::thread, std::shared_ptr<LoadWorkerData>>> LoadThreads;
   std::vector<std::shared_ptr<uint64_t>> ErrorCommunication;
 
-  firestarter::optimizer::Population Population;
+  std::unique_ptr<firestarter::optimizer::Population> Population;
 
   inline static std::unique_ptr<optimizer::OptimizerWorker> Optimizer;
 
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index 99f0f475..36061390 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -41,6 +41,10 @@ class LoadWorkerData {
     std::atomic<uint64_t> StopTsc{};
 
     auto operator=(const Metrics& Other) -> Metrics& {
+      if (this == &Other) {
+        return *this;
+      }
+
       Iterations.store(Other.Iterations.load());
       StartTsc.store(Other.StartTsc.load());
       StopTsc.store(Other.StopTsc.load());
diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp
index a6f6e0f1..eed145ae 100644
--- a/include/firestarter/Measurement/MeasurementWorker.hpp
+++ b/include/firestarter/Measurement/MeasurementWorker.hpp
@@ -46,9 +46,9 @@ class MeasurementWorker {
   std::mutex ValuesMutex;
   std::map<std::string, std::vector<TimeValue>> Values;
 
-  static auto dataAcquisitionWorker(void* MeasurementWorker) -> int*;
+  static auto dataAcquisitionWorker(void* MeasurementWorker) -> void*;
 
-  static auto stdinDataAcquisitionWorker(void* MeasurementWorker) -> int*;
+  static auto stdinDataAcquisitionWorker(void* MeasurementWorker) -> void*;
 
   auto findMetricByName(std::string MetricName) -> const MetricInterface*;
 
diff --git a/include/firestarter/Measurement/Metric/IPCEstimate.hpp b/include/firestarter/Measurement/Metric/IPCEstimate.hpp
index a65263d9..2b3a9c0e 100644
--- a/include/firestarter/Measurement/Metric/IPCEstimate.hpp
+++ b/include/firestarter/Measurement/Metric/IPCEstimate.hpp
@@ -25,13 +25,28 @@
 #include <string>
 
 struct IpcEstimateMetricData {
-  inline static std::string ErrorString;
-  inline static void (*Callback)(void*, const char*, int64_t, double);
-  inline static void* CallbackArg;
+private:
+  IpcEstimateMetricData() = default;
+
+  std::string ErrorString;
+  void (*Callback)(void*, const char*, int64_t, double){};
+  void* CallbackArg{};
+
+public:
+  IpcEstimateMetricData(IpcEstimateMetricData const&) = delete;
+  void operator=(IpcEstimateMetricData const&) = delete;
+
+  static auto instance() -> IpcEstimateMetricData& {
+    static IpcEstimateMetricData Instance;
+    return Instance;
+  }
+
   static auto fini() -> int32_t;
   static auto init() -> int32_t;
   static auto getError() -> const char*;
   static auto registerInsertCallback(void (*C)(void*, const char*, int64_t, double), void* Arg) -> int32_t;
+
+  static void insertValue(double Value);
 };
 
 static constexpr const MetricInterface IpcEstimateMetric{
@@ -47,6 +62,4 @@ static constexpr const MetricInterface IpcEstimateMetric{
     /*GetReading=*/nullptr,
     /*GetError=*/IpcEstimateMetricData::getError,
     /*RegisterInsertCallback=*/IpcEstimateMetricData::registerInsertCallback,
-};
-
-void ipcEstimateMetricInsert(double Value);
\ No newline at end of file
+};
\ No newline at end of file
diff --git a/include/firestarter/Measurement/Metric/Perf.hpp b/include/firestarter/Measurement/Metric/Perf.hpp
index 9d65b94e..681689c7 100644
--- a/include/firestarter/Measurement/Metric/Perf.hpp
+++ b/include/firestarter/Measurement/Metric/Perf.hpp
@@ -22,27 +22,43 @@
 #pragma once
 
 #include "../MetricInterface.h"
+#include <array>
 #include <string>
 
-struct PerfMetricData {
-  inline static const char* const PerfEventParanoidFile = "/proc/sys/kernel/perf_event_paranoid";
+class PerfMetricData {
+private:
+  PerfMetricData() = default;
+
+  static const constexpr char* PerfEventParanoidFile = "/proc/sys/kernel/perf_event_paranoid";
 
   struct ReadFormat {
-    uint64_t Nr;
-    struct {
+    struct ValueAndId {
       uint64_t Value;
       uint64_t Id;
-    } Values[2];
+    };
+
+    uint64_t Nr;
+    std::array<ValueAndId, 2> Values;
   };
 
-  inline static std::string ErrorString;
-  inline static int CpuCyclesFd = -1;
-  inline static int InstructionsFd = -1;
-  inline static uint64_t CpuCyclesId;
-  inline static uint64_t InstructionsId;
-  inline static bool InitDone = false;
-  inline static int32_t InitValue;
-  inline static struct ReadFormat Last;
+  std::string ErrorString;
+  int CpuCyclesFd = -1;
+  int InstructionsFd = -1;
+  uint64_t CpuCyclesId{};
+  uint64_t InstructionsId{};
+  bool InitDone = false;
+  int32_t InitValue{};
+  struct ReadFormat Last {};
+
+public:
+  PerfMetricData(PerfMetricData const&) = delete;
+  void operator=(PerfMetricData const&) = delete;
+
+  static auto instance() -> PerfMetricData& {
+    static PerfMetricData Instance;
+    return Instance;
+  }
+
   static auto fini() -> int32_t;
   static auto init() -> int32_t;
   static auto valueFromId(struct ReadFormat* Reader, uint64_t Id) -> uint64_t;
diff --git a/include/firestarter/Measurement/Metric/RAPL.hpp b/include/firestarter/Measurement/Metric/RAPL.hpp
index e16a731b..37e0bbb4 100644
--- a/include/firestarter/Measurement/Metric/RAPL.hpp
+++ b/include/firestarter/Measurement/Metric/RAPL.hpp
@@ -26,11 +26,7 @@
 #include <string>
 #include <vector>
 
-struct RaplMetricData {
-  inline static const char* const RaplPath = "/sys/class/powercap";
-
-  inline static std::string ErrorString;
-
+class RaplMetricData {
   struct ReaderDef {
     ReaderDef() = delete;
 
@@ -46,7 +42,23 @@ struct RaplMetricData {
     int64_t Max;
   };
 
-  inline static std::vector<std::unique_ptr<ReaderDef>> Readers;
+private:
+  static constexpr const char* RaplPath = "/sys/class/powercap";
+
+  std::string ErrorString;
+
+  std::vector<std::unique_ptr<ReaderDef>> Readers;
+
+  RaplMetricData() = default;
+
+public:
+  RaplMetricData(RaplMetricData const&) = delete;
+  void operator=(RaplMetricData const&) = delete;
+
+  static auto instance() -> RaplMetricData& {
+    static RaplMetricData Instance;
+    return Instance;
+  }
 
   static auto fini() -> int32_t;
   static auto init() -> int32_t;
diff --git a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
index acaa441f..478aa116 100644
--- a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
+++ b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
@@ -35,9 +35,16 @@ class NSGA2 : public Algorithm {
   auto evolve(firestarter::optimizer::Population& Pop) -> firestarter::optimizer::Population override;
 
 private:
+  // NOLINTBEGIN(cppcoreguidelines-avoid-const-or-ref-data-members)
+
+  /// The number of generations of the NSGA2 algorithm.
   const unsigned Gen;
+  /// The crossover propability in the range [0,1[.
   const double Cr;
+  /// The mutation propability in the range [0,1].
   const double M;
+
+  // NOLINTEND(cppcoreguidelines-avoid-const-or-ref-data-members)
 };
 
 } // namespace firestarter::optimizer::algorithm
diff --git a/include/firestarter/Optimizer/OptimizerWorker.hpp b/include/firestarter/Optimizer/OptimizerWorker.hpp
index 2ac242cb..aeae137e 100644
--- a/include/firestarter/Optimizer/OptimizerWorker.hpp
+++ b/include/firestarter/Optimizer/OptimizerWorker.hpp
@@ -30,7 +30,7 @@ namespace firestarter::optimizer {
 class OptimizerWorker {
 public:
   OptimizerWorker(std::unique_ptr<firestarter::optimizer::Algorithm>&& Algorithm,
-                  firestarter::optimizer::Population& Population, std::string OptimizationAlgorithm,
+                  std::unique_ptr<firestarter::optimizer::Population>&& Population, std::string OptimizationAlgorithm,
                   unsigned Individuals, std::chrono::seconds const& Preheat);
 
   ~OptimizerWorker() = default;
@@ -43,7 +43,7 @@ class OptimizerWorker {
   static auto optimizerThread(void* OptimizerWorker) -> void*;
 
   std::unique_ptr<firestarter::optimizer::Algorithm> Algorithm;
-  firestarter::optimizer::Population Population;
+  std::unique_ptr<firestarter::optimizer::Population> Population;
   std::string OptimizationAlgorithm;
   unsigned Individuals;
   std::chrono::seconds Preheat;
diff --git a/include/firestarter/Optimizer/Population.hpp b/include/firestarter/Optimizer/Population.hpp
index 2d904467..61e3e9bd 100644
--- a/include/firestarter/Optimizer/Population.hpp
+++ b/include/firestarter/Optimizer/Population.hpp
@@ -19,80 +19,73 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#ifndef FIRESTARTER_OPTIMIZER_POPULATION_HPP
-#define FIRESTARTER_OPTIMIZER_POPULATION_HPP
+#pragma once
 
 #include "Individual.hpp"
 #include "Problem.hpp"
 #include <cstring>
 #include <memory>
-#include <optional>
-#include <random>
 #include <vector>
 
 namespace firestarter::optimizer {
 
+/// This class models the notion of a population used by the NSGA2 algorithm that contains a number of individuals with
+/// their associated fitness.
 class Population {
 public:
-  // Construct a population from a problem.
-  Population() = default;
+  Population() = delete;
 
+  /// Construct a population from a problem.
   explicit Population(std::shared_ptr<Problem>&& ProblemPtr)
-      : ProblemPtr(std::move(ProblemPtr))
-      , Gen(Rd()) {}
-
-  Population(Population& Pop)
-      : ProblemPtr(Pop.ProblemPtr)
-      , X(Pop.X)
-      , F(Pop.F)
-      , Gen(Rd()) {}
-
-  auto operator=(Population const& Pop) -> Population& {
-    ProblemPtr = Pop.ProblemPtr;
-    X = Pop.X;
-    F = Pop.F;
-    Gen = Pop.Gen;
-
-    return *this;
-  }
+      : ProblemPtr(std::move(ProblemPtr)) {}
 
   ~Population() = default;
 
-  void generateInitialPopulation(std::size_t PopulationSize = 0);
+  /// Generate a supplied number of individuals and save them with their fitness in this datastructure. If the number is
+  /// less then the number of dimensions we fill them with random individuals. If it is at least the number of
+  /// dimension, we first create individuals with one dimension equal to one and the rest equal to zero.
+  /// \arg PopulationSize The number of individuals to generate.
+  void generateInitialPopulation(std::size_t PopulationSize);
 
+  /// The number of individuals in this population.
   [[nodiscard]] auto size() const -> std::size_t;
 
-  // add one individual to the population. fitness will be evaluated.
+  /// Append one individual to the population. If a lookup of the fitness in the history is no successful, the
+  /// individual will be evaluated and the fitness saved.
+  /// \arg Ind The individual to be added to the population.
   void append(Individual const& Ind);
 
+  /// Insert an indiviudal and an associated fitness at a specific index in the population.
+  /// \arg Idx On which index to insert in the population.
+  /// \arg Ind The individual to insert.
+  /// \arg Fit The fitness to insert.
   void insert(std::size_t Idx, Individual const& Ind, std::vector<double> const& Fit);
 
-  // get a random individual inside bounds of problem
-  auto getRandomIndividual() -> Individual;
-
-  // returns the best individual in case of single-objective.
-  // return nothing in case of mutli-objective.
-  [[nodiscard]] auto bestIndividual() const -> std::optional<Individual>;
+  /// Generate a random individual inside the bounds of the problem based on a non-determenistic generator.
+  /// \returns The random individual inside the bounds of the problem.
+  [[nodiscard]] auto getRandomIndividual() const -> Individual;
 
+  /// Const reference to the optimization problem.
   [[nodiscard]] auto problem() const -> Problem const& { return *ProblemPtr; }
 
+  /// Const reference to the vector of individuals.
   [[nodiscard]] auto x() const -> std::vector<Individual> const& { return X; }
+  /// Const reference to the vector of fitnesses.
   [[nodiscard]] auto f() const -> std::vector<std::vector<double>> const& { return F; }
 
 private:
-  // add one individual to the population with a fitness.
+  /// Append one individual with a given fitness to the population.
+  /// \arg Ind The individual to be appended to the population.
+  /// \arg Fit The fitness of the individual.
   void append(Individual const& Ind, std::vector<double> const& Fit);
 
-  // our problem.
+  /// The optimization problem
   std::shared_ptr<Problem> ProblemPtr;
 
+  /// The vector of individuals
   std::vector<Individual> X;
+  /// The vector of fitnesses associated to each individual
   std::vector<std::vector<double>> F;
-
-  std::random_device Rd;
-  std::mt19937 Gen;
 };
 
-} // namespace firestarter::optimizer
-
-#endif
+} // namespace firestarter::optimizer
\ No newline at end of file
diff --git a/include/firestarter/Optimizer/Problem.hpp b/include/firestarter/Optimizer/Problem.hpp
index ae0d285d..d1b86301 100644
--- a/include/firestarter/Optimizer/Problem.hpp
+++ b/include/firestarter/Optimizer/Problem.hpp
@@ -30,35 +30,50 @@
 
 namespace firestarter::optimizer {
 
+/// This class models the abstract problem which should be optimized. It provides the methods to evaluate an individual
+/// and calculate its fitness.
 class Problem {
+  /// The number of metric evaluations
+  uint64_t Fevals = 0;
+
 public:
   Problem() = default;
   virtual ~Problem() = default;
 
-  // return the fitness for an individual
+  /// Perform an evaluation of the supplied individual. This returns a map from the metric name to their respective
+  /// summary. This function will increment the fevals.
+  /// \arg Individual The individual that should be evaluated.
+  /// \returns A map from metric name to the summary of this metric for the specific individual
   virtual auto metrics(Individual const& Individual) -> std::map<std::string, firestarter::measurement::Summary> = 0;
 
-  virtual auto fitness(std::map<std::string, firestarter::measurement::Summary> const& Summaries)
+  /// Convert the result of one evaluation into a fitness (vector of doubles) for the supplied summaries
+  /// \arg Summaries The summaries of one evaluation.
+  /// \returns The fitness vector derived from the summaries. The size of this vector is equal to the number of
+  /// objectives.
+  [[nodiscard]] virtual auto fitness(std::map<std::string, firestarter::measurement::Summary> const& Summaries) const
       -> std::vector<double> = 0;
 
-  // get the bounds of the problem
+  /// Get the bounds of the problem. For each dimension a min and max value is supplied.
+  /// \return The min and max bound per dimension.
   [[nodiscard]] virtual auto getBounds() const -> std::vector<std::tuple<unsigned, unsigned>> = 0;
 
-  // get the number of dimensions of the problem
+  /// Get the number of dimensions of the problem.
+  /// \returns The number of dimensions.
   [[nodiscard]] auto getDims() const -> std::size_t { return this->getBounds().size(); };
 
-  // get the number of objectives.
+  /// Get the number of optimization objectives for this problem.
+  /// \arg The number of objectives.
   [[nodiscard]] virtual auto getNobjs() const -> std::size_t = 0;
 
-  // is the problem multiobjective
+  /// Check if the problem is a multi-objective one.
   [[nodiscard]] auto isMO() const -> bool { return this->getNobjs() > 1; };
 
-  // get the number of fitness evaluations
+  /// Get the number of evaluations.
   [[nodiscard]] auto getFevals() const -> uint64_t { return Fevals; };
 
 protected:
-  // number of fitness evaluations
-  uint64_t Fevals = 0;
+  /// Increment the number of evaluations.
+  void incrementFevals() { Fevals++; };
 };
 
 } // namespace firestarter::optimizer
diff --git a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
index da118321..fa1cbd50 100644
--- a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
+++ b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
@@ -54,7 +54,7 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem {
   auto metrics(std::vector<unsigned> const& Individual)
       -> std::map<std::string, firestarter::measurement::Summary> override {
     // increment evaluation idx
-    Fevals++;
+    incrementFevals();
 
     // change the payload
     assert(InstructionGroups.size() == Individual.size());
@@ -83,7 +83,7 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem {
     return MeasurementWorker->getValues(StartDelta, StopDelta);
   }
 
-  auto fitness(std::map<std::string, firestarter::measurement::Summary> const& Summaries)
+  [[nodiscard]] auto fitness(std::map<std::string, firestarter::measurement::Summary> const& Summaries) const
       -> std::vector<double> override {
     std::vector<double> Values = {};
 
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index 84af6e7b..9bef4a69 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -78,7 +78,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Dat
   auto& DumpRegisterStructRef = Data->LoadWorkerDataPtr->Memory->ExtraVars.Drs;
   auto& DumpVar = DumpRegisterStructRef.DumpVar;
   // memory of simd variables is before the padding
-  const auto* DumpMemAddr = static_cast<volatile uint64_t*>(DumpRegisterStructRef.Padding.data()) - Offset;
+  const auto* DumpMemAddr = DumpRegisterStructRef.Padding.data() - Offset;
 
   // allocate continous memory that fits the register contents
   auto Last = std::vector<uint64_t>(Offset);
@@ -125,7 +125,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Dat
 
     auto Current = std::vector<uint64_t>(Offset);
     // copy the register content to minimize the interruption of the load worker
-    std::memcpy(Current.data(), (void*)DumpMemAddr, Current.size() * sizeof(decltype(Current)::value_type));
+    std::memcpy(Current.data(), DumpMemAddr, Current.size() * sizeof(decltype(Current)::value_type));
 
     // skip the first output, as we first have to get some valid values for last
     if (!SkipFirst) {
@@ -138,9 +138,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Dat
       DumpFile << TotalHammingDistance << ",";
 
       // dump the hamming distance of each double (last, current) pair
-      for (int I = RegisterCount - 1; I >= 0; I--) {
-        // auto registerNum = registerCount - 1 - i;
-
+      for (int I = static_cast<int>(RegisterCount) - 1; I >= 0; I--) {
         for (auto J = 0U; J < RegisterSize; J++) {
           auto Index = (RegisterSize * I) + J;
           auto Hd = static_cast<uint64_t>(hammingDistance(Current[Index], Last[Index]));
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index 13123476..dcf4cdbd 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -24,6 +24,7 @@
 
 #include <algorithm>
 #include <cstdio>
+#include <iomanip>
 #include <regex>
 
 namespace firestarter::environment::x86 {
@@ -182,13 +183,11 @@ void X86Environment::printFunctionSummary() {
   for (auto const& Config : PlatformConfigs) {
     for (auto const& ThreadsPerCore : Config->settings().threads()) {
       const char* Available = Config->isAvailable(topology()) ? "yes" : "no";
-      const char* Fmt = "  %4u | %-30s | %-24s | %s";
       const auto& FunctionName = Config->functionName(ThreadsPerCore);
       const auto& InstructionGroupsString = Config->settings().getInstructionGroupsString();
-      int Sz = std::snprintf(nullptr, 0, Fmt, Id, FunctionName.c_str(), Available, InstructionGroupsString.c_str());
-      std::vector<char> Buf(Sz + 1);
-      std::snprintf(Buf.data(), Buf.size(), Fmt, Id, FunctionName.c_str(), Available, InstructionGroupsString.c_str());
-      log::info() << std::string(Buf.data());
+
+      log::info() << "  " << std::right << std::setw(4) << Id << " | " << std::left << std::setw(30) << FunctionName
+                  << " | " << std::left << std::setw(24) << Available << " | " << InstructionGroupsString;
       Id++;
     }
   }
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index f19a3892..223e1610 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -29,6 +29,7 @@
 #include <firestarter/Optimizer/History.hpp>
 #include <firestarter/Optimizer/Problem/CLIArgumentProblem.hpp>
 #include <firestarter/WindowsCompat.hpp>
+#include <memory>
 
 namespace firestarter {
 
@@ -135,7 +136,7 @@ Firestarter::Firestarter(Config&& ProvidedConfig)
 
         for (auto const& Thread : LoadThreads) {
           auto Td = Thread.second;
-          ipcEstimateMetricInsert(
+          IpcEstimateMetricData::insertValue(
               static_cast<double>(Td->LastRun.Iterations) *
               static_cast<double>(LoadThreads.front().second->CompiledPayloadPtr->stats().Instructions) /
               static_cast<double>(StopTimestamp - StartTimestamp));
@@ -151,7 +152,7 @@ Firestarter::Firestarter(Config&& ProvidedConfig)
           std::move(ApplySettings), MeasurementWorker, Cfg.OptimizationMetrics, Cfg.EvaluationDuration, Cfg.StartDelta,
           Cfg.StopDelta, Environment->config().settings().instructionGroupItems());
 
-      Population = firestarter::optimizer::Population(std::move(Prob));
+      Population = std::make_unique<firestarter::optimizer::Population>(std::move(Prob));
 
       if (Cfg.OptimizationAlgorithm == "NSGA2") {
         Algorithm =
@@ -160,7 +161,7 @@ Firestarter::Firestarter(Config&& ProvidedConfig)
         throw std::invalid_argument("Algorithm " + Cfg.OptimizationAlgorithm + " unknown.");
       }
 
-      Algorithm->checkPopulation(static_cast<firestarter::optimizer::Population const&>(Population), Cfg.Individuals);
+      Algorithm->checkPopulation(*Population, Cfg.Individuals);
     }
   }
 
@@ -211,7 +212,7 @@ void Firestarter::mainThread() {
       auto StartTime = optimizer::History::getTime();
 
       Firestarter::Optimizer = std::make_unique<optimizer::OptimizerWorker>(
-          std::move(Algorithm), Population, Cfg.OptimizationAlgorithm, Cfg.Individuals, Cfg.Preheat);
+          std::move(Algorithm), std::move(Population), Cfg.OptimizationAlgorithm, Cfg.Individuals, Cfg.Preheat);
 
       // wait here until optimizer thread terminates
       Firestarter::Optimizer->join();
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index 8ab145f9..a168a650 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -28,6 +28,7 @@
 #include <firestarter/Logging/Log.hpp>
 #include <iomanip>
 #include <limits>
+#include <sstream>
 
 #if defined(linux) || defined(__linux__)
 #include <firestarter/Measurement/Metric/IPCEstimate.hpp>
@@ -63,7 +64,9 @@ void Firestarter::initLoadWorkers() {
       assert(CommPtr);
       ErrorCommunication.emplace_back(std::shared_ptr<uint64_t>(CommPtr, AlignedAlloc::free));
       log::debug() << "Threads " << (I + NumThreads - 1) % NumThreads << " and " << I << " commPtr = 0x"
-                   << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex
+                   << std::setfill('0') << std::setw(sizeof(uint64_t) * 2)
+                   << std::hex
+                   // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
                    << reinterpret_cast<uint64_t>(CommPtr);
     }
   }
@@ -203,7 +206,7 @@ void Firestarter::printPerformanceReport() {
   if (Cfg.Measurement) {
     for (auto const& Thread : LoadThreads) {
       auto Td = Thread.second;
-      ipcEstimateMetricInsert(
+      IpcEstimateMetricData::insertValue(
           static_cast<double>(Td->LastRun.Iterations) *
           static_cast<double>(LoadThreads.front().second->CompiledPayloadPtr->stats().Instructions) /
           static_cast<double>(StopTimestamp - StartTimestamp));
@@ -211,14 +214,11 @@ void Firestarter::printPerformanceReport() {
   }
 #endif
 
-  // format runtime, gflops and bandwidth %.2f
+  // format runtime, gflops and bandwidth with two decimal places
   const auto FormatString = [](double Value) -> std::string {
-    const char* Fmt = "%.2f";
-
-    auto Size = std::snprintf(nullptr, 0, Fmt, Value);
-    std::vector<char> CharVec(Size + 1);
-    std::snprintf(CharVec.data(), CharVec.size(), Fmt, Value);
-    return {std::string(CharVec.data())};
+    std::stringstream Ss;
+    Ss << std::fixed << std::setprecision(2) << Value;
+    return Ss.str();
   };
 
   log::debug() << "\n"
@@ -294,7 +294,7 @@ void Firestarter::loadThreadWorker(const std::shared_ptr<LoadWorkerData>& Td) {
         ErrorDetectionStructRef.Right.Communication = Td->CommunicationRight.get();
 
         // do first touch memset 0 for the communication pointers
-        std::memset((void*)ErrorDetectionStructRef.Left.Communication, 0, sizeof(uint64_t) * 2);
+        std::memset(static_cast<void*>(ErrorDetectionStructRef.Left.Communication), 0, sizeof(uint64_t) * 2);
       }
 
       // call init function
diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp
index a6584daf..8c225108 100644
--- a/src/firestarter/Measurement/MeasurementWorker.cpp
+++ b/src/firestarter/Measurement/MeasurementWorker.cpp
@@ -99,14 +99,14 @@ MeasurementWorker::MeasurementWorker(std::chrono::milliseconds UpdateInterval, u
   std::map<std::string, bool> Available;
 
   for (auto const& Metric : Metrics) {
-    std::string Name(Metric->Name);
+    const std::string Name(Metric->Name);
     MaxLength = MaxLength < Name.size() ? Name.size() : MaxLength;
     auto ReturnCode = Metric->Init();
     Metric->Fini();
     Available[Name] = ReturnCode == EXIT_SUCCESS;
   }
 
-  unsigned Padding = MaxLength > 6 ? MaxLength - 6 : 0;
+  const auto Padding = MaxLength > 6 ? MaxLength - 6 : 0;
   Ss << "  METRIC" << std::string(Padding + 1, ' ') << "| available\n";
   Ss << "  " << std::string(Padding + 7, '-') << "-----------\n";
   for (auto const& [key, value] : Available) {
@@ -116,13 +116,11 @@ MeasurementWorker::MeasurementWorker(std::chrono::milliseconds UpdateInterval, u
 
   AvailableMetricsString = Ss.str();
 
-  pthread_create(&WorkerThread, nullptr, reinterpret_cast<void* (*)(void*)>(MeasurementWorker::dataAcquisitionWorker),
-                 this);
+  pthread_create(&WorkerThread, nullptr, MeasurementWorker::dataAcquisitionWorker, this);
 
   // create a worker for getting metric values from stdin
   if (!StdinMetrics.empty()) {
-    pthread_create(&StdinThread, nullptr,
-                   reinterpret_cast<void* (*)(void*)>(MeasurementWorker::stdinDataAcquisitionWorker), this);
+    pthread_create(&StdinThread, nullptr, MeasurementWorker::stdinDataAcquisitionWorker, this);
   }
 }
 
@@ -173,7 +171,7 @@ auto MeasurementWorker::findMetricByName(std::string MetricName) -> const Metric
     return nullptr;
   }
   // metric found
-  return const_cast<const MetricInterface*>(*Metric);
+  return *Metric;
 }
 
 // this must be called by the main thread.
@@ -193,7 +191,7 @@ auto MeasurementWorker::initMetrics(std::vector<std::string> const& MetricNames)
     } else {
       const auto* Metric = findMetricByName(MetricName);
       if (Metric != nullptr) {
-        int ReturnValue = Metric->Init();
+        const auto ReturnValue = Metric->Init();
         if (ReturnValue != EXIT_SUCCESS) {
           log::error() << "Metric " << Metric->Name << ": " << Metric->GetError();
           continue;
@@ -264,7 +262,7 @@ auto MeasurementWorker::getValues(std::chrono::milliseconds StartDelta, std::chr
     auto It = std::copy_if(values.begin(), values.end(), CroppedValues.begin(), FindAll);
     CroppedValues.resize(std::distance(CroppedValues.begin(), It));
 
-    Summary Sum = Summary::calculate(CroppedValues.begin(), CroppedValues.end(), Type, NumThreads);
+    const auto Sum = Summary::calculate(CroppedValues.begin(), CroppedValues.end(), Type, NumThreads);
 
     Measurment[key] = Sum;
   }
@@ -274,11 +272,11 @@ auto MeasurementWorker::getValues(std::chrono::milliseconds StartDelta, std::chr
   return Measurment;
 }
 
-auto MeasurementWorker::dataAcquisitionWorker(void* MeasurementWorker) -> int* {
-
+auto MeasurementWorker::dataAcquisitionWorker(void* MeasurementWorker) -> void* {
+  // NOLINTNEXTLINE(cert-pos47-c,concurrency-thread-canceltype-asynchronous)
   pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, nullptr);
 
-  auto* This = reinterpret_cast<class MeasurementWorker*>(MeasurementWorker);
+  auto* This = static_cast<class MeasurementWorker*>(MeasurementWorker);
 
 #ifndef __APPLE__
   pthread_setname_np(pthread_self(), "DataAcquisition");
@@ -373,11 +371,11 @@ auto MeasurementWorker::dataAcquisitionWorker(void* MeasurementWorker) -> int* {
   }
 }
 
-auto MeasurementWorker::stdinDataAcquisitionWorker(void* MeasurementWorker) -> int* {
-
+auto MeasurementWorker::stdinDataAcquisitionWorker(void* MeasurementWorker) -> void* {
+  // NOLINTNEXTLINE(cert-pos47-c,concurrency-thread-canceltype-asynchronous)
   pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, nullptr);
 
-  auto* This = reinterpret_cast<class MeasurementWorker*>(MeasurementWorker);
+  auto* This = static_cast<class MeasurementWorker*>(MeasurementWorker);
 
 #ifndef __APPLE__
   pthread_setname_np(pthread_self(), "StdinDataAcquis");
diff --git a/src/firestarter/Measurement/Metric/IPCEstimate.cpp b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
index dcbc379e..d92d7ea7 100644
--- a/src/firestarter/Measurement/Metric/IPCEstimate.cpp
+++ b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
@@ -22,41 +22,47 @@
 #include <chrono>
 #include <cstdlib>
 #include <firestarter/Measurement/Metric/IPCEstimate.hpp>
-#include <string>
 
 auto IpcEstimateMetricData::fini() -> int32_t {
-  Callback = nullptr;
-  CallbackArg = nullptr;
+  auto& Instance = instance();
+
+  Instance.Callback = nullptr;
+  Instance.CallbackArg = nullptr;
 
   return EXIT_SUCCESS;
 }
 
 auto IpcEstimateMetricData::init() -> int32_t {
-  ErrorString = "";
+  instance().ErrorString = "";
 
   return EXIT_SUCCESS;
 }
 
 auto IpcEstimateMetricData::getError() -> const char* {
-  const char* ErrorCString = ErrorString.c_str();
+  const char* ErrorCString = instance().ErrorString.c_str();
   return ErrorCString;
 }
 
 auto IpcEstimateMetricData::registerInsertCallback(void (*C)(void*, const char*, int64_t, double), void* Arg)
     -> int32_t {
-  Callback = C;
-  CallbackArg = Arg;
+  auto& Instance = instance();
+
+  Instance.Callback = C;
+  Instance.CallbackArg = Arg;
+
   return EXIT_SUCCESS;
 }
 
-void ipcEstimateMetricInsert(double Value) {
-  if (IpcEstimateMetricData::Callback == nullptr || IpcEstimateMetricData::CallbackArg == nullptr) {
+void IpcEstimateMetricData::insertValue(double Value) {
+  auto& Instance = instance();
+
+  if (Instance.Callback == nullptr || Instance.CallbackArg == nullptr) {
     return;
   }
 
-  int64_t T =
+  const int64_t T =
       std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch())
           .count();
 
-  IpcEstimateMetricData::Callback(IpcEstimateMetricData::CallbackArg, "ipc-estimate", T, Value);
+  Instance.Callback(Instance.CallbackArg, "ipc-estimate", T, Value);
 }
\ No newline at end of file
diff --git a/src/firestarter/Measurement/Metric/Perf.cpp b/src/firestarter/Measurement/Metric/Perf.cpp
index 3240d969..af478757 100644
--- a/src/firestarter/Measurement/Metric/Perf.cpp
+++ b/src/firestarter/Measurement/Metric/Perf.cpp
@@ -41,21 +41,25 @@ auto perfEventOpen(struct perf_event_attr* HwEvent, pid_t Pid, int Cpu, int Grou
 } // namespace
 
 auto PerfMetricData::fini() -> int32_t {
-  if (!(CpuCyclesFd < 0)) {
-    close(CpuCyclesFd);
-    CpuCyclesFd = -1;
+  auto& Instance = instance();
+
+  if (!(Instance.CpuCyclesFd < 0)) {
+    close(Instance.CpuCyclesFd);
+    Instance.CpuCyclesFd = -1;
   }
-  if (!(InstructionsFd < 0)) {
-    close(InstructionsFd);
-    InstructionsFd = -1;
+  if (!(Instance.InstructionsFd < 0)) {
+    close(Instance.InstructionsFd);
+    Instance.InstructionsFd = -1;
   }
-  InitDone = false;
+  Instance.InitDone = false;
   return EXIT_SUCCESS;
 }
 
 auto PerfMetricData::init() -> int32_t {
-  if (InitDone) {
-    return InitValue;
+  auto& Instance = instance();
+
+  if (Instance.InitDone) {
+    return Instance.InitValue;
   }
 
   if (access(PerfEventParanoidFile, F_OK) == -1) {
@@ -63,10 +67,10 @@ auto PerfMetricData::init() -> int32_t {
     // The official way of knowing if perf_event_open() support is enabled
     // is checking for the existence of the file
     // /proc/sys/kernel/perf_event_paranoid.
-    ErrorString =
+    Instance.ErrorString =
         "syscall perf_event_open not supported or file " + std::string(PerfEventParanoidFile) + " does not exist";
-    InitValue = EXIT_FAILURE;
-    InitDone = true;
+    Instance.InitValue = EXIT_FAILURE;
+    Instance.InitDone = true;
     return EXIT_FAILURE;
   }
 
@@ -99,27 +103,27 @@ auto PerfMetricData::init() -> int32_t {
   CpuCyclesAttr.exclude_kernel = 1;
   CpuCyclesAttr.exclude_hv = 1;
 
-  CpuCyclesFd = perfEventOpen(&CpuCyclesAttr,
-                              // pid == 0 and cpu == -1
-                              // This measures the calling process/thread on any CPU.
-                              0, -1,
-                              // The group_fd argument allows event groups to be created.  An event
-                              // group has one event which is the group leader.  The leader is
-                              // created first, with group_fd = -1.  The rest of the group members
-                              // are created with subsequent perf_event_open() calls with group_fd
-                              // being set to the file descriptor of the group leader.
-                              -1, 0);
-
-  if (CpuCyclesFd < 0) {
+  Instance.CpuCyclesFd = perfEventOpen(&CpuCyclesAttr,
+                                       // pid == 0 and cpu == -1
+                                       // This measures the calling process/thread on any CPU.
+                                       0, -1,
+                                       // The group_fd argument allows event groups to be created.  An event
+                                       // group has one event which is the group leader.  The leader is
+                                       // created first, with group_fd = -1.  The rest of the group members
+                                       // are created with subsequent perf_event_open() calls with group_fd
+                                       // being set to the file descriptor of the group leader.
+                                       -1, 0);
+
+  if (Instance.CpuCyclesFd < 0) {
     fini();
-    ErrorString = "perf_event_open failed for PERF_COUNT_HW_CPU_CYCLES";
-    InitValue = EXIT_FAILURE;
-    InitDone = true;
+    Instance.ErrorString = "perf_event_open failed for PERF_COUNT_HW_CPU_CYCLES";
+    Instance.InitValue = EXIT_FAILURE;
+    Instance.InitDone = true;
     return EXIT_FAILURE;
   }
 
   // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-  ioctl(CpuCyclesFd, PERF_EVENT_IOC_ID, &CpuCyclesId);
+  ioctl(Instance.CpuCyclesFd, PERF_EVENT_IOC_ID, &Instance.CpuCyclesId);
 
   struct perf_event_attr InstructionsAttr {};
   std::memset(&InstructionsAttr, 0, sizeof(struct perf_event_attr));
@@ -131,43 +135,43 @@ auto PerfMetricData::init() -> int32_t {
   InstructionsAttr.exclude_kernel = 1;
   InstructionsAttr.exclude_hv = 1;
 
-  InstructionsFd = perfEventOpen(&InstructionsAttr,
-                                 // pid == 0 and cpu == -1
-                                 // This measures the calling process/thread on any CPU.
-                                 0, -1,
-                                 // The group_fd argument allows event groups to be created.  An event
-                                 // group has one event which is the group leader.  The leader is
-                                 // created first, with group_fd = -1.  The rest of the group members
-                                 // are created with subsequent perf_event_open() calls with group_fd
-                                 // being set to the file descriptor of the group leader.
-                                 CpuCyclesFd, 0);
-
-  if (InstructionsFd < 0) {
+  Instance.InstructionsFd = perfEventOpen(&InstructionsAttr,
+                                          // pid == 0 and cpu == -1
+                                          // This measures the calling process/thread on any CPU.
+                                          0, -1,
+                                          // The group_fd argument allows event groups to be created.  An event
+                                          // group has one event which is the group leader.  The leader is
+                                          // created first, with group_fd = -1.  The rest of the group members
+                                          // are created with subsequent perf_event_open() calls with group_fd
+                                          // being set to the file descriptor of the group leader.
+                                          Instance.CpuCyclesFd, 0);
+
+  if (Instance.InstructionsFd < 0) {
     fini();
-    ErrorString = "perf_event_open failed for PERF_COUNT_HW_INSTRUCTIONS";
-    InitValue = EXIT_FAILURE;
-    InitDone = true;
+    Instance.ErrorString = "perf_event_open failed for PERF_COUNT_HW_INSTRUCTIONS";
+    Instance.InitValue = EXIT_FAILURE;
+    Instance.InitDone = true;
     return EXIT_FAILURE;
   }
 
   // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-  ioctl(InstructionsFd, PERF_EVENT_IOC_ID, &InstructionsId);
+  ioctl(Instance.InstructionsFd, PERF_EVENT_IOC_ID, &Instance.InstructionsId);
 
   // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-  ioctl(CpuCyclesFd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
+  ioctl(Instance.CpuCyclesFd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
   // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
-  ioctl(CpuCyclesFd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
+  ioctl(Instance.CpuCyclesFd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
 
-  if (0 == read(CpuCyclesFd, &Last, sizeof(Last))) {
+  if (0 == read(Instance.CpuCyclesFd, &Instance.Last, sizeof(Last))) {
     fini();
-    ErrorString = "group read failed in init";
-    InitValue = EXIT_FAILURE;
-    InitDone = true;
+    Instance.ErrorString = "group read failed in init";
+    Instance.InitValue = EXIT_FAILURE;
+    Instance.InitDone = true;
     return EXIT_FAILURE;
   }
 
-  InitValue = EXIT_SUCCESS;
-  InitDone = true;
+  Instance.InitValue = EXIT_SUCCESS;
+  Instance.InitDone = true;
   return EXIT_SUCCESS;
 }
 
@@ -185,31 +189,33 @@ auto PerfMetricData::valueFromId(struct ReadFormat* Reader, uint64_t Id) -> uint
 }
 
 auto PerfMetricData::getReading(double* IpcValue, double* FreqValue) -> int32_t {
+  auto& Instance = instance();
 
-  if (CpuCyclesFd < 0 || InstructionsFd < 0) {
+  if (Instance.CpuCyclesFd < 0 || Instance.InstructionsFd < 0) {
     fini();
     return EXIT_FAILURE;
   }
 
   struct ReadFormat ReadValues {};
 
-  if (0 == read(CpuCyclesFd, &ReadValues, sizeof(ReadValues))) {
+  if (0 == read(Instance.CpuCyclesFd, &ReadValues, sizeof(ReadValues))) {
     fini();
-    ErrorString = "group read failed";
+    Instance.ErrorString = "group read failed";
     return EXIT_FAILURE;
   }
 
   if (IpcValue != nullptr) {
-    std::array<uint64_t, 2> Diff = {valueFromId(&ReadValues, InstructionsId) - valueFromId(&Last, InstructionsId),
-                                    valueFromId(&ReadValues, CpuCyclesId) - valueFromId(&Last, CpuCyclesId)};
+    std::array<uint64_t, 2> Diff = {
+        valueFromId(&ReadValues, Instance.InstructionsId) - valueFromId(&Instance.Last, Instance.InstructionsId),
+        valueFromId(&ReadValues, Instance.CpuCyclesId) - valueFromId(&Instance.Last, Instance.CpuCyclesId)};
 
-    std::memcpy(&Last, &ReadValues, sizeof(Last));
+    std::memcpy(&Instance.Last, &ReadValues, sizeof(Last));
 
     *IpcValue = static_cast<double>(Diff[0]) / static_cast<double>(Diff[1]);
   }
 
   if (FreqValue != nullptr) {
-    *FreqValue = static_cast<double>(valueFromId(&ReadValues, CpuCyclesId)) / 1e9;
+    *FreqValue = static_cast<double>(valueFromId(&ReadValues, Instance.CpuCyclesId)) / 1e9;
   }
 
   return EXIT_SUCCESS;
@@ -220,6 +226,6 @@ auto PerfMetricData::getReadingIpc(double* Value) -> int32_t { return getReading
 auto PerfMetricData::getReadingFreq(double* Value) -> int32_t { return getReading(nullptr, Value); }
 
 auto PerfMetricData::getError() -> const char* {
-  const char* ErrorCString = ErrorString.c_str();
+  const char* ErrorCString = instance().ErrorString.c_str();
   return ErrorCString;
 }
\ No newline at end of file
diff --git a/src/firestarter/Measurement/Metric/RAPL.cpp b/src/firestarter/Measurement/Metric/RAPL.cpp
index a05f8a4c..b08dc19d 100644
--- a/src/firestarter/Measurement/Metric/RAPL.cpp
+++ b/src/firestarter/Measurement/Metric/RAPL.cpp
@@ -33,17 +33,19 @@ extern "C" {
 }
 
 auto RaplMetricData::fini() -> int32_t {
-  Readers.clear();
+  instance().Readers.clear();
 
   return EXIT_SUCCESS;
 }
 
 auto RaplMetricData::init() -> int32_t {
-  ErrorString = "";
+  auto& Instance = instance();
+
+  Instance.ErrorString = "";
 
   DIR* RaplDir = opendir(RaplPath);
   if (RaplDir == nullptr) {
-    ErrorString = "Could not open " + std::string(RaplPath);
+    Instance.ErrorString = "Could not open " + std::string(RaplPath);
     return EXIT_FAILURE;
   }
 
@@ -92,7 +94,7 @@ auto RaplMetricData::init() -> int32_t {
   // paths now contains all interesting nodes
 
   if (Paths.empty()) {
-    ErrorString = "No valid entries in " + std::string(RaplPath);
+    Instance.ErrorString = "No valid entries in " + std::string(RaplPath);
     return EXIT_FAILURE;
   }
 
@@ -101,7 +103,7 @@ auto RaplMetricData::init() -> int32_t {
     EnergyUjPath << Path << "/energy_uj";
     std::ifstream EnergyReadingStream(EnergyUjPath.str());
     if (!EnergyReadingStream.good()) {
-      ErrorString = "Could not read energy_uj";
+      Instance.ErrorString = "Could not read energy_uj";
       break;
     }
 
@@ -109,7 +111,7 @@ auto RaplMetricData::init() -> int32_t {
     MaxEnergyUjRangePath << Path << "/max_energy_range_uj";
     std::ifstream MaxEnergyReadingStream(MaxEnergyUjRangePath.str());
     if (!MaxEnergyReadingStream.good()) {
-      ErrorString = "Could not read max_energy_range_uj";
+      Instance.ErrorString = "Could not read max_energy_range_uj";
       break;
     }
 
@@ -123,10 +125,10 @@ auto RaplMetricData::init() -> int32_t {
 
     auto Def = std::make_unique<ReaderDef>(Path, Max, Reading, 0);
 
-    Readers.emplace_back(std::move(Def));
+    Instance.Readers.emplace_back(std::move(Def));
   }
 
-  if (!ErrorString.empty()) {
+  if (!Instance.ErrorString.empty()) {
     fini();
     return EXIT_FAILURE;
   }
@@ -137,7 +139,7 @@ auto RaplMetricData::init() -> int32_t {
 auto RaplMetricData::getReading(double* Value) -> int32_t {
   double FinalReading = 0.0;
 
-  for (auto& Def : Readers) {
+  for (auto& Def : instance().Readers) {
     std::string Buffer;
 
     std::stringstream EnergyUjPath;
@@ -163,7 +165,7 @@ auto RaplMetricData::getReading(double* Value) -> int32_t {
 }
 
 auto RaplMetricData::getError() -> const char* {
-  const char* ErrorCString = ErrorString.c_str();
+  const char* ErrorCString = instance().ErrorString.c_str();
   return ErrorCString;
 }
 
diff --git a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
index 6824ea0e..3d43a073 100644
--- a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
+++ b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
@@ -72,7 +72,7 @@ auto NSGA2::evolve(firestarter::optimizer::Population& Pop) -> firestarter::opti
   auto NP = Pop.size();
   auto Fevals0 = Prob.getFevals();
 
-  this->checkPopulation(const_cast<firestarter::optimizer::Population const&>(Pop), NP);
+  this->checkPopulation(Pop, NP);
 
   std::random_device Rd;
   std::mt19937 Rng(Rd());
@@ -115,8 +115,8 @@ auto NSGA2::evolve(firestarter::optimizer::Population& Pop) -> firestarter::opti
     firestarter::optimizer::Population Popnew(Pop);
 
     // We create some pseudo-random permutation of the poulation indexes
-    std::shuffle(Shuffle1.begin(), Shuffle1.end(), std::mt19937(std::random_device()()));
-    std::shuffle(Shuffle2.begin(), Shuffle2.end(), std::mt19937(std::random_device()()));
+    std::shuffle(Shuffle1.begin(), Shuffle1.end(), Rng);
+    std::shuffle(Shuffle2.begin(), Shuffle2.end(), Rng);
 
     // We compute crowding distance and non dominated rank for the current
     // population
@@ -166,11 +166,12 @@ auto NSGA2::evolve(firestarter::optimizer::Population& Pop) -> firestarter::opti
 
       Popnew.append(Children.first);
       Popnew.append(Children.second);
-    } // popnew now contains 2NP individuals
-    // This method returns the sorted N best individuals in the population
+    }
+    // Popnew now contains 2NP individuals
+
+    // Save the best NP individuals in the population
     // according to the crowded comparison operator
     BestIdx = util::selectBestNMo(Popnew.f(), NP);
-    // We insert into the population
     for (decltype(NP) I = 0; I < NP; ++I) {
       Pop.insert(I, Popnew.x()[BestIdx[I]], Popnew.f()[BestIdx[I]]);
     }
diff --git a/src/firestarter/Optimizer/OptimizerWorker.cpp b/src/firestarter/Optimizer/OptimizerWorker.cpp
index bbbee14b..a84182b3 100644
--- a/src/firestarter/Optimizer/OptimizerWorker.cpp
+++ b/src/firestarter/Optimizer/OptimizerWorker.cpp
@@ -27,10 +27,11 @@
 namespace firestarter::optimizer {
 
 OptimizerWorker::OptimizerWorker(std::unique_ptr<firestarter::optimizer::Algorithm>&& Algorithm,
-                                 firestarter::optimizer::Population& Population, std::string OptimizationAlgorithm,
-                                 unsigned Individuals, std::chrono::seconds const& Preheat)
+                                 std::unique_ptr<firestarter::optimizer::Population>&& Population,
+                                 std::string OptimizationAlgorithm, unsigned Individuals,
+                                 std::chrono::seconds const& Preheat)
     : Algorithm(std::move(Algorithm))
-    , Population(Population)
+    , Population(std::move(Population))
     , OptimizationAlgorithm(std::move(OptimizationAlgorithm))
     , Individuals(Individuals)
     , Preheat(Preheat) {
@@ -63,10 +64,10 @@ auto OptimizerWorker::optimizerThread(void* OptimizerWorker) -> void* {
 
   // For NSGA2 we start with a initial population
   if (This->OptimizationAlgorithm == "NSGA2") {
-    This->Population.generateInitialPopulation(This->Individuals);
+    This->Population->generateInitialPopulation(This->Individuals);
   }
 
-  This->Algorithm->evolve(This->Population);
+  This->Algorithm->evolve(*This->Population);
 
   return nullptr;
 }
diff --git a/src/firestarter/Optimizer/Population.cpp b/src/firestarter/Optimizer/Population.cpp
index 2394f0d2..b709d74d 100644
--- a/src/firestarter/Optimizer/Population.cpp
+++ b/src/firestarter/Optimizer/Population.cpp
@@ -19,12 +19,11 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <cassert>
 #include <firestarter/Logging/Log.hpp>
 #include <firestarter/Optimizer/History.hpp>
 #include <firestarter/Optimizer/Population.hpp>
-
-#include <algorithm>
-#include <cassert>
+#include <random>
 
 namespace firestarter::optimizer {
 
@@ -39,9 +38,8 @@ void Population::generateInitialPopulation(std::size_t PopulationSize) {
       Individual Vec(Dims, 0);
       Vec[I] = 1;
       this->append(Vec);
+      Remaining--;
     }
-
-    Remaining -= Dims;
   } else {
     firestarter::log::trace() << "Population size (" << std::to_string(PopulationSize)
                               << ") is less than size of problem dimension (" << std::to_string(Dims) << ")";
@@ -60,7 +58,7 @@ void Population::append(Individual const& Ind) {
   std::map<std::string, firestarter::measurement::Summary> Metrics;
 
   // check if we already evaluated this individual
-  auto OptionalMetric = History::find(Ind);
+  const auto OptionalMetric = History::find(Ind);
   if (OptionalMetric.has_value()) {
     Metrics = OptionalMetric.value();
   } else {
@@ -99,10 +97,13 @@ void Population::insert(std::size_t Idx, Individual const& Ind, std::vector<doub
   F[Idx] = Fit;
 }
 
-auto Population::getRandomIndividual() -> Individual {
+auto Population::getRandomIndividual() const -> Individual {
   auto Dims = this->problem().getDims();
   auto const Bounds = this->problem().getBounds();
 
+  std::random_device Rd;
+  std::mt19937 Rng(Rd());
+
   firestarter::log::trace() << "Generating random individual of size: " << Dims;
 
   Individual Out(Dims);
@@ -111,7 +112,7 @@ auto Population::getRandomIndividual() -> Individual {
     auto const Lb = std::get<0>(Bounds[I]);
     auto const Ub = std::get<1>(Bounds[I]);
 
-    Out[I] = std::uniform_int_distribution<unsigned>(Lb, Ub)(this->Gen);
+    Out[I] = std::uniform_int_distribution<unsigned>(Lb, Ub)(Rng);
 
     firestarter::log::trace() << "  - " << I << ": [" << Lb << "," << Ub << "]: " << Out[I];
   }
@@ -119,21 +120,4 @@ auto Population::getRandomIndividual() -> Individual {
   return Out;
 }
 
-auto Population::bestIndividual() const -> std::optional<Individual> {
-  // return an empty vector if the problem is multi objective, as there is no
-  // single best individual
-  if (this->problem().isMO()) {
-    return {};
-  }
-
-  // assert that we have individuals
-  assert(!this->X.empty());
-
-  auto Best = std::max_element(this->X.begin(), this->X.end(), [](const auto& A, const auto& B) { return A < B; });
-
-  assert(Best != this->X.end());
-
-  return *Best;
-}
-
 } // namespace firestarter::optimizer
\ No newline at end of file

From de5bc81460a6d0da001170a6cfab08f380363575 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 2 Nov 2024 14:25:47 +0100
Subject: [PATCH 132/167] clang-tidy: disable warnings from asmjit

---
 .clang-tidy | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.clang-tidy b/.clang-tidy
index 7c7a6449..07e30621 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -59,4 +59,6 @@ CheckOptions:
   - { key: readability-implicit-bool-conversion.AllowIntegerConditions,  value: 1 }
   - { key: readability-implicit-bool-conversion.AllowPointerConditions,  value: 1 }
   - { key: readability-function-cognitive-complexity.IgnoreMacros,  value: 1 }
-  - { key: misc-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic, value: "true" }
\ No newline at end of file
+  - { key: misc-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic, value: "true" }
+  # disable warnings is asmjit
+  - { key: 'clang-analyzer-optin.cplusplus.UninitializedObject:IgnoreRecordsWithField', value: 'asmjit::Operand_::Signature' }
\ No newline at end of file

From 7e0ee181e44209b0d6170b2d42fb820d72b1502a Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 2 Nov 2024 18:23:11 +0100
Subject: [PATCH 133/167] clang-tidy: fix issues in MeasurementWorker

---
 .../Measurement/MeasurementWorker.cpp         | 39 +++++++++++++------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp
index 8c225108..616bb309 100644
--- a/src/firestarter/Measurement/MeasurementWorker.cpp
+++ b/src/firestarter/Measurement/MeasurementWorker.cpp
@@ -19,6 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include <cstdarg>
 #include <firestarter/Logging/Log.hpp>
 #include <firestarter/Measurement/MeasurementWorker.hpp>
 #include <queue>
@@ -33,6 +34,20 @@ void insertCallback(void* Cls, const char* MetricName, int64_t TimeSinceEpoch, d
   static_cast<firestarter::measurement::MeasurementWorker*>(Cls)->insertCallback(MetricName, TimeSinceEpoch, Value);
 }
 
+namespace {
+
+// NOLINTBEGIN(cert-dcl50-cpp,cppcoreguidelines-pro-type-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay)
+auto scanStdin(const char* Fmt, int Count, ...) -> bool {
+  va_list Args;
+  va_start(Args, Count);
+  auto ReturnCode = std::vscanf(Fmt, Args);
+  va_end(Args);
+  return ReturnCode == Count;
+}
+// NOLINTEND(cert-dcl50-cpp,cppcoreguidelines-pro-type-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay)
+
+} // namespace
+
 namespace firestarter::measurement {
 
 MeasurementWorker::MeasurementWorker(std::chrono::milliseconds UpdateInterval, uint64_t NumThreads,
@@ -381,21 +396,23 @@ auto MeasurementWorker::stdinDataAcquisitionWorker(void* MeasurementWorker) -> v
   pthread_setname_np(pthread_self(), "StdinDataAcquis");
 #endif
 
-  for (std::string Line; std::getline(std::cin, Line);) {
+  for (;;) {
     int64_t Time = 0;
     double Value = NAN;
-    char Name[128];
-    if (std::sscanf(Line.c_str(), "%127s %ld %lf", Name, &Time, &Value) == 3) {
-      auto NameEqual = [&Name](auto const& AllowedName) { return AllowedName == std::string(Name); };
-      auto Item = std::find_if(This->stdinMetrics().begin(), This->stdinMetrics().end(), NameEqual);
-      // metric name is allowed
-      if (Item != This->stdinMetrics().end()) {
-        This->insertCallback(Name, Time, Value);
-      }
+    std::array<char, 128> Name = {0};
+
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
+    if (!scanStdin("%127s %ld %lf", 3, Name.data(), &Time, &Value)) {
+      continue;
     }
-  }
 
-  return nullptr;
+    auto NameEqual = [&Name](auto const& AllowedName) { return AllowedName == std::string(Name.data()); };
+    auto Item = std::find_if(This->stdinMetrics().begin(), This->stdinMetrics().end(), NameEqual);
+    // metric name is allowed
+    if (Item != This->stdinMetrics().end()) {
+      This->insertCallback(Name.data(), Time, Value);
+    }
+  }
 }
 
 } // namespace firestarter::measurement
\ No newline at end of file

From 42bfb04be28d89dc900786c3c62f7f5747cfd5ae Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sun, 3 Nov 2024 17:48:25 +0100
Subject: [PATCH 134/167] clang-tidy: fixes

---
 include/firestarter/Firestarter.hpp           |  6 +++
 .../Environment/X86/X86CPUTopology.cpp        | 40 +++++++++----------
 .../Measurement/MeasurementWorker.cpp         |  4 +-
 3 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 7d55cda8..4e7b6981 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -72,6 +72,10 @@ class Firestarter {
 
   std::unique_ptr<firestarter::optimizer::Population> Population;
 
+  // NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
+  // TODO(Issue #85): Currently we support one instance of the Firestarter class. Variables that need to be accessed
+  // from outside the class, e.g. in the sigterm handler are inline static.
+
   inline static std::unique_ptr<optimizer::OptimizerWorker> Optimizer;
 
   // variables to control the termination of the watchdog
@@ -82,6 +86,8 @@ class Firestarter {
   // variable to control the load of the threads
   inline static volatile LoadThreadWorkType LoadVar = LoadThreadWorkType::LoadLow;
 
+  // NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)
+
   // LoadThreadWorker.cpp
   void initLoadWorkers();
   void joinLoadWorkers();
diff --git a/src/firestarter/Environment/X86/X86CPUTopology.cpp b/src/firestarter/Environment/X86/X86CPUTopology.cpp
index 02a84218..a467be50 100644
--- a/src/firestarter/Environment/X86/X86CPUTopology.cpp
+++ b/src/firestarter/Environment/X86/X86CPUTopology.cpp
@@ -138,10 +138,8 @@ auto X86CPUTopology::clockrate() const -> uint64_t {
   using ClockT = std::chrono::high_resolution_clock;
   using TicksT = std::chrono::microseconds;
 
-  uint64_t TimeDiff = 0;
   uint64_t Clockrate = 0;
-  int NumMeasurements = 0;
-  int MinMeasurements = 0;
+  uint64_t MinMeasurements = 0;
 
   ClockT::time_point StartTime;
   ClockT::time_point EndTime;
@@ -162,9 +160,8 @@ auto X86CPUTopology::clockrate() const -> uint64_t {
   MinMeasurements = 20;
 #endif
 
-  int I = 3;
-
-  do {
+  for (uint64_t NumMeasurements = 0, TimeDiff = 0, Duration = 3; TimeDiff < 10000 || NumMeasurements < MinMeasurements;
+       Duration += 2) {
     uint64_t End1Tsc = 0;
     uint64_t End2Tsc = 0;
 
@@ -173,27 +170,29 @@ auto X86CPUTopology::clockrate() const -> uint64_t {
     StartTime = ClockT::now();
     const uint64_t Start2Tsc = timestamp();
 
-    // waiting
-    do {
+    // busy wait waiting for duration to pass
+    for (; End1Tsc < Start2Tsc + 1000000 * Duration;) {
       End1Tsc = timestamp();
-    } while (End1Tsc < Start2Tsc + 1000000 * I); /* busy waiting */
+    }
 
     // end timestamp
-    do {
-      End1Tsc = timestamp();
-      EndTime = ClockT::now();
-      End2Tsc = timestamp();
+    End1Tsc = timestamp();
+    EndTime = ClockT::now();
+    End2Tsc = timestamp();
 
-      TimeDiff = std::chrono::duration_cast<TicksT>(EndTime - StartTime).count();
-    } while (0 == TimeDiff);
+    TimeDiff = std::chrono::duration_cast<TicksT>(EndTime - StartTime).count();
 
-    const uint64_t ClockLowerBound = (((End1Tsc - Start2Tsc) * 1000000) / (TimeDiff));
-    const uint64_t ClockUpperBound = (((End2Tsc - Start1Tsc) * 1000000) / (TimeDiff));
+    // measurement not long enough
+    if (TimeDiff <= 2000) {
+      continue;
+    }
 
     // if both values differ significantly, the measurement could have been
     // interrupted between 2 rdtsc's
-    if ((static_cast<double>(ClockLowerBound) > ((static_cast<double>(ClockUpperBound)) * 0.999)) &&
-        ((TimeDiff) > 2000)) {
+    const uint64_t ClockLowerBound = (((End1Tsc - Start2Tsc) * 1000000) / (TimeDiff));
+    const uint64_t ClockUpperBound = (((End2Tsc - Start1Tsc) * 1000000) / (TimeDiff));
+
+    if (static_cast<double>(ClockLowerBound) > ((static_cast<double>(ClockUpperBound)) * 0.999)) {
       NumMeasurements++;
       const uint64_t Clock = (ClockLowerBound + ClockUpperBound) / 2;
       const bool ClockrateUpdateCondition = Clockrate == 0 ||
@@ -206,8 +205,7 @@ auto X86CPUTopology::clockrate() const -> uint64_t {
         Clockrate = Clock;
       }
     }
-    I += 2;
-  } while (((TimeDiff) < 10000) || (NumMeasurements < MinMeasurements));
+  }
 
   return Clockrate;
 }
diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp
index 616bb309..d1a18c6f 100644
--- a/src/firestarter/Measurement/MeasurementWorker.cpp
+++ b/src/firestarter/Measurement/MeasurementWorker.cpp
@@ -36,7 +36,7 @@ void insertCallback(void* Cls, const char* MetricName, int64_t TimeSinceEpoch, d
 
 namespace {
 
-// NOLINTBEGIN(cert-dcl50-cpp,cppcoreguidelines-pro-type-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay)
+// NOLINTBEGIN(cert-dcl50-cpp,cppcoreguidelines-pro-type-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay,clang-analyzer-valist.Uninitialized)
 auto scanStdin(const char* Fmt, int Count, ...) -> bool {
   va_list Args;
   va_start(Args, Count);
@@ -44,7 +44,7 @@ auto scanStdin(const char* Fmt, int Count, ...) -> bool {
   va_end(Args);
   return ReturnCode == Count;
 }
-// NOLINTEND(cert-dcl50-cpp,cppcoreguidelines-pro-type-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay)
+// NOLINTEND(cert-dcl50-cpp,cppcoreguidelines-pro-type-vararg,cppcoreguidelines-pro-bounds-array-to-pointer-decay,clang-analyzer-valist.Uninitialized)
 
 } // namespace
 

From ef3b634033de84656c38061ed71b38997a552306 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Mon, 4 Nov 2024 17:23:46 +0100
Subject: [PATCH 135/167] clang-tidy: nolint unnecessary warnings

---
 src/firestarter/Environment/X86/Payload/X86Payload.cpp | 2 ++
 src/firestarter/Measurement/Metric/RAPL.cpp            | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index 529d8d21..16485e99 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -68,6 +68,7 @@ void X86Payload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chro
 void X86Payload::initMemory(double* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue) {
   uint64_t I = 0;
 
+  // NOLINTBEGIN(cppcoreguidelines-pro-bounds-pointer-arithmetic)
   for (; I < InitBlocksize; I++) {
     MemoryAddr[I] = 0.25 + static_cast<double>(I) * 8.0 * FirstValue;
   }
@@ -77,6 +78,7 @@ void X86Payload::initMemory(double* MemoryAddr, uint64_t BufferSize, double Firs
   for (; I < BufferSize; I++) {
     MemoryAddr[I] = 0.25 + static_cast<double>(I) * 8.0 * LastValue;
   }
+  // NOLINTEND(cppcoreguidelines-pro-bounds-pointer-arithmetic)
 }
 
 }; // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Measurement/Metric/RAPL.cpp b/src/firestarter/Measurement/Metric/RAPL.cpp
index b08dc19d..80500d06 100644
--- a/src/firestarter/Measurement/Metric/RAPL.cpp
+++ b/src/firestarter/Measurement/Metric/RAPL.cpp
@@ -60,9 +60,14 @@ auto RaplMetricData::init() -> int32_t {
   std::vector<std::string> Paths = {};
 
   struct dirent* Dir = nullptr;
+
+  // As long as the DIR object (named RaplDir here) is not shared between threads this call is thread-safe:
+  // https://www.gnu.org/software/libc/manual/html_node/Reading_002fClosing-Directory.html
+  // NOLINTNEXTLINE(concurrency-mt-unsafe)
   while ((Dir = readdir(RaplDir)) != nullptr) {
     std::stringstream Path;
     std::stringstream NamePath;
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
     Path << RaplPath << "/" << Dir->d_name;
     NamePath << Path.str() << "/name";
 

From d954fe94dbddc45a42e9f4a166922bb9d63d7377 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 5 Nov 2024 15:26:09 +0100
Subject: [PATCH 136/167] add docstrings for cuda

---
 include/firestarter/Cuda/Cuda.hpp | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/include/firestarter/Cuda/Cuda.hpp b/include/firestarter/Cuda/Cuda.hpp
index ebf63762..e4314eb7 100644
--- a/include/firestarter/Cuda/Cuda.hpp
+++ b/include/firestarter/Cuda/Cuda.hpp
@@ -27,14 +27,37 @@
 
 namespace firestarter::cuda {
 
+/// This class handles the workload on CUDA and HIP compatible GPUs. A gemm routine is used to stress them with a
+/// constant high load. This header does not include any CUDA or HIP specific headers to allow us to not guard the
+/// include of this header in other parts of the programm.
 class Cuda {
 private:
+  /// The thread that is used to initilize the gpus. This thread will wait until each thread that runs the gemm routine
+  /// joins.
   std::thread InitThread;
 
+  /// Spawns a thread for each of the selected gpus, initilizes them and starts the execution of the gemm in parallel.
+  /// \arg WaitForInitCv The condition variables used to signal that all gpus are initialized.
+  /// \arg LoadVar A reference to the variable that controlls the current load of Firestarter.
+  /// \arg UseFloat Set to true if we want to stress using single precision floating points.
+  /// \arg UseDouble Set to true if we want to stress using double precision floating points. If neither UseFloat or
+  /// UseDouble is set the precision will be choosen automatically.
+  /// \arg MatrixSize Set to a specific matrix size which will be choosen for the gemm operation or set to 0 for
+  /// automatic selection.
+  /// \arg Gpus Select the number of gpus to stress or 0 for all.
   static void initGpus(std::condition_variable& WaitForInitCv, const volatile firestarter::LoadThreadWorkType& LoadVar,
                        bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus);
 
 public:
+  /// Initilize the cuda class. This will start a thread running the Cuda::initGpus function and wait until all gpus are
+  /// inititialized.
+  /// \arg LoadVar A reference to the variable that controlls the current load of Firestarter.
+  /// \arg UseFloat Set to true if we want to stress using single precision floating points.
+  /// \arg UseDouble Set to true if we want to stress using double precision floating points. If neither UseFloat or
+  /// UseDouble is set the precision will be choosen automatically.
+  /// \arg MatrixSize Set to a specific matrix size which will be choosen for the gemm operation or set to 0 for
+  /// automatic selection.
+  /// \arg Gpus Select the number of gpus to stress or 0 for all.
   Cuda(const volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize,
        int Gpus)
 #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)

From 523a1f48d205a242876ff0b43923d05d39733755 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 5 Nov 2024 21:46:02 +0100
Subject: [PATCH 137/167] move compiled payload into a new header. add
 docstrings for payload classes

---
 .../Environment/Payload/CompiledPayload.hpp   | 100 +++++++++++++
 .../Environment/Payload/Payload.hpp           | 131 ++++++++++++------
 .../X86/Payload/CompiledX86Payload.hpp        |  20 ++-
 3 files changed, 203 insertions(+), 48 deletions(-)
 create mode 100644 include/firestarter/Environment/Payload/CompiledPayload.hpp

diff --git a/include/firestarter/Environment/Payload/CompiledPayload.hpp b/include/firestarter/Environment/Payload/CompiledPayload.hpp
new file mode 100644
index 00000000..e275b180
--- /dev/null
+++ b/include/firestarter/Environment/Payload/CompiledPayload.hpp
@@ -0,0 +1,100 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2024 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#pragma once
+
+#include "firestarter/Constants.hpp"
+#include "firestarter/Environment/Payload/PayloadStats.hpp"
+#include <chrono>
+#include <memory>
+#include <utility>
+
+namespace firestarter::environment::payload {
+
+class Payload;
+
+/// This class represents a payload that can be executed. It is created by calling compilePayload of the payload class
+/// with specific settings. It contains a reference to the init and low load functions (which do not change with payload
+/// settings) and the high load function which changes based on the settings. The stats of the high load function (nb.
+/// of flops, bytes of memory accessed and instructions) can also be retrieved.
+class CompiledPayload {
+public:
+  CompiledPayload() = delete;
+  virtual ~CompiledPayload() = default;
+
+  /// A unique ptr for the CompiledPayload with a custom deleter.
+  using UniquePtr = std::unique_ptr<CompiledPayload, void (*)(CompiledPayload*)>;
+
+  using HighLoadFunctionPtr = uint64_t (*)(double*, volatile LoadThreadWorkType*, uint64_t);
+
+  /// Getter for the stats of the high load function of the compiled payload
+  [[nodiscard]] auto stats() const -> const PayloadStats& { return Stats; };
+
+  /// Function to initialize the memory used by the high load function.
+  /// \arg MemoryAddr The pointer to the memory.
+  /// \arg BufferSize The number of doubles that is allocated in MemoryAddr.
+  void init(double* MemoryAddr, uint64_t BufferSize);
+
+  /// Function to produce a low load on the cpu.
+  /// \arg LoadVar The variable that controls the load. If this variable changes from LoadThreadWorkType::LowLoad to
+  /// something else this function will return.
+  /// \arg Period The period of the low/high load switching. This function may sleep a fraction of this period.
+  void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period);
+
+  /// Function to produce high load on the cpu.
+  /// \arg MemoryAddr The pointer to the memory.
+  /// \arg LoadVar The variable that controls the load. If this variable changes from LoadThreadWorkType::LoadHigh to
+  /// something else this function will return.
+  /// \arg Iterations The current iteration counter. This number will be incremented for every iteration of the high
+  /// load loop.
+  /// \returns The iteration counter passed into this function plus the number of iteration of the high load loop.
+  [[nodiscard]] auto highLoadFunction(double* MemoryAddr, volatile LoadThreadWorkType& LoadVar, uint64_t Iterations)
+      -> uint64_t {
+    return HighLoadFunction(MemoryAddr, &LoadVar, Iterations);
+  }
+
+protected:
+  /// Constructor for the CompiledPayload.
+  /// \arg Stats The stats of the high load function from the payload.
+  /// \arg PayloadPtr A unique pointer to the payload class to allow calling the init and low load functions which do
+  /// not change based on different payload settings.
+  /// \arg HighLoadFunction The pointer to the compiled high load function.
+  CompiledPayload(const PayloadStats& Stats, std::unique_ptr<Payload>&& PayloadPtr,
+                  HighLoadFunctionPtr HighLoadFunction)
+      : Stats(Stats)
+      , PayloadPtr(std::move(PayloadPtr))
+      , HighLoadFunction(HighLoadFunction) {}
+
+  /// Getter for the pointer to the high load function. We need to access this pointer directly to free the associated
+  /// memory from asmjit.
+  [[nodiscard]] auto highLoadFunctionPtr() -> HighLoadFunctionPtr { return HighLoadFunction; }
+
+private:
+  /// The stats of the compiled payload.
+  PayloadStats Stats;
+  /// The pointer to the payload class to allow calling the init and low load functions which do not change based on
+  /// different payload settings.
+  std::unique_ptr<Payload> PayloadPtr;
+  /// The pointer to the compiled high load function.
+  HighLoadFunctionPtr HighLoadFunction;
+};
+
+} // namespace firestarter::environment::payload
\ No newline at end of file
diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index 01982a85..af673b76 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -21,83 +21,71 @@
 
 #pragma once
 
+#include "CompiledPayload.hpp"
 #include "firestarter/Constants.hpp"
 #include "firestarter/Environment/CPUTopology.hpp"
 #include "firestarter/Environment/Payload/PayloadSettings.hpp"
-#include "firestarter/Environment/Payload/PayloadStats.hpp"
 #include <chrono>
 #include <list>
-#include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 
 namespace firestarter::environment::payload {
 
-class Payload;
-
-class CompiledPayload {
-public:
-  CompiledPayload() = delete;
-  virtual ~CompiledPayload() = default;
-
-  using UniquePtr = std::unique_ptr<CompiledPayload, void (*)(CompiledPayload*)>;
-
-  using HighLoadFunctionPtr = uint64_t (*)(double*, volatile LoadThreadWorkType*, uint64_t);
-
-  CompiledPayload(const PayloadStats& Stats, std::unique_ptr<Payload>&& PayloadPtr,
-                  HighLoadFunctionPtr HighLoadFunction)
-      : Stats(Stats)
-      , PayloadPtr(std::move(PayloadPtr))
-      , HighLoadFunction(HighLoadFunction) {}
-
-  [[nodiscard]] auto stats() const -> const PayloadStats& { return Stats; };
-
-  void init(double* MemoryAddr, uint64_t BufferSize);
-
-  void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period);
-
-  [[nodiscard]] auto highLoadFunction(double* AddrMem, volatile LoadThreadWorkType& LoadVar, uint64_t Iterations)
-      -> uint64_t {
-    return HighLoadFunction(AddrMem, &LoadVar, Iterations);
-  }
-
-protected:
-  // We need to access this pointer directly to free the associated memory from asmjit
-  [[nodiscard]] auto highLoadFunctionPtr() -> HighLoadFunctionPtr { return HighLoadFunction; }
-
-private:
-  PayloadStats Stats;
-
-  std::unique_ptr<Payload> PayloadPtr;
-
-  HighLoadFunctionPtr HighLoadFunction;
-};
-
 class Payload {
 private:
+  /// The name of this payload. It is usally named by the CPU extension this payload uses e.g., SSE2 or FMA.
   std::string Name;
-  [[nodiscard]] static auto getSequenceStartCount(const std::vector<std::string>& Sequence, const std::string& Start)
-      -> unsigned;
 
   /// The size of the SIMD registers in units of doubles (8B)
   unsigned RegisterSize = 0;
+
   /// The number of SIMD registers used by the payload
   unsigned RegisterCount = 0;
 
+  /// Get the number of items in the sequence that start with a given string.
+  /// \arg Sequence The sequence that is analyzed.
+  /// \arg Start The string that contains the start of the item names that should be counted in the sequence.
+  /// \returns The number of items in the sequence that start with the supplied strings.
+  [[nodiscard]] static auto getSequenceStartCount(const std::vector<std::string>& Sequence, const std::string& Start)
+      -> unsigned;
+
 protected:
+  /// Generate a sequence of items interleaved with one another based on a supplied number how many times each items
+  /// should appear in the resulting sequence.
+  /// \arg Proportion The mapping of items defined by a string and the number of times this item should apear in the
+  /// resuling sequence.
+  /// \returns The sequence that is generated from the supplied propotions
   [[nodiscard]] static auto generateSequence(const std::vector<std::pair<std::string, unsigned>>& Proportion)
       -> std::vector<std::string>;
+
+  /// Get the number of items in the sequence that start with "L2".
+  /// \arg Sequence The sequence that is analyzed.
+  /// \returns The number of items items in the sequence that start with "L2".
   [[nodiscard]] static auto getL2SequenceCount(const std::vector<std::string>& Sequence) -> unsigned {
     return getSequenceStartCount(Sequence, "L2");
   };
+
+  /// Get the number of items in the sequence that start with "L3".
+  /// \arg Sequence The sequence that is analyzed.
+  /// \returns The number of items items in the sequence that start with "L3".
   [[nodiscard]] static auto getL3SequenceCount(const std::vector<std::string>& Sequence) -> unsigned {
     return getSequenceStartCount(Sequence, "L3");
   };
+
+  /// Get the number of items in the sequence that start with "RAM".
+  /// \arg Sequence The sequence that is analyzed.
+  /// \returns The number of items items in the sequence that start with "RAM".
   [[nodiscard]] static auto getRAMSequenceCount(const std::vector<std::string>& Sequence) -> unsigned {
     return getSequenceStartCount(Sequence, "RAM");
   };
 
+  /// Get the maximum number of repetitions of the the supplied sequence so that the size of the sequence times the
+  /// number of repetitions is smaller equal to the number of lines. The number of repetitions is a unsigned number.
+  /// \arg Sequence The reference to the sequence that should be repeated multiple times
+  /// \arg NumberOfLines The maximum number of entries in the repeated sequence
+  /// \returns The number of repetitions of the sequence.
   [[nodiscard]] static auto getNumberOfSequenceRepetitions(const std::vector<std::string>& Sequence,
                                                            const unsigned NumberOfLines) -> unsigned {
     if (Sequence.empty()) {
@@ -106,15 +94,48 @@ class Payload {
     return NumberOfLines / Sequence.size();
   };
 
+  /// Get the number of accesses that can be made to 80% of the L2 cache size (each incrementing the pointer to the
+  /// cache) before the pointer need to be reseted to the original value. This assumes that each L2 item in the sequence
+  /// increments the pointer by one cache line (64B). It is also assumed that the number of accesses fit at least once
+  /// into this cache. This should always be the case on modern CPUs.
+  /// \arg Sequence The reference to the sequence.
+  /// \arg NumberOfLines The maximum number of entries in the repeated sequence.
+  /// \arg Size The size of the L2 Cache.
+  /// \returns The maximum number of iterations of the repeated sequence to fill up to 80% of the L2 cache.
   [[nodiscard]] static auto getL2LoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
                                            unsigned Size) -> unsigned;
+
+  /// Get the number of accesses that can be made to 80% of the L3 cache size (each incrementing the pointer to the
+  /// cache) before the pointer need to be reseted to the original value. This assumes that each L3 item in the sequence
+  /// increments the pointer by one cache line (64B). See the note about assumptions on the size of the cache in the
+  /// documentation of getL2LoopCount.
+  /// \arg Sequence The reference to the sequence.
+  /// \arg NumberOfLines The maximum number of entries in the repeated sequence.
+  /// \arg Size The size of the L3 Cache.
+  /// \returns The maximum number of iterations of the repeated sequence to fill up to 80% of the L3 cache.
   [[nodiscard]] static auto getL3LoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
                                            unsigned Size) -> unsigned;
+
+  /// Get the number of accesses that can be made to 100% of the RAM size (each incrementing the pointer to the ram)
+  /// before the pointer need to be reseted to the original value. This assumes that each RAM item in the sequence
+  /// increments the pointer by one cache line (64B). See the note about assumptions on the size of the cache in the
+  /// documentation of getL2LoopCount.
+  /// \arg Sequence The reference to the sequence.
+  /// \arg NumberOfLines The maximum number of entries in the repeated sequence.
+  /// \arg Size The size of the RAM.
+  /// \returns The maximum number of iterations of the repeated sequence to fill up to 100% of the RAM.
   [[nodiscard]] static auto getRAMLoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
                                             unsigned Size) -> unsigned;
 
+  /// Function to initialize the memory used by the high load function.
+  /// \arg MemoryAddr The pointer to the memory.
+  /// \arg BufferSize The number of doubles that is allocated in MemoryAddr.
   virtual void init(double* MemoryAddr, uint64_t BufferSize) const = 0;
 
+  /// Function to produce a low load on the cpu.
+  /// \arg LoadVar The variable that controls the load. If this variable changes from LoadThreadWorkType::LowLoad to
+  /// something else this function will return.
+  /// \arg Period The period of the low/high load switching. This function may sleep a fraction of this period.
   virtual void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) const = 0;
 
 public:
@@ -126,19 +147,37 @@ class Payload {
       , RegisterCount(RegisterCount) {}
   virtual ~Payload() = default;
 
+  // Allow init and lowLoadFunction functions to be accessed by the CompiledPayload class.
   friend void CompiledPayload::init(double* MemoryAddr, uint64_t BufferSize);
   friend void CompiledPayload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period);
 
+  /// Get the name of this payload. It is usally named by the CPU extension this payload uses e.g., SSE2 or FMA.
   [[nodiscard]] auto name() const -> const std::string& { return Name; }
+
   /// The size of the SIMD registers in units of doubles (8B)
   [[nodiscard]] auto registerSize() const -> unsigned { return RegisterSize; }
+
   /// The number of SIMD registers used by the payload
   [[nodiscard]] auto registerCount() const -> unsigned { return RegisterCount; }
 
-  [[nodiscard]] virtual auto isAvailable(const CPUTopology&) const -> bool = 0;
-
+  /// Check if this payload is available on the current system. This usally translates if the cpu extensions are
+  /// available.
+  /// \arg Topology The CPUTopology that is used to check agains if this payload is supported.
+  /// \returns true if the payload is supported on the given CPUTopology.
+  [[nodiscard]] virtual auto isAvailable(const CPUTopology& Topology) const -> bool = 0;
+
+  /// Compile this payload with supplied settings and optional features.
+  /// \arg Settings The settings for this payload e.g., the number of lines or the size of the caches.
+  /// \arg DumpRegisters Should the code to support dumping registers be baked into the high load routine of the
+  /// compiled payload.
+  /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine
+  /// of the compiled payload.
+  /// \returns The compiled payload that provides access to the init and load functions.
   [[nodiscard]] virtual auto compilePayload(const PayloadSettings& Settings, bool DumpRegisters,
                                             bool ErrorDetection) const -> CompiledPayload::UniquePtr = 0;
+
+  /// Get the available instruction items that are supported by this payload.
+  /// \returns The available instruction items that are supported by this payload.
   [[nodiscard]] virtual auto getAvailableInstructions() const -> std::list<std::string> = 0;
 };
 
diff --git a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
index c3f89c9c..52ba9f37 100644
--- a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
@@ -22,25 +22,35 @@
 #pragma once
 
 #include "asmjit/core/jitruntime.h"
-#include "firestarter/Environment/Payload/Payload.hpp"
+#include "firestarter/Environment/Payload/CompiledPayload.hpp"
 #include "firestarter/Logging/Log.hpp"
 #include <memory>
 
 namespace firestarter::environment::x86::payload {
 
+/// This class provides the functionality to compile a payload created with asmjit and create a unique pointer to the
+/// CompiledPayload class which can be used to execute the functions of this payload.
 class CompiledX86Payload final : public environment::payload::CompiledPayload {
 private:
   // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
   inline static asmjit::JitRuntime Runtime = asmjit::JitRuntime();
 
+  /// Custom deleter to release the memory of the high load function from the asmjit runtime.
+  /// \arg Payload The pointer to this class
   static void deleter(CompiledX86Payload* Payload) {
     if (Payload && Payload->highLoadFunctionPtr()) {
       Runtime.release(Payload->highLoadFunctionPtr());
     }
   }
-
+  /// Custom deleter to release the memory of the high load function from the asmjit runtime.
+  /// \arg Payload The pointer to this class
   static void deleter(CompiledPayload* Payload) { deleter(dynamic_cast<CompiledX86Payload*>(Payload)); }
 
+  /// Wrap the CompiledPayload class and forward all arguments.
+  /// \arg Stats The stats of the high load function from the payload.
+  /// \arg PayloadPtr A unique pointer to the payload class to allow calling the init and low load functions which do
+  /// not change based on different payload settings.
+  /// \arg HighLoadFunction The pointer to the compiled high load function.
   CompiledX86Payload(const environment::payload::PayloadStats& Stats,
                      std::unique_ptr<environment::payload::Payload>&& PayloadPtr, HighLoadFunctionPtr HighLoadFunction)
       : CompiledPayload(Stats, std::move(PayloadPtr), HighLoadFunction) {}
@@ -49,6 +59,12 @@ class CompiledX86Payload final : public environment::payload::CompiledPayload {
   CompiledX86Payload() = delete;
   ~CompiledX86Payload() override = default;
 
+  /// Create a unique pointer to a compiled payload from payload stats and assembly in a code holder.
+  /// \targ DerivedPayload The payload class from which the CodeHolder with the assembly was created from.
+  /// \arg Stats The stats of the payload that is contained in the CodeHolder.
+  /// \arg Code The CodeHolder that contains the assembly instruction making up the payload. This will be added to the
+  /// JitRuntime and a pointer to the function will be provided to the CompiledPayload class.
+  /// \returns The unique pointer to the compiled payload.
   template <class DerivedPayload>
   [[nodiscard]] static auto create(environment::payload::PayloadStats Stats, asmjit::CodeHolder& Code) -> UniquePtr {
     HighLoadFunctionPtr HighLoadFunction{};

From ac82234fddc800aa5673adf81ca9437a9cc72924 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 5 Nov 2024 22:14:50 +0100
Subject: [PATCH 138/167] move functions for sequences of items into
 payloadsettings

---
 .../Environment/Payload/Payload.hpp           | 83 ------------------
 .../Environment/Payload/PayloadSettings.hpp   | 86 +++++++++++++++++++
 src/CMakeLists.txt                            |  3 +-
 .../Environment/Payload/CompiledPayload.cpp   | 33 +++++++
 .../{Payload.cpp => PayloadSettings.cpp}      | 26 +++---
 .../Environment/X86/Payload/AVX512Payload.cpp | 20 +++--
 .../Environment/X86/Payload/AVXPayload.cpp    | 20 +++--
 .../Environment/X86/Payload/FMA4Payload.cpp   | 20 +++--
 .../Environment/X86/Payload/FMAPayload.cpp    | 20 +++--
 .../Environment/X86/Payload/SSE2Payload.cpp   | 20 +++--
 .../Environment/X86/Payload/ZENFMAPayload.cpp | 20 +++--
 11 files changed, 203 insertions(+), 148 deletions(-)
 create mode 100644 src/firestarter/Environment/Payload/CompiledPayload.cpp
 rename src/firestarter/Environment/Payload/{Payload.cpp => PayloadSettings.cpp} (72%)

diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index af673b76..0d844f59 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -29,7 +29,6 @@
 #include <list>
 #include <string>
 #include <utility>
-#include <vector>
 
 namespace firestarter::environment::payload {
 
@@ -44,89 +43,7 @@ class Payload {
   /// The number of SIMD registers used by the payload
   unsigned RegisterCount = 0;
 
-  /// Get the number of items in the sequence that start with a given string.
-  /// \arg Sequence The sequence that is analyzed.
-  /// \arg Start The string that contains the start of the item names that should be counted in the sequence.
-  /// \returns The number of items in the sequence that start with the supplied strings.
-  [[nodiscard]] static auto getSequenceStartCount(const std::vector<std::string>& Sequence, const std::string& Start)
-      -> unsigned;
-
 protected:
-  /// Generate a sequence of items interleaved with one another based on a supplied number how many times each items
-  /// should appear in the resulting sequence.
-  /// \arg Proportion The mapping of items defined by a string and the number of times this item should apear in the
-  /// resuling sequence.
-  /// \returns The sequence that is generated from the supplied propotions
-  [[nodiscard]] static auto generateSequence(const std::vector<std::pair<std::string, unsigned>>& Proportion)
-      -> std::vector<std::string>;
-
-  /// Get the number of items in the sequence that start with "L2".
-  /// \arg Sequence The sequence that is analyzed.
-  /// \returns The number of items items in the sequence that start with "L2".
-  [[nodiscard]] static auto getL2SequenceCount(const std::vector<std::string>& Sequence) -> unsigned {
-    return getSequenceStartCount(Sequence, "L2");
-  };
-
-  /// Get the number of items in the sequence that start with "L3".
-  /// \arg Sequence The sequence that is analyzed.
-  /// \returns The number of items items in the sequence that start with "L3".
-  [[nodiscard]] static auto getL3SequenceCount(const std::vector<std::string>& Sequence) -> unsigned {
-    return getSequenceStartCount(Sequence, "L3");
-  };
-
-  /// Get the number of items in the sequence that start with "RAM".
-  /// \arg Sequence The sequence that is analyzed.
-  /// \returns The number of items items in the sequence that start with "RAM".
-  [[nodiscard]] static auto getRAMSequenceCount(const std::vector<std::string>& Sequence) -> unsigned {
-    return getSequenceStartCount(Sequence, "RAM");
-  };
-
-  /// Get the maximum number of repetitions of the the supplied sequence so that the size of the sequence times the
-  /// number of repetitions is smaller equal to the number of lines. The number of repetitions is a unsigned number.
-  /// \arg Sequence The reference to the sequence that should be repeated multiple times
-  /// \arg NumberOfLines The maximum number of entries in the repeated sequence
-  /// \returns The number of repetitions of the sequence.
-  [[nodiscard]] static auto getNumberOfSequenceRepetitions(const std::vector<std::string>& Sequence,
-                                                           const unsigned NumberOfLines) -> unsigned {
-    if (Sequence.empty()) {
-      return 0;
-    }
-    return NumberOfLines / Sequence.size();
-  };
-
-  /// Get the number of accesses that can be made to 80% of the L2 cache size (each incrementing the pointer to the
-  /// cache) before the pointer need to be reseted to the original value. This assumes that each L2 item in the sequence
-  /// increments the pointer by one cache line (64B). It is also assumed that the number of accesses fit at least once
-  /// into this cache. This should always be the case on modern CPUs.
-  /// \arg Sequence The reference to the sequence.
-  /// \arg NumberOfLines The maximum number of entries in the repeated sequence.
-  /// \arg Size The size of the L2 Cache.
-  /// \returns The maximum number of iterations of the repeated sequence to fill up to 80% of the L2 cache.
-  [[nodiscard]] static auto getL2LoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
-                                           unsigned Size) -> unsigned;
-
-  /// Get the number of accesses that can be made to 80% of the L3 cache size (each incrementing the pointer to the
-  /// cache) before the pointer need to be reseted to the original value. This assumes that each L3 item in the sequence
-  /// increments the pointer by one cache line (64B). See the note about assumptions on the size of the cache in the
-  /// documentation of getL2LoopCount.
-  /// \arg Sequence The reference to the sequence.
-  /// \arg NumberOfLines The maximum number of entries in the repeated sequence.
-  /// \arg Size The size of the L3 Cache.
-  /// \returns The maximum number of iterations of the repeated sequence to fill up to 80% of the L3 cache.
-  [[nodiscard]] static auto getL3LoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
-                                           unsigned Size) -> unsigned;
-
-  /// Get the number of accesses that can be made to 100% of the RAM size (each incrementing the pointer to the ram)
-  /// before the pointer need to be reseted to the original value. This assumes that each RAM item in the sequence
-  /// increments the pointer by one cache line (64B). See the note about assumptions on the size of the cache in the
-  /// documentation of getL2LoopCount.
-  /// \arg Sequence The reference to the sequence.
-  /// \arg NumberOfLines The maximum number of entries in the repeated sequence.
-  /// \arg Size The size of the RAM.
-  /// \returns The maximum number of iterations of the repeated sequence to fill up to 100% of the RAM.
-  [[nodiscard]] static auto getRAMLoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
-                                            unsigned Size) -> unsigned;
-
   /// Function to initialize the memory used by the high load function.
   /// \arg MemoryAddr The pointer to the memory.
   /// \arg BufferSize The number of doubles that is allocated in MemoryAddr.
diff --git a/include/firestarter/Environment/Payload/PayloadSettings.hpp b/include/firestarter/Environment/Payload/PayloadSettings.hpp
index 5c287e10..81c05a08 100644
--- a/include/firestarter/Environment/Payload/PayloadSettings.hpp
+++ b/include/firestarter/Environment/Payload/PayloadSettings.hpp
@@ -46,6 +46,13 @@ struct PayloadSettings {
   unsigned Lines;
   std::vector<InstructionWithProportion> InstructionGroups;
 
+  /// Get the number of items in the sequence that start with a given string.
+  /// \arg Sequence The sequence that is analyzed.
+  /// \arg Start The string that contains the start of the item names that should be counted in the sequence.
+  /// \returns The number of items in the sequence that start with the supplied strings.
+  [[nodiscard]] static auto getSequenceStartCount(const std::vector<std::string>& Sequence, const std::string& Start)
+      -> unsigned;
+
 public:
   PayloadSettings() = delete;
 
@@ -57,6 +64,81 @@ struct PayloadSettings {
       , Lines(Lines)
       , InstructionGroups(std::move(InstructionGroups)) {}
 
+  /// Generate a sequence of items interleaved with one another based on a supplied number how many times each items
+  /// should appear in the resulting sequence.
+  /// \arg Proportion The mapping of items defined by a string and the number of times this item should apear in the
+  /// resuling sequence.
+  /// \returns The sequence that is generated from the supplied propotions
+  [[nodiscard]] static auto generateSequence(const std::vector<std::pair<std::string, unsigned>>& Proportion)
+      -> std::vector<std::string>;
+
+  /// Get the number of items in the sequence that start with "L2".
+  /// \arg Sequence The sequence that is analyzed.
+  /// \returns The number of items items in the sequence that start with "L2".
+  [[nodiscard]] static auto getL2SequenceCount(const std::vector<std::string>& Sequence) -> unsigned {
+    return getSequenceStartCount(Sequence, "L2");
+  };
+
+  /// Get the number of items in the sequence that start with "L3".
+  /// \arg Sequence The sequence that is analyzed.
+  /// \returns The number of items items in the sequence that start with "L3".
+  [[nodiscard]] static auto getL3SequenceCount(const std::vector<std::string>& Sequence) -> unsigned {
+    return getSequenceStartCount(Sequence, "L3");
+  };
+
+  /// Get the number of items in the sequence that start with "RAM".
+  /// \arg Sequence The sequence that is analyzed.
+  /// \returns The number of items items in the sequence that start with "RAM".
+  [[nodiscard]] static auto getRAMSequenceCount(const std::vector<std::string>& Sequence) -> unsigned {
+    return getSequenceStartCount(Sequence, "RAM");
+  };
+
+  /// Get the maximum number of repetitions of the the supplied sequence so that the size of the sequence times the
+  /// number of repetitions is smaller equal to the number of lines. The number of repetitions is a unsigned number.
+  /// \arg Sequence The reference to the sequence that should be repeated multiple times
+  /// \arg NumberOfLines The maximum number of entries in the repeated sequence
+  /// \returns The number of repetitions of the sequence.
+  [[nodiscard]] static auto getNumberOfSequenceRepetitions(const std::vector<std::string>& Sequence,
+                                                           const unsigned NumberOfLines) -> unsigned {
+    if (Sequence.empty()) {
+      return 0;
+    }
+    return NumberOfLines / Sequence.size();
+  };
+
+  /// Get the number of accesses that can be made to 80% of the L2 cache size (each incrementing the pointer to the
+  /// cache) before the pointer need to be reseted to the original value. This assumes that each L2 item in the sequence
+  /// increments the pointer by one cache line (64B). It is also assumed that the number of accesses fit at least once
+  /// into this cache. This should always be the case on modern CPUs.
+  /// \arg Sequence The reference to the sequence.
+  /// \arg NumberOfLines The maximum number of entries in the repeated sequence.
+  /// \arg Size The size of the L2 Cache.
+  /// \returns The maximum number of iterations of the repeated sequence to fill up to 80% of the L2 cache.
+  [[nodiscard]] static auto getL2LoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
+                                           unsigned Size) -> unsigned;
+
+  /// Get the number of accesses that can be made to 80% of the L3 cache size (each incrementing the pointer to the
+  /// cache) before the pointer need to be reseted to the original value. This assumes that each L3 item in the sequence
+  /// increments the pointer by one cache line (64B). See the note about assumptions on the size of the cache in the
+  /// documentation of getL2LoopCount.
+  /// \arg Sequence The reference to the sequence.
+  /// \arg NumberOfLines The maximum number of entries in the repeated sequence.
+  /// \arg Size The size of the L3 Cache.
+  /// \returns The maximum number of iterations of the repeated sequence to fill up to 80% of the L3 cache.
+  [[nodiscard]] static auto getL3LoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
+                                           unsigned Size) -> unsigned;
+
+  /// Get the number of accesses that can be made to 100% of the RAM size (each incrementing the pointer to the ram)
+  /// before the pointer need to be reseted to the original value. This assumes that each RAM item in the sequence
+  /// increments the pointer by one cache line (64B). See the note about assumptions on the size of the cache in the
+  /// documentation of getL2LoopCount.
+  /// \arg Sequence The reference to the sequence.
+  /// \arg NumberOfLines The maximum number of entries in the repeated sequence.
+  /// \arg Size The size of the RAM.
+  /// \returns The maximum number of iterations of the repeated sequence to fill up to 100% of the RAM.
+  [[nodiscard]] static auto getRAMLoopCount(const std::vector<std::string>& Sequence, unsigned NumberOfLines,
+                                            unsigned Size) -> unsigned;
+
   /// Are the payload settings concreate, i.e. can one specific payload be compiled with these settings. This is the
   /// case if the option of threads is reduces to a single element.
   [[nodiscard]] auto isConcreate() const -> bool { return Threads.size() == 1; }
@@ -114,6 +196,10 @@ struct PayloadSettings {
   /// The vector of instruction groups with proportions.
   [[nodiscard]] auto instructionGroups() const -> const auto& { return InstructionGroups; }
 
+  /// Generate a sequence of items interleaved with one another based on the instruction groups.
+  /// \returns The sequence that is generated from the supplied propotions in the instruction groups.
+  [[nodiscard]] auto sequence() const -> std::vector<std::string> { return generateSequence(instructionGroups()); }
+
   /// The vector of instructions that are saved in the instruction groups
   [[nodiscard]] auto instructionGroupItems() const -> std::vector<std::string> {
     std::vector<std::string> Items;
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0673af9b..c0355fa0 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -11,7 +11,8 @@ SET(FIRESTARTER_FILES
 
 	firestarter/Environment/Environment.cpp
 	firestarter/Environment/CPUTopology.cpp
-	firestarter/Environment/Payload/Payload.cpp
+	firestarter/Environment/Payload/CompiledPayload.cpp
+	firestarter/Environment/Payload/PayloadSettings.cpp
 
 	# here starts the x86 specific code
 	firestarter/Environment/X86/X86Environment.cpp
diff --git a/src/firestarter/Environment/Payload/CompiledPayload.cpp b/src/firestarter/Environment/Payload/CompiledPayload.cpp
new file mode 100644
index 00000000..9c7b5410
--- /dev/null
+++ b/src/firestarter/Environment/Payload/CompiledPayload.cpp
@@ -0,0 +1,33 @@
+/******************************************************************************
+ * FIRESTARTER - A Processor Stress Test Utility
+ * Copyright (C) 2020 TU Dresden, Center for Information Services and High
+ * Performance Computing
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/\>.
+ *
+ * Contact: daniel.hackenberg@tu-dresden.de
+ *****************************************************************************/
+
+#include <firestarter/Environment/Payload/CompiledPayload.hpp>
+#include <firestarter/Environment/Payload/Payload.hpp>
+
+namespace firestarter::environment::payload {
+
+void CompiledPayload::init(double* MemoryAddr, uint64_t BufferSize) { PayloadPtr->init(MemoryAddr, BufferSize); }
+
+void CompiledPayload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) {
+  PayloadPtr->lowLoadFunction(LoadVar, Period);
+};
+
+}; // namespace firestarter::environment::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/Payload/Payload.cpp b/src/firestarter/Environment/Payload/PayloadSettings.cpp
similarity index 72%
rename from src/firestarter/Environment/Payload/Payload.cpp
rename to src/firestarter/Environment/Payload/PayloadSettings.cpp
index 1bd89385..abc86b91 100644
--- a/src/firestarter/Environment/Payload/Payload.cpp
+++ b/src/firestarter/Environment/Payload/PayloadSettings.cpp
@@ -21,18 +21,12 @@
 
 #include <algorithm>
 #include <cmath>
-
-#include <firestarter/Environment/Payload/Payload.hpp>
+#include <firestarter/Environment/Payload/PayloadSettings.hpp>
 
 namespace firestarter::environment::payload {
 
-void CompiledPayload::init(double* MemoryAddr, uint64_t BufferSize) { PayloadPtr->init(MemoryAddr, BufferSize); }
-
-void CompiledPayload::lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) {
-  PayloadPtr->lowLoadFunction(LoadVar, Period);
-};
-
-auto Payload::getSequenceStartCount(const std::vector<std::string>& Sequence, const std::string& Start) -> unsigned {
+auto PayloadSettings::getSequenceStartCount(const std::vector<std::string>& Sequence, const std::string& Start)
+    -> unsigned {
   unsigned I = 0;
 
   for (const auto& Item : Sequence) {
@@ -44,7 +38,7 @@ auto Payload::getSequenceStartCount(const std::vector<std::string>& Sequence, co
   return I;
 }
 
-auto Payload::generateSequence(std::vector<std::pair<std::string, unsigned>> const& Proportions)
+auto PayloadSettings::generateSequence(std::vector<std::pair<std::string, unsigned>> const& Proportions)
     -> std::vector<std::string> {
   std::vector<std::pair<std::string, unsigned>> Prop = Proportions;
 
@@ -73,8 +67,8 @@ auto Payload::generateSequence(std::vector<std::pair<std::string, unsigned>> con
   return Sequence;
 }
 
-auto Payload::getL2LoopCount(const std::vector<std::string>& Sequence, const unsigned NumberOfLines,
-                             const unsigned Size) -> unsigned {
+auto PayloadSettings::getL2LoopCount(const std::vector<std::string>& Sequence, const unsigned NumberOfLines,
+                                     const unsigned Size) -> unsigned {
   if (getL2SequenceCount(Sequence) == 0) {
     return 0;
   }
@@ -82,8 +76,8 @@ auto Payload::getL2LoopCount(const std::vector<std::string>& Sequence, const uns
       (0.8 * Size / 64 / (getL2SequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines))));
 }
 
-auto Payload::getL3LoopCount(const std::vector<std::string>& Sequence, const unsigned NumberOfLines,
-                             const unsigned Size) -> unsigned {
+auto PayloadSettings::getL3LoopCount(const std::vector<std::string>& Sequence, const unsigned NumberOfLines,
+                                     const unsigned Size) -> unsigned {
   if (getL3SequenceCount(Sequence) == 0) {
     return 0;
   }
@@ -91,8 +85,8 @@ auto Payload::getL3LoopCount(const std::vector<std::string>& Sequence, const uns
       (0.8 * Size / 64 / (getL3SequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines))));
 }
 
-auto Payload::getRAMLoopCount(const std::vector<std::string>& Sequence, const unsigned NumberOfLines,
-                              const unsigned Size) -> unsigned {
+auto PayloadSettings::getRAMLoopCount(const std::vector<std::string>& Sequence, const unsigned NumberOfLines,
+                                      const unsigned Size) -> unsigned {
   if (getRAMSequenceCount(Sequence) == 0) {
     return 0;
   }
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index f317a4db..0f128641 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -37,8 +37,9 @@ auto AVX512Payload::compilePayload(const environment::payload::PayloadSettings&
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto Sequence = generateSequence(Settings.instructionGroups());
-  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
+  auto Sequence = Settings.sequence();
+  auto Repetitions =
+      environment::payload::PayloadSettings::getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
 
   // compute count of flops and memory access for performance report
   environment::payload::PayloadStats Stats;
@@ -75,9 +76,12 @@ auto AVX512Payload::compilePayload(const environment::payload::PayloadSettings&
   const auto RamSize = Settings.ramBufferSizePerThread();
 
   // calculate the reset counters for the buffers
-  const auto L2LoopCount = getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
-  const auto L3LoopCount = getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
-  const auto RamLoopCount = getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
+  const auto L2LoopCount =
+      environment::payload::PayloadSettings::getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
+  const auto L3LoopCount =
+      environment::payload::PayloadSettings::getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
+  const auto RamLoopCount =
+      environment::payload::PayloadSettings::getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
 
   asmjit::CodeHolder Code;
   Code.init(asmjit::Environment::host());
@@ -298,7 +302,7 @@ auto AVX512Payload::compilePayload(const environment::payload::PayloadSettings&
   }
 
   Cb.movq(TempReg, IterReg); // restore iteration counter
-  if (getRAMSequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getRAMSequenceCount(Sequence) > 0) {
     // reset RAM counter
     auto NoRamReset = Cb.newLabel();
 
@@ -312,7 +316,7 @@ auto AVX512Payload::compilePayload(const environment::payload::PayloadSettings&
     Stats.Instructions += 2;
   }
   Cb.inc(TempReg); // increment iteration counter
-  if (getL2SequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
     auto NoL2Reset = Cb.newLabel();
 
@@ -326,7 +330,7 @@ auto AVX512Payload::compilePayload(const environment::payload::PayloadSettings&
     Stats.Instructions += 2;
   }
   Cb.movq(IterReg, TempReg); // store iteration counter
-  if (getL3SequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
     auto NoL3Reset = Cb.newLabel();
 
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index 2d8923f9..322ea3e7 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -35,8 +35,9 @@ auto AVXPayload::compilePayload(const environment::payload::PayloadSettings& Set
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto Sequence = generateSequence(Settings.instructionGroups());
-  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
+  auto Sequence = Settings.sequence();
+  auto Repetitions =
+      environment::payload::PayloadSettings::getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
 
   // compute count of flops and memory access for performance report
   environment::payload::PayloadStats Stats;
@@ -73,9 +74,12 @@ auto AVXPayload::compilePayload(const environment::payload::PayloadSettings& Set
   const auto RamSize = Settings.ramBufferSizePerThread();
 
   // calculate the reset counters for the buffers
-  const auto L2LoopCount = getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
-  const auto L3LoopCount = getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
-  const auto RamLoopCount = getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
+  const auto L2LoopCount =
+      environment::payload::PayloadSettings::getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
+  const auto L3LoopCount =
+      environment::payload::PayloadSettings::getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
+  const auto RamLoopCount =
+      environment::payload::PayloadSettings::getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
 
   asmjit::CodeHolder Code;
   Code.init(asmjit::Environment::host());
@@ -329,7 +333,7 @@ auto AVXPayload::compilePayload(const environment::payload::PayloadSettings& Set
     }
   }
 
-  if (getRAMSequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getRAMSequenceCount(Sequence) > 0) {
     // reset RAM counter
     auto NoRamReset = Cb.newLabel();
 
@@ -342,7 +346,7 @@ auto AVXPayload::compilePayload(const environment::payload::PayloadSettings& Set
     // adds always two instruction
     Stats.Instructions += 2;
   }
-  if (getL2SequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
     auto NoL2Reset = Cb.newLabel();
 
@@ -355,7 +359,7 @@ auto AVXPayload::compilePayload(const environment::payload::PayloadSettings& Set
     // adds always two instruction
     Stats.Instructions += 2;
   }
-  if (getL3SequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
     auto NoL3Reset = Cb.newLabel();
 
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index f2342323..ec770929 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -36,8 +36,9 @@ auto FMA4Payload::compilePayload(const environment::payload::PayloadSettings& Se
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto Sequence = generateSequence(Settings.instructionGroups());
-  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
+  auto Sequence = Settings.sequence();
+  auto Repetitions =
+      environment::payload::PayloadSettings::getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
 
   // compute count of flops and memory access for performance report
   environment::payload::PayloadStats Stats;
@@ -74,9 +75,12 @@ auto FMA4Payload::compilePayload(const environment::payload::PayloadSettings& Se
   const auto RamSize = Settings.ramBufferSizePerThread();
 
   // calculate the reset counters for the buffers
-  const auto L2LoopCount = getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
-  const auto L3LoopCount = getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
-  const auto RamLoopCount = getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
+  const auto L2LoopCount =
+      environment::payload::PayloadSettings::getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
+  const auto L3LoopCount =
+      environment::payload::PayloadSettings::getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
+  const auto RamLoopCount =
+      environment::payload::PayloadSettings::getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
 
   asmjit::CodeHolder Code;
   Code.init(asmjit::Environment::host());
@@ -300,7 +304,7 @@ auto FMA4Payload::compilePayload(const environment::payload::PayloadSettings& Se
   }
 
   Cb.movq(TempReg, IterReg); // restore iteration counter
-  if (getRAMSequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getRAMSequenceCount(Sequence) > 0) {
     // reset RAM counter
     auto NoRamReset = Cb.newLabel();
 
@@ -314,7 +318,7 @@ auto FMA4Payload::compilePayload(const environment::payload::PayloadSettings& Se
     Stats.Instructions += 2;
   }
   Cb.inc(TempReg); // increment iteration counter
-  if (getL2SequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
     auto NoL2Reset = Cb.newLabel();
 
@@ -328,7 +332,7 @@ auto FMA4Payload::compilePayload(const environment::payload::PayloadSettings& Se
     Stats.Instructions += 2;
   }
   Cb.movq(IterReg, TempReg); // store iteration counter
-  if (getL3SequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
     auto NoL3Reset = Cb.newLabel();
 
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index eba17753..accde4a0 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -38,8 +38,9 @@ auto FMAPayload::compilePayload(const environment::payload::PayloadSettings& Set
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto Sequence = generateSequence(Settings.instructionGroups());
-  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
+  auto Sequence = Settings.sequence();
+  auto Repetitions =
+      environment::payload::PayloadSettings::getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
 
   // compute count of flops and memory access for performance report
   environment::payload::PayloadStats Stats;
@@ -76,9 +77,12 @@ auto FMAPayload::compilePayload(const environment::payload::PayloadSettings& Set
   const auto RamSize = Settings.ramBufferSizePerThread();
 
   // calculate the reset counters for the buffers
-  const auto L2LoopCount = getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
-  const auto L3LoopCount = getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
-  const auto RamLoopCount = getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
+  const auto L2LoopCount =
+      environment::payload::PayloadSettings::getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
+  const auto L3LoopCount =
+      environment::payload::PayloadSettings::getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
+  const auto RamLoopCount =
+      environment::payload::PayloadSettings::getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
 
   asmjit::CodeHolder Code;
   Code.init(asmjit::Environment::host());
@@ -336,7 +340,7 @@ auto FMAPayload::compilePayload(const environment::payload::PayloadSettings& Set
   }
 
   Cb.movq(TempReg, IterReg); // restore iteration counter
-  if (getRAMSequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getRAMSequenceCount(Sequence) > 0) {
     // reset RAM counter
     auto NoRamReset = Cb.newLabel();
 
@@ -350,7 +354,7 @@ auto FMAPayload::compilePayload(const environment::payload::PayloadSettings& Set
     Stats.Instructions += 2;
   }
   Cb.inc(TempReg); // increment iteration counter
-  if (getL2SequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
     auto NoL2Reset = Cb.newLabel();
 
@@ -364,7 +368,7 @@ auto FMAPayload::compilePayload(const environment::payload::PayloadSettings& Set
     Stats.Instructions += 2;
   }
   Cb.movq(IterReg, TempReg); // store iteration counter
-  if (getL3SequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
     auto NoL3Reset = Cb.newLabel();
 
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index 47126f1f..0d05ca99 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -35,8 +35,9 @@ auto SSE2Payload::compilePayload(const environment::payload::PayloadSettings& Se
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto Sequence = generateSequence(Settings.instructionGroups());
-  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
+  auto Sequence = Settings.sequence();
+  auto Repetitions =
+      environment::payload::PayloadSettings::getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
 
   // compute count of flops and memory access for performance report
   environment::payload::PayloadStats Stats;
@@ -73,9 +74,12 @@ auto SSE2Payload::compilePayload(const environment::payload::PayloadSettings& Se
   const auto RamSize = Settings.ramBufferSizePerThread();
 
   // calculate the reset counters for the buffers
-  const auto L2LoopCount = getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
-  const auto L3LoopCount = getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
-  const auto RamLoopCount = getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
+  const auto L2LoopCount =
+      environment::payload::PayloadSettings::getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
+  const auto L3LoopCount =
+      environment::payload::PayloadSettings::getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
+  const auto RamLoopCount =
+      environment::payload::PayloadSettings::getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
 
   asmjit::CodeHolder Code;
   Code.init(asmjit::Environment::host());
@@ -321,7 +325,7 @@ auto SSE2Payload::compilePayload(const environment::payload::PayloadSettings& Se
     }
   }
 
-  if (getRAMSequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getRAMSequenceCount(Sequence) > 0) {
     // reset RAM counter
     auto NoRamReset = Cb.newLabel();
 
@@ -334,7 +338,7 @@ auto SSE2Payload::compilePayload(const environment::payload::PayloadSettings& Se
     // adds always two instruction
     Stats.Instructions += 2;
   }
-  if (getL2SequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
     auto NoL2Reset = Cb.newLabel();
 
@@ -347,7 +351,7 @@ auto SSE2Payload::compilePayload(const environment::payload::PayloadSettings& Se
     // adds always two instruction
     Stats.Instructions += 2;
   }
-  if (getL3SequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
     auto NoL3Reset = Cb.newLabel();
 
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index 64a3593c..e45dd9bf 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -34,8 +34,9 @@ auto ZENFMAPayload::compilePayload(const environment::payload::PayloadSettings&
 
   // Compute the sequence of instruction groups and the number of its repetions
   // to reach the desired size
-  auto Sequence = generateSequence(Settings.instructionGroups());
-  auto Repetitions = getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
+  auto Sequence = Settings.sequence();
+  auto Repetitions =
+      environment::payload::PayloadSettings::getNumberOfSequenceRepetitions(Sequence, Settings.linesPerThread());
 
   // compute count of flops and memory access for performance report
   environment::payload::PayloadStats Stats;
@@ -72,9 +73,12 @@ auto ZENFMAPayload::compilePayload(const environment::payload::PayloadSettings&
   const auto RamSize = Settings.ramBufferSizePerThread();
 
   // calculate the reset counters for the buffers
-  const auto L2LoopCount = getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
-  const auto L3LoopCount = getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
-  const auto RamLoopCount = getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
+  const auto L2LoopCount =
+      environment::payload::PayloadSettings::getL2LoopCount(Sequence, Settings.linesPerThread(), L2Size);
+  const auto L3LoopCount =
+      environment::payload::PayloadSettings::getL3LoopCount(Sequence, Settings.linesPerThread(), L3Size);
+  const auto RamLoopCount =
+      environment::payload::PayloadSettings::getRAMLoopCount(Sequence, Settings.linesPerThread(), RamSize);
 
   asmjit::CodeHolder Code;
   Code.init(asmjit::Environment::host());
@@ -286,7 +290,7 @@ auto ZENFMAPayload::compilePayload(const environment::payload::PayloadSettings&
   }
 
   Cb.movq(TempReg, IterReg); // restore iteration counter
-  if (getRAMSequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getRAMSequenceCount(Sequence) > 0) {
     // reset RAM counter
     auto NoRamReset = Cb.newLabel();
 
@@ -300,7 +304,7 @@ auto ZENFMAPayload::compilePayload(const environment::payload::PayloadSettings&
     Stats.Instructions += 2;
   }
   Cb.inc(TempReg); // increment iteration counter
-  if (getL2SequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getL2SequenceCount(Sequence) > 0) {
     // reset L2-Cache counter
     auto NoL2Reset = Cb.newLabel();
 
@@ -314,7 +318,7 @@ auto ZENFMAPayload::compilePayload(const environment::payload::PayloadSettings&
     Stats.Instructions += 2;
   }
   Cb.movq(IterReg, TempReg); // store iteration counter
-  if (getL3SequenceCount(Sequence) > 0) {
+  if (environment::payload::PayloadSettings::getL3SequenceCount(Sequence) > 0) {
     // reset L3-Cache counter
     auto NoL3Reset = Cb.newLabel();
 

From d5c031146ba31808779a6b34140974a8576914b1 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 5 Nov 2024 23:22:25 +0100
Subject: [PATCH 139/167] add more documentation to the payload and platform

---
 .../Environment/Payload/PayloadSettings.hpp   | 26 +++++++++++++++++--
 .../Environment/Payload/PayloadStats.hpp      |  7 ++++-
 .../Environment/Platform/PlatformConfig.hpp   | 26 +++++++++++++++++++
 .../X86/Platform/X86PlatformConfig.hpp        | 19 ++++++++++++++
 .../Environment/Payload/PayloadSettings.cpp   |  2 +-
 5 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/include/firestarter/Environment/Payload/PayloadSettings.hpp b/include/firestarter/Environment/Payload/PayloadSettings.hpp
index 81c05a08..8438e9a6 100644
--- a/include/firestarter/Environment/Payload/PayloadSettings.hpp
+++ b/include/firestarter/Environment/Payload/PayloadSettings.hpp
@@ -32,6 +32,7 @@
 
 namespace firestarter::environment::payload {
 
+/// This class represents the settings that can be changed in the high load routine of a payload.
 struct PayloadSettings {
 public:
   using InstructionWithProportion = std::pair<std::string, unsigned>;
@@ -40,10 +41,21 @@ struct PayloadSettings {
   /// The number of threads for which this payload is available. Multiple ones may exsists. The PayloadSettings are
   /// concreate once this is set to contain only one element.
   std::list<unsigned> Threads;
+
+  /// The size of the L1i cache per physical CPU core. This value may be empty.
   std::optional<unsigned> InstructionCacheSize;
+
+  /// The size of the L1d,L2,...,L3 caches per physical CPU core.
   std::list<unsigned> DataCacheBufferSize;
+
+  /// The selected size of the buffer that is in the RAM on the physical CPU core.
   unsigned RamBufferSize;
+
+  /// The maximum number of instructions that should appear inside the high load routine.
   unsigned Lines;
+
+  /// This represents the instructions in combination with the number of times they should appear in the generated
+  /// sequence.
   std::vector<InstructionWithProportion> InstructionGroups;
 
   /// Get the number of items in the sequence that start with a given string.
@@ -69,7 +81,7 @@ struct PayloadSettings {
   /// \arg Proportion The mapping of items defined by a string and the number of times this item should apear in the
   /// resuling sequence.
   /// \returns The sequence that is generated from the supplied propotions
-  [[nodiscard]] static auto generateSequence(const std::vector<std::pair<std::string, unsigned>>& Proportion)
+  [[nodiscard]] static auto generateSequence(const std::vector<InstructionWithProportion>& Proportion)
       -> std::vector<std::string>;
 
   /// Get the number of items in the sequence that start with "L2".
@@ -154,10 +166,13 @@ struct PayloadSettings {
 
   /// The available instruction cache size. This refers to the L1i-Cache on the physical CPU core.
   [[nodiscard]] auto instructionCacheSize() const -> const auto& { return InstructionCacheSize; }
+
   /// The size of the L1d,L2,...,L3 caches per physical CPU core.
   [[nodiscard]] auto dataCacheBufferSize() const -> const auto& { return DataCacheBufferSize; }
+
   /// The selected size of the buffer that is in the RAM on the physical CPU core.
   [[nodiscard]] auto ramBufferSize() const -> auto{ return RamBufferSize; }
+
   /// Return the total buffer size for the data caches and the ram per physical CPU core.
   [[nodiscard]] auto totalBufferSize() const -> std::size_t {
     std::size_t Total = 0;
@@ -167,6 +182,7 @@ struct PayloadSettings {
     Total += RamBufferSize;
     return Total;
   }
+
   /// The number of instruction groups which should be used in the payload per physical CPU core.
   [[nodiscard]] auto lines() const -> auto{ return Lines; }
 
@@ -178,6 +194,7 @@ struct PayloadSettings {
     }
     return {};
   }
+
   /// The size of the L1d,L2,...,L3 caches per thread on the physical CPU core.
   [[nodiscard]] auto dataCacheBufferSizePerThread() const -> std::list<unsigned> {
     auto DataCacheBufferSizePerThread = DataCacheBufferSize;
@@ -186,10 +203,13 @@ struct PayloadSettings {
     }
     return DataCacheBufferSizePerThread;
   }
+
   /// The selected size of the buffer that is in the RAM per thread on the physical CPU core.
   [[nodiscard]] auto ramBufferSizePerThread() const -> auto{ return RamBufferSize / thread(); }
+
   /// Return the total buffer size for the data caches and the ram per thread on the physical CPU core.
   [[nodiscard]] auto totalBufferSizePerThread() const -> std::size_t { return totalBufferSize() / thread(); }
+
   /// The number of instruction groups which should be used in the payload per thread on the physical CPU core.
   [[nodiscard]] auto linesPerThread() const -> auto{ return Lines / thread(); }
 
@@ -200,7 +220,7 @@ struct PayloadSettings {
   /// \returns The sequence that is generated from the supplied propotions in the instruction groups.
   [[nodiscard]] auto sequence() const -> std::vector<std::string> { return generateSequence(instructionGroups()); }
 
-  /// The vector of instructions that are saved in the instruction groups
+  /// The vector of used instructions that are saved in the instruction groups
   [[nodiscard]] auto instructionGroupItems() const -> std::vector<std::string> {
     std::vector<std::string> Items;
     Items.reserve(InstructionGroups.size());
@@ -210,6 +230,8 @@ struct PayloadSettings {
     return Items;
   }
 
+  /// Get the string that represents the instructions in combination with the number of times they should appear in the
+  /// sequence.
   [[nodiscard]] auto getInstructionGroupsString() const -> std::string {
     std::stringstream Ss;
 
diff --git a/include/firestarter/Environment/Payload/PayloadStats.hpp b/include/firestarter/Environment/Payload/PayloadStats.hpp
index 5bd79bcc..79b2b1e3 100644
--- a/include/firestarter/Environment/Payload/PayloadStats.hpp
+++ b/include/firestarter/Environment/Payload/PayloadStats.hpp
@@ -23,10 +23,15 @@
 
 namespace firestarter::environment::payload {
 
+/// This struct represents the stats a compiled payload has.
 struct PayloadStats {
+  /// The number of flops computed per iteration of the high load routine.
   unsigned Flops = 0;
+
+  /// The number of bytes accessed to the main memory per iteration of the high load routine.
   unsigned Bytes = 0;
-  // number of instructions in load loop
+
+  /// The number of instructions in load loop
   unsigned Instructions = 0;
 };
 
diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp
index a9a1e9ac..81c1454a 100644
--- a/include/firestarter/Environment/Platform/PlatformConfig.hpp
+++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp
@@ -28,31 +28,56 @@
 
 namespace firestarter::environment::platform {
 
+/// The payload in combination with settings and a short hand name for the specific microarchitecture this payload is
+/// designed for.
 class PlatformConfig {
 private:
+  /// The name of this platform. This is usually a short hand for the CPU microarchitecture e.g., HSW_COREI or
+  /// HSW_XEONEP.
   std::string Name;
+
+  /// The settings for the associated payload.
   payload::PayloadSettings Settings;
+
+  /// The payload this platfrom should execute.
   std::shared_ptr<const payload::Payload> Payload;
 
 public:
   /// Getter for the name of the platform.
   [[nodiscard]] auto name() const -> const auto& { return Name; }
+
   /// Getter for the settings of the platform.
   [[nodiscard]] auto settings() const -> const auto& { return Settings; }
+
   /// Reference to the settings. This allows them to be overriden.
   [[nodiscard]] auto settings() -> auto& { return Settings; }
+
   /// Getter for the payload of the platform.
   [[nodiscard]] auto payload() const -> const auto& { return Payload; }
 
+  /// Check if this platform is available on the current system. This transloate to if the cpu extensions are
+  /// available for the payload that is used.
+  /// \arg Topology The reference to the CPUTopology that is used to check agains if this platform is supported.
+  /// \returns true if the platform is supported on the given CPUTopology.
   [[nodiscard]] auto isAvailable(const CPUTopology& Topology) const -> bool { return isAvailable(&Topology); }
 
+  /// Check if this platform is available and the default on the current system.
+  /// \arg Topology The reference to the CPUTopology that is used to check agains if this payload is supported.
+  /// \returns true if the platform is the default one for a given CPUTopology.
   [[nodiscard]] auto isDefault(const CPUTopology& Topology) const -> bool { return isDefault(&Topology); }
 
 protected:
+  /// Check if this platform is available on the current system. This transloate to if the cpu extensions are
+  /// available for the payload that is used.
+  /// \arg Topology The pointer to the CPUTopology that is used to check agains if this platform is supported.
+  /// \returns true if the platform is supported on the given CPUTopology.
   [[nodiscard]] virtual auto isAvailable(const CPUTopology* Topology) const -> bool {
     return payload()->isAvailable(*Topology);
   }
 
+  /// Check if this platform is available and the default on the current system.
+  /// \arg Topology The pointer to the CPUTopology that is used to check agains if this payload is supported.
+  /// \returns true if the platform is the default one for a given CPUTopology.
   [[nodiscard]] virtual auto isDefault(const CPUTopology*) const -> bool = 0;
 
 public:
@@ -88,6 +113,7 @@ class PlatformConfig {
     return functionName(Settings.thread());
   };
 
+  /// Print a summary for the selected platform/payload with given settings.
   void printCodePathSummary() const {
     assert(Settings.isConcreate() && "Setting must be concreate to print the code path summary.");
 
diff --git a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
index 42a63f22..788f9c29 100644
--- a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
@@ -27,9 +27,12 @@
 
 namespace firestarter::environment::x86::platform {
 
+/// Models a platform config that is the default based on x86 CPU family and model ids.
 class X86PlatformConfig : public environment::platform::PlatformConfig {
 private:
+  /// The famility id of the processor for which this is the default platform config.
   unsigned Family;
+  /// The list of model ids in combination with the family for which this is the default platform config.
   std::list<unsigned> Models;
 
 public:
@@ -40,8 +43,15 @@ class X86PlatformConfig : public environment::platform::PlatformConfig {
       , Family(Family)
       , Models(std::move(Models)) {}
 
+  /// Check if this platform is available on the current system. This transloate to if the cpu extensions are
+  /// available for the payload that is used.
+  /// \arg Topology The reference to the X86CPUTopology that is used to check agains if this platform is supported.
+  /// \returns true if the platform is supported on the given X86CPUTopology.
   [[nodiscard]] auto isAvailable(const X86CPUTopology& Topology) const -> bool { return isAvailable(&Topology); }
 
+  /// Check if this platform is available and the default on the current system.
+  /// \arg Topology The reference to the X86CPUTopology that is used to check agains if this payload is supported.
+  /// \returns true if the platform is the default one for a given X86CPUTopology.
   [[nodiscard]] auto isDefault(const X86CPUTopology& Topology) const -> bool { return isDefault(&Topology); }
 
   /// Clone a the platform config.
@@ -63,10 +73,19 @@ class X86PlatformConfig : public environment::platform::PlatformConfig {
   }
 
 private:
+  /// Check if this platform is available on the current system. This tranlates to if the cpu extensions are
+  /// available for the payload that is used.
+  /// \arg Topology The pointer to the CPUTopology that is used to check agains if this platform is supported.
+  /// \returns true if the platform is supported on the given CPUTopology.
   [[nodiscard]] auto isAvailable(const CPUTopology* Topology) const -> bool final {
     return environment::platform::PlatformConfig::isAvailable(Topology);
   }
 
+  /// Check if this platform is available and the default on the current system. This is done by checking if the family
+  /// id in the CPUTopology matches the one saved in Family and if the model id in the CPUTopology is contained in
+  /// Models.
+  /// \arg Topology The pointer to the CPUTopology that is used to check agains if this payload is supported.
+  /// \returns true if the platform is the default one for a given CPUTopology.
   [[nodiscard]] auto isDefault(const CPUTopology* Topology) const -> bool final {
     const auto* FinalTopology = dynamic_cast<const X86CPUTopology*>(Topology);
     assert(FinalTopology && "isDefault not called with const X86CPUTopology*");
diff --git a/src/firestarter/Environment/Payload/PayloadSettings.cpp b/src/firestarter/Environment/Payload/PayloadSettings.cpp
index abc86b91..bd8997f0 100644
--- a/src/firestarter/Environment/Payload/PayloadSettings.cpp
+++ b/src/firestarter/Environment/Payload/PayloadSettings.cpp
@@ -38,7 +38,7 @@ auto PayloadSettings::getSequenceStartCount(const std::vector<std::string>& Sequ
   return I;
 }
 
-auto PayloadSettings::generateSequence(std::vector<std::pair<std::string, unsigned>> const& Proportions)
+auto PayloadSettings::generateSequence(std::vector<PayloadSettings::InstructionWithProportion> const& Proportions)
     -> std::vector<std::string> {
   std::vector<std::pair<std::string, unsigned>> Prop = Proportions;
 

From 235e92b67deecbbdf7793667c110b7f7db13e066 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Wed, 6 Nov 2024 15:38:19 +0100
Subject: [PATCH 140/167] refactor payload. add docstrings

---
 .../Environment/Payload/Payload.hpp           |  4 ++
 .../Environment/X86/Payload/AVX512Payload.hpp | 43 +++++++++++++-----
 .../Environment/X86/Payload/AVXPayload.hpp    | 41 ++++++++++++-----
 .../Environment/X86/Payload/FMA4Payload.hpp   | 42 +++++++++++++-----
 .../Environment/X86/Payload/FMAPayload.hpp    | 35 +++++++++------
 .../Environment/X86/Payload/SSE2Payload.hpp   | 41 ++++++++++++-----
 .../Environment/X86/Payload/X86Payload.hpp    | 44 +++++++++++++++++--
 .../Environment/X86/Payload/ZENFMAPayload.hpp | 26 +++++++----
 .../Environment/X86/Payload/AVX512Payload.cpp | 17 ++-----
 .../Environment/X86/Payload/AVXPayload.cpp    | 17 ++-----
 .../Environment/X86/Payload/FMA4Payload.cpp   | 17 ++-----
 .../Environment/X86/Payload/FMAPayload.cpp    | 17 ++-----
 .../Environment/X86/Payload/SSE2Payload.cpp   | 17 ++-----
 .../Environment/X86/Payload/X86Payload.cpp    |  9 ++++
 .../Environment/X86/Payload/ZENFMAPayload.cpp | 17 ++-----
 15 files changed, 241 insertions(+), 146 deletions(-)

diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index 0d844f59..927940e3 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -58,6 +58,10 @@ class Payload {
 public:
   Payload() = delete;
 
+  /// Abstract construction for the payload.
+  /// \arg Name The name of this payload. It is usally named by the CPU extension this payload uses e.g., SSE2 or FMA.
+  /// \arg RegisterSize The size of the SIMD registers in units of doubles (8B).
+  /// \arg RegisterCount The number of SIMD registers used by the payload.
   Payload(std::string Name, unsigned RegisterSize, unsigned RegisterCount) noexcept
       : Name(std::move(Name))
       , RegisterSize(RegisterSize)
diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
index 6c80810b..1372bf0f 100644
--- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
@@ -24,26 +24,47 @@
 #include "X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
+
+/// This payload is designed for the AVX512 foundation CPU extension.
 class AVX512Payload final : public X86Payload {
 public:
   AVX512Payload() noexcept
-      : X86Payload({asmjit::CpuFeatures::X86::kAVX512_F}, "AVX512", 8, 32) {}
+      : X86Payload(/*FeatureRequests=*/{asmjit::CpuFeatures::X86::kAVX512_F}, /*Name=*/"AVX512", /*RegisterSize=*/8,
+                   /*RegisterCount=*/32,
+                   /*InstructionFlops=*/
+                   {{"REG", 32},
+                    {"L1_L", 32},
+                    {"L1_BROADCAST", 16},
+                    {"L1_S", 16},
+                    {"L1_LS", 16},
+                    {"L2_L", 32},
+                    {"L2_S", 16},
+                    {"L2_LS", 16},
+                    {"L3_L", 32},
+                    {"L3_S", 16},
+                    {"L3_LS", 16},
+                    {"L3_P", 16},
+                    {"RAM_L", 32},
+                    {"RAM_S", 16},
+                    {"RAM_LS", 16},
+                    {"RAM_P", 16}},
+                   /*InstructionMemory=*/{{"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}) {}
 
+  /// Compile this payload with supplied settings and optional features.
+  /// \arg Settings The settings for this payload e.g., the number of lines or the size of the caches.
+  /// \arg DumpRegisters Should the code to support dumping registers be baked into the high load routine of the
+  /// compiled payload.
+  /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine
+  /// of the compiled payload.
+  /// \returns The compiled payload that provides access to the init and load functions.
   [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                     bool ErrorDetection) const
       -> environment::payload::CompiledPayload::UniquePtr override;
 
-  [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-
 private:
+  /// Function to initialize the memory used by the high load function.
+  /// \arg MemoryAddr The pointer to the memory.
+  /// \arg BufferSize The number of doubles that is allocated in MemoryAddr.
   void init(double* MemoryAddr, uint64_t BufferSize) const override;
-
-  const std::map<std::string, unsigned> InstructionFlops = {
-      {"REG", 32},   {"L1_L", 32},  {"L1_BROADCAST", 16}, {"L1_S", 16}, {"L1_LS", 16}, {"L2_L", 32},
-      {"L2_S", 16},  {"L2_LS", 16}, {"L3_L", 32},         {"L3_S", 16}, {"L3_LS", 16}, {"L3_P", 16},
-      {"RAM_L", 32}, {"RAM_S", 16}, {"RAM_LS", 16},       {"RAM_P", 16}};
-
-  const std::map<std::string, unsigned> InstructionMemory = {
-      {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
 };
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
index d4af02c0..aeb9c7f9 100644
--- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
@@ -24,25 +24,46 @@
 #include "X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
+
+/// This payload is designed for the AVX CPU extension.
 class AVXPayload final : public X86Payload {
 public:
   AVXPayload()
-      : X86Payload({asmjit::CpuFeatures::X86::kAVX}, "AVX", 4, 16) {}
+      : X86Payload(/*FeatureRequests=*/{asmjit::CpuFeatures::X86::kAVX}, /*Name=*/"AVX", /*RegisterSize=*/4,
+                   /*RegisterCount=*/16,
+                   /*InstructionFlops=*/
+                   {{"REG", 4},
+                    {"L1_L", 4},
+                    {"L1_S", 4},
+                    {"L1_LS", 4},
+                    {"L2_L", 4},
+                    {"L2_S", 4},
+                    {"L2_LS", 4},
+                    {"L3_L", 4},
+                    {"L3_S", 4},
+                    {"L3_LS", 4},
+                    {"L3_P", 4},
+                    {"RAM_L", 4},
+                    {"RAM_S", 4},
+                    {"RAM_LS", 4},
+                    {"RAM_P", 4}},
+                   /*InstructionMemory=*/{{"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}) {}
 
+  /// Compile this payload with supplied settings and optional features.
+  /// \arg Settings The settings for this payload e.g., the number of lines or the size of the caches.
+  /// \arg DumpRegisters Should the code to support dumping registers be baked into the high load routine of the
+  /// compiled payload.
+  /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine
+  /// of the compiled payload.
+  /// \returns The compiled payload that provides access to the init and load functions.
   [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                     bool ErrorDetection) const
       -> environment::payload::CompiledPayload::UniquePtr override;
 
-  [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-
 private:
+  /// Function to initialize the memory used by the high load function.
+  /// \arg MemoryAddr The pointer to the memory.
+  /// \arg BufferSize The number of doubles that is allocated in MemoryAddr.
   void init(double* MemoryAddr, uint64_t BufferSize) const override;
-
-  const std::map<std::string, unsigned> InstructionFlops = {
-      {"REG", 4},  {"L1_L", 4},  {"L1_S", 4}, {"L1_LS", 4}, {"L2_L", 4},  {"L2_S", 4},   {"L2_LS", 4}, {"L3_L", 4},
-      {"L3_S", 4}, {"L3_LS", 4}, {"L3_P", 4}, {"RAM_L", 4}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}};
-
-  const std::map<std::string, unsigned> InstructionMemory = {
-      {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
 };
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
index 021ca8aa..2fe0d9de 100644
--- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
@@ -25,25 +25,45 @@
 
 namespace firestarter::environment::x86::payload {
 
+/// This payload is designed for the FMA4 CPU extension.
 class FMA4Payload final : public X86Payload {
 public:
   FMA4Payload() noexcept
-      : X86Payload({asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4}, "FMA4", 4, 16) {}
+      : X86Payload(/*FeatureRequests=*/{asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4},
+                   /*Name=*/"FMA4", /*RegisterSize=*/4, /*RegisterCount=*/16,
+                   /*InstructionFlops=*/
+                   {{"REG", 8},
+                    {"L1_L", 12},
+                    {"L1_S", 8},
+                    {"L1_LS", 8},
+                    {"L2_L", 8},
+                    {"L2_S", 4},
+                    {"L2_LS", 4},
+                    {"L3_L", 8},
+                    {"L3_S", 4},
+                    {"L3_LS", 4},
+                    {"L3_P", 4},
+                    {"RAM_L", 8},
+                    {"RAM_S", 4},
+                    {"RAM_LS", 4},
+                    {"RAM_P", 4}},
+                   /*InstructionMemory=*/{{"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}) {}
 
+  /// Compile this payload with supplied settings and optional features.
+  /// \arg Settings The settings for this payload e.g., the number of lines or the size of the caches.
+  /// \arg DumpRegisters Should the code to support dumping registers be baked into the high load routine of the
+  /// compiled payload.
+  /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine
+  /// of the compiled payload.
+  /// \returns The compiled payload that provides access to the init and load functions.
   [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                     bool ErrorDetection) const
       -> environment::payload::CompiledPayload::UniquePtr override;
 
-  [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-
-  void init(double* MemoryAddr, uint64_t BufferSize) const override;
-
 private:
-  const std::map<std::string, unsigned> InstructionFlops = {
-      {"REG", 8},  {"L1_L", 12}, {"L1_S", 8}, {"L1_LS", 8}, {"L2_L", 8},  {"L2_S", 4},   {"L2_LS", 4}, {"L3_L", 8},
-      {"L3_S", 4}, {"L3_LS", 4}, {"L3_P", 4}, {"RAM_L", 8}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}};
-
-  const std::map<std::string, unsigned> InstructionMemory = {
-      {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
+  /// Function to initialize the memory used by the high load function.
+  /// \arg MemoryAddr The pointer to the memory.
+  /// \arg BufferSize The number of doubles that is allocated in MemoryAddr.
+  void init(double* MemoryAddr, uint64_t BufferSize) const override;
 };
 } // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
index e147451f..d6505b1e 100644
--- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
@@ -24,27 +24,36 @@
 #include "X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
+
+/// This payload is designed for the FMA CPU extension.
 class FMAPayload final : public X86Payload {
 public:
   FMAPayload() noexcept
-      : X86Payload({asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, "FMA", 4, 16) {}
-
+      : X86Payload(/*FeatureRequests=*/{asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, /*Name=*/"FMA",
+                   /*RegisterSize=*/4, /*RegisterCount=*/16,
+                   /*InstructionFlops=*/{{"REG", 16},  {"L1_L", 16},     {"L1_2L", 16},      {"L1_S", 8},
+                                         {"L1_LS", 8}, {"L1_LS_256", 8}, {"L1_2LS_256", 16}, {"L2_L", 16},
+                                         {"L2_S", 8},  {"L2_LS", 8},     {"L2_LS_256", 8},   {"L2_2LS_256", 16},
+                                         {"L3_L", 16}, {"L3_S", 8},      {"L3_LS", 8},       {"L3_LS_256", 8},
+                                         {"L3_P", 8},  {"RAM_L", 16},    {"RAM_S", 8},       {"RAM_LS", 8},
+                                         {"RAM_P", 8}},
+                   /*InstructionMemory=*/{{"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}) {}
+
+  /// Compile this payload with supplied settings and optional features.
+  /// \arg Settings The settings for this payload e.g., the number of lines or the size of the caches.
+  /// \arg DumpRegisters Should the code to support dumping registers be baked into the high load routine of the
+  /// compiled payload.
+  /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine
+  /// of the compiled payload.
+  /// \returns The compiled payload that provides access to the init and load functions.
   [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                     bool ErrorDetection) const
       -> environment::payload::CompiledPayload::UniquePtr override;
 
-  [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-
 private:
+  /// Function to initialize the memory used by the high load function.
+  /// \arg MemoryAddr The pointer to the memory.
+  /// \arg BufferSize The number of doubles that is allocated in MemoryAddr.
   void init(double* MemoryAddr, uint64_t BufferSize) const override;
-
-  const std::map<std::string, unsigned> InstructionFlops = {
-      {"REG", 16},        {"L1_L", 16},  {"L1_2L", 16}, {"L1_S", 8},      {"L1_LS", 8},     {"L1_LS_256", 8},
-      {"L1_2LS_256", 16}, {"L2_L", 16},  {"L2_S", 8},   {"L2_LS", 8},     {"L2_LS_256", 8}, {"L2_2LS_256", 16},
-      {"L3_L", 16},       {"L3_S", 8},   {"L3_LS", 8},  {"L3_LS_256", 8}, {"L3_P", 8},      {"RAM_L", 16},
-      {"RAM_S", 8},       {"RAM_LS", 8}, {"RAM_P", 8}};
-
-  const std::map<std::string, unsigned> InstructionMemory = {
-      {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
 };
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
index 652e9cef..98dc9055 100644
--- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
@@ -24,25 +24,46 @@
 #include "X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
+
+/// This payload is designed for the SSE2 CPU extension.
 class SSE2Payload final : public X86Payload {
 public:
   SSE2Payload() noexcept
-      : X86Payload({asmjit::CpuFeatures::X86::kSSE2}, "SSE2", 2, 16) {}
+      : X86Payload(/*FeatureRequests=*/{asmjit::CpuFeatures::X86::kSSE2}, /*Name=*/"SSE2", /*RegisterSize=*/2,
+                   /*RegisterCount=*/16,
+                   /*InstructionFlops=*/
+                   {{"REG", 2},
+                    {"L1_L", 2},
+                    {"L1_S", 2},
+                    {"L1_LS", 2},
+                    {"L2_L", 2},
+                    {"L2_S", 2},
+                    {"L2_LS", 2},
+                    {"L3_L", 2},
+                    {"L3_S", 2},
+                    {"L3_LS", 2},
+                    {"L3_P", 2},
+                    {"RAM_L", 2},
+                    {"RAM_S", 2},
+                    {"RAM_LS", 2},
+                    {"RAM_P", 2}},
+                   /*InstructionMemory=*/{{"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}) {}
 
+  /// Compile this payload with supplied settings and optional features.
+  /// \arg Settings The settings for this payload e.g., the number of lines or the size of the caches.
+  /// \arg DumpRegisters Should the code to support dumping registers be baked into the high load routine of the
+  /// compiled payload.
+  /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine
+  /// of the compiled payload.
+  /// \returns The compiled payload that provides access to the init and load functions.
   [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                     bool ErrorDetection) const
       -> environment::payload::CompiledPayload::UniquePtr override;
 
-  [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-
 private:
+  /// Function to initialize the memory used by the high load function.
+  /// \arg MemoryAddr The pointer to the memory.
+  /// \arg BufferSize The number of doubles that is allocated in MemoryAddr.
   void init(double* MemoryAddr, uint64_t BufferSize) const override;
-
-  const std::map<std::string, unsigned> InstructionFlops = {
-      {"REG", 2},  {"L1_L", 2},  {"L1_S", 2}, {"L1_LS", 2}, {"L2_L", 2},  {"L2_S", 2},   {"L2_LS", 2}, {"L3_L", 2},
-      {"L3_S", 2}, {"L3_LS", 2}, {"L3_P", 2}, {"RAM_L", 2}, {"RAM_S", 2}, {"RAM_LS", 2}, {"RAM_P", 2}};
-
-  const std::map<std::string, unsigned> InstructionMemory = {
-      {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}};
 };
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index ef8fab69..17cfca25 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -40,14 +40,34 @@ namespace firestarter::environment::x86::payload {
 
 class X86Payload : public environment::payload::Payload {
 private:
-  // we can use this to check, if our platform support this payload
+  /// This list contains the features (cpu extenstions) that are requied to execute the payload.
   std::list<asmjit::CpuFeatures::X86::Id> FeatureRequests;
 
+  /// The mapping from instructions to the number of flops per instruction. This map is required to have an entry for
+  /// every instruction.
+  std::map<std::string, unsigned> InstructionFlops;
+
+  /// The mapping from instructions to the size of main memory accesses for this instuction. This map is not required to
+  /// contain all instructions.
+  std::map<std::string, unsigned> InstructionMemory;
+
 public:
+  /// Abstract constructor for a payload on X86 CPUs.
+  /// \arg FeatureRequests This list with features (cpu extenstions) that are requied to execute the payload.
+  /// \arg Name The name of this payload. It is usally named by the CPU extension this payload uses e.g., SSE2 or FMA.
+  /// \arg RegisterSize The size of the SIMD registers in units of doubles (8B).
+  /// \arg RegisterCount The number of SIMD registers used by the payload.
+  /// \arg InstructionFlops The mapping from instructions to the number of flops per instruction. This map is required
+  /// to have an entry for every instruction.
+  /// \arg InstructionMemory The mapping from instructions to the size of main memory accesses for this instuction. This
+  /// map is not required to contain all instructions.
   X86Payload(std::initializer_list<asmjit::CpuFeatures::X86::Id> FeatureRequests, std::string Name,
-             unsigned RegisterSize, unsigned RegisterCount) noexcept
+             unsigned RegisterSize, unsigned RegisterCount, std::map<std::string, unsigned>&& InstructionFlops,
+             std::map<std::string, unsigned>&& InstructionMemory) noexcept
       : Payload(std::move(Name), RegisterSize, RegisterCount)
-      , FeatureRequests(FeatureRequests) {}
+      , FeatureRequests(FeatureRequests)
+      , InstructionFlops(std::move(InstructionFlops))
+      , InstructionMemory(std::move(InstructionMemory)) {}
 
 private:
   [[nodiscard]] auto isAvailable(const CPUTopology& Topology) const -> bool final {
@@ -483,8 +503,24 @@ class X86Payload : public environment::payload::Payload {
 
   static void initMemory(double* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue);
 
-  // use cpuid and usleep as low load
+  /// Function to produce a low load on the cpu.
+  /// \arg LoadVar The variable that controls the load. If this variable changes from LoadThreadWorkType::LowLoad to
+  /// something else this function will return.
+  /// \arg Period The period of the low/high load switching. This function will sleep 1% of the Period and check if the
+  /// LoadVar changed.
   void lowLoadFunction(volatile LoadThreadWorkType& LoadVar, std::chrono::microseconds Period) const final;
+
+  /// Get the available instruction items that are supported by this payload.
+  /// \returns The available instruction items that are supported by this payload.
+  [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> final;
+
+  /// Get the mapping from instructions to the number of flops per instruction. This map is required to have an entry
+  /// for every instruction.
+  [[nodiscard]] auto instructionFlops() const -> const auto& { return InstructionFlops; }
+
+  /// Get the mapping from instructions to the size of main memory accesses for this instuction. This map is not
+  /// required to contain all instructions.
+  [[nodiscard]] auto instructionMemory() const -> const auto& { return InstructionMemory; }
 };
 
 } // namespace firestarter::environment::x86::payload
diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
index 4bd69d7c..8286de6c 100644
--- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
@@ -24,23 +24,31 @@
 #include "X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
+
+/// This payload is designed for the FMA CPU extension in combination with the first generation Zen microarchitecture.
 class ZENFMAPayload final : public X86Payload {
 public:
   ZENFMAPayload() noexcept
-      : X86Payload({asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA}, "ZENFMA", 4, 16) {}
-
+      : X86Payload(/*FeatureRequests=*/{asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA},
+                   /*Name=*/"ZENFMA", /*RegisterSize=*/4, /*RegisterCount=*/16,
+                   /*InstructionFlops=*/{{"REG", 8}, {"L1_LS", 8}, {"L2_L", 8}, {"L3_L", 8}, {"RAM_L", 8}},
+                   /*InstructionMemory=*/{{"RAM_L", 64}}) {}
+
+  /// Compile this payload with supplied settings and optional features.
+  /// \arg Settings The settings for this payload e.g., the number of lines or the size of the caches.
+  /// \arg DumpRegisters Should the code to support dumping registers be baked into the high load routine of the
+  /// compiled payload.
+  /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine
+  /// of the compiled payload.
+  /// \returns The compiled payload that provides access to the init and load functions.
   [[nodiscard]] auto compilePayload(const environment::payload::PayloadSettings& Settings, bool DumpRegisters,
                                     bool ErrorDetection) const
       -> environment::payload::CompiledPayload::UniquePtr override;
 
-  [[nodiscard]] auto getAvailableInstructions() const -> std::list<std::string> override;
-
 private:
+  /// Function to initialize the memory used by the high load function.
+  /// \arg MemoryAddr The pointer to the memory.
+  /// \arg BufferSize The number of doubles that is allocated in MemoryAddr.
   void init(double* MemoryAddr, uint64_t BufferSize) const override;
-
-  const std::map<std::string, unsigned> InstructionFlops = {
-      {"REG", 8}, {"L1_LS", 8}, {"L2_L", 8}, {"L3_L", 8}, {"RAM_L", 8}};
-
-  const std::map<std::string, unsigned> InstructionMemory = {{"RAM_L", 64}};
 };
 } // namespace firestarter::environment::x86::payload
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index 0f128641..7eecadaf 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -45,17 +45,17 @@ auto AVX512Payload::compilePayload(const environment::payload::PayloadSettings&
   environment::payload::PayloadStats Stats;
 
   for (const auto& Item : Sequence) {
-    auto It = InstructionFlops.find(Item);
+    auto It = instructionFlops().find(Item);
 
-    if (It == InstructionFlops.end()) {
+    if (It == instructionFlops().end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
     }
 
     Stats.Flops += It->second;
 
-    It = InstructionMemory.find(Item);
+    It = instructionMemory().find(Item);
 
-    if (It != InstructionMemory.end()) {
+    if (It != instructionMemory().end()) {
       Stats.Bytes += It->second;
     }
   }
@@ -384,15 +384,6 @@ auto AVX512Payload::compilePayload(const environment::payload::PayloadSettings&
   return CompiledPayloadPtr;
 }
 
-auto AVX512Payload::getAvailableInstructions() const -> std::list<std::string> {
-  std::list<std::string> Instructions;
-
-  transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions),
-            [](const auto& Item) { return Item.first; });
-
-  return Instructions;
-}
-
 void AVX512Payload::init(double* MemoryAddr, uint64_t BufferSize) const {
   X86Payload::initMemory(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index 322ea3e7..22e54843 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -43,17 +43,17 @@ auto AVXPayload::compilePayload(const environment::payload::PayloadSettings& Set
   environment::payload::PayloadStats Stats;
 
   for (const auto& Item : Sequence) {
-    auto It = InstructionFlops.find(Item);
+    auto It = instructionFlops().find(Item);
 
-    if (It == InstructionFlops.end()) {
+    if (It == instructionFlops().end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
     }
 
     Stats.Flops += It->second;
 
-    It = InstructionMemory.find(Item);
+    It = instructionMemory().find(Item);
 
-    if (It != InstructionMemory.end()) {
+    if (It != instructionMemory().end()) {
       Stats.Bytes += It->second;
     }
   }
@@ -414,15 +414,6 @@ auto AVXPayload::compilePayload(const environment::payload::PayloadSettings& Set
   return CompiledPayloadPtr;
 }
 
-auto AVXPayload::getAvailableInstructions() const -> std::list<std::string> {
-  std::list<std::string> Instructions;
-
-  transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions),
-            [](const auto& Item) { return Item.first; });
-
-  return Instructions;
-}
-
 void AVXPayload::init(double* MemoryAddr, uint64_t BufferSize) const {
   X86Payload::initMemory(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index ec770929..33a79f40 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -44,17 +44,17 @@ auto FMA4Payload::compilePayload(const environment::payload::PayloadSettings& Se
   environment::payload::PayloadStats Stats;
 
   for (const auto& Item : Sequence) {
-    auto It = InstructionFlops.find(Item);
+    auto It = instructionFlops().find(Item);
 
-    if (It == InstructionFlops.end()) {
+    if (It == instructionFlops().end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
     }
 
     Stats.Flops += It->second;
 
-    It = InstructionMemory.find(Item);
+    It = instructionMemory().find(Item);
 
-    if (It != InstructionMemory.end()) {
+    if (It != instructionMemory().end()) {
       Stats.Bytes += It->second;
     }
   }
@@ -387,15 +387,6 @@ auto FMA4Payload::compilePayload(const environment::payload::PayloadSettings& Se
   return CompiledPayloadPtr;
 }
 
-auto FMA4Payload::getAvailableInstructions() const -> std::list<std::string> {
-  std::list<std::string> Instructions;
-
-  transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions),
-            [](const auto& Item) { return Item.first; });
-
-  return Instructions;
-}
-
 void FMA4Payload::init(double* MemoryAddr, uint64_t BufferSize) const {
   X86Payload::initMemory(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index accde4a0..ee807df8 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -46,17 +46,17 @@ auto FMAPayload::compilePayload(const environment::payload::PayloadSettings& Set
   environment::payload::PayloadStats Stats;
 
   for (const auto& Item : Sequence) {
-    auto It = InstructionFlops.find(Item);
+    auto It = instructionFlops().find(Item);
 
-    if (It == InstructionFlops.end()) {
+    if (It == instructionFlops().end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
     }
 
     Stats.Flops += It->second;
 
-    It = InstructionMemory.find(Item);
+    It = instructionMemory().find(Item);
 
-    if (It != InstructionMemory.end()) {
+    if (It != instructionMemory().end()) {
       Stats.Bytes += It->second;
     }
   }
@@ -422,15 +422,6 @@ auto FMAPayload::compilePayload(const environment::payload::PayloadSettings& Set
   return CompiledPayloadPtr;
 }
 
-auto FMAPayload::getAvailableInstructions() const -> std::list<std::string> {
-  std::list<std::string> Instructions;
-
-  transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions),
-            [](const auto& Item) { return Item.first; });
-
-  return Instructions;
-}
-
 void FMAPayload::init(double* MemoryAddr, uint64_t BufferSize) const {
   X86Payload::initMemory(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index 0d05ca99..10512f51 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -43,17 +43,17 @@ auto SSE2Payload::compilePayload(const environment::payload::PayloadSettings& Se
   environment::payload::PayloadStats Stats;
 
   for (const auto& Item : Sequence) {
-    auto It = InstructionFlops.find(Item);
+    auto It = instructionFlops().find(Item);
 
-    if (It == InstructionFlops.end()) {
+    if (It == instructionFlops().end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
     }
 
     Stats.Flops += It->second;
 
-    It = InstructionMemory.find(Item);
+    It = instructionMemory().find(Item);
 
-    if (It != InstructionMemory.end()) {
+    if (It != instructionMemory().end()) {
       Stats.Bytes += It->second;
     }
   }
@@ -406,15 +406,6 @@ auto SSE2Payload::compilePayload(const environment::payload::PayloadSettings& Se
   return CompiledPayloadPtr;
 }
 
-auto SSE2Payload::getAvailableInstructions() const -> std::list<std::string> {
-  std::list<std::string> Instructions;
-
-  transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions),
-            [](const auto& Item) { return Item.first; });
-
-  return Instructions;
-}
-
 void SSE2Payload::init(double* MemoryAddr, uint64_t BufferSize) const {
   X86Payload::initMemory(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15);
 }
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index 16485e99..fa18b837 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -81,4 +81,13 @@ void X86Payload::initMemory(double* MemoryAddr, uint64_t BufferSize, double Firs
   // NOLINTEND(cppcoreguidelines-pro-bounds-pointer-arithmetic)
 }
 
+auto X86Payload::getAvailableInstructions() const -> std::list<std::string> {
+  std::list<std::string> Instructions;
+
+  transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions),
+            [](const auto& Item) { return Item.first; });
+
+  return Instructions;
+}
+
 }; // namespace firestarter::environment::x86::payload
\ No newline at end of file
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index e45dd9bf..4f38d514 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -42,17 +42,17 @@ auto ZENFMAPayload::compilePayload(const environment::payload::PayloadSettings&
   environment::payload::PayloadStats Stats;
 
   for (const auto& Item : Sequence) {
-    auto It = InstructionFlops.find(Item);
+    auto It = instructionFlops().find(Item);
 
-    if (It == InstructionFlops.end()) {
+    if (It == instructionFlops().end()) {
       workerLog::error() << "Instruction group " << Item << " undefined in " << name() << ".";
     }
 
     Stats.Flops += It->second;
 
-    It = InstructionMemory.find(Item);
+    It = instructionMemory().find(Item);
 
-    if (It != InstructionMemory.end()) {
+    if (It != instructionMemory().end()) {
       Stats.Bytes += It->second;
     }
   }
@@ -372,15 +372,6 @@ auto ZENFMAPayload::compilePayload(const environment::payload::PayloadSettings&
   return CompiledPayloadPtr;
 }
 
-auto ZENFMAPayload::getAvailableInstructions() const -> std::list<std::string> {
-  std::list<std::string> Instructions;
-
-  transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions),
-            [](const auto& Item) { return Item.first; });
-
-  return Instructions;
-}
-
 void ZENFMAPayload::init(double* MemoryAddr, uint64_t BufferSize) const {
   X86Payload::initMemory(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4);
 }

From 982e2ca2334dd41c10ab465ef1b464bf826a76bb Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 7 Nov 2024 15:22:01 +0100
Subject: [PATCH 141/167] add more docstrings

---
 .../firestarter/Environment/Environment.hpp   | 64 ++++++++++++++++++-
 .../X86/Payload/CompiledX86Payload.hpp        |  2 +-
 .../Environment/X86/Payload/X86Payload.hpp    | 26 +++++++-
 .../X86/Platform/BulldozerConfig.hpp          | 11 ++--
 .../X86/Platform/HaswellConfig.hpp            | 13 ++--
 .../X86/Platform/HaswellEPConfig.hpp          | 13 ++--
 .../X86/Platform/KnightsLandingConfig.hpp     | 11 ++--
 .../Environment/X86/Platform/NaplesConfig.hpp | 13 ++--
 .../X86/Platform/NehalemConfig.hpp            | 11 ++--
 .../X86/Platform/NehalemEPConfig.hpp          | 11 ++--
 .../Environment/X86/Platform/RomeConfig.hpp   |  9 ++-
 .../X86/Platform/SandyBridgeConfig.hpp        | 13 ++--
 .../X86/Platform/SandyBridgeEPConfig.hpp      | 13 ++--
 .../X86/Platform/SkylakeConfig.hpp            |  9 ++-
 .../X86/Platform/SkylakeSPConfig.hpp          | 10 ++-
 .../Environment/X86/X86Environment.hpp        | 37 +++++++++--
 src/firestarter/Environment/Environment.cpp   | 24 +++----
 .../Environment/X86/X86Environment.cpp        |  2 +-
 src/firestarter/OneAPI/OneAPI.cpp             |  2 +-
 19 files changed, 220 insertions(+), 74 deletions(-)

diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index 72f1a583..0bf2f181 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -30,6 +30,10 @@
 
 namespace firestarter::environment {
 
+/// This class handles parsing of user input to FIRESTARTER, namely the number of threads used, the thread affinity, the
+/// selection of the correct high-load function, selection of the instruction groups and number of lines. It also
+/// handles printing useful information, provides interfaces to the PlatformConfig and the number of threads. It
+/// facilitates setting the cpu affinity in further parts of FIRESTARTER.
 class Environment {
 public:
   Environment() = delete;
@@ -37,29 +41,68 @@ class Environment {
       : Topology(std::move(Topology)) {}
   virtual ~Environment() = default;
 
+  /// Parse the user input for the cpu affinity and the number of requested threads. If a CpuBind is provided we
+  /// evaluate it and set the number of threads and their affinity accordingly. This is only supported on linux and with
+  /// the FIRESTARTER_THREAD_AFFINITY build flag. This function will save the correct number of threads based on the
+  /// user input in RequestedNumThreads. It must be called for FIRESTARTER to function properly.
+  /// \arg RequestedNumThreads The number of threads that are requested by a user. If this is zero the number will be
+  /// automatically determined.
+  /// \arg CpuBind If this string following the CPULIST format: "x,y,z", "x-y", "x-y/step", and any combination of the
+  /// above. We select the number of requested CPUs and their cpubind from this string.
   void evaluateCpuAffinity(unsigned RequestedNumThreads, const std::string& CpuBind);
+
+  /// The worker threads are numerated from zero to RequestedNumThreads. Set the cpuaffinity of a calling thread based
+  /// on this index to the one that that should be used according to the determined CpuBind list from the call to
+  /// evaluateCpuAffinity. This function will throw if it is called with an invalid index.
+  /// \arg Thread The index of the worker thread.
   void setCpuAffinity(unsigned Thread) const;
+
+  /// Print the summary of the used thread for the workers. If thread affinity is supported (linux and compiled with the
+  /// FIRESTARTER_THREAD_AFFINITY flag), print which thread is pinned to which CPU.
   void printThreadSummary();
 
+  /// Select a PlatformConfig based on its generated id. This function will throw if a payload is not available or the
+  /// id is incorrect. If id is zero we automatically select a matching PlatformConfig.
+  /// \arg FunctionId The id of the PlatformConfig that should be selected.
+  /// \arg AllowUnavailablePayload If true we will not throw if the PlatformConfig is not available.
   virtual void selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) = 0;
+
+  /// Parse the selected payload instruction groups and save the in the selected function. Throws if the input is
+  /// invalid.
+  /// \arg Groups The list of instruction groups that is in the format: multiple INSTRUCTION:VALUE pairs
+  /// comma-seperated.
   virtual void selectInstructionGroups(std::string Groups) = 0;
+
+  /// Print the available instruction groups of the selected function.
   virtual void printAvailableInstructionGroups() = 0;
+
+  /// Set the line count in the selected function.
+  /// \arg LineCount The maximum number of instruction that should be in the high-load loop.
   virtual void setLineCount(unsigned LineCount) = 0;
+
+  /// Print a summary of the settings of the selected config.
   virtual void printSelectedCodePathSummary() = 0;
+
+  /// Print a list of available high-load function and if they are available on the current system.
   virtual void printFunctionSummary() = 0;
 
+  /// Get the number of threads FIRESTARTER will run with.
   [[nodiscard]] auto requestedNumThreads() const -> uint64_t { return RequestedNumThreads; }
 
+  /// Getter (which allows modifying) for the current platform config containing the payload, settings and the
+  /// associated name.
   [[nodiscard]] virtual auto config() -> platform::PlatformConfig& {
     assert(Config && "No PlatformConfig selected");
     return *Config;
   }
 
+  /// Const getter for the current platform config containing the payload, settings and the associated name.
   [[nodiscard]] virtual auto config() const -> const platform::PlatformConfig& {
     assert(Config && "No PlatformConfig selected");
     return *Config;
   }
 
+  /// Const getter for the current CPU topology.
   [[nodiscard]] virtual auto topology() const -> const CPUTopology& {
     assert(Topology && "Topology is a nullptr");
     return *Topology;
@@ -70,19 +113,36 @@ class Environment {
   void setConfig(std::unique_ptr<platform::PlatformConfig>&& Config) { this->Config = std::move(Config); }
 
 private:
+  /// The selected config that contains the payload, settings and the associated name.
   std::unique_ptr<platform::PlatformConfig> Config;
+  /// The description of the current CPU.
   std::unique_ptr<CPUTopology> Topology;
 
+  /// The number of threads FIRESTARTER is requested to run with. This will initially be set to zero, which will be
+  /// replaced by the maximum number of threads after calling evaluateCpuAffinity.
   uint64_t RequestedNumThreads = 0;
 
   // TODO(Issue #74): Use hwloc for cpu thread affinity.
 #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
-  static auto cpuAllowed(unsigned Id) -> int;
+  /// Check if the Cpu is allowed to be used with the current program.
+  /// \arg Id The if of the CPU which is checked.
+  /// \returns true if the CPU with Id is allowed to be used by the program.
+  static auto cpuAllowed(unsigned Id) -> bool;
+
+  /// Set the cpu affinity of the current thread to a specific CPU.
+  /// \arg Id The id of the CPU to which to pin the calling thread.
+  /// \returns 0 on success. See the man page for. sched_setaffinity.
   static auto cpuSet(unsigned Id) -> int;
+
+  /// Add a CPU to mask if this CPU is available on the current system or throw with an error.
+  /// \arg Cpu The id of the CPU to add to the mask.
+  /// \arg Mask The reference to the mask to add the cpu to.
   void addCpuSet(unsigned Cpu, cpu_set_t& Mask) const;
-#endif
 
+  /// The list of physical CPU ids that are requested to be used. The length of this list should match the number of
+  /// requested threads if it is not zero.
   std::vector<unsigned> CpuBind;
+#endif
 };
 
 } // namespace firestarter::environment
diff --git a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
index 52ba9f37..9a7ee0e2 100644
--- a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
@@ -60,7 +60,7 @@ class CompiledX86Payload final : public environment::payload::CompiledPayload {
   ~CompiledX86Payload() override = default;
 
   /// Create a unique pointer to a compiled payload from payload stats and assembly in a code holder.
-  /// \targ DerivedPayload The payload class from which the CodeHolder with the assembly was created from.
+  /// \tparam DerivedPayload The payload class from which the CodeHolder with the assembly was created from.
   /// \arg Stats The stats of the payload that is contained in the CodeHolder.
   /// \arg Code The CodeHolder that contains the assembly instruction making up the payload. This will be added to the
   /// JitRuntime and a pointer to the function will be provided to the CompiledPayload class.
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index 17cfca25..ec61b5a2 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -36,6 +36,7 @@
 
 constexpr const auto InitBlocksize = 1024;
 
+/// This abstract class models a payload that can be compiled with settings and executed for X86 CPUs.
 namespace firestarter::environment::x86::payload {
 
 class X86Payload : public environment::payload::Payload {
@@ -70,6 +71,10 @@ class X86Payload : public environment::payload::Payload {
       , InstructionMemory(std::move(InstructionMemory)) {}
 
 private:
+  /// Check if this payload is available on the current system. This is equivalent to checking if the supplied Topology
+  /// contains all features that are in FeatureRequests.
+  /// \arg Topology The CPUTopology that is used to check agains if this payload is supported.
+  /// \returns true if the payload is supported on the given CPUTopology.
   [[nodiscard]] auto isAvailable(const CPUTopology& Topology) const -> bool final {
     const auto* FinalTopology = dynamic_cast<const X86CPUTopology*>(&Topology);
     assert(FinalTopology && "isAvailable not called with const X86CPUTopology*");
@@ -85,7 +90,7 @@ class X86Payload : public environment::payload::Payload {
 
 protected:
   /// Emit the code to dump the xmm, ymm or zmm registers into memory for the dump registers feature.
-  /// \arg Vec the type of the vector register used.
+  /// \tparam Vec the type of the vector register used.
   /// \arg Cb The asmjit code builder that is used to emit the assembler code.
   /// \arg PointerReg the register containing the pointer into memory in LoadWorkerMemory that is used in the high-load
   /// routine.
@@ -118,8 +123,23 @@ class X86Payload : public environment::payload::Payload {
     Cb.bind(SkipRegistersDump);
   }
 
-  // add MM regs to dirty regs
-  // zmm31 is used for backup if VectorReg is of type asmjit::x86::Zmm
+  /// Emit the code to detect errors between this and two other threads that execute the same payload concurrently. We
+  /// backup the registers in Mm2...Mm7. We will check every 0x3fff iterations. If the check did not succeed we write
+  /// the LoadThreadWorkType::LoadStop flag in the AddrHighReg and therefore abort as soon as we pass the check in the
+  /// high-load routine.
+  /// \tparam MaybeConstIterRegT The type of the iteration register. If this is Mm, we assume that Mm0 is used by the
+  /// payload and the other Mm1...Mm7 are free to use. If they are free we will use them to backup rax, rbx, rcx, rdx,
+  /// r8 and r9. Otherwise we push them on the stack.
+  /// \tparam MaybeConstVectorRegT This is the type of the vector register. It can be either Xmm, Ymm or Zmm. In case of
+  /// Xmm we backup xmm0 on the stack, in case of Ymm we backup ymm0 im Mm4...Mm7 and in case of Zmm we use zmm31 for
+  /// the backup. This register may not be used in the payload.
+  /// \arg Cb The asmjit code builder that is used to emit the assembler code.
+  /// \arg IterReg The register that holds the iteration counter of the high-load loop.
+  /// \arg AddrHighReg The register contains the pointer to the memory address where the LoadThreadWorkType is saved.
+  /// \arg PointerReg The register contains the pointer into memory in LoadWorkerMemory that is used in the high-load
+  /// routine.
+  /// \arg TempReg The first register that can be used to store temporary values.
+  /// \arg TempReg2 The second register that can be used to store temporary values.
   template <class MaybeConstIterRegT, class MaybeConstVectorRegT>
   void emitErrorDetectionCode(asmjit::x86::Builder& Cb, MaybeConstIterRegT& IterReg,
                               const asmjit::x86::Gpq& AddrHighReg, const asmjit::x86::Gpq& PointerReg,
diff --git a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
index b3d50c1a..ee733b5d 100644
--- a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
@@ -29,9 +29,12 @@ class BulldozerConfig final : public X86PlatformConfig {
 public:
   BulldozerConfig() noexcept
       : X86PlatformConfig(
-            "BLD_OPTERON", 21, {1, 2, 3},
-            environment::payload::PayloadSettings({1}, {16384, 1048576, 786432}, 104857600, 1536,
-                                                  {{"RAM_L", 1}, {"L3_L", 1}, {"L2_LS", 5}, {"L1_L", 90}, {"REG", 45}}),
-            std::make_shared<const payload::FMA4Payload>()) {}
+            /*Name=*/"BLD_OPTERON", /*Family=*/21, /*Models=*/{1, 2, 3},
+            /*Settings=*/
+            environment::payload::PayloadSettings(
+                /*Threads=*/{1}, /*DataCacheBufferSize=*/{16384, 1048576, 786432}, /*RamBufferSize=*/104857600,
+                /*Lines=*/1536,
+                /*InstructionGroups=*/{{"RAM_L", 1}, {"L3_L", 1}, {"L2_LS", 5}, {"L1_L", 90}, {"REG", 45}}),
+            /*Payload=*/std::make_shared<const payload::FMA4Payload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
index d6ce0078..5800e82f 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
@@ -28,10 +28,13 @@ namespace firestarter::environment::x86::platform {
 class HaswellConfig final : public X86PlatformConfig {
 public:
   HaswellConfig() noexcept
-      : X86PlatformConfig("HSW_COREI", 6, {60, 61, 69, 70, 71},
-                          environment::payload::PayloadSettings(
-                              {1, 2}, {32768, 262144, 1572864}, 104857600, 1536,
-                              {{"RAM_L", 2}, {"L3_LS", 3}, {"L2_LS", 9}, {"L1_LS", 90}, {"REG", 40}}),
-                          std::make_shared<const payload::FMAPayload>()) {}
+      : X86PlatformConfig(
+            /*Name=*/"HSW_COREI", /*Family=*/6, /*Models=*/{60, 61, 69, 70, 71},
+            /*Settings=*/
+            environment::payload::PayloadSettings(
+                /*Threads=*/{1, 2}, /*DataCacheBufferSize=*/{32768, 262144, 1572864}, /*RamBufferSize=*/104857600,
+                /*Lines=*/1536,
+                /*InstructionGroups=*/{{"RAM_L", 2}, {"L3_LS", 3}, {"L2_LS", 9}, {"L1_LS", 90}, {"REG", 40}}),
+            /*Payload=*/std::make_shared<const payload::FMAPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
index ae4e9a72..ef54e5d2 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
@@ -28,10 +28,13 @@ namespace firestarter::environment::x86::platform {
 class HaswellEPConfig final : public X86PlatformConfig {
 public:
   HaswellEPConfig() noexcept
-      : X86PlatformConfig("HSW_XEONEP", 6, {63, 79},
-                          environment::payload::PayloadSettings(
-                              {1, 2}, {32768, 262144, 2621440}, 104857600, 1536,
-                              {{"RAM_L", 8}, {"L3_LS", 1}, {"L2_LS", 29}, {"L1_LS", 100}, {"REG", 100}}),
-                          std::make_shared<const payload::FMAPayload>()) {}
+      : X86PlatformConfig(
+            /*Name=*/"HSW_XEONEP", /*Family=*/6, /*Models=*/{63, 79},
+            /*Settings=*/
+            environment::payload::PayloadSettings(
+                /*Threads=*/{1, 2}, /*DataCacheBufferSize=*/{32768, 262144, 2621440},
+                /*RamBufferSize=*/104857600, /*Lines=*/1536,
+                /*InstructionGroups=*/{{"RAM_L", 8}, {"L3_LS", 1}, {"L2_LS", 29}, {"L1_LS", 100}, {"REG", 100}}),
+            /*Payload=*/std::make_shared<const payload::FMAPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
index 23230e14..bd2d1a26 100644
--- a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
@@ -28,9 +28,12 @@ namespace firestarter::environment::x86::platform {
 class KnightsLandingConfig final : public X86PlatformConfig {
 public:
   KnightsLandingConfig() noexcept
-      : X86PlatformConfig("KNL_XEONPHI", 6, {87},
-                          environment::payload::PayloadSettings({4}, {32768, 524288, 236279125}, 26214400, 1536,
-                                                                {{"RAM_P", 3}, {"L2_S", 8}, {"L1_L", 40}, {"REG", 10}}),
-                          std::make_shared<const payload::AVX512Payload>()) {}
+      : X86PlatformConfig(/*Name=*/"KNL_XEONPHI", /*Family=*/6, /*Models=*/{87},
+                          /*Settings=*/
+                          environment::payload::PayloadSettings(
+                              /*Threads=*/{4}, /*DataCacheBufferSize=*/{32768, 524288, 236279125},
+                              /*RamBufferSize=*/26214400, /*Lines=*/1536,
+                              /*InstructionGroups=*/{{"RAM_P", 3}, {"L2_S", 8}, {"L1_L", 40}, {"REG", 10}}),
+                          /*Payload=*/std::make_shared<const payload::AVX512Payload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
index 07ed7f50..7f303f94 100644
--- a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
@@ -28,10 +28,13 @@ namespace firestarter::environment::x86::platform {
 class NaplesConfig final : public X86PlatformConfig {
 public:
   NaplesConfig() noexcept
-      : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24},
-                          environment::payload::PayloadSettings(
-                              {1, 2}, {65536, 524288, 2097152}, 104857600, 1536,
-                              {{"RAM_L", 3}, {"L3_L", 14}, {"L2_L", 75}, {"L1_LS", 81}, {"REG", 100}}),
-                          std::make_shared<const payload::ZENFMAPayload>()) {}
+      : X86PlatformConfig(
+            /*Name=*/"ZEN_EPYC", /*Family=*/23, /*Models=*/{1, 8, 17, 24},
+            /*Settings=*/
+            environment::payload::PayloadSettings(
+                /*Threads=*/{1, 2}, /*DataCacheBufferSize=*/{65536, 524288, 2097152}, /*RamBufferSize=*/104857600,
+                /*Lines=*/1536,
+                /*InstructionGroups=*/{{"RAM_L", 3}, {"L3_L", 14}, {"L2_L", 75}, {"L1_LS", 81}, {"REG", 100}}),
+            /*Payload=*/std::make_shared<const payload::ZENFMAPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
index 8a0c9699..0403edf2 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
@@ -28,9 +28,12 @@ namespace firestarter::environment::x86::platform {
 class NehalemConfig final : public X86PlatformConfig {
 public:
   NehalemConfig() noexcept
-      : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23},
-                          environment::payload::PayloadSettings({1, 2}, {32768, 262144, 1572864}, 104857600, 1536,
-                                                                {{"RAM_P", 1}, {"L1_LS", 70}, {"REG", 2}}),
-                          std::make_shared<const payload::SSE2Payload>()) {}
+      : X86PlatformConfig(
+            /*Name=*/"NHM_COREI", /*Family=*/6, /*Models=*/{30, 37, 23},
+            /*Settings=*/
+            environment::payload::PayloadSettings(/*Threads=*/{1, 2}, /*DataCacheBufferSize=*/{32768, 262144, 1572864},
+                                                  /*RamBufferSize=*/104857600, /*Lines=*/1536,
+                                                  /*InstructionGroups=*/{{"RAM_P", 1}, {"L1_LS", 70}, {"REG", 2}}),
+            /*Payload=*/std::make_shared<const payload::SSE2Payload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
index a2815577..847e5b89 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
@@ -28,9 +28,12 @@ namespace firestarter::environment::x86::platform {
 class NehalemEPConfig final : public X86PlatformConfig {
 public:
   NehalemEPConfig() noexcept
-      : X86PlatformConfig("NHM_XEONEP", 6, {26, 44},
-                          environment::payload::PayloadSettings({1, 2}, {32768, 262144, 2097152}, 104857600, 1536,
-                                                                {{"RAM_P", 1}, {"L1_LS", 60}, {"REG", 2}}),
-                          std::make_shared<const payload::SSE2Payload>()) {}
+      : X86PlatformConfig(/*Name=*/"NHM_XEONEP", /*Family=*/6, /*Models=*/{26, 44},
+                          /*Settings=*/
+                          environment::payload::PayloadSettings(
+                              /*Threads=*/{1, 2}, /*DataCacheBufferSize=*/{32768, 262144, 2097152},
+                              /*RamBufferSize=*/104857600, /*Lines=*/1536,
+                              /*InstructionGroups=*/{{"RAM_P", 1}, {"L1_LS", 60}, {"REG", 2}}),
+                          /*Payload=*/std::make_shared<const payload::SSE2Payload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
index 69b0e9e2..fe4c8fce 100644
--- a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
@@ -29,10 +29,13 @@ class RomeConfig final : public X86PlatformConfig {
 public:
   RomeConfig() noexcept
       : X86PlatformConfig(
-            "ZEN_2_EPYC", 23, {49},
+            /*Name=*/"ZEN_2_EPYC", /*Family=*/23, /*Models=*/{49},
+            /*Settings=*/
             environment::payload::PayloadSettings(
-                {1, 2}, {32768, 524288, 2097152}, 104857600, 1536,
+                /*Threads=*/{1, 2}, /*DataCacheBufferSize=*/{32768, 524288, 2097152}, /*RamBufferSize=*/104857600,
+                /*Lines=*/1536,
+                /*InstructionGroups=*/
                 {{"RAM_L", 10}, {"L3_L", 25}, {"L2_L", 91}, {"L1_2LS_256", 72}, {"L1_LS_256", 82}, {"REG", 75}}),
-            std::make_shared<const payload::FMAPayload>()) {}
+            /*Payload=*/std::make_shared<const payload::FMAPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
index ebad14ca..bf786979 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
@@ -28,10 +28,13 @@ namespace firestarter::environment::x86::platform {
 class SandyBridgeConfig final : public X86PlatformConfig {
 public:
   SandyBridgeConfig() noexcept
-      : X86PlatformConfig("SNB_COREI", 6, {42, 58},
-                          environment::payload::PayloadSettings(
-                              {1, 2}, {32768, 262144, 1572864}, 104857600, 1536,
-                              {{"RAM_L", 2}, {"L3_LS", 4}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 45}}),
-                          std::make_shared<const payload::AVXPayload>()) {}
+      : X86PlatformConfig(
+            /*Name=*/"SNB_COREI", /*Family=*/6, /*Models=*/{42, 58},
+            /*Settings=*/
+            environment::payload::PayloadSettings(
+                /*Threads=*/{1, 2}, /*DataCacheBufferSize=*/{32768, 262144, 1572864}, /*RamBufferSize=*/104857600,
+                /*Lines=*/1536,
+                /*InstructionGroups=*/{{"RAM_L", 2}, {"L3_LS", 4}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 45}}),
+            /*Payload=*/std::make_shared<const payload::AVXPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
index b42ca0c5..ed61f304 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
@@ -29,11 +29,14 @@ namespace firestarter::environment::x86::platform {
 class SandyBridgeEPConfig final : public X86PlatformConfig {
 public:
   SandyBridgeEPConfig() noexcept
-      : X86PlatformConfig("SNB_XEONEP", 6, {45, 62},
-                          environment::payload::PayloadSettings(
-                              {1, 2}, {32768, 262144, 2621440}, 104857600, 1536,
-                              {{"RAM_L", 3}, {"L3_LS", 2}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 30}}),
-                          std::make_shared<const payload::AVXPayload>()) {}
+      : X86PlatformConfig(
+            /*Name=*/"SNB_XEONEP", /*Family=*/6, /*Models=*/{45, 62},
+            /*Settings=*/
+            environment::payload::PayloadSettings(
+                /*Threads=*/{1, 2}, /*DataCacheBufferSize=*/{32768, 262144, 2621440}, /*RamBufferSize=*/104857600,
+                /*Lines=*/1536,
+                /*InstructionGroups=*/{{"RAM_L", 3}, {"L3_LS", 2}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 30}}),
+            /*Payload=*/std::make_shared<const payload::AVXPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
 
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
index 57cf9eec..8e3f74fa 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
@@ -29,11 +29,14 @@ namespace firestarter::environment::x86::platform {
 class SkylakeConfig final : public X86PlatformConfig {
 public:
   SkylakeConfig() noexcept
-      : X86PlatformConfig("SKL_COREI", 6, {78, 94},
+      : X86PlatformConfig(/*Name=*/"SKL_COREI", /*Family=*/6, /*Models=*/{78, 94},
+                          /*Settings=*/
                           environment::payload::PayloadSettings(
-                              {1, 2}, {32768, 262144, 1572864}, 104857600, 1536,
+                              /*Threads=*/{1, 2}, /*DataCacheBufferSize=*/{32768, 262144, 1572864},
+                              /*RamBufferSize=*/104857600, /*Lines=*/1536,
+                              /*InstructionGroups=*/
                               {{"RAM_L", 3}, {"L3_LS_256", 5}, {"L2_LS_256", 18}, {"L1_2LS_256", 78}, {"REG", 40}}),
-                          std::make_shared<const payload::FMAPayload>()) {}
+                          /*Payload=*/std::make_shared<const payload::FMAPayload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
 
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
index e9b94f94..c5a69761 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
@@ -28,8 +28,12 @@ namespace firestarter::environment::x86::platform {
 class SkylakeSPConfig final : public X86PlatformConfig {
 public:
   SkylakeSPConfig() noexcept
-      : X86PlatformConfig("SKL_XEONEP", 6, {85},
-                          environment::payload::PayloadSettings({1, 2}, {32768, 1048576, 1441792}, 1048576000, 1536,
+      : X86PlatformConfig(/*Name=*/"SKL_XEONEP", /*Family=*/6, /*Models=*/{85},
+                          /*Settings=*/
+                          environment::payload::PayloadSettings(/*Threads=*/{1, 2},
+                                                                /*DataCacheBufferSize=*/{32768, 1048576, 1441792},
+                                                                /*RamBufferSize=*/1048576000, /*Lines=*/1536,
+                                                                /*InstructionGroups=*/
                                                                 {{"RAM_S", 3},
                                                                  {"RAM_P", 1},
                                                                  {"L3_S", 1},
@@ -39,6 +43,6 @@ class SkylakeSPConfig final : public X86PlatformConfig {
                                                                  {"L1_S", 0},
                                                                  {"L1_L", 40},
                                                                  {"REG", 140}}),
-                          std::make_shared<const payload::AVX512Payload>()) {}
+                          /*Payload=*/std::make_shared<const payload::AVX512Payload>()) {}
 };
 } // namespace firestarter::environment::x86::platform
diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp
index 395b1cf7..b9f9e039 100644
--- a/include/firestarter/Environment/X86/X86Environment.hpp
+++ b/include/firestarter/Environment/X86/X86Environment.hpp
@@ -43,35 +43,59 @@ class X86Environment final : public Environment {
   X86Environment()
       : Environment(std::make_unique<X86CPUTopology>()) {}
 
+  /// Getter (which allows modifying) for the current platform config containing the payload, settings, the
+  /// associated name and the default X86 family and models.
   [[nodiscard]] auto config() -> platform::X86PlatformConfig& final {
     auto* X86PlatformConfig = dynamic_cast<platform::X86PlatformConfig*>(&Environment::config());
     assert(X86PlatformConfig && "X86PlatformConfig is a nullptr");
     return *X86PlatformConfig;
   }
 
+  /// Const getter for the current platform config containing the payload, settings, the associated name and the default
+  /// X86 family and models.
   [[nodiscard]] auto config() const -> const platform::X86PlatformConfig& final {
     const auto* X86PlatformConfig = dynamic_cast<const platform::X86PlatformConfig*>(&Environment::config());
     assert(X86PlatformConfig && "X86PlatformConfig is a nullptr");
     return *X86PlatformConfig;
   }
 
+  /// Const getter for the current CPU topology with X86 specific modifications.
   [[nodiscard]] auto topology() const -> const X86CPUTopology& final {
     const auto* X86Topology = dynamic_cast<const X86CPUTopology*>(&Environment::topology());
     assert(X86Topology && "X86Topology is a nullptr");
     return *X86Topology;
   }
 
+  /// Select a PlatformConfig based on its generated id. This function will throw if a payload is not available or the
+  /// id is incorrect. If id is zero we automatically select a matching PlatformConfig.
+  /// \arg FunctionId The id of the PlatformConfig that should be selected.
+  /// \arg AllowUnavailablePayload If true we will not throw if the PlatformConfig is not available.
   void selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) override;
+
+  /// Parse the selected payload instruction groups and save the in the selected function. Throws if the input is
+  /// invalid.
+  /// \arg Groups The list of instruction groups that is in the format: multiple INSTRUCTION:VALUE pairs
+  /// comma-seperated.
   void selectInstructionGroups(std::string Groups) override;
+
+  /// Print the available instruction groups of the selected function.
   void printAvailableInstructionGroups() override;
+
+  /// Set the line count in the selected function.
+  /// \arg LineCount The maximum number of instruction that should be in the high-load loop.
   void setLineCount(unsigned LineCount) override;
+
+  /// Print a summary of the settings of the selected config.
   void printSelectedCodePathSummary() override;
+
+  /// Print a list of available high-load function and if they are available on the current system. This includes all
+  /// PlatformConfigs in combination with all thread per core counts.
   void printFunctionSummary() override;
 
 private:
-  // The available function IDs are generated by iterating through this list
-  // of PlatformConfig. Add new PlatformConfig at the bottom to maintain
-  // stable IDs.
+  /// The list of availabe platform configs that is printed when supplying the --avail command line argument. The IDs
+  /// for these configs are generated by iterating through this list starting with 1. To maintain stable IDs in
+  /// FIRESTARTER new configs should be added to the bottom of the list.
   const std::list<std::shared_ptr<platform::X86PlatformConfig>> PlatformConfigs = {
       std::make_shared<platform::KnightsLandingConfig>(), std::make_shared<platform::SkylakeConfig>(),
       std::make_shared<platform::SkylakeSPConfig>(),      std::make_shared<platform::HaswellConfig>(),
@@ -80,7 +104,12 @@ class X86Environment final : public Environment {
       std::make_shared<platform::NehalemEPConfig>(),      std::make_shared<platform::BulldozerConfig>(),
       std::make_shared<platform::NaplesConfig>(),         std::make_shared<platform::RomeConfig>()};
 
-  // List of fallback PlatformConfig. Add one for each x86 extension.
+  /// The list of configs that are fallbacks. If none of the PlatformConfigs is the default one on the current CPU, we
+  /// select the first one from this list that is available on the current system. If multiple configs can be available
+  /// on one system the one with higher priority should be at the top of this list. Modern X86 CPUs will support SSE2
+  /// therefore it is the last on the list. CPUs that support AVX512 will most certainly also support FMA and AVX,
+  /// AVX512 takes precedence. This list should contain one entry for each of the supported CPU extensions by the
+  /// FIRESTARTER payloads.
   const std::list<std::shared_ptr<platform::X86PlatformConfig>> FallbackPlatformConfigs = {
       std::make_shared<platform::SkylakeSPConfig>(),   // AVX512
       std::make_shared<platform::BulldozerConfig>(),   // FMA4
diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp
index 548aca58..590483c4 100644
--- a/src/firestarter/Environment/Environment.cpp
+++ b/src/firestarter/Environment/Environment.cpp
@@ -42,7 +42,7 @@ auto Environment::cpuSet(unsigned Id) -> int {
   return sched_setaffinity(0, sizeof(cpu_set_t), &Mask);
 }
 
-auto Environment::cpuAllowed(unsigned Id) -> int {
+auto Environment::cpuAllowed(unsigned Id) -> bool {
   cpu_set_t Mask;
 
   CPU_ZERO(&Mask);
@@ -51,7 +51,7 @@ auto Environment::cpuAllowed(unsigned Id) -> int {
     return CPU_ISSET(Id, &Mask);
   }
 
-  return 0;
+  return false;
 }
 
 void Environment::addCpuSet(unsigned Cpu, cpu_set_t& Mask) const {
@@ -72,10 +72,6 @@ void Environment::addCpuSet(unsigned Cpu, cpu_set_t& Mask) const {
 #endif
 
 void Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::string& CpuBind) {
-#if not((defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY))
-  (void)CpuBind;
-#endif
-
   if (RequestedNumThreads > 0 && RequestedNumThreads > topology().numThreads()) {
     log::warn() << "Not enough CPUs for requested number of threads";
   }
@@ -122,6 +118,8 @@ void Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::s
       }
     }
   } else {
+    RequestedNumThreads = 0;
+
     // parse CPULIST for binding
     const auto Delimiter = ',';
     const std::regex Re(R"(^(?:(\d+)(?:-([1-9]\d*)(?:\/([1-9]\d*))?)?)$)");
@@ -160,24 +158,26 @@ void Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, const std::s
       }
     }
   }
-#else
-  if (RequestedNumThreads == 0) {
-    RequestedNumThreads = topology().maxNumThreads();
-  }
-#endif
 
   if (RequestedNumThreads == 0) {
     throw std::invalid_argument("Found no usable CPUs!");
   }
 
-#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)
+  // Save the ids of the threads.
   for (unsigned I = 0; I < topology().maxNumThreads(); I++) {
     if (CPU_ISSET(I, &Cpuset)) {
       this->CpuBind.push_back(I);
     }
   }
+#else
+  (void)CpuBind;
+
+  if (RequestedNumThreads == 0) {
+    RequestedNumThreads = topology().maxNumThreads();
+  }
 #endif
 
+  // Limit the number of thread to the maximum on the CPU.
   this->RequestedNumThreads = (std::min)(RequestedNumThreads, topology().maxNumThreads());
 }
 
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index dcf4cdbd..ea651aa9 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -43,7 +43,7 @@ void X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa
                                    PlatformConfigPtr->functionName(ThreadsPerCore) + "\") requires " +
                                    PlatformConfigPtr->payload()->name() + ", which is not supported by the processor.";
           if (AllowUnavailablePayload) {
-            log::error() << ErrorString;
+            log::warn() << ErrorString;
           } else {
             throw std::invalid_argument(ErrorString);
           }
diff --git a/src/firestarter/OneAPI/OneAPI.cpp b/src/firestarter/OneAPI/OneAPI.cpp
index fcff8cf4..ddbd0dec 100644
--- a/src/firestarter/OneAPI/OneAPI.cpp
+++ b/src/firestarter/OneAPI/OneAPI.cpp
@@ -38,7 +38,7 @@ namespace firestarter::oneapi {
 namespace {
 
 /// Helper function to generate random floating point values between 0 and 1 in an array.
-/// \targ FloatingPointType The type of floating point value of the array. Either float or double.
+/// \tparam FloatingPointType The type of floating point value of the array. Either float or double.
 /// \arg NumberOfElems The number of elements of the array.
 /// \arg Array The array of floating point values which should be initilized with random data between 0 and 1.
 template <typename FloatingPointType> void fillArrayWithRandomFloats(size_t NumberOfElems, FloatingPointType* Array) {

From 23ccb55c007c8371e5cf199983cdb6ace429e54b Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 7 Nov 2024 16:28:01 +0100
Subject: [PATCH 142/167] docstrings

---
 .../firestarter/Environment/CPUTopology.hpp   | 51 +++++++++++++++----
 .../Environment/X86/X86CPUTopology.hpp        | 23 +++++++--
 2 files changed, 61 insertions(+), 13 deletions(-)

diff --git a/include/firestarter/Environment/CPUTopology.hpp b/include/firestarter/Environment/CPUTopology.hpp
index a68eee81..bf9a8d19 100644
--- a/include/firestarter/Environment/CPUTopology.hpp
+++ b/include/firestarter/Environment/CPUTopology.hpp
@@ -34,6 +34,7 @@ extern "C" {
 
 namespace firestarter::environment {
 
+/// This class models the properties of a processor.
 class CPUTopology {
 public:
   explicit CPUTopology(std::string Architecture);
@@ -41,50 +42,80 @@ class CPUTopology {
 
   friend auto operator<<(std::ostream& Stream, CPUTopology const& CpuTopologyRef) -> std::ostream&;
 
+  /// The total number of hardware threads.
   [[nodiscard]] auto numThreads() const -> unsigned { return NumThreadsPerCore * NumCoresTotal; }
+  /// The maximum os_index of all PUs plus 1 if we cannot determine the number of cpu kinds. Otherwise the maximum
+  /// number of PUs.
   [[nodiscard]] auto maxNumThreads() const -> unsigned;
+  /// Assuming we have a consistent number of threads per core. The number of thread per core.
   [[nodiscard]] auto numThreadsPerCore() const -> unsigned { return NumThreadsPerCore; }
+  /// The total number of cores.
   [[nodiscard]] auto numCoresTotal() const -> unsigned { return NumCoresTotal; }
+  /// The total number of packages.
   [[nodiscard]] auto numPackages() const -> unsigned { return NumPackages; }
-
+  /// The CPU architecture e.g., x86_64
   [[nodiscard]] auto architecture() const -> std::string const& { return Architecture; }
+  /// The CPU vendor i.e., Intel or AMD.
   [[nodiscard]] virtual auto vendor() const -> std::string const& { return Vendor; }
+  /// The processor name, this includes the vendor specific name
   [[nodiscard]] virtual auto processorName() const -> std::string const& { return ProcessorName; }
-  [[nodiscard]] virtual auto model() const -> std::string const& { return Model; }
+  /// The model of the processor. With X86 this is the the string of Family, Model and Stepping.
+  [[nodiscard]] virtual auto model() const -> std::string const& = 0;
 
-  // get the size of the L1i-cache in bytes
+  /// Getter for the L1i-cache size in bytes
   [[nodiscard]] auto instructionCacheSize() const -> const auto& { return InstructionCacheSize; }
 
-  // return the cpu clockrate in Hz
+  /// Getter for the clockrate in Hz
   [[nodiscard]] virtual auto clockrate() const -> uint64_t { return Clockrate; }
-  // return the cpu features
+
+  /// Getter for the list of CPU features
   [[nodiscard]] virtual auto features() const -> std::list<std::string> const& = 0;
 
-  // get a timestamp
+  /// Get the current hardware timestamp
   [[nodiscard]] virtual auto timestamp() const -> uint64_t = 0;
 
-  [[nodiscard]] auto getPkgIdFromPU(unsigned Pu) const -> std::optional<unsigned>;
+  /// Get the logical index of the core that housed the PU which is described by the os index.
+  /// \arg Pu The os index of the thread.
+  /// \returns Optionally the logical index of the CPU that houses this hardware thread.
   [[nodiscard]] auto getCoreIdFromPU(unsigned Pu) const -> std::optional<unsigned>;
 
+  /// Get the logical index of the package that housed the PU which is described by the os index.
+  /// \arg Pu The os index of the thread.
+  /// \returns Optionally the logical index of the package that houses this hardware thread.
+  [[nodiscard]] auto getPkgIdFromPU(unsigned Pu) const -> std::optional<unsigned>;
+
 protected:
+  /// Read the scaling_govenor file of cpu0 on linux and return the contents as a string.
   [[nodiscard]] static auto scalingGovernor() -> std::string;
+
+  /// Print the information about this process to a stream.
   [[nodiscard]] auto print(std::ostream& Stream) const -> std::ostream&;
 
 private:
-  /// The Vendor name of the CPU.
+  /// The CPU vendor i.e., Intel or AMD.
   std::string Vendor;
-  /// The Model name of the CPU.
-  std::string Model;
 
+  /// Helper function to open a filepath and return a stringstream with its contents.
+  /// \arg FilePath The file to open
+  /// \returns A stringstream with the contents of the file.
   [[nodiscard]] static auto getFileAsStream(std::string const& FilePath) -> std::stringstream;
 
+  /// Assuming we have a consistent number of threads per core. The number of thread per core.
   unsigned NumThreadsPerCore;
+  /// The total number of cores.
   unsigned NumCoresTotal;
+  /// The total number of packages.
   unsigned NumPackages;
+
+  /// The CPU architecture e.g., x86_64
   std::string Architecture;
+  /// The processor name, this includes the vendor specific name
   std::string ProcessorName;
+  /// The optional size of the instruction cache per core.
   std::optional<unsigned> InstructionCacheSize;
+  /// Clockrate of the CPU in Hz
   uint64_t Clockrate = 0;
+  /// The hwloc topology that is used to query information about the processor.
   hwloc_topology_t Topology{};
 };
 
diff --git a/include/firestarter/Environment/X86/X86CPUTopology.hpp b/include/firestarter/Environment/X86/X86CPUTopology.hpp
index c7706a77..39d59278 100644
--- a/include/firestarter/Environment/X86/X86CPUTopology.hpp
+++ b/include/firestarter/Environment/X86/X86CPUTopology.hpp
@@ -27,40 +27,57 @@
 
 namespace firestarter::environment::x86 {
 
+/// This class models the properties of a x86_64 processor.
 class X86CPUTopology final : public CPUTopology {
 public:
   X86CPUTopology();
 
   friend auto operator<<(std::ostream& Stream, X86CPUTopology const& CpuTopology) -> std::ostream&;
 
+  /// Getter for the list of CPU features
   [[nodiscard]] auto features() const -> std::list<std::string> const& override { return this->FeatureList; }
+  /// Getter for the CPU features class from asmjit
   [[nodiscard]] auto featuresAsmjit() const -> const asmjit::CpuFeatures& { return this->CpuInfo.features(); }
 
+  /// Getter for the clockrate in Hz
   [[nodiscard]] auto clockrate() const -> uint64_t override;
 
+  /// Get the current hardware timestamp
   [[nodiscard]] auto timestamp() const -> uint64_t override;
 
+  /// The family id of the x86 processor
   [[nodiscard]] auto familyId() const -> unsigned { return this->CpuInfo.familyId(); }
+  /// The model id of the x86 processor
   [[nodiscard]] auto modelId() const -> unsigned { return this->CpuInfo.modelId(); }
+  /// The stepping id of the x86 processor
   [[nodiscard]] auto stepping() const -> unsigned { return this->CpuInfo.stepping(); }
-
+  /// The CPU vendor i.e., Intel or AMD.
   [[nodiscard]] auto vendor() const -> std::string const& final { return Vendor; }
+  /// Get the string containing family, model and stepping ids.
   [[nodiscard]] auto model() const -> std::string const& final { return Model; }
 
 private:
+  /// Does this processor support timestamp counters
   [[nodiscard]] auto hasRdtsc() const -> bool { return this->HasRdtsc; }
+  /// Does this processor have invariant timestamp counters
   [[nodiscard]] auto hasInvariantRdtsc() const -> bool { return this->HasInvariantRdtsc; }
+
+  /// A wrapper to the cpuid call to keep a consitent interface between Windows and other platforms.
   static void cpuid(uint64_t* Rax, uint64_t* Rbx, uint64_t* Rcx, uint64_t* Rdx);
 
+  /// The asmjit CpuInfo for the current processor
   asmjit::CpuInfo CpuInfo;
+  /// The list of cpufeatures that are supported by the current processpr
   std::list<std::string> FeatureList;
 
+  /// Does this processor support timestamp counters
   bool HasRdtsc;
+  /// Does this processor have invariant timestamp counters
   bool HasInvariantRdtsc;
 
-  /// The Vendor name of the CPU.
+  /// The CPU vendor i.e., Intel or AMD.
   std::string Vendor;
-  /// The Model name of the CPU.
+  /// Model string containing family, model and stepping ids.
   std::string Model;
 };
 

From 5a301a364db1101441e19dd0606453de7476132b Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 7 Nov 2024 16:58:47 +0100
Subject: [PATCH 143/167] add docstrings

---
 include/firestarter/Json/Summary.hpp          |  1 +
 .../Logging/FirstWorkerThreadFilter.hpp       |  6 ++
 include/firestarter/Logging/Log.hpp           |  3 +
 .../firestarter/Measurement/MetricInterface.h | 62 +++++++++++--------
 include/firestarter/Measurement/Summary.hpp   |  8 +++
 include/firestarter/Measurement/TimeValue.hpp |  1 +
 6 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/include/firestarter/Json/Summary.hpp b/include/firestarter/Json/Summary.hpp
index 8fe781e9..87495fb5 100644
--- a/include/firestarter/Json/Summary.hpp
+++ b/include/firestarter/Json/Summary.hpp
@@ -23,6 +23,7 @@
 
 #include "../Measurement/Summary.hpp"
 
+/// Json serializer and deserializer for the firestarter::measurement::Summary struct
 namespace nlohmann {
 template <> struct adl_serializer<firestarter::measurement::Summary> {
   // functions for nlohmann json do not follow LLVM code style
diff --git a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
index 1da12b39..4e501b2e 100644
--- a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
+++ b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp
@@ -26,12 +26,18 @@
 
 namespace firestarter::logging {
 
+/// Logging filter for nitro to discard values that do not match a specific thread id.
 template <typename Record> class FirstWorkerThreadFilter {
 public:
   using record_type = Record;
 
+  /// Set the thread id from which records should not be discarded.
+  /// \arg NewFirstThread The specified thread.
   static void setFirstThread(std::thread::id NewFirstThread) { FirstThread = NewFirstThread; }
 
+  /// Filter records. We keep record if they are from the specified thread or if the severity is at least error.
+  /// \arg R The record to filter.
+  /// \returns true if the record should be kept.
   auto filter(Record& R) const -> bool {
     return R.std_thread_id() == FirstThread || R.severity() >= nitro::log::severity_level::error;
   }
diff --git a/include/firestarter/Logging/Log.hpp b/include/firestarter/Logging/Log.hpp
index ec543aa1..912f76be 100644
--- a/include/firestarter/Logging/Log.hpp
+++ b/include/firestarter/Logging/Log.hpp
@@ -40,6 +40,8 @@ namespace firestarter {
 
 namespace logging {
 
+/// Formatter to log Records with severity warn, error and fatal to stderr and all other Records to stdout. If a record
+/// has severity error or fatal we abort the program.
 class StdOut {
 public:
   static void sink(nitro::log::severity_level Severity, const std::string& FormattedRecord) {
@@ -73,6 +75,7 @@ template <typename Record>
 // The class may not be named Formater since this is used as a template argument name in nitro which will cause errors
 // when compiling with MSC. We will also write it with lower case and the correct spelling in case it gets renamed
 // correctly there.
+/// Format Record and add a string representing the severity in front.
 class formatter {
   // NOLINTEND(readability-identifier-naming)
 public:
diff --git a/include/firestarter/Measurement/MetricInterface.h b/include/firestarter/Measurement/MetricInterface.h
index 6d738661..f3382cf2 100644
--- a/include/firestarter/Measurement/MetricInterface.h
+++ b/include/firestarter/Measurement/MetricInterface.h
@@ -21,67 +21,77 @@
 
 #pragma once
 
+/// This file provides a C style interface to write metrics for FIRESTARTER and provide them as shared libraries.
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 #include <cstdint>
 // NOLINTBEGIN(modernize-use-using)
+
+/// Describe the type of the metric and how values need to be accumulated. Per default metrics are of pulling type where
+/// FIRESTARTER will pull the values through the GetReading function.
 typedef struct {
   uint32_t
-      // metric value is absolute
+      /// Set this to 1 if the metric values provided are absolute.
       Absolute : 1,
-      // metric value accumulates
+      /// Set this to 1 if the metric values provided are accumulative.
       Accumalative : 1,
-      // Set to divide metric values by thread count.
+      /// Set this to 1 if the metric value needs to be divided by the number of threads.
       DivideByThreadCount : 1,
-      // Set to insert time-value pairs via callback function passed by
-      // register_insert_callback.
+      /// Set this to 1 if the metric will provide time-value data in a pushing way trough the RegisterInsertCallback
+      /// function.
       InsertCallback : 1,
-      // ignore the start and stop delta set by the user
+      /// Set this to 1 if the accumulation of the metric should ignore the start/stop delta which are specified by the
+      /// user of FIRESTARTER.
       IgnoreStartStopDelta : 1,
-      // Reserved space to round up to 32 bits
+      /// Reserved space to fill 32 bits
       Reserved : 27;
 } MetricType;
 
-// Define `metric_interface_t metric` inside your shared library to be able to
-// load it during runtime.
+/// Define `MetricInterface Metric` inside your shared library to be able to load it during runtime.
 typedef struct {
-  // the name of the metric
+  /// The name of the metric
   const char* Name;
 
-  // metric type with bitfield from metric_type_t
+  /// Describes what the value of the metrics represents and how it needs to be accumulated.
   MetricType Type;
 
-  // the unit of the metric
+  /// The unit of the metric
   const char* Unit;
 
+  /// The time in usecs after which the callback should be called again. Set to 0 to disable.
   uint64_t CallbackTime;
 
-  // This function will be called every `callback_time` usecs. Disable by
-  // setting `callback_time` to 0.
+  /// This function will be called every `CallbackTime` usecs. Disable by setting `CallbackTime` to 0.
   void (*Callback)();
 
-  // init the metric.
-  // returns EXIT_SUCCESS on success.
+  /// init the metric.
+  /// \returns EXIT_SUCCESS on success.
   int32_t (*Init)();
 
-  // deinit the metric.
-  // returns EXIT_SUCCESS on success.
+  /// deinit the metric.
+  /// \returns EXIT_SUCCESS on success.
   int32_t (*Fini)();
 
-  // Get a reading of the metric
-  // Return EXIT_SUCCESS if we got a new value.
-  // Set this function pointer to NULL if METRIC_INSERT_CALLBACK is specified.
+  /// Get a reading of the metric. Set this function pointer to null if MetricType::InsertCallback is specified in the
+  /// Type.
+  /// \arg Value The pointer to which the value will be saved.
+  /// \returns EXIT_SUCCESS if we got a new value.
   int32_t (*GetReading)(double* Value);
 
-  // Get error in case return code not being EXIT_SUCCESS
+  /// Get error in case return code not being EXIT_SUCCESS.
+  /// \returns The error string.
   const char* (*GetError)();
 
-  // If METRIC_INSERT_CALLBACK is set in the type, this function will be passed
-  // a callback and the first argument for the callback.
-  // Further arguments of callback are the metric name, an unix timestamp (time
-  // since epoch) and a metric value.
+  /// If MetricType::InsertCallback is specified in the Type this function will be used to pass the metric a callback
+  /// and the first argument to this callback.
+  /// The first argument is the function pointer to the callback. The first argument to this function pointer needs to
+  /// be filled with the second argument to this function.
+  /// The supplied function pointer needs to be called with the metric name for the second, an unix timestamp (time
+  /// since epoch) for the third and a metric value for the forth argument. This allows the metric to provide values in
+  /// a pushing way in contract to the pulling way of the GetReading function.
   int32_t (*RegisterInsertCallback)(void (*)(void*, const char*, int64_t, double), void*);
 
 } MetricInterface;
diff --git a/include/firestarter/Measurement/Summary.hpp b/include/firestarter/Measurement/Summary.hpp
index 019a73eb..085a482b 100644
--- a/include/firestarter/Measurement/Summary.hpp
+++ b/include/firestarter/Measurement/Summary.hpp
@@ -29,6 +29,7 @@
 
 namespace firestarter::measurement {
 
+/// This struct summarized multiple timevalues. The duration, the number of time points an average and stddev is saved.
 struct Summary {
   size_t NumTimepoints;
   std::chrono::milliseconds Duration;
@@ -36,6 +37,13 @@ struct Summary {
   double Average;
   double Stddev;
 
+  /// Calculate the summary over a range of timevalues for a given metric and number of threads.
+  /// \arg Begin The start of the iterator
+  /// \arg End The end of the iterator
+  /// \arg MetricType This describes what each timevalue represents and how the metric needs to be calucated into a
+  /// summary.
+  /// \arg NumThreads The number of threads this metric was accumulated across.
+  /// \returns The summary over the range of timevalues from a specific metric.
   static auto calculate(std::vector<TimeValue>::iterator Begin, std::vector<TimeValue>::iterator End,
                         MetricType MetricType, uint64_t NumThreads) -> Summary;
 };
diff --git a/include/firestarter/Measurement/TimeValue.hpp b/include/firestarter/Measurement/TimeValue.hpp
index 10b31e8b..8088385e 100644
--- a/include/firestarter/Measurement/TimeValue.hpp
+++ b/include/firestarter/Measurement/TimeValue.hpp
@@ -25,6 +25,7 @@
 
 namespace firestarter::measurement {
 
+/// This struct models a value that was captured at a specific timepoint.
 struct TimeValue {
   TimeValue() = default;
 

From e72b0101c52e20097dbb7565b49774ae6bebdc4c Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 00:55:40 +0100
Subject: [PATCH 144/167] add docstrings

---
 .../Measurement/MeasurementWorker.hpp         | 26 ++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp
index eed145ae..7f3ed989 100644
--- a/include/firestarter/Measurement/MeasurementWorker.hpp
+++ b/include/firestarter/Measurement/MeasurementWorker.hpp
@@ -38,22 +38,36 @@ namespace firestarter::measurement {
 
 class MeasurementWorker {
 private:
+  /// The thread that handles the values that are read from metrics
   pthread_t WorkerThread{};
+  /// The thread that handles the metric values that are read from stdin
   pthread_t StdinThread{};
 
+  /// The vector of metrics that are available. Currently the following metrics are builtin: sysfs-powercap-rapl,
+  /// perf-ipc, perf-freq and ipc-estimate. Metric provided through shared libraries are added to this list.
   std::vector<const MetricInterface*> Metrics = {&RaplMetric, &PerfIpcMetric, &PerfFreqMetric, &IpcEstimateMetric};
 
+  /// Mutex to access the Values map.
   std::mutex ValuesMutex;
+  /// Map from metric name to the vector of timevalues of this metric.
   std::map<std::string, std::vector<TimeValue>> Values;
 
+  /// The thread function handles the timed polling of the metric values and saves them to the Value datastructure.
   static auto dataAcquisitionWorker(void* MeasurementWorker) -> void*;
 
+  /// The thread function that handles the acquisition of the metric values from stdin and saves them to the Value
+  /// datastructure.
   static auto stdinDataAcquisitionWorker(void* MeasurementWorker) -> void*;
 
+  /// Return the pointer to a metric from the Metrics vector that matches the supplied name.
+  /// \arg MetricName The name of the metric
+  /// \returns the pointer to the metric with the specified name or a nullptr
   auto findMetricByName(std::string MetricName) -> const MetricInterface*;
 
+  /// We poll the values of all the metrics after this number of milliseconds.
   std::chrono::milliseconds UpdateInterval;
 
+  /// The start time of the measurement that should be summarized with the getValues function.
   std::chrono::high_resolution_clock::time_point StartTime;
 
   // some metric values have to be devided by this
@@ -62,9 +76,12 @@ class MeasurementWorker {
   std::string AvailableMetricsString;
 
 #ifndef FIRESTARTER_LINK_STATIC
+  /// The pointer to the metrics that are used for dynamic libraries. We need to save them seperately here to call
+  /// dlclose later.
   std::vector<void*> MetricDylibs;
 #endif
 
+  /// The name of the metrics that are supplied from stdin.
   std::vector<std::string> StdinMetrics;
 
 public:
@@ -80,7 +97,7 @@ class MeasurementWorker {
 
   auto stdinMetrics() -> std::vector<std::string> const& { return StdinMetrics; }
 
-  // returns a list of metrics
+  /// Get the name of the metrics. This includes all metrics, builins, from dynamic libraries and metrics from stdin.
   auto metricNames() -> std::vector<std::string>;
 
   // setup the selected metrics
@@ -90,10 +107,13 @@ class MeasurementWorker {
   // callback function for metrics
   void insertCallback(const char* MetricName, int64_t TimeSinceEpoch, double Value);
 
-  // start the measurement
+  /// Set the StartTime to the current timestep
   void startMeasurement();
 
-  // get the measurement values begining from measurement start until now.
+  /// Get the measurement values begining from measurement start (set with startMeasurement) until the measurement stop
+  /// (now).
+  /// \arg StartDelta The time to skip from the measurement start
+  /// \arg StopDelta The time to skip from the measurement stop
   auto getValues(std::chrono::milliseconds StartDelta = std::chrono::milliseconds::zero(),
                  std::chrono::milliseconds StopDelta = std::chrono::milliseconds::zero())
       -> std::map<std::string, Summary>;

From ad22452188140113927d85814f49f38de6d5f50e Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 00:56:08 +0100
Subject: [PATCH 145/167] change log error to log warn to not abort on failed
 metric init

---
 src/firestarter/Measurement/MeasurementWorker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp
index d1a18c6f..46e277b2 100644
--- a/src/firestarter/Measurement/MeasurementWorker.cpp
+++ b/src/firestarter/Measurement/MeasurementWorker.cpp
@@ -208,7 +208,7 @@ auto MeasurementWorker::initMetrics(std::vector<std::string> const& MetricNames)
       if (Metric != nullptr) {
         const auto ReturnValue = Metric->Init();
         if (ReturnValue != EXIT_SUCCESS) {
-          log::error() << "Metric " << Metric->Name << ": " << Metric->GetError();
+          log::warn() << "Metric " << Metric->Name << ": " << Metric->GetError();
           continue;
         }
       }

From 74988531fcbe2fc1bd3bd9b4a9bc51bc98b6cc88 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 13:27:49 +0100
Subject: [PATCH 146/167] add docstrings for measurement and metrics

---
 .../Measurement/MeasurementWorker.hpp         | 27 +++++++++---
 .../Measurement/Metric/IPCEstimate.hpp        | 25 +++++++++++
 .../firestarter/Measurement/Metric/Perf.hpp   | 44 ++++++++++++++++++-
 .../firestarter/Measurement/Metric/RAPL.hpp   | 14 ++++++
 .../Measurement/MeasurementWorker.cpp         |  4 +-
 5 files changed, 105 insertions(+), 9 deletions(-)

diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp
index 7f3ed989..14088e33 100644
--- a/include/firestarter/Measurement/MeasurementWorker.hpp
+++ b/include/firestarter/Measurement/MeasurementWorker.hpp
@@ -36,6 +36,8 @@ void insertCallback(void* Cls, const char* MetricName, int64_t TimeSinceEpoch, d
 
 namespace firestarter::measurement {
 
+/// This class handles the management of metrics, acquisition of metric data and provids summaries of a time range of
+/// metric values.
 class MeasurementWorker {
 private:
   /// The thread that handles the values that are read from metrics
@@ -70,7 +72,7 @@ class MeasurementWorker {
   /// The start time of the measurement that should be summarized with the getValues function.
   std::chrono::high_resolution_clock::time_point StartTime;
 
-  // some metric values have to be devided by this
+  /// The number of thread FIRESTARTER runs with. This is required by some metrics
   const uint64_t NumThreads;
 
   std::string AvailableMetricsString;
@@ -85,26 +87,38 @@ class MeasurementWorker {
   std::vector<std::string> StdinMetrics;
 
 public:
-  // creates the worker thread
+  /// Initilize the measurement worker. It will spawn the threads for the polling of metic values.
+  /// \arg UpdateInterval The polling time for metric updates.
+  /// \arg NumThreads The number of thread FIRESTARTER is running with.
+  /// \arg MetricDylibsNames The vector of files to which are passed to dlopen for using additional metrics from shared
+  /// libraries.
+  /// \arg StdinMetricsNames The vector of metric names that should be read in from stdin
   MeasurementWorker(std::chrono::milliseconds UpdateInterval, uint64_t NumThreads,
                     std::vector<std::string> const& MetricDylibsNames,
                     std::vector<std::string> const& StdinMetricsNames);
 
-  // stops the worker threads
+  /// Stops the worker threads
   ~MeasurementWorker();
 
+  /// Get the formatting table of all metrics and if they are available
   [[nodiscard]] auto availableMetrics() const -> std::string const& { return this->AvailableMetricsString; }
 
+  /// The vector of all metrics that are read from stdin
   auto stdinMetrics() -> std::vector<std::string> const& { return StdinMetrics; }
 
   /// Get the name of the metrics. This includes all metrics, builins, from dynamic libraries and metrics from stdin.
   auto metricNames() -> std::vector<std::string>;
 
-  // setup the selected metrics
-  // returns a vector with the names of inialized metrics
+  /// Initialize the metrics with the provided names.
+  /// \arg MetricNames The metrics to initialize
+  /// \returns The vector of metrics that were successfully initialized.
   auto initMetrics(std::vector<std::string> const& MetricNames) -> std::vector<std::string>;
 
-  // callback function for metrics
+  /// This function insert a time value pair for a specific metric. This function will be provided to metrics to allow
+  /// them to push time value pairs.
+  /// \arg MetricName The name of the metric for which values are inserted
+  /// \arg TimeSinceEpoch The time since epoch of the time value pair
+  /// \arg Value The value of the time value pair
   void insertCallback(const char* MetricName, int64_t TimeSinceEpoch, double Value);
 
   /// Set the StartTime to the current timestep
@@ -114,6 +128,7 @@ class MeasurementWorker {
   /// (now).
   /// \arg StartDelta The time to skip from the measurement start
   /// \arg StopDelta The time to skip from the measurement stop
+  /// \returns The map from all metrics to their respective summaries.
   auto getValues(std::chrono::milliseconds StartDelta = std::chrono::milliseconds::zero(),
                  std::chrono::milliseconds StopDelta = std::chrono::milliseconds::zero())
       -> std::map<std::string, Summary>;
diff --git a/include/firestarter/Measurement/Metric/IPCEstimate.hpp b/include/firestarter/Measurement/Metric/IPCEstimate.hpp
index 2b3a9c0e..3a299244 100644
--- a/include/firestarter/Measurement/Metric/IPCEstimate.hpp
+++ b/include/firestarter/Measurement/Metric/IPCEstimate.hpp
@@ -24,31 +24,56 @@
 #include "../MetricInterface.h"
 #include <string>
 
+/// The wrapper for the C interface to the IpcEstimateMetric metric.
 struct IpcEstimateMetricData {
 private:
   IpcEstimateMetricData() = default;
 
+  /// The error string of this metric
   std::string ErrorString;
+
+  /// The saved callback to push the metric value
   void (*Callback)(void*, const char*, int64_t, double){};
+  /// The saved first argument for the callback
   void* CallbackArg{};
 
 public:
   IpcEstimateMetricData(IpcEstimateMetricData const&) = delete;
   void operator=(IpcEstimateMetricData const&) = delete;
 
+  /// Get the instance of this metric
   static auto instance() -> IpcEstimateMetricData& {
     static IpcEstimateMetricData Instance;
     return Instance;
   }
 
+  /// Deinit the metric.
+  /// \returns EXIT_SUCCESS on success.
   static auto fini() -> int32_t;
+
+  /// Init the metric.
+  /// \returns EXIT_SUCCESS on success.
   static auto init() -> int32_t;
+
+  /// Get error in case return code not being EXIT_SUCCESS.
+  /// \returns The error string.
   static auto getError() -> const char*;
+
+  /// The first argument is the function pointer to the callback. The first argument to this function pointer needs to
+  /// be filled with the second argument to this function.
+  /// The supplied function pointer needs to be called with the metric name for the second, an unix timestamp (time
+  /// since epoch) for the third and a metric value for the forth argument. This allows the metric to provide values in
+  /// a pushing way in contract to the pulling way of the GetReading function.
   static auto registerInsertCallback(void (*C)(void*, const char*, int64_t, double), void* Arg) -> int32_t;
 
+  /// Push a value with the current timestamp.
+  /// \arg Value The metric value to push.
   static void insertValue(double Value);
 };
 
+/// This metric provdies the ipc estimated based on the estimated number of instructions and the runtime of the high
+/// load loop. The metric value is dependent on the frequency of the processor. It serves as an estimation of the IPC
+/// times the processor frequency.
 static constexpr const MetricInterface IpcEstimateMetric{
     /*Name=*/"ipc-estimate",
     /*Type=*/
diff --git a/include/firestarter/Measurement/Metric/Perf.hpp b/include/firestarter/Measurement/Metric/Perf.hpp
index 681689c7..32ef3fd6 100644
--- a/include/firestarter/Measurement/Metric/Perf.hpp
+++ b/include/firestarter/Measurement/Metric/Perf.hpp
@@ -25,12 +25,14 @@
 #include <array>
 #include <string>
 
+/// The wrapper for the C interface to the PerfIpcMetric and PerfFreqMetric metric.
 class PerfMetricData {
 private:
   PerfMetricData() = default;
 
   static const constexpr char* PerfEventParanoidFile = "/proc/sys/kernel/perf_event_paranoid";
 
+  /// The datastructure that is read from the file descriptor provided by the perf_event_open syscall.
   struct ReadFormat {
     struct ValueAndId {
       uint64_t Value;
@@ -41,33 +43,72 @@ class PerfMetricData {
     std::array<ValueAndId, 2> Values;
   };
 
+  /// The error string of this metric
   std::string ErrorString;
+
+  /// The file descriptor of the perf_event_open syscall for the PERF_COUNT_HW_CPU_CYCLES event. This file descriptor
+  /// handles as a group for the other file descriptor.
   int CpuCyclesFd = -1;
+  /// The file descriptor of the perf_event_open syscall for the PERF_COUNT_HW_INSTRUCTIONS event.
   int InstructionsFd = -1;
+  /// The PERF_EVENT_IOC_ID for the cpu cycles file descriptor.
   uint64_t CpuCyclesId{};
+  /// The PERF_EVENT_IOC_ID for the cpu instruction file descriptor.
   uint64_t InstructionsId{};
+
+  /// The flag that stop init from being executed multiple times.
   bool InitDone = false;
+  /// The value that is returned if the init function called multiple times.
   int32_t InitValue{};
+
+  /// Save the last read metric for the perf-ipc metric. This value will be updated when the perf-ipc metric is read.
   struct ReadFormat Last {};
 
+  /// Get a reading of the perf-freq and perf-ipc metric. Pointers can be nullptr.
+  /// \arg IpcValue The pointer to which the value for ipc metric value will be saved.
+  /// \arg FreqValue The pointer to which the value for freq metric value will be saved.
+  /// \returns EXIT_SUCCESS if we got a new value.
+  static auto getReading(double* IpcValue, double* FreqValue) -> int32_t;
+
 public:
   PerfMetricData(PerfMetricData const&) = delete;
   void operator=(PerfMetricData const&) = delete;
 
+  /// Get the instance of this metric
   static auto instance() -> PerfMetricData& {
     static PerfMetricData Instance;
     return Instance;
   }
 
+  /// Deinit the metric.
+  /// \returns EXIT_SUCCESS on success.
   static auto fini() -> int32_t;
+
+  /// Init the metric.
+  /// \returns EXIT_SUCCESS on success.
   static auto init() -> int32_t;
+
+  /// Read the from a specific PERF_EVENT_IOC_ID out of the ReadFormat datastructure.
+  /// \arg Reader The ReadFormat datastructure from which the value will be extracter
+  /// \arg Id The PERF_EVENT_IOC_ID of the metric that should be read.
   static auto valueFromId(struct ReadFormat* Reader, uint64_t Id) -> uint64_t;
-  static auto getReading(double* IpcValue, double* FreqValue) -> int32_t;
+
+  /// Get a reading of the perf-ipc metric.
+  /// \arg Value The pointer to which the value will be saved.
+  /// \returns EXIT_SUCCESS if we got a new value.
   static auto getReadingIpc(double* Value) -> int32_t;
+
+  /// Get a reading of the perf-freq metric.
+  /// \arg Value The pointer to which the value will be saved.
+  /// \returns EXIT_SUCCESS if we got a new value.
   static auto getReadingFreq(double* Value) -> int32_t;
+
+  /// Get error in case return code not being EXIT_SUCCESS.
+  /// \returns The error string.
   static auto getError() -> const char*;
 };
 
+/// This metric provides IPC measurement of the programm and all associated threads.
 static constexpr const MetricInterface PerfIpcMetric{
     /*Name=*/"perf-ipc",
     /*Type=*/
@@ -83,6 +124,7 @@ static constexpr const MetricInterface PerfIpcMetric{
     /*RegisterInsertCallback=*/nullptr,
 };
 
+/// This metric provides frequency measurement on the CPUs used to execute the program on.
 static constexpr const MetricInterface PerfFreqMetric{
     /*Name=*/"perf-freq",
     /*Type=*/
diff --git a/include/firestarter/Measurement/Metric/RAPL.hpp b/include/firestarter/Measurement/Metric/RAPL.hpp
index 37e0bbb4..d98f4a8d 100644
--- a/include/firestarter/Measurement/Metric/RAPL.hpp
+++ b/include/firestarter/Measurement/Metric/RAPL.hpp
@@ -60,16 +60,30 @@ class RaplMetricData {
     return Instance;
   }
 
+  /// Deinit the metric.
+  /// \returns EXIT_SUCCESS on success.
   static auto fini() -> int32_t;
+
+  /// Init the metric.
+  /// \returns EXIT_SUCCESS on success.
   static auto init() -> int32_t;
 
+  /// Get a reading of the sysfs-powercap-rapl metric.
+  /// \arg Value The pointer to which the value will be saved.
+  /// \returns EXIT_SUCCESS if we got a new value.
   static auto getReading(double* Value) -> int32_t;
 
+  /// Get error in case return code not being EXIT_SUCCESS.
+  /// \returns The error string.
   static auto getError() -> const char*;
 
+  /// This function should be called every 30s. It will make shure that we do not miss an overflow of a counter and
+  /// therefore get a wrong reading.
   static void callback();
 };
 
+/// This metric provides power measurements through the RAPL interface. Either psys measurement is choosen or if this is
+/// not available the sum of packages and drams.
 static constexpr const MetricInterface RaplMetric{
     /*Name=*/"sysfs-powercap-rapl",
     /*Type=*/
diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp
index 46e277b2..dbefe334 100644
--- a/src/firestarter/Measurement/MeasurementWorker.cpp
+++ b/src/firestarter/Measurement/MeasurementWorker.cpp
@@ -58,8 +58,8 @@ MeasurementWorker::MeasurementWorker(std::chrono::milliseconds UpdateInterval, u
 
 #ifndef FIRESTARTER_LINK_STATIC
   // open dylibs and find metric symbol.
-  // create an entry in _metricDylibs with handle from dlopen and
-  // metric_interface_t structure. add this structe as a pointer to metrics.
+  // create an entry in MetricDylibs with handle from dlopen and
+  // MetricInterface structure. add this structe as a pointer to metrics.
   for (auto const& Dylib : MetricDylibsNames) {
     void* Handle = nullptr;
     const char* Filename = Dylib.c_str();

From eef129fcfe0c80c0232f351b254480a2b1db93e2 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 13:29:08 +0100
Subject: [PATCH 147/167] fix refactor in rapl metric

---
 src/firestarter/Measurement/Metric/RAPL.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/firestarter/Measurement/Metric/RAPL.cpp b/src/firestarter/Measurement/Metric/RAPL.cpp
index 80500d06..15869650 100644
--- a/src/firestarter/Measurement/Metric/RAPL.cpp
+++ b/src/firestarter/Measurement/Metric/RAPL.cpp
@@ -128,7 +128,7 @@ auto RaplMetricData::init() -> int32_t {
     std::getline(MaxEnergyReadingStream, Buffer);
     const auto Max = std::stoul(Buffer);
 
-    auto Def = std::make_unique<ReaderDef>(Path, Max, Reading, 0);
+    auto Def = std::make_unique<ReaderDef>(/*Path=*/Path, /*LastReading=*/Reading, /*Overflow=*/0, /*Max=*/Max);
 
     Instance.Readers.emplace_back(std::move(Def));
   }

From bbe6f57615acda83d5fafd45d9ffa8adc9902568 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 13:33:06 +0100
Subject: [PATCH 148/167] rapl metric docstrings

---
 include/firestarter/Measurement/Metric/RAPL.hpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/firestarter/Measurement/Metric/RAPL.hpp b/include/firestarter/Measurement/Metric/RAPL.hpp
index d98f4a8d..00f12019 100644
--- a/include/firestarter/Measurement/Metric/RAPL.hpp
+++ b/include/firestarter/Measurement/Metric/RAPL.hpp
@@ -26,7 +26,11 @@
 #include <string>
 #include <vector>
 
+/// The wrapper for the C interface to the RaplMetric metric.
 class RaplMetricData {
+private:
+  /// Datastructure to hold the path of the sysfs rapl entry, the last reading (improtant to detect overflows), the
+  /// counter of the number of overflows and the maximum value that the reading will have.
   struct ReaderDef {
     ReaderDef() = delete;
 
@@ -42,11 +46,13 @@ class RaplMetricData {
     int64_t Max;
   };
 
-private:
+  /// The path to the sysfs rapl entries
   static constexpr const char* RaplPath = "/sys/class/powercap";
 
+  /// The error string of this metric
   std::string ErrorString;
 
+  /// The vector of readers that hold the path and read values from the sysfs rapl
   std::vector<std::unique_ptr<ReaderDef>> Readers;
 
   RaplMetricData() = default;
@@ -55,6 +61,7 @@ class RaplMetricData {
   RaplMetricData(RaplMetricData const&) = delete;
   void operator=(RaplMetricData const&) = delete;
 
+  /// Get the instance of this metric
   static auto instance() -> RaplMetricData& {
     static RaplMetricData Instance;
     return Instance;

From 283ec47409ef9d66f467c50dc32d374c0629f189 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 13:43:44 +0100
Subject: [PATCH 149/167] remove unused return argument in watchdogworker

---
 include/firestarter/Firestarter.hpp |  4 ++--
 src/firestarter/WatchdogWorker.cpp  | 14 +++++---------
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 4e7b6981..ac6a1a03 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -127,8 +127,8 @@ class Firestarter {
   static void loadThreadWorker(const std::shared_ptr<LoadWorkerData>& Td);
 
   // WatchdogWorker.cpp
-  static auto watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load,
-                             std::chrono::seconds Timeout) -> int;
+  static void watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load,
+                             std::chrono::seconds Timeout);
 
   // DumpRegisterWorker.cpp
   void initDumpRegisterWorker();
diff --git a/src/firestarter/WatchdogWorker.cpp b/src/firestarter/WatchdogWorker.cpp
index 2dafbb94..570cbd91 100644
--- a/src/firestarter/WatchdogWorker.cpp
+++ b/src/firestarter/WatchdogWorker.cpp
@@ -30,8 +30,8 @@
 
 namespace firestarter {
 
-auto Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load,
-                                 std::chrono::seconds Timeout) -> int {
+void Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load,
+                                 std::chrono::seconds Timeout) {
 
   using clock = std::chrono::high_resolution_clock;
   using nsec = std::chrono::nanoseconds;
@@ -85,7 +85,7 @@ auto Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::
         WatchdogTerminateAlert.wait_for(Lk, LoadNsec, []() { return WatchdogTerminate; });
         // terminate on interrupt
         if (WatchdogTerminate) {
-          return EXIT_SUCCESS;
+          return;
         }
       }
 #ifdef ENABLE_VTRACING
@@ -114,7 +114,7 @@ auto Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::
         WatchdogTerminateAlert.wait_for(Lk, IdleNsec, []() { return WatchdogTerminate; });
         // terminate on interrupt
         if (WatchdogTerminate) {
-          return EXIT_SUCCESS;
+          return;
         }
       }
 #ifdef ENABLE_VTRACING
@@ -133,7 +133,7 @@ auto Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::
         if (WatchdogTerminate || (Timeout > sec::zero() && (Time > Timeout))) {
           setLoad(LoadThreadWorkType::LoadStop);
 
-          return EXIT_SUCCESS;
+          return;
         }
       }
     }
@@ -149,11 +149,7 @@ auto Firestarter::watchdogWorker(std::chrono::microseconds Period, std::chrono::
     }
 
     setLoad(LoadThreadWorkType::LoadStop);
-
-    return EXIT_SUCCESS;
   }
-
-  return EXIT_SUCCESS;
 }
 
 } // namespace firestarter
\ No newline at end of file

From dc6c80ac17d60447c024906eb217e15f096069d2 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 13:43:54 +0100
Subject: [PATCH 150/167] add docstrings to OneAPI

---
 include/firestarter/OneAPI/OneAPI.hpp | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/include/firestarter/OneAPI/OneAPI.hpp b/include/firestarter/OneAPI/OneAPI.hpp
index 0c8a6e5f..e7e973fc 100644
--- a/include/firestarter/OneAPI/OneAPI.hpp
+++ b/include/firestarter/OneAPI/OneAPI.hpp
@@ -27,14 +27,37 @@
 
 namespace firestarter::oneapi {
 
+/// This class handles the workload on OneAPI compatible GPUs. A gemm routine is used to stress them with a
+/// constant high load. This header does not include any OneAPI specific headers to allow us to not guard the
+/// include of this header in other parts of the programm.
 class OneAPI {
 private:
+  /// The thread that is used to initilize the gpus. This thread will wait until each thread that runs the gemm routine
+  /// joins.
   std::thread InitThread;
 
+  /// Spawns a thread for each of the selected gpus, initilizes them and starts the execution of the gemm in parallel.
+  /// \arg WaitForInitCv The condition variables used to signal that all gpus are initialized.
+  /// \arg LoadVar A reference to the variable that controlls the current load of Firestarter.
+  /// \arg UseFloat Set to true if we want to stress using single precision floating points.
+  /// \arg UseDouble Set to true if we want to stress using double precision floating points. If neither UseFloat or
+  /// UseDouble is set the precision will be choosen automatically.
+  /// \arg MatrixSize Set to a specific matrix size which will be choosen for the gemm operation or set to 0 for
+  /// automatic selection.
+  /// \arg Gpus Select the number of gpus to stress or 0 for all.
   static void initGpus(std::condition_variable& WaitForInitCv, const volatile firestarter::LoadThreadWorkType& LoadVar,
                        bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus);
 
 public:
+  /// Initilize the OneAPI class. This will start a thread running the OneAPI::initGpus function and wait until all gpus
+  /// are inititialized.
+  /// \arg LoadVar A reference to the variable that controlls the current load of Firestarter.
+  /// \arg UseFloat Set to true if we want to stress using single precision floating points.
+  /// \arg UseDouble Set to true if we want to stress using double precision floating points. If neither UseFloat or
+  /// UseDouble is set the precision will be choosen automatically.
+  /// \arg MatrixSize Set to a specific matrix size which will be choosen for the gemm operation or set to 0 for
+  /// automatic selection.
+  /// \arg Gpus Select the number of gpus to stress or 0 for all.
   OneAPI(const volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize,
          int Gpus)
 #if defined(FIRESTARTER_BUILD_ONEAPI)

From cfc2cf173199e7da4f05fb21b263e89942ccfc8d Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 13:57:16 +0100
Subject: [PATCH 151/167] doc strings and rename funtions in optimization

---
 include/firestarter/Optimizer/Algorithm.hpp       | 10 +++++++++-
 include/firestarter/Optimizer/Algorithm/NSGA2.hpp |  2 +-
 src/firestarter/Firestarter.cpp                   |  2 +-
 src/firestarter/Optimizer/Algorithm/NSGA2.cpp     |  6 ++----
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/include/firestarter/Optimizer/Algorithm.hpp b/include/firestarter/Optimizer/Algorithm.hpp
index 8bae8bd3..dfcb6882 100644
--- a/include/firestarter/Optimizer/Algorithm.hpp
+++ b/include/firestarter/Optimizer/Algorithm.hpp
@@ -25,13 +25,21 @@
 
 namespace firestarter::optimizer {
 
+/// Abstract class to provide an interface for evolutionary optimization algorithms.
 class Algorithm {
 public:
   Algorithm() = default;
   virtual ~Algorithm() = default;
 
-  virtual void checkPopulation(Population const& Pop, std::size_t PopulationSize) = 0;
+  /// Check if the population size and the problem matches the requirements of the algorithm. Asserts if this checks
+  /// fail.
+  /// \arg Prob The poblem that should be optimized with this algorithm
+  /// \arg The initial PopulationSize that is used
+  virtual void check(Problem const& Prob, std::size_t PopulationSize) = 0;
 
+  /// Evolve the population across multiple iterations.
+  /// \arg Pop The initial population
+  /// \returns The final population after the optimization has run
   virtual auto evolve(Population& Pop) -> Population = 0;
 };
 
diff --git a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
index 478aa116..43532b76 100644
--- a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
+++ b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
@@ -30,7 +30,7 @@ class NSGA2 : public Algorithm {
   NSGA2(unsigned Gen, double Cr, double M);
   ~NSGA2() override = default;
 
-  void checkPopulation(firestarter::optimizer::Population const& Pop, std::size_t PopulationSize) override;
+  void check(firestarter::optimizer::Problem const& Prob, std::size_t PopulationSize) override;
 
   auto evolve(firestarter::optimizer::Population& Pop) -> firestarter::optimizer::Population override;
 
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 223e1610..02e5ab48 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -161,7 +161,7 @@ Firestarter::Firestarter(Config&& ProvidedConfig)
         throw std::invalid_argument("Algorithm " + Cfg.OptimizationAlgorithm + " unknown.");
       }
 
-      Algorithm->checkPopulation(*Population, Cfg.Individuals);
+      Algorithm->check(Population->problem(), Cfg.Individuals);
     }
   }
 
diff --git a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
index 3d43a073..dc04baa5 100644
--- a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
+++ b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
@@ -49,9 +49,7 @@ NSGA2::NSGA2(unsigned Gen, double Cr, double M)
   }
 }
 
-void NSGA2::checkPopulation(firestarter::optimizer::Population const& Pop, std::size_t PopulationSize) {
-  const auto& Prob = Pop.problem();
-
+void NSGA2::check(firestarter::optimizer::Problem const& Prob, std::size_t PopulationSize) {
   if (!Prob.isMO()) {
     throw std::invalid_argument("NSGA2 is a multiobjective algorithms, while number of objectives is " +
                                 std::to_string(Prob.getNobjs()));
@@ -72,7 +70,7 @@ auto NSGA2::evolve(firestarter::optimizer::Population& Pop) -> firestarter::opti
   auto NP = Pop.size();
   auto Fevals0 = Prob.getFevals();
 
-  this->checkPopulation(Pop, NP);
+  this->check(Prob, NP);
 
   std::random_device Rd;
   std::mt19937 Rng(Rd());

From 29168c22679676b700aaf31be15a720f0998bd3b Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 14:59:50 +0100
Subject: [PATCH 152/167] cleanup optimization worker. add docstrings

---
 include/firestarter/Optimizer/Algorithm.hpp   |  2 +-
 .../firestarter/Optimizer/Algorithm/NSGA2.hpp | 16 ++++++
 include/firestarter/Optimizer/History.hpp     | 28 +++++++++-
 .../firestarter/Optimizer/OptimizerWorker.hpp | 21 ++++++--
 .../Optimizer/Problem/CLIArgumentProblem.hpp  | 54 ++++++++++++++-----
 src/firestarter/Firestarter.cpp               |  4 +-
 src/firestarter/Optimizer/OptimizerWorker.cpp |  7 ++-
 7 files changed, 109 insertions(+), 23 deletions(-)

diff --git a/include/firestarter/Optimizer/Algorithm.hpp b/include/firestarter/Optimizer/Algorithm.hpp
index dfcb6882..58850929 100644
--- a/include/firestarter/Optimizer/Algorithm.hpp
+++ b/include/firestarter/Optimizer/Algorithm.hpp
@@ -34,7 +34,7 @@ class Algorithm {
   /// Check if the population size and the problem matches the requirements of the algorithm. Asserts if this checks
   /// fail.
   /// \arg Prob The poblem that should be optimized with this algorithm
-  /// \arg The initial PopulationSize that is used
+  /// \arg PopulationSize The initial size of the population that is used
   virtual void check(Problem const& Prob, std::size_t PopulationSize) = 0;
 
   /// Evolve the population across multiple iterations.
diff --git a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
index 43532b76..8ba6ec41 100644
--- a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
+++ b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
@@ -25,13 +25,29 @@
 
 namespace firestarter::optimizer::algorithm {
 
+/// This class implements the NSGA2 evolutionary optimization algorithm.
+/// The NSGA2 algorithm, as described in "A fast and elitist multiobjective genetic algorithm: NSGA-II"
+/// (https://dl.acm.org/doi/10.1109/4235.996017), is a multiobjective algorithm allowing FIRESTARTER to optimize with
+/// two (or more) metrics. This is relevant because adding the IPC (instruction per cycle) metric supports the
+/// optimization algorithm to converge towards higher power consumption.
 class NSGA2 : public Algorithm {
 public:
+  /// Initialize the NSGA2 algorithm.
+  /// \arg Gen The number of generation that the algorithm uses to evolve its population.
+  /// \arg Cr The Crossover probability. Must be in range [0,1[
+  /// \arg M Mutation probability. Must be in range [0,1]
   NSGA2(unsigned Gen, double Cr, double M);
   ~NSGA2() override = default;
 
+  /// Check if the problem and population size matches the requirements of NSGA2. We must have a multi-objective problem
+  /// and at least 5 and a multiple of 4 individuals in our population.
+  /// \arg Prob The poblem that should be optimized with this algorithm
+  /// \arg PopulationSize The initial size of the population that is used
   void check(firestarter::optimizer::Problem const& Prob, std::size_t PopulationSize) override;
 
+  /// Evolve the population across multiple iterations.
+  /// \arg Pop The initial population
+  /// \returns The final population after the optimization has run
   auto evolve(firestarter::optimizer::Population& Pop) -> firestarter::optimizer::Population override;
 
 private:
diff --git a/include/firestarter/Optimizer/History.hpp b/include/firestarter/Optimizer/History.hpp
index 55c8d05c..aae788d5 100644
--- a/include/firestarter/Optimizer/History.hpp
+++ b/include/firestarter/Optimizer/History.hpp
@@ -39,38 +39,60 @@
 
 namespace firestarter::optimizer {
 
+/// Singleton that handle keeping track of the history of evaluated indivudals and their associated metric summaries.
 struct History {
 private:
-  // https://stackoverflow.com/questions/17074324/how-can-i-sort-two-vectors-in-the-same-way-with-criteria-that-uses-only-one-of/17074810#17074810
+  /// Find the permuation of a vector when sorting it with a supplied comparison function.
+  /// \tparam T The type of the vector elements
+  /// \tparam CompareT The type of the comparison function.
+  /// \arg Vec The const reference to vector that will be sorted.
+  /// \arg Compare The comparision function which will be used to sort the vector.
+  /// \returns The indices of how the vector would be sorted according to the comparison function.
   template <typename T, typename CompareT>
   static auto sortPermutation(const std::vector<T>& Vec, CompareT& Compare) -> std::vector<std::size_t> {
+    // https://stackoverflow.com/questions/17074324/how-can-i-sort-two-vectors-in-the-same-way-with-criteria-that-uses-only-one-of/17074810#17074810
     std::vector<std::size_t> P(Vec.size());
     std::iota(P.begin(), P.end(), 0);
     std::sort(P.begin(), P.end(), [&](std::size_t I, std::size_t J) { return Compare(Vec[I], Vec[J]); });
     return P;
   }
 
+  /// Add padding to a stingstream to fill it up to a maximum width.
+  /// \arg Ss The stringstream to add padding to.
+  /// \arg Width The maximum width until which should be padded.
+  /// \arg Taken The number of characters that are already filled up.
+  /// \arg C The character that should be used for padding.
   static void padding(std::stringstream& Ss, std::size_t Width, std::size_t Taken, char C) {
     for (std::size_t I = 0; I < (std::max)(Width, Taken) - Taken; ++I) {
       Ss << C;
     }
   }
 
+  /// The maximum number of elements that will be printed.
   static constexpr const int MaxElementPrintCount = 20;
+  /// The minimum width of columns that are printed.
   static constexpr const std::size_t MinColumnWidth = 10;
 
   // NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
+  /// The vector of individuals that have been evaluated. This vector has the same size as F.
   inline static std::vector<Individual> X = {};
+  /// The vector of metric summaries associated to the evaluated individuals. This vector has the same size as X.
   inline static std::vector<std::map<std::string, firestarter::measurement::Summary>> F = {};
   // NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)
 
 public:
+  /// Append an evaluated individual to the history.
+  /// \arg Ind The individual to add.
+  /// \arg Metric The metric summaries for this individual.
   static void append(std::vector<unsigned> const& Ind,
                      std::map<std::string, firestarter::measurement::Summary> const& Metric) {
     X.push_back(Ind);
     F.push_back(Metric);
   }
 
+  /// Loopup an indiviudal in the history and return the metric summaries if it is in the history.
+  /// \arg Individual The individual which may already be evaluated.
+  /// \returns The metric summaries if the individual is in the history or std::nullopt otherwise.
   static auto find(std::vector<unsigned> const& Individual)
       -> std::optional<std::map<std::string, firestarter::measurement::Summary>> {
     auto FindEqual = [&Individual](auto const& Ind) { return Ind == Individual; };
@@ -82,6 +104,10 @@ struct History {
     return F[Dist];
   }
 
+  /// Print the best individuals per metric. This will print a table with the average metric value and indiviudals per
+  /// metric.
+  /// \arg OptimizationMetrics The metrics for which the best individual should be printed.
+  /// \arg PayloadItems The instruction of the associated instruction groups used in the optimization.
   static void printBest(std::vector<std::string> const& OptimizationMetrics,
                         std::vector<std::string> const& PayloadItems) {
     // TODO(Issue #76): print paretto front
diff --git a/include/firestarter/Optimizer/OptimizerWorker.hpp b/include/firestarter/Optimizer/OptimizerWorker.hpp
index aeae137e..f534485c 100644
--- a/include/firestarter/Optimizer/OptimizerWorker.hpp
+++ b/include/firestarter/Optimizer/OptimizerWorker.hpp
@@ -27,27 +27,42 @@
 
 namespace firestarter::optimizer {
 
+/// Class to run the optimization in another thread.
 class OptimizerWorker {
 public:
+  /// Start the optimization in another thread.
+  /// \arg Algorithm The algorithm that is used to optimize FIRESTARTER.
+  /// \arg Population The population containing the problem that will be used to optimize FIRESTARTER.
+  /// \arg Individuals The number of individuals for the intial population.
+  /// \arg Preheat The time we preheat before starting the optimization.
   OptimizerWorker(std::unique_ptr<firestarter::optimizer::Algorithm>&& Algorithm,
-                  std::unique_ptr<firestarter::optimizer::Population>&& Population, std::string OptimizationAlgorithm,
-                  unsigned Individuals, std::chrono::seconds const& Preheat);
+                  std::unique_ptr<firestarter::optimizer::Population>&& Population, unsigned Individuals,
+                  std::chrono::seconds const& Preheat);
 
   ~OptimizerWorker() = default;
 
+  /// Join the optimization thread.
   void join() const;
 
+  /// Kill the optimization thread.
   void kill() const;
 
 private:
+  /// The thread worker that does the optimization.
+  /// \arg OptimizerWorker The pointer to the OptimizerWorker (this) datastructure.
+  /// \returns a nullptr
   static auto optimizerThread(void* OptimizerWorker) -> void*;
 
+  /// The algorithm that is used to optimize FIRESTARTER.
   std::unique_ptr<firestarter::optimizer::Algorithm> Algorithm;
+  /// The population containing the problem that will be used to optimize FIRESTARTER.
   std::unique_ptr<firestarter::optimizer::Population> Population;
-  std::string OptimizationAlgorithm;
+  /// The number of individuals for the intial population.
   unsigned Individuals;
+  /// The time we preheat before starting the optimization.
   std::chrono::seconds Preheat;
 
+  /// The pthread that is used for the optimization.
   pthread_t WorkerThread{};
 };
 
diff --git a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
index fa1cbd50..36646cab 100644
--- a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
+++ b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
@@ -31,8 +31,39 @@
 
 namespace firestarter::optimizer::problem {
 
+/// This class models the problem of optimizing firestarter on the fly. The evaluation of metrics is done by switching
+/// the settings of the high load routine and measuring the metric in the specified runtime.
 class CLIArgumentProblem final : public firestarter::optimizer::Problem {
+private:
+  /// The function which takes instruction groups and switches the payload in the high load function to the supplied
+  /// ones.
+  std::function<void(std::vector<std::pair<std::string, unsigned>> const&)> ChangePayloadFunction;
+  /// The shared pointer to the measurement infrastructure which will be used to get metric values.
+  std::shared_ptr<firestarter::measurement::MeasurementWorker> MeasurementWorker;
+  /// The metrics that are used in the optimization. They may have a dash at the start to allow them to be changed from
+  /// maximization to minimization.
+  std::vector<std::string> Metrics;
+  /// The duration of the measurement.
+  std::chrono::seconds Timeout;
+  /// The time to skip from the measurement start
+  std::chrono::milliseconds StartDelta;
+  /// The time to skip from the measurement stop
+  std::chrono::milliseconds StopDelta;
+  /// The vector of instruction that is used in the optimization for the payload.
+  std::vector<std::string> InstructionGroups;
+
 public:
+  /// Constructor for the problem of optimizing firestarter on the fly.
+  /// \arg ChangePayloadFunction The function which takes instruction groups and switches the payload in the high load
+  /// function to the supplied ones.
+  /// \arg MeasurementWorker The shared pointer to the measurement infrastructure which will be used to get metric
+  /// values
+  /// \arg Metrics The metrics that are used in the optimization. They may have a dash at the start to allow them to be
+  /// changed from maximization to minimization.
+  /// \arg Timeout The duration of the measurement.
+  /// \arg StartDelta The time to skip from the measurement start
+  /// \arg StopDelta The time to skip from the measurement stop
+  /// \arg InstructionGroups The vector of instruction that is used in the optimization for the payload.
   CLIArgumentProblem(std::function<void(std::vector<std::pair<std::string, unsigned>> const&)>&& ChangePayloadFunction,
                      std::shared_ptr<firestarter::measurement::MeasurementWorker> MeasurementWorker,
                      std::vector<std::string> const& Metrics, std::chrono::seconds Timeout,
@@ -50,7 +81,9 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem {
 
   ~CLIArgumentProblem() override = default;
 
-  // return all available metrics for the individual
+  /// Evaluate the given individual by switching the current payload, doing the measurement and returning the results.
+  /// \arg Individual The indivudal that should be measured.
+  /// \returns The map from all metrics to their respective summaries for the measured individual.
   auto metrics(std::vector<unsigned> const& Individual)
       -> std::map<std::string, firestarter::measurement::Summary> override {
     // increment evaluation idx
@@ -83,6 +116,11 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem {
     return MeasurementWorker->getValues(StartDelta, StopDelta);
   }
 
+  /// Calculate the fitness based on the metric summaries of an individual. This will select the metrics that are
+  /// required for the optimization, round them and potentially invert the results if the optimization metric name
+  /// starts with a dash ('-').
+  /// \arg Summaries The metric values for all metrics for an individual
+  /// \return The vector containing the fitness for that metrics that are used in the optimization.
   [[nodiscard]] auto fitness(std::map<std::string, firestarter::measurement::Summary> const& Summaries) const
       -> std::vector<double> override {
     std::vector<double> Values = {};
@@ -113,7 +151,8 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem {
     return Values;
   }
 
-  // get the bounds of the problem
+  /// Get the bounds of the problem. We currently set these bounds fix to a range from 0 to 100 for every instruction.
+  /// \returns A vector the size of the number of instruction groups containing a tuple(0, 100).
   [[nodiscard]] auto getBounds() const -> std::vector<std::tuple<unsigned, unsigned>> override {
     std::vector<std::tuple<unsigned, unsigned>> Vec(InstructionGroups.size(),
                                                     std::make_tuple<unsigned, unsigned>(0, 100));
@@ -121,17 +160,8 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem {
     return Vec;
   }
 
-  // get the number of objectives.
+  /// Get the number of optimization objectives.
   [[nodiscard]] auto getNobjs() const -> std::size_t override { return Metrics.size(); }
-
-private:
-  std::function<void(std::vector<std::pair<std::string, unsigned>> const&)> ChangePayloadFunction;
-  std::shared_ptr<firestarter::measurement::MeasurementWorker> MeasurementWorker;
-  std::vector<std::string> Metrics;
-  std::chrono::seconds Timeout;
-  std::chrono::milliseconds StartDelta;
-  std::chrono::milliseconds StopDelta;
-  std::vector<std::string> InstructionGroups;
 };
 
 } // namespace firestarter::optimizer::problem
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 02e5ab48..b5239539 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -211,8 +211,8 @@ void Firestarter::mainThread() {
     if (Cfg.Optimize) {
       auto StartTime = optimizer::History::getTime();
 
-      Firestarter::Optimizer = std::make_unique<optimizer::OptimizerWorker>(
-          std::move(Algorithm), std::move(Population), Cfg.OptimizationAlgorithm, Cfg.Individuals, Cfg.Preheat);
+      Firestarter::Optimizer = std::make_unique<optimizer::OptimizerWorker>(std::move(Algorithm), std::move(Population),
+                                                                            Cfg.Individuals, Cfg.Preheat);
 
       // wait here until optimizer thread terminates
       Firestarter::Optimizer->join();
diff --git a/src/firestarter/Optimizer/OptimizerWorker.cpp b/src/firestarter/Optimizer/OptimizerWorker.cpp
index a84182b3..b3085ab0 100644
--- a/src/firestarter/Optimizer/OptimizerWorker.cpp
+++ b/src/firestarter/Optimizer/OptimizerWorker.cpp
@@ -19,6 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Optimizer/Algorithm/NSGA2.hpp"
 #include <firestarter/Optimizer/OptimizerWorker.hpp>
 
 #include <thread>
@@ -27,12 +28,10 @@
 namespace firestarter::optimizer {
 
 OptimizerWorker::OptimizerWorker(std::unique_ptr<firestarter::optimizer::Algorithm>&& Algorithm,
-                                 std::unique_ptr<firestarter::optimizer::Population>&& Population,
-                                 std::string OptimizationAlgorithm, unsigned Individuals,
+                                 std::unique_ptr<firestarter::optimizer::Population>&& Population, unsigned Individuals,
                                  std::chrono::seconds const& Preheat)
     : Algorithm(std::move(Algorithm))
     , Population(std::move(Population))
-    , OptimizationAlgorithm(std::move(OptimizationAlgorithm))
     , Individuals(Individuals)
     , Preheat(Preheat) {
   pthread_create(&this->WorkerThread, nullptr, OptimizerWorker::optimizerThread, this);
@@ -63,7 +62,7 @@ auto OptimizerWorker::optimizerThread(void* OptimizerWorker) -> void* {
   std::this_thread::sleep_for(This->Preheat);
 
   // For NSGA2 we start with a initial population
-  if (This->OptimizationAlgorithm == "NSGA2") {
+  if (dynamic_cast<algorithm::NSGA2*>(This->Algorithm.get())) {
     This->Population->generateInitialPopulation(This->Individuals);
   }
 

From 1690ec0ab99f25aba070bffdf32cb345d450b3a5 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 15:04:40 +0100
Subject: [PATCH 153/167] ci: remove GHC artifacts to make more space for the
 rocm build

---
 .github/workflows/cmake.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index 6b4c9178..b58b7e03 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -21,6 +21,8 @@ jobs:
       run: |
         sudo rm -rf /usr/local/lib/android
         sudo rm -rf /usr/share/dotnet
+        sudo rm -rf /opt/ghc
+        sudo rm -rf /usr/local/.ghcup
 
     - name: Install g++-9 (if needed)
       if: matrix.compiler == 'g++-9'

From dee9fc12079f6927744bba02bcd76f9fe6034a30 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 15:34:02 +0100
Subject: [PATCH 154/167] explicitly check if number of GPUs is not equal to 0.

---
 src/firestarter/Cuda/Cuda.cpp     | 2 +-
 src/firestarter/OneAPI/OneAPI.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp
index 6ce974f1..492011e7 100644
--- a/src/firestarter/Cuda/Cuda.cpp
+++ b/src/firestarter/Cuda/Cuda.cpp
@@ -280,7 +280,7 @@ void Cuda::initGpus(std::condition_variable& WaitForInitCv, const volatile fires
   std::mutex GpuThreadsWaitForInitCvMutex;
   std::vector<std::thread> GpuThreads;
 
-  if (Gpus) {
+  if (Gpus != 0) {
     compat::accellSafeCall(compat::init(0), __FILE__, __LINE__);
 
     int DevCount{};
diff --git a/src/firestarter/OneAPI/OneAPI.cpp b/src/firestarter/OneAPI/OneAPI.cpp
index ddbd0dec..98ef6b4a 100644
--- a/src/firestarter/OneAPI/OneAPI.cpp
+++ b/src/firestarter/OneAPI/OneAPI.cpp
@@ -260,7 +260,7 @@ void OneAPI::initGpus(std::condition_variable& WaitForInitCv, const volatile fir
   std::mutex GpuThreadsWaitForInitCvMutex;
   std::vector<std::thread> GpuThreads;
 
-  if (Gpus) {
+  if (Gpus != 0) {
     auto Platforms = sycl::platform::get_platforms();
 
     if (Platforms.empty()) {

From 9c3291629af3f3b3ccee4de3ee30fef7f43737ff Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 15:35:03 +0100
Subject: [PATCH 155/167] update docstring in cuda and oneapi files

---
 include/firestarter/Cuda/Cuda.hpp     | 4 ++--
 include/firestarter/OneAPI/OneAPI.hpp | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/firestarter/Cuda/Cuda.hpp b/include/firestarter/Cuda/Cuda.hpp
index e4314eb7..cdd14017 100644
--- a/include/firestarter/Cuda/Cuda.hpp
+++ b/include/firestarter/Cuda/Cuda.hpp
@@ -44,7 +44,7 @@ class Cuda {
   /// UseDouble is set the precision will be choosen automatically.
   /// \arg MatrixSize Set to a specific matrix size which will be choosen for the gemm operation or set to 0 for
   /// automatic selection.
-  /// \arg Gpus Select the number of gpus to stress or 0 for all.
+  /// \arg Gpus Select the number of gpus to stress or -1 for all.
   static void initGpus(std::condition_variable& WaitForInitCv, const volatile firestarter::LoadThreadWorkType& LoadVar,
                        bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus);
 
@@ -57,7 +57,7 @@ class Cuda {
   /// UseDouble is set the precision will be choosen automatically.
   /// \arg MatrixSize Set to a specific matrix size which will be choosen for the gemm operation or set to 0 for
   /// automatic selection.
-  /// \arg Gpus Select the number of gpus to stress or 0 for all.
+  /// \arg Gpus Select the number of gpus to stress or -1 for all.
   Cuda(const volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize,
        int Gpus)
 #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP)
diff --git a/include/firestarter/OneAPI/OneAPI.hpp b/include/firestarter/OneAPI/OneAPI.hpp
index e7e973fc..a84e2c53 100644
--- a/include/firestarter/OneAPI/OneAPI.hpp
+++ b/include/firestarter/OneAPI/OneAPI.hpp
@@ -44,7 +44,7 @@ class OneAPI {
   /// UseDouble is set the precision will be choosen automatically.
   /// \arg MatrixSize Set to a specific matrix size which will be choosen for the gemm operation or set to 0 for
   /// automatic selection.
-  /// \arg Gpus Select the number of gpus to stress or 0 for all.
+  /// \arg Gpus Select the number of gpus to stress or -1 for all.
   static void initGpus(std::condition_variable& WaitForInitCv, const volatile firestarter::LoadThreadWorkType& LoadVar,
                        bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus);
 
@@ -57,7 +57,7 @@ class OneAPI {
   /// UseDouble is set the precision will be choosen automatically.
   /// \arg MatrixSize Set to a specific matrix size which will be choosen for the gemm operation or set to 0 for
   /// automatic selection.
-  /// \arg Gpus Select the number of gpus to stress or 0 for all.
+  /// \arg Gpus Select the number of gpus to stress or -1 for all.
   OneAPI(const volatile firestarter::LoadThreadWorkType& LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize,
          int Gpus)
 #if defined(FIRESTARTER_BUILD_ONEAPI)

From 08a3772c992e96521d0c86d3407b2def1c58c3e8 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 15:35:13 +0100
Subject: [PATCH 156/167] add docstring for config

---
 include/firestarter/Config.hpp | 42 ++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/include/firestarter/Config.hpp b/include/firestarter/Config.hpp
index 31f89062..5c272401 100644
--- a/include/firestarter/Config.hpp
+++ b/include/firestarter/Config.hpp
@@ -27,55 +27,97 @@
 
 namespace firestarter {
 
+/// This struct contains the parsed config from the command line for Firestarter.
 struct Config {
+  /// The argument vector from the command line.
   const char** Argv;
 
+  /// The timeout after which firestarter terminates. This is available in combination with optimization.
   std::chrono::seconds Timeout{};
+  /// The period after with which the low/high load routine is switched.
   std::chrono::microseconds Period{};
+  /// The load in the range of 0 < Load <= Period, which controls how long of the period the high-load loop runs.
   std::chrono::microseconds Load{};
 
+  /// The interval every which the register will be dumped to the file.
   std::chrono::seconds DumpRegistersTimeDelta = std::chrono::seconds(0);
+  /// The time to skip from the measurement start
   std::chrono::milliseconds StartDelta = std::chrono::milliseconds(0);
+  /// The time to skip from the measurement stop
   std::chrono::milliseconds StopDelta = std::chrono::milliseconds(0);
+  /// Metric values will be polled by the MeasurementInterval.
   std::chrono::milliseconds MeasurementInterval = std::chrono::milliseconds(0);
+  /// The time how long the processor will be preheated before starting a measurement or optimization.
   std::chrono::seconds Preheat{};
+  /// The time how long a measurement should take.
   std::chrono::seconds EvaluationDuration{};
 
+  /// The crossover probability used in the NSGA2 optimization algorithm.
   double Nsga2Cr;
+  /// The mutation probability used in the NSGA2 optimization algorithm.
   double Nsga2M;
 
+  /// The name of the metrics that are read from stdin.
   std::vector<std::string> StdinMetrics;
+  /// The paths to the metrics that are loaded using shared libraries.
   std::vector<std::string> MetricPaths;
+  /// The list of metrics that are used for maximization. If a metric is prefixed with '-' it will be minimized.
   std::vector<std::string> OptimizationMetrics;
 
+  /// The optional cpu bind that allow pinning to specific cpus.
   std::string CpuBind;
+  /// The optional selected instruction groups. If this is empty the default will be choosen.
   std::string InstructionGroups;
+  /// The file where the dump register feature will safe its output to.
   std::string DumpRegistersOutpath;
+  /// The name of the optimization algorithm.
   std::string OptimizationAlgorithm;
+  /// The file where the data saved during optimization is saved.
   std::string OptimizeOutfile;
 
+  /// The argument count from the command line.
   int Argc;
+  /// The requested number of threads firestarter should run with. 0 means all threads.
   unsigned RequestedNumThreads;
+  /// The selected function id. 0 means automatic selection.
   unsigned FunctionId;
+  /// The line count of the payload. 0 means default.
   unsigned LineCount = 0;
+  /// The number of gpus firestarter should stress. Default is -1 means all gpus.
   int Gpus = 0;
+  /// The matrix size which should be used. 0 means automatic detections.
   unsigned GpuMatrixSize = 0;
+  /// The number of individuals that should be used for the optimization.
   unsigned Individuals;
+  /// The number of generations that should be used for the optimization.
   unsigned Generations;
 
+  /// If the function summary should be printed.
   bool PrintFunctionSummary;
+  /// If the available instruction groups for a function should be printed.
   bool ListInstructionGroups;
+  /// Allow payloads that are not supported on the current processor.
   bool AllowUnavailablePayload = false;
+  /// Is the dump registers debug feature enabled?
   bool DumpRegisters = false;
+  /// Is the error detection feature enabled?
   bool ErrorDetection = false;
+  /// Should the GPUs use floating point precision? If neither GpuUseFloat or GpuUseDouble is set, precision will be
+  /// choosen automatically.
   bool GpuUseFloat = false;
+  /// Should the GPUs use double point precision? If neither GpuUseFloat or GpuUseDouble is set, precision will be
+  /// choosen automatically.
   bool GpuUseDouble = false;
+  /// Should we print all available metrics.
   bool ListMetrics = false;
+  /// Do we perform an measurement.
   bool Measurement = false;
+  /// Do we perform optimization.
   bool Optimize = false;
 
   Config() = delete;
 
+  /// Parser the config from the command line argumens.
   Config(int Argc, const char** Argv);
 };
 

From 0de3fa6a37f8f724ec79bcaf163a010f480ff468 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 16:59:02 +0100
Subject: [PATCH 157/167] fix offset in error detection algorithm. add
 docstrings

---
 include/firestarter/Constants.hpp             | 17 ++++++++-
 include/firestarter/DumpRegisterStruct.hpp    | 19 ++++++----
 .../Environment/X86/Payload/X86Payload.hpp    | 21 +++++++++--
 include/firestarter/ErrorDetectionStruct.hpp  | 14 +++++---
 include/firestarter/LoadWorkerData.hpp        | 36 +++++++++++++++++++
 5 files changed, 93 insertions(+), 14 deletions(-)

diff --git a/include/firestarter/Constants.hpp b/include/firestarter/Constants.hpp
index 9549f581..71a0d992 100644
--- a/include/firestarter/Constants.hpp
+++ b/include/firestarter/Constants.hpp
@@ -30,14 +30,29 @@ using EightBytesType = uint64_t;
 // We want enum to have the size of 8B. Disable the warnings for bigger enum size than needed.
 // NOLINTBEGIN(performance-enum-size)
 
-enum class LoadThreadState : EightBytesType { ThreadWait = 1, ThreadWork = 2, ThreadInit = 3, ThreadSwitch = 4 };
+/// This enum describes the state of the load workers.
+enum class LoadThreadState : EightBytesType {
+  /// Idle
+  ThreadWait = 1,
+  /// Work loop (both low and high load)
+  ThreadWork = 2,
+  /// Init the thread
+  ThreadInit = 3,
+  /// Tell the thread to recompile the payload and reinitialize the data.
+  ThreadSwitch = 4
+};
 
+/// This enum describes the Load that should be applied by firestarter.
 enum class LoadThreadWorkType : EightBytesType {
   /* DO NOT CHANGE! the asm load-loop tests if load-variable is == 0 */
+  /// Apply low load
   LoadLow = 0,
   /* DO NOT CHANGE! the asm load-loop continues until the load-variable is != 1 */
+  /// Apply hugh load
   LoadHigh = 1,
+  /// Exit the load loop and stop the execution of firestarter.
   LoadStop = 2,
+  /// Exit the load loop.
   LoadSwitch = 4
 };
 // NOLINTEND(performance-enum-size)
diff --git a/include/firestarter/DumpRegisterStruct.hpp b/include/firestarter/DumpRegisterStruct.hpp
index 49213508..c60a2666 100644
--- a/include/firestarter/DumpRegisterStruct.hpp
+++ b/include/firestarter/DumpRegisterStruct.hpp
@@ -29,8 +29,13 @@ namespace firestarter {
 /* DO NOT CHANGE! the asm load-loop tests if it should dump the current register
  * content */
 // NOLINTBEGIN(performance-enum-size)
-// Define the variable with the size of a cache line
-enum class DumpVariable : EightBytesType { Start = 0, Wait = 1 };
+/// This struct defines the variable the is used to control when the registers should be dumped.
+enum class DumpVariable : EightBytesType {
+  /// Start saving register to memory
+  Start = 0,
+  /// When done when change it to the Wait state. There we do nothing.
+  Wait = 1
+};
 // NOLINTEND(performance-enum-size)
 
 // The maximal number of SIMD registers. This is currently 32 for zmm registers.
@@ -40,14 +45,16 @@ constexpr const auto RegisterMaxSize = 8;
 /// The maximum number of doubles in SIMD registers multiplied with the maximum number of vector registers.
 constexpr const auto MaxNumberOfDoublesInVectorRegisters = RegisterMaxNum * RegisterMaxSize;
 
-// REGISTER_MAX_NUM cachelines
+/// This struct is used to do the communication between the high-load loop and the part of the program that saves the
+/// dumped registers to a file.
 struct DumpRegisterStruct {
+  /// This array will contain the dumped registers. It has the size of 32 Cachelines. (8B doubles * 8 double in a
+  /// register * 32 registers)
   std::array<double, MaxNumberOfDoublesInVectorRegisters> RegisterValues;
-  // pad to use a whole cacheline
+  /// Pad the DumpVar to use a whole cacheline
   std::array<EightBytesType, 7> Padding;
+  /// The variable that controls the execution of the dump register code in the high-load routine.
   volatile DumpVariable DumpVar;
 };
 
-#undef REGISTER_MAX_NUM
-
 } // namespace firestarter
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index ec61b5a2..82ea7e0c 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -386,12 +386,15 @@ class X86Payload : public environment::payload::Payload {
       auto L0 = Cb.newLabel();
       Cb.bind(L0);
 
+      // Atomically ompare the data in the communicaton with the local data.
       Cb.lock();
       Cb.cmpxchg16b(asmjit::x86::ptr(asmjit::x86::r8));
 
       auto L1 = Cb.newLabel();
       Cb.jnz(L1);
 
+      // Communication had the same data as saved in locals 0 and 1. rcx, rbx saved in communication.
+      // Save written data rcx, rbx in locals 0 and 1.
       Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local0Offset), asmjit::x86::rcx);
       Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local1Offset), asmjit::x86::rbx);
       Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local2Offset), asmjit::Imm(0));
@@ -404,38 +407,51 @@ class X86Payload : public environment::payload::Payload {
 
       Cb.bind(L1);
 
+      // Communication had differnt data as saved in locals 0 and 1. rdx, rax contains the data in communication.
+      // Compare the iteration counter of this and the other thread
       Cb.cmp(asmjit::x86::rcx, asmjit::x86::rdx);
 
       auto L2 = Cb.newLabel();
       Cb.jle(L2);
 
+      // The current iteration counter is bigger than the counter of the other thread.
+      // Save the current counter and hash into our local storage.
       Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local0Offset), asmjit::x86::rcx);
       Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local1Offset), asmjit::x86::rbx);
 
+      // Repeat the lock cmpxchg16b routine until the other thread catches up.
       Cb.jmp(L0);
 
       Cb.bind(L2);
 
+      // The current iteration counter is smaller equal than the iteration counter of the other thread.
+
       auto L3 = Cb.newLabel();
 
+      // Check if the read value from the other thread is saved locally.
       Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, Local2Offset), asmjit::Imm(0));
       Cb.jne(L3);
       Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, Local3Offset), asmjit::Imm(0));
       Cb.jne(L3);
 
+      // Save the last read value from the other thread into the local storage.
       Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local2Offset), asmjit::x86::rdx);
       Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, Local3Offset), asmjit::x86::rax);
 
       Cb.bind(L3);
 
-      Cb.cmp(asmjit::x86::rcx, asmjit::x86::ptr_64(asmjit::x86::r9, 16));
+      // Check if the id of the two threads are equal
+      Cb.cmp(asmjit::x86::rcx, asmjit::x86::ptr_64(asmjit::x86::r9, Local2Offset));
       Cb.mov(asmjit::x86::rax, asmjit::Imm(4));
+      // If the iteration counter of this thread is smaller, skip this check. The other thread will wait for this one.
       Cb.jne(L6);
 
-      Cb.cmp(asmjit::x86::rbx, asmjit::x86::ptr_64(asmjit::x86::r9, 24));
+      // Compare the hashes and write teh result
+      Cb.cmp(asmjit::x86::rbx, asmjit::x86::ptr_64(asmjit::x86::r9, Local3Offset));
       auto L4 = Cb.newLabel();
       Cb.jne(L4);
 
+      // Hash check succeeded.
       Cb.mov(asmjit::x86::rax, asmjit::Imm(0));
 
       auto L5 = Cb.newLabel();
@@ -443,6 +459,7 @@ class X86Payload : public environment::payload::Payload {
 
       Cb.bind(L4);
 
+      // Hash check failed
       Cb.mov(asmjit::x86::rax, asmjit::Imm(1));
 
       Cb.bind(L5);
diff --git a/include/firestarter/ErrorDetectionStruct.hpp b/include/firestarter/ErrorDetectionStruct.hpp
index 6e5a7626..4d26d9f0 100644
--- a/include/firestarter/ErrorDetectionStruct.hpp
+++ b/include/firestarter/ErrorDetectionStruct.hpp
@@ -24,22 +24,26 @@
 #include <cstdint>
 namespace firestarter {
 
+/// This struct is used for the error detection feature. The error detection works between two threads. The current one
+/// and one on the left. Analogous for the thread on the right. We hash the contents of the vector registers and compare
+/// them with the current iteration counter aginst the other threads.
 struct ErrorDetectionStruct {
   struct OneSide {
-    // the pointer to 16B of communication
+    /// The pointer to 16B of communication between the two threads which is used with lock cmpxchg16b
     uint64_t* Communication;
+    /// The local variables that are used for the error detection algorithm
     // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
     uint64_t Locals[4];
-    // if this variable is not 0, an error occured in the comparison with the
-    // left thread.
+    /// If this variable is not 0, an error occured in the comparison with the other thread.
     uint64_t Error;
+    /// Padding to fill up a cache line.
     // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
     uint64_t Padding[2];
   };
 
-  // we have two cache lines (64B) containing each two 16B local variable and
-  // one ptr (8B)
+  /// The data that is used for the error detection algorithm between the current and the thread left to it.
   OneSide Left;
+  /// The data that is used for the error detection algorithm between the current and the thread right to it.
   OneSide Right;
 };
 
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index 36061390..413f4b7f 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -33,11 +33,17 @@
 
 namespace firestarter {
 
+/// This class contains the information that is required to execute the load routines and change the payload during
+/// executions.
 class LoadWorkerData {
 public:
+  /// This struct models parameters acquired during the execution of the high-load routine.
   struct Metrics {
+    /// The number of iteration the high-load loop was executed.
     std::atomic<uint64_t> Iterations{};
+    /// The start of the execution of the high-load loop.
     std::atomic<uint64_t> StartTsc{};
+    /// The stop of the execution of the high-load loop.
     std::atomic<uint64_t> StopTsc{};
 
     auto operator=(const Metrics& Other) -> Metrics& {
@@ -52,6 +58,16 @@ class LoadWorkerData {
     }
   };
 
+  /// Create the datastructure that is shared between a load worker thread and firestarter.
+  /// \arg Id The id of the load worker thread. They are counted from 0 to the maximum number of threads - 1.
+  /// \arg Environment The reference to the environment which allows setting the thread affinity and getting the current
+  /// timestamp.
+  /// \arg LoadVar The variable that controls the execution of the load worker.
+  /// \arg Period Is used in combination with the LoadVar for the low load routine.
+  /// \arg DumpRegisters Should the code to support dumping registers be baked into the high load routine of the
+  /// compiled payload.
+  /// \arg ErrorDetection Should the code to support error detection between thread be baked into the high load routine
+  /// of the compiled payload.
   LoadWorkerData(uint64_t Id, const environment::Environment& Environment, volatile LoadThreadWorkType& LoadVar,
                  std::chrono::microseconds Period, bool DumpRegisters, bool ErrorDetection)
       : LoadVar(LoadVar)
@@ -64,14 +80,21 @@ class LoadWorkerData {
 
   ~LoadWorkerData() = default;
 
+  /// Set the shared pointer to the memory shared between two thread for the communication required for the error
+  /// detection feature.
+  /// \arg CommunicationLeft The memory shared with the left thread.
+  /// \arg CommunicationRight The memory shared with the right thread.
   void setErrorCommunication(std::shared_ptr<uint64_t> CommunicationLeft,
                              std::shared_ptr<uint64_t> CommunicationRight) {
     this->CommunicationLeft = std::move(CommunicationLeft);
     this->CommunicationRight = std::move(CommunicationRight);
   }
 
+  /// Gettter for the id of the thread.
   [[nodiscard]] auto id() const -> uint64_t { return Id; }
+  /// Const getter for the environment.
   [[nodiscard]] auto environment() const -> const environment::Environment& { return Environment; }
+  /// Getter for the current platform config.
   [[nodiscard]] auto config() const -> environment::platform::PlatformConfig& { return *Config; }
 
   /// Access the DumpRegisterStruct. Asserts when dumping registers is not enabled.
@@ -104,7 +127,10 @@ class LoadWorkerData {
   /// The compiled payload which contains the pointers to the specific functions which are executed and some stats.
   environment::payload::CompiledPayload::UniquePtr CompiledPayloadPtr = {nullptr, nullptr};
 
+  /// The variable that controls the execution of the load worker.
   volatile LoadThreadWorkType& LoadVar;
+
+  /// The size of the buffer that is allocated in the load worker.
   uint64_t BuffersizeMem{};
 
   /// The collected metrics from the current execution of the LoadThreadState::ThreadWork state. Do not read from it.
@@ -116,13 +142,23 @@ class LoadWorkerData {
   // period in usecs
   // used in low load routine to sleep 1/100th of this time
   std::chrono::microseconds Period;
+
+  /// Should the code to support dumping registers be baked into the high load routine of the compiled payload.
   bool DumpRegisters;
+
+  /// Should the code to support error detection between thread be baked into the high load routine of the compiled
+  /// payload.
   bool ErrorDetection;
+  /// The pointer to the variable that is used for communication to the left thread for the error detection feature.
   std::shared_ptr<uint64_t> CommunicationLeft;
+  /// The pointer to the variable that is used for communication to the right thread for the error detection feature.
   std::shared_ptr<uint64_t> CommunicationRight;
 
+  /// The id of this load thread.
   const uint64_t Id;
+  /// The reference to the environment which allows setting the thread affinity and getting the current timestamp.
   const environment::Environment& Environment;
+  /// The config that is cloned from the environment for this specfic load worker.
   std::unique_ptr<environment::platform::PlatformConfig> Config;
 };
 

From 427675a7acefcf11273d27504c7d3602a379e2a0 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 17:09:59 +0100
Subject: [PATCH 158/167] docstring for DumpRegisterWorkerData

---
 include/firestarter/DumpRegisterWorkerData.hpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/firestarter/DumpRegisterWorkerData.hpp b/include/firestarter/DumpRegisterWorkerData.hpp
index 3de34fa1..4236ae53 100644
--- a/include/firestarter/DumpRegisterWorkerData.hpp
+++ b/include/firestarter/DumpRegisterWorkerData.hpp
@@ -29,10 +29,18 @@
 
 namespace firestarter {
 
+/// This class holds the data that is required for the worker thread that dumps the register contents to a file.
 class DumpRegisterWorkerData {
 public:
   DumpRegisterWorkerData() = delete;
 
+  /// Initialize the DumpRegisterWorkerData.
+  /// \arg LoadWorkerDataPtr The shared pointer to the data of the thread were registers should be dummped. We need it
+  /// to access the memory to which the registers are dumped as well as getting the size and count of registers.
+  /// \arg DumpTimeDelta Every this number of seconds the register content will be dumped.
+  /// \arg DumpFilePath The folder that is used to dump registers to. If the string is empty the current directory will
+  /// be choosen. If it cannot be determined /tmp is used. In this directory a file called hamming_distance.csv will be
+  /// created.
   DumpRegisterWorkerData(std::shared_ptr<LoadWorkerData> LoadWorkerDataPtr, std::chrono::seconds DumpTimeDelta,
                          const std::string& DumpFilePath)
       : LoadWorkerDataPtr(std::move(LoadWorkerDataPtr))
@@ -52,8 +60,12 @@ class DumpRegisterWorkerData {
 
   ~DumpRegisterWorkerData() = default;
 
+  /// The shared pointer to the data of the thread were registers should be dummped. We need it to access the memory to
+  /// which the registers are dumped as well as getting the size and count of registers.
   std::shared_ptr<LoadWorkerData> LoadWorkerDataPtr;
+  /// Every this number of seconds the register content will be dumped.
   const std::chrono::seconds DumpTimeDelta;
+  /// The folder in which the hamming_distance.csv file will be created.
   std::string DumpFilePath;
 };
 

From e04716fd2d968a10bb10a577be307766441e9d24 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 8 Nov 2024 17:36:44 +0100
Subject: [PATCH 159/167] add docstrings to Firestarter.hpp

---
 include/firestarter/Firestarter.hpp | 52 ++++++++++++++++++++++++++---
 1 file changed, 47 insertions(+), 5 deletions(-)

diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index ac6a1a03..51cb601a 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -47,51 +47,76 @@ extern "C" {
 
 namespace firestarter {
 
+/// This is the main class of firestarter and handles the execution of the programm.
 class Firestarter {
 public:
   Firestarter() = delete;
 
+  /// Read the config, validate and throw on problems with config. Setup everything that is required for the execution
+  /// of firestarter.
+  /// \arg ProvidedConfig The config for the execution of Firestarter
   explicit Firestarter(Config&& ProvidedConfig);
 
   ~Firestarter() = default;
 
+  /// This function takes care of the execution of firestarter. It will start the load on CPUs and GPUs.
   void mainThread();
 
 private:
   const Config Cfg;
 
+  /// The class that handles setting up the payload for firestarter
   std::unique_ptr<environment::Environment> Environment;
+  /// The class for execution of the gemm routine on Cuda or HIP GPUs.
   std::unique_ptr<cuda::Cuda> Cuda;
+  /// The class for execution of the gemm routine on OneAPI GPUs.
   std::unique_ptr<oneapi::OneAPI> Oneapi;
+  /// The pointer to the optimization algorithm that is used by the optimization functionality.
   std::unique_ptr<firestarter::optimizer::Algorithm> Algorithm;
+  /// The thread that is used to dump register contents to a file.
   std::thread DumpRegisterWorkerThread;
+  /// The shared pointer to the datastructure that handles the management of metrics, acquisition of metric data and
+  /// provids summaries of a time range of metric values.
   std::shared_ptr<measurement::MeasurementWorker> MeasurementWorker;
 
+  /// The vector of thread handles for the load workers and shared pointer to the their respective data.
   std::vector<std::pair<std::thread, std::shared_ptr<LoadWorkerData>>> LoadThreads;
+  /// The vector of communication data, where each element is shared between two neighbouring threads for the error
+  /// detection feature.
   std::vector<std::shared_ptr<uint64_t>> ErrorCommunication;
 
+  /// The population holding the problem that is used for the optimization feature.
   std::unique_ptr<firestarter::optimizer::Population> Population;
 
   // NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
   // TODO(Issue #85): Currently we support one instance of the Firestarter class. Variables that need to be accessed
   // from outside the class, e.g. in the sigterm handler are inline static.
 
+  /// The instance of the optimization worker that handles the execution of the optimization.
   inline static std::unique_ptr<optimizer::OptimizerWorker> Optimizer;
 
-  // variables to control the termination of the watchdog
+  /// Variable to control the termination of the watchdog
   inline static bool WatchdogTerminate = false;
+  /// Condition variable for the WatchdogTerminate to allow notifying when sleeping for a specific time.
   inline static std::condition_variable WatchdogTerminateAlert;
+  /// Mutex to guard access to WatchdogTerminate.
   inline static std::mutex WatchdogTerminateMutex;
 
-  // variable to control the load of the threads
+  /// Variable to control the load of the threads
   inline static volatile LoadThreadWorkType LoadVar = LoadThreadWorkType::LoadLow;
 
   // NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)
 
-  // LoadThreadWorker.cpp
+  /// Spawn the load workers and initialize them.
   void initLoadWorkers();
+
+  /// Wait for the load worker to join
   void joinLoadWorkers();
+
+  /// Print the error report for the error detection feature.
   void printThreadErrorReport();
+
+  /// Print the performance report. It contains the estimation of the FLOPS and main memory bandwidth.
   void printPerformanceReport();
 
   /// Set the load workers to the ThreadInit state.
@@ -124,20 +149,37 @@ class Firestarter {
   /// before we wait for the acknowledgement of the thread.
   void signalLoadWorkers(LoadThreadState State, void (*Function)() = nullptr);
 
+  /// The function that is executed for each load thread.
+  /// \arg Td The shared pointer to the data that is required in this thread.
   static void loadThreadWorker(const std::shared_ptr<LoadWorkerData>& Td);
 
-  // WatchdogWorker.cpp
+  /// This function handels switching the load from high to low in a loop and stopping the execution if a timeout was
+  /// set.
+  /// \arg Period The period of the high/low switch. Set to zero to disable switching between a high and low load.
+  /// \arg Load The time of the period where high load is applied.
+  /// \arg Timeout The timeout after which firestarter stops. Set to zero to disable.
   static void watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load,
                              std::chrono::seconds Timeout);
 
-  // DumpRegisterWorker.cpp
+  /// Start the thread to dump the registers of the first load thread to a file.
   void initDumpRegisterWorker();
+
+  /// Wait for the dump register thread to terminate.
   void joinDumpRegisterWorker();
+
+  /// The thread that dumps the registers of the first thread to a file.
+  /// \arg Data The data that is required for the worker thread to dump the register contents to a file.
   static void dumpRegisterWorker(std::unique_ptr<DumpRegisterWorkerData> Data);
 
+  /// Set the load var to a specific value and update it with a memory fence across threads.
+  /// \arg Value The new load value.
   static void setLoad(LoadThreadWorkType Value);
 
+  /// Sigalarm handler does nothing.
   static void sigalrmHandler(int Signum);
+
+  /// Sigterm handler stops the execution of firestarter
+  /// \arg Signum The signal number is ignored.
   static void sigtermHandler(int Signum);
 };
 

From 32f13995edcdaef2f237e14c4da41c40399b4730 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 16 Nov 2024 22:11:00 +0100
Subject: [PATCH 160/167] fix bug saving json in history

---
 include/firestarter/Optimizer/History.hpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/firestarter/Optimizer/History.hpp b/include/firestarter/Optimizer/History.hpp
index aae788d5..4a586514 100644
--- a/include/firestarter/Optimizer/History.hpp
+++ b/include/firestarter/Optimizer/History.hpp
@@ -274,6 +274,11 @@ struct History {
       Hostname = "unknown";
     }
 
+    // Strip away any remaining null terminators
+    if (const auto Pos = Hostname.find('\0'); Pos != std::string::npos) {
+      Hostname.erase(Pos);
+    }
+
     J["hostname"] = Hostname;
 
     J["startTime"] = StartTime;
@@ -299,10 +304,11 @@ struct History {
 
     std::string Outpath = Path;
     if (Outpath.empty()) {
-      // Wrapp get_current_dir_name in a unique ptr, as it needs to get deleted by free when it is not used anymore.
+      // Wrap get_current_dir_name in a unique ptr, as it needs to get deleted by free when it is not used anymore.
       const std::unique_ptr<char, void (*)(void*)> WrappedPwd = {get_current_dir_name(), free};
       if (WrappedPwd) {
-        Outpath = *WrappedPwd;
+        // Get the pointer captured in the WrappedPwd (not only the first char as would be with *WrappedPwd)
+        Outpath = WrappedPwd.get();
       } else {
         firestarter::log::warn() << "Could not find $PWD.";
         Outpath = "/tmp";

From f045530a65d634fb5ec0c0f9d52c6cd6f08e4289 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Sat, 16 Nov 2024 22:11:20 +0100
Subject: [PATCH 161/167] update comment in X86PlatformConfig.cpp

---
 .../Environment/X86/Platform/X86PlatformConfig.cpp            | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp b/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp
index cbf977df..fa4d4399 100644
--- a/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp
+++ b/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp
@@ -22,4 +22,6 @@
 // This file exists to get an entry in the compile commands database. Clangd will interpolate the include directories
 // for header files based on the source file with the best matching score. This file should be the best score for the
 // included header. Therefore we should not see any errors in this file for missing includes. For more infomation
-// look in the LLVM code base: clang/lib/Tooling/InterpolatingCompilationDatabase.cpp
\ No newline at end of file
+// look in the LLVM code base: clang/lib/Tooling/InterpolatingCompilationDatabase.cpp
+
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
\ No newline at end of file

From b16aa6a2644cfe1537f3db85d35e9f79235996ab Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 19 Nov 2024 16:22:36 +0100
Subject: [PATCH 162/167] fix MiB print in cuda

---
 src/firestarter/Cuda/Cuda.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp
index 492011e7..bc8ba28a 100644
--- a/src/firestarter/Cuda/Cuda.cpp
+++ b/src/firestarter/Cuda/Cuda.cpp
@@ -224,7 +224,7 @@ void createLoad(std::condition_variable& WaitForInitCv, std::mutex& WaitForInitC
     firestarter::log::info() << "   GPU " << DeviceIndex << "\n"
                              << "    name:           " << Properties.name << "\n"
                              << "    memory:         " << ToMiB(MemoryAvail) << "/" << ToMiB(MemoryTotal)
-                             << " iB available (using " << ToMiB(UseBytes) << " iB)\n"
+                             << " MiB available (using " << ToMiB(UseBytes) << " MiB)\n"
                              << "    matrix size:    " << MatrixSize << "\n"
                              << "    used precision: "
                              << ((sizeof(FloatingPointType) == sizeof(double)) ? "double" : "single");

From 0fce1bb9173bc43f0b78f665c8942384a418d0ee Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Tue, 19 Nov 2024 16:38:19 +0100
Subject: [PATCH 163/167] unify firestarter includes for source files

---
 src/firestarter/Config.cpp                       |  7 ++++---
 src/firestarter/Cuda/Cuda.cpp                    |  9 +++++----
 src/firestarter/DumpRegisterWorker.cpp           |  3 +--
 src/firestarter/Environment/CPUTopology.cpp      |  6 +++---
 src/firestarter/Environment/Environment.cpp      |  5 +++--
 .../Environment/Payload/CompiledPayload.cpp      |  4 ++--
 .../Environment/Payload/PayloadSettings.cpp      |  3 ++-
 .../Environment/X86/Payload/AVX512Payload.cpp    |  4 ++--
 .../Environment/X86/Payload/AVXPayload.cpp       |  4 ++--
 .../Environment/X86/Payload/FMA4Payload.cpp      |  4 ++--
 .../Environment/X86/Payload/FMAPayload.cpp       |  4 ++--
 .../Environment/X86/Payload/SSE2Payload.cpp      |  5 ++---
 .../Environment/X86/Payload/X86Payload.cpp       |  7 ++++---
 .../Environment/X86/Payload/ZENFMAPayload.cpp    |  4 ++--
 .../Environment/X86/X86CPUTopology.cpp           |  4 ++--
 .../Environment/X86/X86Environment.cpp           |  4 ++--
 src/firestarter/Firestarter.cpp                  | 16 ++++++++--------
 src/firestarter/LoadWorker.cpp                   | 16 ++++++++--------
 src/firestarter/Main.cpp                         |  6 +++---
 .../Measurement/MeasurementWorker.cpp            |  5 +++--
 .../Measurement/Metric/IPCEstimate.cpp           |  3 ++-
 src/firestarter/Measurement/Metric/Perf.cpp      |  4 ++--
 src/firestarter/Measurement/Metric/RAPL.cpp      |  3 ++-
 src/firestarter/Measurement/Summary.cpp          |  2 +-
 src/firestarter/OneAPI/OneAPI.cpp                |  6 +++---
 src/firestarter/Optimizer/Algorithm/NSGA2.cpp    |  8 ++++----
 src/firestarter/Optimizer/OptimizerWorker.cpp    |  2 +-
 src/firestarter/Optimizer/Population.cpp         |  7 ++++---
 .../Optimizer/Util/MultiObjective.cpp            |  2 +-
 src/firestarter/SafeExit.cpp                     |  3 ++-
 src/firestarter/WatchdogWorker.cpp               |  2 +-
 31 files changed, 85 insertions(+), 77 deletions(-)

diff --git a/src/firestarter/Config.cpp b/src/firestarter/Config.cpp
index 7842e8e6..356580d5 100644
--- a/src/firestarter/Config.cpp
+++ b/src/firestarter/Config.cpp
@@ -19,10 +19,11 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Config.hpp"
+#include "firestarter/Constants.hpp"
+#include "firestarter/Logging/Log.hpp"
+
 #include <cxxopts.hpp>
-#include <firestarter/Config.hpp>
-#include <firestarter/Constants.hpp>
-#include <firestarter/Logging/Log.hpp>
 
 namespace {
 
diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp
index bc8ba28a..9469073a 100644
--- a/src/firestarter/Cuda/Cuda.cpp
+++ b/src/firestarter/Cuda/Cuda.cpp
@@ -24,12 +24,13 @@
  * http://wili.cc/blog/gpu-burn.html
  *****************************************************************************/
 
+#include "firestarter/Cuda/Cuda.hpp"
+#include "firestarter/Cuda/CudaHipCompat.hpp"
+#include "firestarter/Logging/Log.hpp"
+
 #include <atomic>
+#include <cmath>
 #include <cstddef>
-#include <firestarter/Cuda/Cuda.hpp>
-#include <firestarter/Cuda/CudaHipCompat.hpp>
-#include <firestarter/LoadWorkerData.hpp>
-#include <firestarter/Logging/Log.hpp>
 #include <type_traits>
 
 namespace firestarter::cuda {
diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp
index 9bef4a69..127d0f1d 100644
--- a/src/firestarter/DumpRegisterWorker.cpp
+++ b/src/firestarter/DumpRegisterWorker.cpp
@@ -19,8 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/Firestarter.hpp>
-#include <firestarter/Logging/Log.hpp>
+#include "firestarter/Firestarter.hpp"
 
 #include <fstream>
 #include <sstream>
diff --git a/src/firestarter/Environment/CPUTopology.cpp b/src/firestarter/Environment/CPUTopology.cpp
index b0fabc52..a7acf3f2 100644
--- a/src/firestarter/Environment/CPUTopology.cpp
+++ b/src/firestarter/Environment/CPUTopology.cpp
@@ -19,10 +19,10 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <array>
-#include <firestarter/Environment/CPUTopology.hpp>
-#include <firestarter/Logging/Log.hpp>
+#include "firestarter/Environment/CPUTopology.hpp"
+#include "firestarter/Logging/Log.hpp"
 
+#include <array>
 #include <fstream>
 #include <regex>
 #include <utility>
diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp
index 590483c4..9d3f81c7 100644
--- a/src/firestarter/Environment/Environment.cpp
+++ b/src/firestarter/Environment/Environment.cpp
@@ -19,8 +19,9 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/Environment/Environment.hpp>
-#include <firestarter/Logging/Log.hpp>
+#include "firestarter/Environment/Environment.hpp"
+#include "firestarter/Logging/Log.hpp"
+
 #include <regex>
 #include <stdexcept>
 #include <string>
diff --git a/src/firestarter/Environment/Payload/CompiledPayload.cpp b/src/firestarter/Environment/Payload/CompiledPayload.cpp
index 9c7b5410..33183d7a 100644
--- a/src/firestarter/Environment/Payload/CompiledPayload.cpp
+++ b/src/firestarter/Environment/Payload/CompiledPayload.cpp
@@ -19,8 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/Environment/Payload/CompiledPayload.hpp>
-#include <firestarter/Environment/Payload/Payload.hpp>
+#include "firestarter/Environment/Payload/CompiledPayload.hpp"
+#include "firestarter/Environment/Payload/Payload.hpp"
 
 namespace firestarter::environment::payload {
 
diff --git a/src/firestarter/Environment/Payload/PayloadSettings.cpp b/src/firestarter/Environment/Payload/PayloadSettings.cpp
index bd8997f0..25ca4ea4 100644
--- a/src/firestarter/Environment/Payload/PayloadSettings.cpp
+++ b/src/firestarter/Environment/Payload/PayloadSettings.cpp
@@ -19,9 +19,10 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Environment/Payload/PayloadSettings.hpp"
+
 #include <algorithm>
 #include <cmath>
-#include <firestarter/Environment/Payload/PayloadSettings.hpp>
 
 namespace firestarter::environment::payload {
 
diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
index 7eecadaf..f52a5410 100644
--- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp
@@ -19,8 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/Environment/X86/Payload/AVX512Payload.hpp>
-#include <firestarter/Environment/X86/Payload/CompiledX86Payload.hpp>
+#include "firestarter/Environment/X86/Payload/AVX512Payload.hpp"
+#include "firestarter/Environment/X86/Payload/CompiledX86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 
diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
index 22e54843..b20a85f7 100644
--- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp
@@ -19,8 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/Environment/X86/Payload/AVXPayload.hpp>
-#include <firestarter/Environment/X86/Payload/CompiledX86Payload.hpp>
+#include "firestarter/Environment/X86/Payload/AVXPayload.hpp"
+#include "firestarter/Environment/X86/Payload/CompiledX86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 
diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
index 33a79f40..202d34c7 100644
--- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp
@@ -19,8 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/Environment/X86/Payload/CompiledX86Payload.hpp>
-#include <firestarter/Environment/X86/Payload/FMA4Payload.hpp>
+#include "firestarter/Environment/X86/Payload/FMA4Payload.hpp"
+#include "firestarter/Environment/X86/Payload/CompiledX86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 
diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
index ee807df8..cec0021a 100644
--- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp
@@ -19,8 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/Environment/X86/Payload/CompiledX86Payload.hpp>
-#include <firestarter/Environment/X86/Payload/FMAPayload.hpp>
+#include "firestarter/Environment/X86/Payload/FMAPayload.hpp"
+#include "firestarter/Environment/X86/Payload/CompiledX86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 
diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
index 10512f51..fc77c8e1 100644
--- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp
@@ -19,9 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include "asmjit/core/environment.h"
-#include <firestarter/Environment/X86/Payload/CompiledX86Payload.hpp>
-#include <firestarter/Environment/X86/Payload/SSE2Payload.hpp>
+#include "firestarter/Environment/X86/Payload/SSE2Payload.hpp"
+#include "firestarter/Environment/X86/Payload/CompiledX86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 
diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
index fa18b837..296d1052 100644
--- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp
+++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp
@@ -19,11 +19,12 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Environment/X86/Payload/X86Payload.hpp"
+#include "firestarter/Constants.hpp"
+#include "firestarter/WindowsCompat.hpp"
+
 #include <cassert>
 #include <chrono>
-#include <firestarter/Constants.hpp>
-#include <firestarter/Environment/X86/Payload/X86Payload.hpp>
-#include <firestarter/WindowsCompat.hpp>
 #include <thread>
 
 namespace firestarter::environment::x86::payload {
diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
index 4f38d514..4857f82d 100644
--- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
+++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp
@@ -19,8 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/Environment/X86/Payload/CompiledX86Payload.hpp>
-#include <firestarter/Environment/X86/Payload/ZENFMAPayload.hpp>
+#include "firestarter/Environment/X86/Payload/ZENFMAPayload.hpp"
+#include "firestarter/Environment/X86/Payload/CompiledX86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 
diff --git a/src/firestarter/Environment/X86/X86CPUTopology.cpp b/src/firestarter/Environment/X86/X86CPUTopology.cpp
index a467be50..64d64cfb 100644
--- a/src/firestarter/Environment/X86/X86CPUTopology.cpp
+++ b/src/firestarter/Environment/X86/X86CPUTopology.cpp
@@ -19,8 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/Environment/X86/X86CPUTopology.hpp>
-#include <firestarter/Logging/Log.hpp>
+#include "firestarter/Environment/X86/X86CPUTopology.hpp"
+#include "firestarter/Logging/Log.hpp"
 
 #include <ctime>
 
diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp
index ea651aa9..3ecd89c1 100644
--- a/src/firestarter/Environment/X86/X86Environment.cpp
+++ b/src/firestarter/Environment/X86/X86Environment.cpp
@@ -19,8 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/Environment/X86/X86Environment.hpp>
-#include <firestarter/Logging/Log.hpp>
+#include "firestarter/Environment/X86/X86Environment.hpp"
+#include "firestarter/Logging/Log.hpp"
 
 #include <algorithm>
 #include <cstdio>
diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index b5239539..8ad20a25 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -19,16 +19,16 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Firestarter.hpp"
+#include "firestarter/Environment/X86/X86Environment.hpp"
+#include "firestarter/Logging/Log.hpp"
+#include "firestarter/Measurement/Metric/IPCEstimate.hpp"
+#include "firestarter/Optimizer/Algorithm/NSGA2.hpp"
+#include "firestarter/Optimizer/History.hpp"
+#include "firestarter/Optimizer/Problem/CLIArgumentProblem.hpp"
+
 #include <csignal>
 #include <cstdlib>
-#include <firestarter/Environment/X86/X86Environment.hpp>
-#include <firestarter/Firestarter.hpp>
-#include <firestarter/Logging/Log.hpp>
-#include <firestarter/Measurement/Metric/IPCEstimate.hpp>
-#include <firestarter/Optimizer/Algorithm/NSGA2.hpp>
-#include <firestarter/Optimizer/History.hpp>
-#include <firestarter/Optimizer/Problem/CLIArgumentProblem.hpp>
-#include <firestarter/WindowsCompat.hpp>
 #include <memory>
 
 namespace firestarter {
diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp
index a168a650..4d473832 100644
--- a/src/firestarter/LoadWorker.cpp
+++ b/src/firestarter/LoadWorker.cpp
@@ -21,17 +21,13 @@
 
 #include "firestarter/AlignedAlloc.hpp"
 #include "firestarter/Constants.hpp"
+#include "firestarter/ErrorDetectionStruct.hpp"
+#include "firestarter/Firestarter.hpp"
 #include "firestarter/LoadWorkerData.hpp"
-#include <cstdint>
-#include <firestarter/ErrorDetectionStruct.hpp>
-#include <firestarter/Firestarter.hpp>
-#include <firestarter/Logging/Log.hpp>
-#include <iomanip>
-#include <limits>
-#include <sstream>
+#include "firestarter/Logging/Log.hpp"
 
 #if defined(linux) || defined(__linux__)
-#include <firestarter/Measurement/Metric/IPCEstimate.hpp>
+#include "firestarter/Measurement/Metric/IPCEstimate.hpp"
 #endif
 
 #ifdef ENABLE_VTRACING
@@ -42,7 +38,11 @@
 #endif
 
 #include <cmath>
+#include <cstdint>
 #include <cstdlib>
+#include <iomanip>
+#include <limits>
+#include <sstream>
 #include <thread>
 
 namespace firestarter {
diff --git a/src/firestarter/Main.cpp b/src/firestarter/Main.cpp
index 8158c609..24269db3 100644
--- a/src/firestarter/Main.cpp
+++ b/src/firestarter/Main.cpp
@@ -19,9 +19,9 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/Config.hpp>
-#include <firestarter/Firestarter.hpp>
-#include <firestarter/Logging/Log.hpp>
+#include "firestarter/Config.hpp"
+#include "firestarter/Firestarter.hpp"
+#include "firestarter/Logging/Log.hpp"
 
 auto main(int argc, const char** argv) -> int {
   firestarter::log::info() << "FIRESTARTER - A Processor Stress Test Utility, Version " << _FIRESTARTER_VERSION_STRING
diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp
index dbefe334..25294e04 100644
--- a/src/firestarter/Measurement/MeasurementWorker.cpp
+++ b/src/firestarter/Measurement/MeasurementWorker.cpp
@@ -19,9 +19,10 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Measurement/MeasurementWorker.hpp"
+#include "firestarter/Logging/Log.hpp"
+
 #include <cstdarg>
-#include <firestarter/Logging/Log.hpp>
-#include <firestarter/Measurement/MeasurementWorker.hpp>
 #include <queue>
 
 #ifndef FIRESTARTER_LINK_STATIC
diff --git a/src/firestarter/Measurement/Metric/IPCEstimate.cpp b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
index d92d7ea7..5cd49b88 100644
--- a/src/firestarter/Measurement/Metric/IPCEstimate.cpp
+++ b/src/firestarter/Measurement/Metric/IPCEstimate.cpp
@@ -19,9 +19,10 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Measurement/Metric/IPCEstimate.hpp"
+
 #include <chrono>
 #include <cstdlib>
-#include <firestarter/Measurement/Metric/IPCEstimate.hpp>
 
 auto IpcEstimateMetricData::fini() -> int32_t {
   auto& Instance = instance();
diff --git a/src/firestarter/Measurement/Metric/Perf.cpp b/src/firestarter/Measurement/Metric/Perf.cpp
index af478757..92a09cf1 100644
--- a/src/firestarter/Measurement/Metric/Perf.cpp
+++ b/src/firestarter/Measurement/Metric/Perf.cpp
@@ -19,13 +19,13 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Measurement/Metric/Perf.hpp"
+
 #include <array>
 #include <cassert>
 #include <cstring>
 #include <string>
 
-#include <firestarter/Measurement/Metric/Perf.hpp>
-
 extern "C" {
 #include <linux/perf_event.h>
 #include <sys/ioctl.h>
diff --git a/src/firestarter/Measurement/Metric/RAPL.cpp b/src/firestarter/Measurement/Metric/RAPL.cpp
index 15869650..458b2643 100644
--- a/src/firestarter/Measurement/Metric/RAPL.cpp
+++ b/src/firestarter/Measurement/Metric/RAPL.cpp
@@ -19,9 +19,10 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Measurement/Metric/RAPL.hpp"
+
 #include <cstdio>
 #include <cstring>
-#include <firestarter/Measurement/Metric/RAPL.hpp>
 #include <fstream>
 #include <memory>
 #include <sstream>
diff --git a/src/firestarter/Measurement/Summary.cpp b/src/firestarter/Measurement/Summary.cpp
index 1f54f721..1fecb99f 100644
--- a/src/firestarter/Measurement/Summary.cpp
+++ b/src/firestarter/Measurement/Summary.cpp
@@ -19,7 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/Measurement/Summary.hpp>
+#include "firestarter/Measurement/Summary.hpp"
 
 #include <cassert>
 #include <cmath>
diff --git a/src/firestarter/OneAPI/OneAPI.cpp b/src/firestarter/OneAPI/OneAPI.cpp
index 98ef6b4a..3a5cfc4d 100644
--- a/src/firestarter/OneAPI/OneAPI.cpp
+++ b/src/firestarter/OneAPI/OneAPI.cpp
@@ -22,15 +22,15 @@
 /* OneAPI for GPUs, based on CUDA component
  *****************************************************************************/
 
-#include <firestarter/LoadWorkerData.hpp>
-#include <firestarter/Logging/Log.hpp>
-#include <firestarter/OneAPI/OneAPI.hpp>
+#include "firestarter/OneAPI/OneAPI.hpp"
+#include "firestarter/Logging/Log.hpp"
 
 #include <oneapi/mkl.hpp>
 #include <sycl/sycl.hpp>
 
 #include <algorithm>
 #include <atomic>
+#include <cmath>
 #include <type_traits>
 
 namespace firestarter::oneapi {
diff --git a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
index dc04baa5..c515e429 100644
--- a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
+++ b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp
@@ -21,10 +21,10 @@
 
 // This file borrows a lot of code from https://github.com/esa/pagmo2
 
-#include <firestarter/Logging/Log.hpp>
-#include <firestarter/Optimizer/Algorithm/NSGA2.hpp>
-#include <firestarter/Optimizer/Individual.hpp>
-#include <firestarter/Optimizer/Util/MultiObjective.hpp>
+#include "firestarter/Optimizer/Algorithm/NSGA2.hpp"
+#include "firestarter/Logging/Log.hpp"
+#include "firestarter/Optimizer/Individual.hpp"
+#include "firestarter/Optimizer/Util/MultiObjective.hpp"
 
 #include <algorithm>
 #include <iomanip>
diff --git a/src/firestarter/Optimizer/OptimizerWorker.cpp b/src/firestarter/Optimizer/OptimizerWorker.cpp
index b3085ab0..a82c1fa8 100644
--- a/src/firestarter/Optimizer/OptimizerWorker.cpp
+++ b/src/firestarter/Optimizer/OptimizerWorker.cpp
@@ -19,8 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Optimizer/OptimizerWorker.hpp"
 #include "firestarter/Optimizer/Algorithm/NSGA2.hpp"
-#include <firestarter/Optimizer/OptimizerWorker.hpp>
 
 #include <thread>
 #include <utility>
diff --git a/src/firestarter/Optimizer/Population.cpp b/src/firestarter/Optimizer/Population.cpp
index b709d74d..a5a21527 100644
--- a/src/firestarter/Optimizer/Population.cpp
+++ b/src/firestarter/Optimizer/Population.cpp
@@ -19,10 +19,11 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
+#include "firestarter/Optimizer/Population.hpp"
+#include "firestarter/Logging/Log.hpp"
+#include "firestarter/Optimizer/History.hpp"
+
 #include <cassert>
-#include <firestarter/Logging/Log.hpp>
-#include <firestarter/Optimizer/History.hpp>
-#include <firestarter/Optimizer/Population.hpp>
 #include <random>
 
 namespace firestarter::optimizer {
diff --git a/src/firestarter/Optimizer/Util/MultiObjective.cpp b/src/firestarter/Optimizer/Util/MultiObjective.cpp
index de7da71d..7cae260a 100644
--- a/src/firestarter/Optimizer/Util/MultiObjective.cpp
+++ b/src/firestarter/Optimizer/Util/MultiObjective.cpp
@@ -21,7 +21,7 @@
 
 // This file borrows a lot of code from https://github.com/esa/pagmo2
 
-#include <firestarter/Optimizer/Util/MultiObjective.hpp>
+#include "firestarter/Optimizer/Util/MultiObjective.hpp"
 
 #include <algorithm>
 #include <stdexcept>
diff --git a/src/firestarter/SafeExit.cpp b/src/firestarter/SafeExit.cpp
index c5f6e604..4aed7a50 100644
--- a/src/firestarter/SafeExit.cpp
+++ b/src/firestarter/SafeExit.cpp
@@ -19,7 +19,8 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/SafeExit.hpp>
+#include "firestarter/SafeExit.hpp"
+
 #include <mutex>
 
 [[noreturn]] void firestarter::safeExit(const int Status) {
diff --git a/src/firestarter/WatchdogWorker.cpp b/src/firestarter/WatchdogWorker.cpp
index 570cbd91..b5a73787 100644
--- a/src/firestarter/WatchdogWorker.cpp
+++ b/src/firestarter/WatchdogWorker.cpp
@@ -19,7 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include <firestarter/Firestarter.hpp>
+#include "firestarter/Firestarter.hpp"
 
 #include <cerrno>
 #include <csignal>

From 9155ae2b90b31088879867e0bf842f0291904cda Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Wed, 20 Nov 2024 13:26:55 +0100
Subject: [PATCH 164/167] fix crash when optimization is stoped

---
 src/firestarter/Firestarter.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp
index 8ad20a25..379e2039 100644
--- a/src/firestarter/Firestarter.cpp
+++ b/src/firestarter/Firestarter.cpp
@@ -216,6 +216,7 @@ void Firestarter::mainThread() {
 
       // wait here until optimizer thread terminates
       Firestarter::Optimizer->join();
+      Firestarter::Optimizer.reset();
 
       auto PayloadItems = Environment->config().settings().instructionGroupItems();
 

From eb8eed787fbf7dcd17a5d2e115e76d15a2ff45d0 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 21 Nov 2024 19:17:48 +0100
Subject: [PATCH 165/167] unify firestarter includes in header files

---
 include/firestarter/Cuda/Cuda.hpp             |  1 +
 include/firestarter/Cuda/CudaHipCompat.hpp    |  3 +-
 include/firestarter/DumpRegisterStruct.hpp    |  1 +
 .../firestarter/DumpRegisterWorkerData.hpp    |  7 +++--
 .../firestarter/Environment/Environment.hpp   |  3 +-
 .../Environment/Payload/CompiledPayload.hpp   |  1 +
 .../Environment/Payload/Payload.hpp           |  3 +-
 .../Environment/Platform/PlatformConfig.hpp   |  3 +-
 .../Environment/X86/Payload/AVX512Payload.hpp |  2 +-
 .../Environment/X86/Payload/AVXPayload.hpp    |  2 +-
 .../X86/Payload/CompiledX86Payload.hpp        |  3 +-
 .../Environment/X86/Payload/FMA4Payload.hpp   |  2 +-
 .../Environment/X86/Payload/FMAPayload.hpp    |  2 +-
 .../Environment/X86/Payload/SSE2Payload.hpp   |  2 +-
 .../Environment/X86/Payload/X86Payload.hpp    | 11 ++++----
 .../Environment/X86/Payload/ZENFMAPayload.hpp |  2 +-
 .../X86/Platform/BulldozerConfig.hpp          |  4 +--
 .../X86/Platform/HaswellConfig.hpp            |  4 +--
 .../X86/Platform/HaswellEPConfig.hpp          |  4 +--
 .../X86/Platform/KnightsLandingConfig.hpp     |  4 +--
 .../Environment/X86/Platform/NaplesConfig.hpp |  4 +--
 .../X86/Platform/NehalemConfig.hpp            |  4 +--
 .../X86/Platform/NehalemEPConfig.hpp          |  4 +--
 .../Environment/X86/Platform/RomeConfig.hpp   |  4 +--
 .../X86/Platform/SandyBridgeConfig.hpp        |  4 +--
 .../X86/Platform/SandyBridgeEPConfig.hpp      |  4 +--
 .../X86/Platform/SkylakeConfig.hpp            |  4 +--
 .../X86/Platform/SkylakeSPConfig.hpp          |  4 +--
 .../X86/Platform/X86PlatformConfig.hpp        |  2 +-
 .../Environment/X86/X86CPUTopology.hpp        |  4 +--
 .../Environment/X86/X86Environment.hpp        | 28 +++++++++----------
 include/firestarter/ErrorDetectionStruct.hpp  |  1 +
 include/firestarter/Firestarter.hpp           | 20 ++++++-------
 include/firestarter/Json/Summary.hpp          |  2 +-
 include/firestarter/LoadWorkerData.hpp        |  7 +++--
 include/firestarter/LoadWorkerMemory.hpp      |  7 +++--
 include/firestarter/Logging/Log.hpp           |  3 +-
 .../Measurement/MeasurementWorker.hpp         | 13 +++++----
 .../Measurement/Metric/IPCEstimate.hpp        |  3 +-
 .../firestarter/Measurement/Metric/Perf.hpp   |  3 +-
 .../firestarter/Measurement/Metric/RAPL.hpp   |  3 +-
 .../firestarter/Measurement/MetricInterface.h |  2 +-
 include/firestarter/Measurement/Summary.hpp   |  5 ++--
 include/firestarter/OneAPI/OneAPI.hpp         |  1 +
 include/firestarter/Optimizer/Algorithm.hpp   |  2 +-
 .../firestarter/Optimizer/Algorithm/NSGA2.hpp |  2 +-
 include/firestarter/Optimizer/History.hpp     |  9 +++---
 .../firestarter/Optimizer/OptimizerWorker.hpp |  5 ++--
 include/firestarter/Optimizer/Population.hpp  |  5 ++--
 include/firestarter/Optimizer/Problem.hpp     |  5 ++--
 .../Optimizer/Problem/CLIArgumentProblem.hpp  |  5 ++--
 .../Optimizer/Util/MultiObjective.hpp         |  3 +-
 52 files changed, 130 insertions(+), 106 deletions(-)

diff --git a/include/firestarter/Cuda/Cuda.hpp b/include/firestarter/Cuda/Cuda.hpp
index cdd14017..396654c7 100644
--- a/include/firestarter/Cuda/Cuda.hpp
+++ b/include/firestarter/Cuda/Cuda.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include "firestarter/Constants.hpp"
+
 #include <condition_variable>
 #include <thread>
 
diff --git a/include/firestarter/Cuda/CudaHipCompat.hpp b/include/firestarter/Cuda/CudaHipCompat.hpp
index 7166ba30..f0543f4d 100644
--- a/include/firestarter/Cuda/CudaHipCompat.hpp
+++ b/include/firestarter/Cuda/CudaHipCompat.hpp
@@ -29,9 +29,10 @@
 
 #pragma once
 
+#include "firestarter/Logging/Log.hpp"
+
 #include <cassert>
 #include <cstddef>
-#include <firestarter/Logging/Log.hpp>
 #include <optional>
 #include <sstream>
 #include <type_traits>
diff --git a/include/firestarter/DumpRegisterStruct.hpp b/include/firestarter/DumpRegisterStruct.hpp
index c60a2666..63d4695e 100644
--- a/include/firestarter/DumpRegisterStruct.hpp
+++ b/include/firestarter/DumpRegisterStruct.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include "firestarter/Constants.hpp"
+
 #include <array>
 
 namespace firestarter {
diff --git a/include/firestarter/DumpRegisterWorkerData.hpp b/include/firestarter/DumpRegisterWorkerData.hpp
index 4236ae53..e0bf01d4 100644
--- a/include/firestarter/DumpRegisterWorkerData.hpp
+++ b/include/firestarter/DumpRegisterWorkerData.hpp
@@ -21,9 +21,10 @@
 
 #pragma once
 
-#include "LoadWorkerData.hpp"
-#include "Logging/Log.hpp"
-#include "WindowsCompat.hpp" // IWYU pragma: keep
+#include "firestarter/LoadWorkerData.hpp"
+#include "firestarter/Logging/Log.hpp"
+#include "firestarter/WindowsCompat.hpp" // IWYU pragma: keep
+
 #include <chrono>
 #include <utility>
 
diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp
index 0bf2f181..41446bde 100644
--- a/include/firestarter/Environment/Environment.hpp
+++ b/include/firestarter/Environment/Environment.hpp
@@ -21,8 +21,9 @@
 
 #pragma once
 
-#include "CPUTopology.hpp"
+#include "firestarter/Environment/CPUTopology.hpp"
 #include "firestarter/Environment/Platform/PlatformConfig.hpp"
+
 #include <cassert>
 #include <cstdint>
 #include <memory>
diff --git a/include/firestarter/Environment/Payload/CompiledPayload.hpp b/include/firestarter/Environment/Payload/CompiledPayload.hpp
index e275b180..488c6c8d 100644
--- a/include/firestarter/Environment/Payload/CompiledPayload.hpp
+++ b/include/firestarter/Environment/Payload/CompiledPayload.hpp
@@ -23,6 +23,7 @@
 
 #include "firestarter/Constants.hpp"
 #include "firestarter/Environment/Payload/PayloadStats.hpp"
+
 #include <chrono>
 #include <memory>
 #include <utility>
diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp
index 927940e3..b5b17199 100644
--- a/include/firestarter/Environment/Payload/Payload.hpp
+++ b/include/firestarter/Environment/Payload/Payload.hpp
@@ -21,10 +21,11 @@
 
 #pragma once
 
-#include "CompiledPayload.hpp"
 #include "firestarter/Constants.hpp"
 #include "firestarter/Environment/CPUTopology.hpp"
+#include "firestarter/Environment/Payload/CompiledPayload.hpp"
 #include "firestarter/Environment/Payload/PayloadSettings.hpp"
+
 #include <chrono>
 #include <list>
 #include <string>
diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp
index 81c1454a..40833b8c 100644
--- a/include/firestarter/Environment/Platform/PlatformConfig.hpp
+++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp
@@ -21,9 +21,8 @@
 
 #pragma once
 
-#include "../Payload/Payload.hpp"
 #include "firestarter/Environment/CPUTopology.hpp"
-#include "firestarter/Environment/Payload/PayloadSettings.hpp"
+#include "firestarter/Environment/Payload/Payload.hpp"
 #include "firestarter/Logging/Log.hpp"
 
 namespace firestarter::environment::platform {
diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
index 1372bf0f..20bfc491 100644
--- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include "X86Payload.hpp"
+#include "firestarter/Environment/X86/Payload/X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 
diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
index aeb9c7f9..24ef7a15 100644
--- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include "X86Payload.hpp"
+#include "firestarter/Environment/X86/Payload/X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 
diff --git a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
index 9a7ee0e2..776f83f4 100644
--- a/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/CompiledX86Payload.hpp
@@ -21,9 +21,10 @@
 
 #pragma once
 
-#include "asmjit/core/jitruntime.h"
 #include "firestarter/Environment/Payload/CompiledPayload.hpp"
 #include "firestarter/Logging/Log.hpp"
+
+#include <asmjit/asmjit.h>
 #include <memory>
 
 namespace firestarter::environment::x86::payload {
diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
index 2fe0d9de..f0e711f6 100644
--- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include "X86Payload.hpp"
+#include "firestarter/Environment/X86/Payload/X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 
diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
index d6505b1e..8280a5b2 100644
--- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include "X86Payload.hpp"
+#include "firestarter/Environment/X86/Payload/X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 
diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
index 98dc9055..557af0d4 100644
--- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include "X86Payload.hpp"
+#include "firestarter/Environment/X86/Payload/X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 
diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
index 82ea7e0c..44d5bd4f 100644
--- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp
+++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp
@@ -21,12 +21,13 @@
 
 #pragma once
 
-#include "../../../Constants.hpp"          // IWYU pragma: keep
-#include "../../../DumpRegisterStruct.hpp" // IWYU pragma: keep
-#include "../../../Logging/Log.hpp"        // IWYU pragma: keep
-#include "../../Payload/Payload.hpp"
-#include "../X86CPUTopology.hpp"
+#include "firestarter/Constants.hpp"          // IWYU pragma: keep
+#include "firestarter/DumpRegisterStruct.hpp" // IWYU pragma: keep
+#include "firestarter/Environment/Payload/Payload.hpp"
+#include "firestarter/Environment/X86/X86CPUTopology.hpp"
 #include "firestarter/LoadWorkerMemory.hpp"
+#include "firestarter/Logging/Log.hpp" // IWYU pragma: keep
+
 #include <asmjit/x86.h>
 #include <cassert>
 #include <cstdint>
diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
index 8286de6c..5d624725 100644
--- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
+++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include "X86Payload.hpp"
+#include "firestarter/Environment/X86/Payload/X86Payload.hpp"
 
 namespace firestarter::environment::x86::payload {
 
diff --git a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
index ee733b5d..936b3601 100644
--- a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp
@@ -21,8 +21,8 @@
 
 #pragma once
 
-#include "../Payload/FMA4Payload.hpp"
-#include "X86PlatformConfig.hpp"
+#include "firestarter/Environment/X86/Payload/FMA4Payload.hpp"
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class BulldozerConfig final : public X86PlatformConfig {
diff --git a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
index 5800e82f..768d3597 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp
@@ -21,8 +21,8 @@
 
 #pragma once
 
-#include "../Payload/FMAPayload.hpp"
-#include "X86PlatformConfig.hpp"
+#include "firestarter/Environment/X86/Payload/FMAPayload.hpp"
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class HaswellConfig final : public X86PlatformConfig {
diff --git a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
index ef54e5d2..23d2518f 100644
--- a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp
@@ -21,8 +21,8 @@
 
 #pragma once
 
-#include "../Payload/FMAPayload.hpp"
-#include "X86PlatformConfig.hpp"
+#include "firestarter/Environment/X86/Payload/FMAPayload.hpp"
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class HaswellEPConfig final : public X86PlatformConfig {
diff --git a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
index bd2d1a26..f849c07b 100644
--- a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp
@@ -21,8 +21,8 @@
 
 #pragma once
 
-#include "../Payload/AVX512Payload.hpp"
-#include "X86PlatformConfig.hpp"
+#include "firestarter/Environment/X86/Payload/AVX512Payload.hpp"
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class KnightsLandingConfig final : public X86PlatformConfig {
diff --git a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
index 7f303f94..abef11da 100644
--- a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp
@@ -21,8 +21,8 @@
 
 #pragma once
 
-#include "../Payload/ZENFMAPayload.hpp"
-#include "X86PlatformConfig.hpp"
+#include "firestarter/Environment/X86/Payload/ZENFMAPayload.hpp"
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class NaplesConfig final : public X86PlatformConfig {
diff --git a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
index 0403edf2..31374061 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp
@@ -21,8 +21,8 @@
 
 #pragma once
 
-#include "../Payload/SSE2Payload.hpp"
-#include "X86PlatformConfig.hpp"
+#include "firestarter/Environment/X86/Payload/SSE2Payload.hpp"
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class NehalemConfig final : public X86PlatformConfig {
diff --git a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
index 847e5b89..9a6a08bb 100644
--- a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp
@@ -21,8 +21,8 @@
 
 #pragma once
 
-#include "../Payload/SSE2Payload.hpp"
-#include "X86PlatformConfig.hpp"
+#include "firestarter/Environment/X86/Payload/SSE2Payload.hpp"
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class NehalemEPConfig final : public X86PlatformConfig {
diff --git a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
index fe4c8fce..e70161d7 100644
--- a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp
@@ -21,8 +21,8 @@
 
 #pragma once
 
-#include "../Payload/FMAPayload.hpp"
-#include "X86PlatformConfig.hpp"
+#include "firestarter/Environment/X86/Payload/FMAPayload.hpp"
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class RomeConfig final : public X86PlatformConfig {
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
index bf786979..b5c5b1c4 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp
@@ -21,8 +21,8 @@
 
 #pragma once
 
-#include "../Payload/AVXPayload.hpp"
-#include "X86PlatformConfig.hpp"
+#include "firestarter/Environment/X86/Payload/AVXPayload.hpp"
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class SandyBridgeConfig final : public X86PlatformConfig {
diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
index ed61f304..944f069a 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
@@ -22,8 +22,8 @@
 #ifndef INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SANDYBRIDGEEPCONFIG_H
 #define INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SANDYBRIDGEEPCONFIG_H
 
-#include "../Payload/AVXPayload.hpp"
-#include "X86PlatformConfig.hpp"
+#include "firestarter/Environment/X86/Payload/AVXPayload.hpp"
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class SandyBridgeEPConfig final : public X86PlatformConfig {
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
index 8e3f74fa..659c486a 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
@@ -22,8 +22,8 @@
 #ifndef INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SKYLAKECONFIG_H
 #define INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SKYLAKECONFIG_H
 
-#include "../Payload/FMAPayload.hpp"
-#include "X86PlatformConfig.hpp"
+#include "firestarter/Environment/X86/Payload/FMAPayload.hpp"
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class SkylakeConfig final : public X86PlatformConfig {
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
index c5a69761..864ebec9 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp
@@ -21,8 +21,8 @@
 
 #pragma once
 
-#include "../Payload/AVX512Payload.hpp"
-#include "X86PlatformConfig.hpp"
+#include "firestarter/Environment/X86/Payload/AVX512Payload.hpp"
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86::platform {
 class SkylakeSPConfig final : public X86PlatformConfig {
diff --git a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
index 788f9c29..15d54638 100644
--- a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp
@@ -21,8 +21,8 @@
 
 #pragma once
 
-#include "../../Platform/PlatformConfig.hpp"
 #include "firestarter/Environment/CPUTopology.hpp"
+#include "firestarter/Environment/Platform/PlatformConfig.hpp"
 #include "firestarter/Environment/X86/X86CPUTopology.hpp"
 
 namespace firestarter::environment::x86::platform {
diff --git a/include/firestarter/Environment/X86/X86CPUTopology.hpp b/include/firestarter/Environment/X86/X86CPUTopology.hpp
index 39d59278..0a85d040 100644
--- a/include/firestarter/Environment/X86/X86CPUTopology.hpp
+++ b/include/firestarter/Environment/X86/X86CPUTopology.hpp
@@ -21,9 +21,9 @@
 
 #pragma once
 
-#include <asmjit/asmjit.h>
+#include "firestarter/Environment/CPUTopology.hpp"
 
-#include "../CPUTopology.hpp"
+#include <asmjit/asmjit.h>
 
 namespace firestarter::environment::x86 {
 
diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp
index b9f9e039..f4760f7e 100644
--- a/include/firestarter/Environment/X86/X86Environment.hpp
+++ b/include/firestarter/Environment/X86/X86Environment.hpp
@@ -21,20 +21,20 @@
 
 #pragma once
 
-#include "../Environment.hpp"
-#include "Platform/BulldozerConfig.hpp"
-#include "Platform/HaswellConfig.hpp"
-#include "Platform/HaswellEPConfig.hpp"
-#include "Platform/KnightsLandingConfig.hpp"
-#include "Platform/NaplesConfig.hpp"
-#include "Platform/NehalemConfig.hpp"
-#include "Platform/NehalemEPConfig.hpp"
-#include "Platform/RomeConfig.hpp"
-#include "Platform/SandyBridgeConfig.hpp"
-#include "Platform/SandyBridgeEPConfig.hpp"
-#include "Platform/SkylakeConfig.hpp"
-#include "Platform/SkylakeSPConfig.hpp"
-#include "Platform/X86PlatformConfig.hpp"
+#include "firestarter/Environment/Environment.hpp"
+#include "firestarter/Environment/X86/Platform/BulldozerConfig.hpp"
+#include "firestarter/Environment/X86/Platform/HaswellConfig.hpp"
+#include "firestarter/Environment/X86/Platform/HaswellEPConfig.hpp"
+#include "firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp"
+#include "firestarter/Environment/X86/Platform/NaplesConfig.hpp"
+#include "firestarter/Environment/X86/Platform/NehalemConfig.hpp"
+#include "firestarter/Environment/X86/Platform/NehalemEPConfig.hpp"
+#include "firestarter/Environment/X86/Platform/RomeConfig.hpp"
+#include "firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp"
+#include "firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp"
+#include "firestarter/Environment/X86/Platform/SkylakeConfig.hpp"
+#include "firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp"
+#include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
 
 namespace firestarter::environment::x86 {
 
diff --git a/include/firestarter/ErrorDetectionStruct.hpp b/include/firestarter/ErrorDetectionStruct.hpp
index 4d26d9f0..1fc3ad24 100644
--- a/include/firestarter/ErrorDetectionStruct.hpp
+++ b/include/firestarter/ErrorDetectionStruct.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include <cstdint>
+
 namespace firestarter {
 
 /// This struct is used for the error detection feature. The error detection works between two threads. The current one
diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp
index 51cb601a..a51feebb 100644
--- a/include/firestarter/Firestarter.hpp
+++ b/include/firestarter/Firestarter.hpp
@@ -21,16 +21,16 @@
 
 #pragma once
 
-#include "Config.hpp"
-#include "Constants.hpp"
-#include "Cuda/Cuda.hpp"
-#include "DumpRegisterWorkerData.hpp"
-#include "LoadWorkerData.hpp"
-#include "Measurement/MeasurementWorker.hpp"
-#include "OneAPI/OneAPI.hpp"
-#include "Optimizer/Algorithm.hpp"
-#include "Optimizer/OptimizerWorker.hpp"
-#include "Optimizer/Population.hpp"
+#include "firestarter/Config.hpp"
+#include "firestarter/Constants.hpp"
+#include "firestarter/Cuda/Cuda.hpp"
+#include "firestarter/DumpRegisterWorkerData.hpp"
+#include "firestarter/LoadWorkerData.hpp"
+#include "firestarter/Measurement/MeasurementWorker.hpp"
+#include "firestarter/OneAPI/OneAPI.hpp"
+#include "firestarter/Optimizer/Algorithm.hpp"
+#include "firestarter/Optimizer/OptimizerWorker.hpp"
+#include "firestarter/Optimizer/Population.hpp"
 
 #include <chrono>
 #include <condition_variable>
diff --git a/include/firestarter/Json/Summary.hpp b/include/firestarter/Json/Summary.hpp
index 87495fb5..e6f33e5d 100644
--- a/include/firestarter/Json/Summary.hpp
+++ b/include/firestarter/Json/Summary.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include "../Measurement/Summary.hpp"
+#include "firestarter/Measurement/Summary.hpp"
 
 /// Json serializer and deserializer for the firestarter::measurement::Summary struct
 namespace nlohmann {
diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp
index 413f4b7f..1cf3dac3 100644
--- a/include/firestarter/LoadWorkerData.hpp
+++ b/include/firestarter/LoadWorkerData.hpp
@@ -21,10 +21,11 @@
 
 #pragma once
 
-#include "Constants.hpp"
-#include "Environment/Environment.hpp"
-#include "LoadWorkerMemory.hpp"
+#include "firestarter/Constants.hpp"
+#include "firestarter/Environment/Environment.hpp"
 #include "firestarter/Environment/Platform/PlatformConfig.hpp"
+#include "firestarter/LoadWorkerMemory.hpp"
+
 #include <atomic>
 #include <cmath>
 #include <memory>
diff --git a/include/firestarter/LoadWorkerMemory.hpp b/include/firestarter/LoadWorkerMemory.hpp
index 2a5ea253..11493665 100644
--- a/include/firestarter/LoadWorkerMemory.hpp
+++ b/include/firestarter/LoadWorkerMemory.hpp
@@ -21,9 +21,10 @@
 
 #pragma once
 
-#include "AlignedAlloc.hpp"
-#include "DumpRegisterStruct.hpp"
-#include "ErrorDetectionStruct.hpp"
+#include "firestarter/AlignedAlloc.hpp"
+#include "firestarter/DumpRegisterStruct.hpp"
+#include "firestarter/ErrorDetectionStruct.hpp"
+
 #include <memory>
 
 namespace firestarter {
diff --git a/include/firestarter/Logging/Log.hpp b/include/firestarter/Logging/Log.hpp
index 912f76be..10090668 100644
--- a/include/firestarter/Logging/Log.hpp
+++ b/include/firestarter/Logging/Log.hpp
@@ -21,8 +21,9 @@
 
 #pragma once
 
-#include "FirstWorkerThreadFilter.hpp"
+#include "firestarter/Logging/FirstWorkerThreadFilter.hpp"
 #include "firestarter/SafeExit.hpp"
+
 #include <cstdlib>
 #include <iostream>
 #include <nitro/log/attribute/message.hpp>
diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp
index 14088e33..a25c8da3 100644
--- a/include/firestarter/Measurement/MeasurementWorker.hpp
+++ b/include/firestarter/Measurement/MeasurementWorker.hpp
@@ -21,13 +21,14 @@
 
 #pragma once
 
-#include "Metric/IPCEstimate.hpp"
-#include "Metric/Perf.hpp"
-#include "Metric/RAPL.hpp"
-#include "MetricInterface.h"
-#include "Summary.hpp"
-#include "TimeValue.hpp"
+#include "firestarter/Measurement/Metric/IPCEstimate.hpp"
+#include "firestarter/Measurement/Metric/Perf.hpp"
+#include "firestarter/Measurement/Metric/RAPL.hpp"
+#include "firestarter/Measurement/MetricInterface.h"
+#include "firestarter/Measurement/Summary.hpp"
+#include "firestarter/Measurement/TimeValue.hpp"
 #include "firestarter/WindowsCompat.hpp" // IWYU pragma: keep
+
 #include <chrono>
 #include <map>
 #include <mutex>
diff --git a/include/firestarter/Measurement/Metric/IPCEstimate.hpp b/include/firestarter/Measurement/Metric/IPCEstimate.hpp
index 3a299244..52bc9cdb 100644
--- a/include/firestarter/Measurement/Metric/IPCEstimate.hpp
+++ b/include/firestarter/Measurement/Metric/IPCEstimate.hpp
@@ -21,7 +21,8 @@
 
 #pragma once
 
-#include "../MetricInterface.h"
+#include "firestarter/Measurement/MetricInterface.h"
+
 #include <string>
 
 /// The wrapper for the C interface to the IpcEstimateMetric metric.
diff --git a/include/firestarter/Measurement/Metric/Perf.hpp b/include/firestarter/Measurement/Metric/Perf.hpp
index 32ef3fd6..8e0e14c7 100644
--- a/include/firestarter/Measurement/Metric/Perf.hpp
+++ b/include/firestarter/Measurement/Metric/Perf.hpp
@@ -21,7 +21,8 @@
 
 #pragma once
 
-#include "../MetricInterface.h"
+#include "firestarter/Measurement/MetricInterface.h"
+
 #include <array>
 #include <string>
 
diff --git a/include/firestarter/Measurement/Metric/RAPL.hpp b/include/firestarter/Measurement/Metric/RAPL.hpp
index 00f12019..59d4a822 100644
--- a/include/firestarter/Measurement/Metric/RAPL.hpp
+++ b/include/firestarter/Measurement/Metric/RAPL.hpp
@@ -21,7 +21,8 @@
 
 #pragma once
 
-#include "../MetricInterface.h"
+#include "firestarter/Measurement/MetricInterface.h"
+
 #include <memory>
 #include <string>
 #include <vector>
diff --git a/include/firestarter/Measurement/MetricInterface.h b/include/firestarter/Measurement/MetricInterface.h
index f3382cf2..e0c81432 100644
--- a/include/firestarter/Measurement/MetricInterface.h
+++ b/include/firestarter/Measurement/MetricInterface.h
@@ -27,7 +27,7 @@
 extern "C" {
 #endif
 
-#include <cstdint>
+#include <stdint.h>
 // NOLINTBEGIN(modernize-use-using)
 
 /// Describe the type of the metric and how values need to be accumulated. Per default metrics are of pulling type where
diff --git a/include/firestarter/Measurement/Summary.hpp b/include/firestarter/Measurement/Summary.hpp
index 085a482b..05c5a925 100644
--- a/include/firestarter/Measurement/Summary.hpp
+++ b/include/firestarter/Measurement/Summary.hpp
@@ -21,8 +21,9 @@
 
 #pragma once
 
-#include "MetricInterface.h"
-#include "TimeValue.hpp"
+#include "firestarter/Measurement/MetricInterface.h"
+#include "firestarter/Measurement/TimeValue.hpp"
+
 #include <chrono>
 #include <nlohmann/json.hpp>
 #include <vector>
diff --git a/include/firestarter/OneAPI/OneAPI.hpp b/include/firestarter/OneAPI/OneAPI.hpp
index a84e2c53..4022b8c4 100644
--- a/include/firestarter/OneAPI/OneAPI.hpp
+++ b/include/firestarter/OneAPI/OneAPI.hpp
@@ -22,6 +22,7 @@
 #pragma once
 
 #include "firestarter/Constants.hpp"
+
 #include <condition_variable>
 #include <thread>
 
diff --git a/include/firestarter/Optimizer/Algorithm.hpp b/include/firestarter/Optimizer/Algorithm.hpp
index 58850929..be5d5961 100644
--- a/include/firestarter/Optimizer/Algorithm.hpp
+++ b/include/firestarter/Optimizer/Algorithm.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include "Population.hpp"
+#include "firestarter/Optimizer/Population.hpp"
 
 namespace firestarter::optimizer {
 
diff --git a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
index 8ba6ec41..6b395823 100644
--- a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
+++ b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp
@@ -21,7 +21,7 @@
 
 #pragma once
 
-#include "../Algorithm.hpp"
+#include "firestarter/Optimizer/Algorithm.hpp"
 
 namespace firestarter::optimizer::algorithm {
 
diff --git a/include/firestarter/Optimizer/History.hpp b/include/firestarter/Optimizer/History.hpp
index 4a586514..10d635c1 100644
--- a/include/firestarter/Optimizer/History.hpp
+++ b/include/firestarter/Optimizer/History.hpp
@@ -21,11 +21,12 @@
 
 #pragma once
 
-#include "../Json/Summary.hpp" // IWYU pragma: keep
-#include "../Logging/Log.hpp"
-#include "../Measurement/Summary.hpp"
-#include "Individual.hpp"
+#include "firestarter/Json/Summary.hpp" // IWYU pragma: keep
+#include "firestarter/Logging/Log.hpp"
+#include "firestarter/Measurement/Summary.hpp"
+#include "firestarter/Optimizer/Individual.hpp"
 #include "firestarter/WindowsCompat.hpp" // IWYU pragma: keep
+
 #include <algorithm>
 #include <cassert>
 #include <cstring>
diff --git a/include/firestarter/Optimizer/OptimizerWorker.hpp b/include/firestarter/Optimizer/OptimizerWorker.hpp
index f534485c..17293ad3 100644
--- a/include/firestarter/Optimizer/OptimizerWorker.hpp
+++ b/include/firestarter/Optimizer/OptimizerWorker.hpp
@@ -19,9 +19,10 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#include "Algorithm.hpp"
-#include "Population.hpp"
+#include "firestarter/Optimizer/Algorithm.hpp"
+#include "firestarter/Optimizer/Population.hpp"
 #include "firestarter/WindowsCompat.hpp" // IWYU pragma: keep
+
 #include <chrono>
 #include <memory>
 
diff --git a/include/firestarter/Optimizer/Population.hpp b/include/firestarter/Optimizer/Population.hpp
index 61e3e9bd..ac857e30 100644
--- a/include/firestarter/Optimizer/Population.hpp
+++ b/include/firestarter/Optimizer/Population.hpp
@@ -21,8 +21,9 @@
 
 #pragma once
 
-#include "Individual.hpp"
-#include "Problem.hpp"
+#include "firestarter/Optimizer/Individual.hpp"
+#include "firestarter/Optimizer/Problem.hpp"
+
 #include <cstring>
 #include <memory>
 #include <vector>
diff --git a/include/firestarter/Optimizer/Problem.hpp b/include/firestarter/Optimizer/Problem.hpp
index d1b86301..bee3fdbb 100644
--- a/include/firestarter/Optimizer/Problem.hpp
+++ b/include/firestarter/Optimizer/Problem.hpp
@@ -21,8 +21,9 @@
 
 #pragma once
 
-#include "../Measurement/Summary.hpp"
-#include "Individual.hpp"
+#include "firestarter/Measurement/Summary.hpp"
+#include "firestarter/Optimizer/Individual.hpp"
+
 #include <cstring>
 #include <map>
 #include <tuple>
diff --git a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
index 36646cab..4335a4f9 100644
--- a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
+++ b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp
@@ -21,8 +21,9 @@
 
 #pragma once
 
-#include "../../Measurement/MeasurementWorker.hpp"
-#include "../Problem.hpp"
+#include "firestarter/Measurement/MeasurementWorker.hpp"
+#include "firestarter/Optimizer/Problem.hpp"
+
 #include <cassert>
 #include <functional>
 #include <thread>
diff --git a/include/firestarter/Optimizer/Util/MultiObjective.hpp b/include/firestarter/Optimizer/Util/MultiObjective.hpp
index 1b3a1873..049d7be3 100644
--- a/include/firestarter/Optimizer/Util/MultiObjective.hpp
+++ b/include/firestarter/Optimizer/Util/MultiObjective.hpp
@@ -21,7 +21,8 @@
 
 #pragma once
 
-#include "../Individual.hpp"
+#include "firestarter/Optimizer/Individual.hpp"
+
 #include <random>
 #include <utility>
 #include <vector>

From 92897b2d341acdfae0395672468d936efaa8a799 Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Thu, 21 Nov 2024 19:18:59 +0100
Subject: [PATCH 166/167] remove ifndefs

---
 .../Environment/X86/Platform/SandyBridgeEPConfig.hpp       | 7 ++-----
 .../firestarter/Environment/X86/Platform/SkylakeConfig.hpp | 7 ++-----
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
index 944f069a..67048ba5 100644
--- a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp
@@ -19,8 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#ifndef INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SANDYBRIDGEEPCONFIG_H
-#define INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SANDYBRIDGEEPCONFIG_H
+#pragma once
 
 #include "firestarter/Environment/X86/Payload/AVXPayload.hpp"
 #include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
@@ -38,6 +37,4 @@ class SandyBridgeEPConfig final : public X86PlatformConfig {
                 /*InstructionGroups=*/{{"RAM_L", 3}, {"L3_LS", 2}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 30}}),
             /*Payload=*/std::make_shared<const payload::AVXPayload>()) {}
 };
-} // namespace firestarter::environment::x86::platform
-
-#endif
+} // namespace firestarter::environment::x86::platform
\ No newline at end of file
diff --git a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
index 659c486a..8a109d11 100644
--- a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
+++ b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp
@@ -19,8 +19,7 @@
  * Contact: daniel.hackenberg@tu-dresden.de
  *****************************************************************************/
 
-#ifndef INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SKYLAKECONFIG_H
-#define INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SKYLAKECONFIG_H
+#pragma once
 
 #include "firestarter/Environment/X86/Payload/FMAPayload.hpp"
 #include "firestarter/Environment/X86/Platform/X86PlatformConfig.hpp"
@@ -38,6 +37,4 @@ class SkylakeConfig final : public X86PlatformConfig {
                               {{"RAM_L", 3}, {"L3_LS_256", 5}, {"L2_LS_256", 18}, {"L1_2LS_256", 78}, {"REG", 40}}),
                           /*Payload=*/std::make_shared<const payload::FMAPayload>()) {}
 };
-} // namespace firestarter::environment::x86::platform
-
-#endif
+} // namespace firestarter::environment::x86::platform
\ No newline at end of file

From f53b7364a373704ffcdb2a3bb847944f2869d3ce Mon Sep 17 00:00:00 2001
From: Markus Schmidl <markus.schmidl@mailbox.tu-dresden.de>
Date: Fri, 22 Nov 2024 10:27:02 +0100
Subject: [PATCH 167/167] silence modernice warning in c-style header

---
 include/firestarter/Measurement/MetricInterface.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/firestarter/Measurement/MetricInterface.h b/include/firestarter/Measurement/MetricInterface.h
index e0c81432..03f4872c 100644
--- a/include/firestarter/Measurement/MetricInterface.h
+++ b/include/firestarter/Measurement/MetricInterface.h
@@ -27,7 +27,9 @@
 extern "C" {
 #endif
 
+// NOLINTNEXTLINE(modernize-deprecated-headers)
 #include <stdint.h>
+
 // NOLINTBEGIN(modernize-use-using)
 
 /// Describe the type of the metric and how values need to be accumulated. Per default metrics are of pulling type where