From f67729b584020eb4459d54eb35f2efb00ed5076e Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Thu, 19 Sep 2024 16:12:18 +0200 Subject: [PATCH 001/167] enable compile commands generation --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c94e0144..8e91d9b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,8 @@ cmake_minimum_required(VERSION 3.22) project(FIRESTARTER) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + include(cmake/GitSubmoduleUpdate.cmake) # set FIRESTARTER version From f24a0b2031546ff58611c0312d74161303398088 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Thu, 19 Sep 2024 18:27:54 +0200 Subject: [PATCH 002/167] add .clang-format file base on LLVM style --- .clang-format | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..656a3655 --- /dev/null +++ b/.clang-format @@ -0,0 +1,7 @@ +--- +BasedOnStyle: LLVM +Language: Cpp +BreakConstructorInitializersBeforeComma: 'true' +AllowShortFunctionsOnASingleLine: All +PointerAlignment: Left +ColumnLimit: 120 \ No newline at end of file From 9732bdb59717274f666e9c1497289d1f9a0d7858 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Thu, 19 Sep 2024 18:28:44 +0200 Subject: [PATCH 003/167] clang-format --- include/firestarter/Cuda/Cuda.hpp | 8 +- .../firestarter/DumpRegisterWorkerData.hpp | 10 +- .../firestarter/Environment/CPUTopology.hpp | 18 +- .../firestarter/Environment/Environment.hpp | 21 +- .../Environment/Payload/Payload.hpp | 56 +- .../Environment/Platform/PlatformConfig.hpp | 42 +- .../Environment/Platform/RuntimeConfig.hpp | 68 ++- .../Environment/X86/Payload/AVX512Payload.hpp | 24 +- .../Environment/X86/Payload/AVXPayload.hpp | 24 +- .../Environment/X86/Payload/FMA4Payload.hpp | 29 +- .../Environment/X86/Payload/FMAPayload.hpp | 30 +- .../Environment/X86/Payload/SSE2Payload.hpp | 24 +- .../Environment/X86/Payload/X86Payload.hpp | 47 +- .../Environment/X86/Payload/ZENFMAPayload.hpp | 22 +- .../X86/Platform/BulldozerConfig.hpp | 13 +- .../X86/Platform/HaswellConfig.hpp | 13 +- .../X86/Platform/HaswellEPConfig.hpp | 20 +- .../X86/Platform/KnightsLandingConfig.hpp | 16 +- .../Environment/X86/Platform/NaplesConfig.hpp | 20 +- .../X86/Platform/NehalemConfig.hpp | 16 +- .../X86/Platform/NehalemEPConfig.hpp | 16 +- .../Environment/X86/Platform/RomeConfig.hpp | 21 +- .../X86/Platform/SandyBridgeConfig.hpp | 20 +- .../X86/Platform/SandyBridgeEPConfig.hpp | 20 +- .../X86/Platform/SkylakeConfig.hpp | 20 +- .../X86/Platform/SkylakeSPConfig.hpp | 13 +- .../X86/Platform/X86PlatformConfig.hpp | 27 +- .../Environment/X86/X86CPUTopology.hpp | 25 +- .../Environment/X86/X86Environment.hpp | 57 +- include/firestarter/ErrorDetectionStruct.hpp | 8 +- include/firestarter/Firestarter.hpp | 60 +-- include/firestarter/Json/Summary.hpp | 9 +- include/firestarter/LoadWorkerData.hpp | 62 +-- .../Logging/FirstWorkerThreadFilter.hpp | 10 +- include/firestarter/Logging/Log.hpp | 37 +- .../Measurement/MeasurementWorker.hpp | 41 +- .../firestarter/Measurement/MetricInterface.h | 12 +- include/firestarter/Measurement/Summary.hpp | 10 +- include/firestarter/Measurement/TimeValue.hpp | 7 +- include/firestarter/OneAPI/OneAPI.hpp | 8 +- include/firestarter/Optimizer/Algorithm.hpp | 5 +- .../firestarter/Optimizer/Algorithm/NSGA2.hpp | 6 +- include/firestarter/Optimizer/History.hpp | 107 ++-- .../firestarter/Optimizer/OptimizerWorker.hpp | 13 +- include/firestarter/Optimizer/Population.hpp | 30 +- include/firestarter/Optimizer/Problem.hpp | 13 +- .../Optimizer/Problem/CLIArgumentProblem.hpp | 60 +-- .../Optimizer/Util/MultiObjective.hpp | 40 +- src/firestarter/Cuda/Cuda.cpp | 500 ++++++++---------- src/firestarter/DumpRegisterWorker.cpp | 37 +- src/firestarter/Environment/CPUTopology.cpp | 73 +-- src/firestarter/Environment/Environment.cpp | 69 +-- .../Environment/Payload/Payload.cpp | 46 +- .../Environment/X86/Payload/AVX512Payload.cpp | 108 ++-- .../Environment/X86/Payload/AVXPayload.cpp | 133 ++--- .../Environment/X86/Payload/FMA4Payload.cpp | 185 +++---- .../Environment/X86/Payload/FMAPayload.cpp | 118 ++--- .../Environment/X86/Payload/SSE2Payload.cpp | 128 ++--- .../Environment/X86/Payload/X86Payload.cpp | 62 +-- .../Environment/X86/Payload/ZENFMAPayload.cpp | 120 ++--- .../Environment/X86/X86CPUTopology.cpp | 21 +- .../Environment/X86/X86Environment.cpp | 110 ++-- src/firestarter/Firestarter.cpp | 198 +++---- src/firestarter/LoadWorker.cpp | 150 +++--- src/firestarter/Main.cpp | 176 +++--- .../Measurement/MeasurementWorker.cpp | 161 +++--- .../Measurement/Metric/IPCEstimate.cpp | 18 +- src/firestarter/Measurement/Metric/Perf.cpp | 71 ++- src/firestarter/Measurement/Metric/RAPL.cpp | 43 +- src/firestarter/Measurement/Summary.cpp | 14 +- src/firestarter/OneAPI/OneAPI.cpp | 190 +++---- src/firestarter/Optimizer/Algorithm/NSGA2.cpp | 55 +- src/firestarter/Optimizer/OptimizerWorker.cpp | 26 +- src/firestarter/Optimizer/Population.cpp | 24 +- .../Optimizer/Util/MultiObjective.cpp | 112 ++-- src/firestarter/WatchdogWorker.cpp | 28 +- 76 files changed, 1657 insertions(+), 2597 deletions(-) diff --git a/include/firestarter/Cuda/Cuda.hpp b/include/firestarter/Cuda/Cuda.hpp index a2f281d9..d7911eb4 100644 --- a/include/firestarter/Cuda/Cuda.hpp +++ b/include/firestarter/Cuda/Cuda.hpp @@ -34,13 +34,11 @@ class Cuda { std::condition_variable _waitForInitCv; std::mutex _waitForInitCvMutex; - static void initGpus(std::condition_variable &cv, - volatile unsigned long long *loadVar, bool useFloat, - bool useDouble, unsigned matrixSize, int gpus); + static void initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble, + unsigned matrixSize, int gpus); public: - Cuda(volatile unsigned long long *loadVar, bool useFloat, bool useDouble, - unsigned matrixSize, int gpus); + Cuda(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus); ~Cuda() { if (_initThread.joinable()) { diff --git a/include/firestarter/DumpRegisterWorkerData.hpp b/include/firestarter/DumpRegisterWorkerData.hpp index f7b721d4..14ccc95f 100644 --- a/include/firestarter/DumpRegisterWorkerData.hpp +++ b/include/firestarter/DumpRegisterWorkerData.hpp @@ -21,22 +21,20 @@ #pragma once +#include #include #include -#include - #ifdef FIRESTARTER_DEBUG_FEATURES namespace firestarter { class DumpRegisterWorkerData { public: - DumpRegisterWorkerData(std::shared_ptr loadWorkerData, - std::chrono::seconds dumpTimeDelta, + DumpRegisterWorkerData(std::shared_ptr loadWorkerData, std::chrono::seconds dumpTimeDelta, std::string dumpFilePath) - : loadWorkerData(loadWorkerData), dumpTimeDelta(dumpTimeDelta) { - + : loadWorkerData(loadWorkerData) + , dumpTimeDelta(dumpTimeDelta) { if (dumpFilePath.empty()) { char cwd[PATH_MAX]; if (getcwd(cwd, sizeof(cwd)) != NULL) { diff --git a/include/firestarter/Environment/CPUTopology.hpp b/include/firestarter/Environment/CPUTopology.hpp index dcb61e96..af749d78 100644 --- a/include/firestarter/Environment/CPUTopology.hpp +++ b/include/firestarter/Environment/CPUTopology.hpp @@ -37,18 +37,16 @@ class CPUTopology { CPUTopology(std::string architecture); virtual ~CPUTopology(); - unsigned numThreads() const { - return _numThreadsPerCore * _numCoresTotal; - } + unsigned numThreads() const { return _numThreadsPerCore * _numCoresTotal; } unsigned maxNumThreads() const; unsigned numThreadsPerCore() const { return _numThreadsPerCore; } unsigned numCoresTotal() const { return _numCoresTotal; } unsigned numPackages() const { return _numPackages; } - std::string const &architecture() const { return _architecture; } - virtual std::string const &vendor() const { return _vendor; } - virtual std::string const &processorName() const { return _processorName; } - virtual std::string const &model() const = 0; + std::string const& architecture() const { return _architecture; } + virtual std::string const& vendor() const { return _vendor; } + virtual std::string const& processorName() const { return _processorName; } + virtual std::string const& model() const = 0; // get the size of the L1i-cache in bytes unsigned instructionCacheSize() const { return _instructionCacheSize; } @@ -56,7 +54,7 @@ class CPUTopology { // return the cpu clockrate in Hz virtual unsigned long long clockrate() const { return _clockrate; } // return the cpu features - virtual std::list const &features() const = 0; + virtual std::list const& features() const = 0; // get a timestamp virtual unsigned long long timestamp() const = 0; @@ -66,10 +64,10 @@ class CPUTopology { protected: std::string scalingGovernor() const; - std::ostream &print(std::ostream &stream) const; + std::ostream& print(std::ostream& stream) const; private: - static std::stringstream getFileAsStream(std::string const &filePath); + static std::stringstream getFileAsStream(std::string const& filePath); unsigned _numThreadsPerCore; unsigned _numCoresTotal; diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp index c76dc073..24722dc3 100644 --- a/include/firestarter/Environment/Environment.hpp +++ b/include/firestarter/Environment/Environment.hpp @@ -21,18 +21,18 @@ #pragma once +#include #include #include #include - -#include #include namespace firestarter::environment { class Environment { public: - Environment(CPUTopology *topology) : _topology(topology) {} + Environment(CPUTopology* topology) + : _topology(topology) {} ~Environment() { delete this->_topology; if (_selectedConfig != nullptr) { @@ -45,15 +45,14 @@ class Environment { void printThreadSummary(); virtual void evaluateFunctions() = 0; - virtual int selectFunction(unsigned functionId, - bool allowUnavailablePayload) = 0; + virtual int selectFunction(unsigned functionId, bool allowUnavailablePayload) = 0; virtual int selectInstructionGroups(std::string groups) = 0; virtual void printAvailableInstructionGroups() = 0; virtual void setLineCount(unsigned lineCount) = 0; virtual void printSelectedCodePathSummary() = 0; virtual void printFunctionSummary() = 0; - platform::RuntimeConfig &selectedConfig() const { + platform::RuntimeConfig& selectedConfig() const { #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-value" @@ -68,18 +67,16 @@ class Environment { return *_selectedConfig; } - unsigned long long requestedNumThreads() const { - return _requestedNumThreads; - } + unsigned long long requestedNumThreads() const { return _requestedNumThreads; } - CPUTopology const &topology() const { + CPUTopology const& topology() const { assert(_topology != nullptr); return *_topology; } protected: - platform::RuntimeConfig *_selectedConfig = nullptr; - CPUTopology *_topology = nullptr; + platform::RuntimeConfig* _selectedConfig = nullptr; + CPUTopology* _topology = nullptr; private: unsigned long long _requestedNumThreads; diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp index 40246ac0..f16d6879 100644 --- a/include/firestarter/Environment/Payload/Payload.hpp +++ b/include/firestarter/Environment/Payload/Payload.hpp @@ -31,8 +31,7 @@ namespace firestarter::environment::payload { class Payload { private: std::string _name; - unsigned getSequenceStartCount(const std::vector &sequence, - const std::string start); + unsigned getSequenceStartCount(const std::vector& sequence, const std::string start); protected: unsigned _flops; @@ -44,44 +43,39 @@ class Payload { // number of used simd registers unsigned _registerCount; - std::vector generateSequence( - const std::vector> &proportion); - unsigned getL2SequenceCount(const std::vector &sequence) { + std::vector generateSequence(const std::vector>& proportion); + unsigned getL2SequenceCount(const std::vector& sequence) { return getSequenceStartCount(sequence, "L2"); }; - unsigned getL3SequenceCount(const std::vector &sequence) { + unsigned getL3SequenceCount(const std::vector& sequence) { return getSequenceStartCount(sequence, "L3"); }; - unsigned getRAMSequenceCount(const std::vector &sequence) { + unsigned getRAMSequenceCount(const std::vector& sequence) { return getSequenceStartCount(sequence, "RAM"); }; - unsigned - getNumberOfSequenceRepetitions(const std::vector &sequence, - const unsigned numberOfLines) { + unsigned getNumberOfSequenceRepetitions(const std::vector& sequence, const unsigned numberOfLines) { if (sequence.size() == 0) { return 0; } return numberOfLines / sequence.size(); }; - unsigned getL2LoopCount(const std::vector &sequence, - const unsigned numberOfLines, const unsigned size, + unsigned getL2LoopCount(const std::vector& sequence, const unsigned numberOfLines, const unsigned size, const unsigned threads); - unsigned getL3LoopCount(const std::vector &sequence, - const unsigned numberOfLines, const unsigned size, + unsigned getL3LoopCount(const std::vector& sequence, const unsigned numberOfLines, const unsigned size, const unsigned threads); - unsigned getRAMLoopCount(const std::vector &sequence, - const unsigned numberOfLines, const unsigned size, + unsigned getRAMLoopCount(const std::vector& sequence, const unsigned numberOfLines, const unsigned size, const unsigned threads); public: Payload(std::string name, unsigned registerSize, unsigned registerCount) - : _name(name), _registerSize(registerSize), - _registerCount(registerCount) {} + : _name(name) + , _registerSize(registerSize) + , _registerCount(registerCount) {} virtual ~Payload() {} - const std::string &name() const { return _name; } + const std::string& name() const { return _name; } unsigned flops() const { return _flops; } unsigned bytes() const { return _bytes; } unsigned instructions() const { return _instructions; } @@ -90,24 +84,18 @@ class Payload { virtual bool isAvailable() const = 0; - virtual void lowLoadFunction(volatile unsigned long long *addrHigh, - unsigned long long period) = 0; + virtual void lowLoadFunction(volatile unsigned long long* addrHigh, unsigned long long period) = 0; - virtual int compilePayload( - std::vector> const &proportion, - unsigned instructionCacheSize, - std::list const &dataCacheBufferSize, unsigned ramBufferSize, - unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) = 0; + virtual int compilePayload(std::vector> const& proportion, + unsigned instructionCacheSize, std::list const& dataCacheBufferSize, + unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, + bool errorDetection) = 0; virtual std::list getAvailableInstructions() const = 0; - virtual void init(unsigned long long *memoryAddr, - unsigned long long bufferSize) = 0; - virtual unsigned long long - highLoadFunction(unsigned long long *addrMem, - volatile unsigned long long *addrHigh, - unsigned long long iterations) = 0; + virtual void init(unsigned long long* memoryAddr, unsigned long long bufferSize) = 0; + virtual unsigned long long highLoadFunction(unsigned long long* addrMem, volatile unsigned long long* addrHigh, + unsigned long long iterations) = 0; - virtual Payload *clone() const = 0; + virtual Payload* clone() const = 0; }; } // namespace firestarter::environment::payload diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp index cbde3c68..b396d134 100644 --- a/include/firestarter/Environment/Platform/PlatformConfig.hpp +++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp @@ -21,10 +21,9 @@ #pragma once +#include #include #include - -#include #include #include #include @@ -36,7 +35,7 @@ class PlatformConfig { private: std::string _name; std::list _threads; - payload::Payload *_payload; + payload::Payload* _payload; protected: unsigned _instructionCacheSize; @@ -45,33 +44,31 @@ class PlatformConfig { unsigned _lines; public: - PlatformConfig(std::string name, std::list threads, - unsigned instructionCacheSize, - std::initializer_list dataCacheBufferSize, - unsigned ramBufferSize, unsigned lines, - payload::Payload *payload) - : _name(name), _threads(threads), _payload(payload), - _instructionCacheSize(instructionCacheSize), - _dataCacheBufferSize(dataCacheBufferSize), - _ramBufferSize(ramBufferSize), _lines(lines) {} + PlatformConfig(std::string name, std::list threads, unsigned instructionCacheSize, + std::initializer_list dataCacheBufferSize, unsigned ramBufferSize, unsigned lines, + payload::Payload* payload) + : _name(name) + , _threads(threads) + , _payload(payload) + , _instructionCacheSize(instructionCacheSize) + , _dataCacheBufferSize(dataCacheBufferSize) + , _ramBufferSize(ramBufferSize) + , _lines(lines) {} virtual ~PlatformConfig() { delete _payload; } - const std::string &name() const { return _name; } + const std::string& name() const { return _name; } unsigned instructionCacheSize() const { return _instructionCacheSize; } - const std::list &dataCacheBufferSize() const { - return _dataCacheBufferSize; - } + const std::list& dataCacheBufferSize() const { return _dataCacheBufferSize; } unsigned ramBufferSize() const { return _ramBufferSize; } unsigned lines() const { return _lines; } - payload::Payload const &payload() const { return *_payload; } + payload::Payload const& payload() const { return *_payload; } std::map getThreadMap() const { std::map threadMap; - for (auto const &thread : _threads) { + for (auto const& thread : _threads) { std::stringstream functionName; - functionName << "FUNC_" << name() << "_" << payload().name() << "_" - << thread << "T"; + functionName << "FUNC_" << name() << "_" << payload().name() << "_" << thread << "T"; threadMap[thread] = functionName.str(); } @@ -82,13 +79,12 @@ class PlatformConfig { virtual bool isDefault() const = 0; - virtual std::vector> - getDefaultPayloadSettings() const = 0; + virtual std::vector> getDefaultPayloadSettings() const = 0; std::string getDefaultPayloadSettingsString() const { std::stringstream ss; - for (auto const &[name, value] : this->getDefaultPayloadSettings()) { + for (auto const& [name, value] : this->getDefaultPayloadSettings()) { ss << name << ":" << value << ","; } diff --git a/include/firestarter/Environment/Platform/RuntimeConfig.hpp b/include/firestarter/Environment/Platform/RuntimeConfig.hpp index 2ed821ea..86946877 100644 --- a/include/firestarter/Environment/Platform/RuntimeConfig.hpp +++ b/include/firestarter/Environment/Platform/RuntimeConfig.hpp @@ -21,15 +21,14 @@ #pragma once -#include - #include +#include namespace firestarter::environment::platform { class RuntimeConfig { private: - PlatformConfig const &_platformConfig; + PlatformConfig const& _platformConfig; std::unique_ptr _payload; unsigned _thread; std::vector> _payloadSettings; @@ -39,31 +38,34 @@ class RuntimeConfig { unsigned _lines; public: - RuntimeConfig(PlatformConfig const &platformConfig, unsigned thread, - unsigned detectedInstructionCacheSize) - : _platformConfig(platformConfig), _payload(nullptr), _thread(thread), - _payloadSettings(platformConfig.getDefaultPayloadSettings()), - _instructionCacheSize(platformConfig.instructionCacheSize()), - _dataCacheBufferSize(platformConfig.dataCacheBufferSize()), - _ramBufferSize(platformConfig.ramBufferSize()), - _lines(platformConfig.lines()) { + RuntimeConfig(PlatformConfig const& platformConfig, unsigned thread, unsigned detectedInstructionCacheSize) + : _platformConfig(platformConfig) + , _payload(nullptr) + , _thread(thread) + , _payloadSettings(platformConfig.getDefaultPayloadSettings()) + , _instructionCacheSize(platformConfig.instructionCacheSize()) + , _dataCacheBufferSize(platformConfig.dataCacheBufferSize()) + , _ramBufferSize(platformConfig.ramBufferSize()) + , _lines(platformConfig.lines()) { if (detectedInstructionCacheSize != 0) { this->_instructionCacheSize = detectedInstructionCacheSize; } }; - RuntimeConfig(const RuntimeConfig &c) - : _platformConfig(c.platformConfig()), - _payload(c.platformConfig().payload().clone()), _thread(c.thread()), - _payloadSettings(c.payloadSettings()), - _instructionCacheSize(c.instructionCacheSize()), - _dataCacheBufferSize(c.dataCacheBufferSize()), - _ramBufferSize(c.ramBufferSize()), _lines(c.lines()) {} + RuntimeConfig(const RuntimeConfig& c) + : _platformConfig(c.platformConfig()) + , _payload(c.platformConfig().payload().clone()) + , _thread(c.thread()) + , _payloadSettings(c.payloadSettings()) + , _instructionCacheSize(c.instructionCacheSize()) + , _dataCacheBufferSize(c.dataCacheBufferSize()) + , _ramBufferSize(c.ramBufferSize()) + , _lines(c.lines()) {} ~RuntimeConfig() { _payload.reset(); } - PlatformConfig const &platformConfig() const { return _platformConfig; } - payload::Payload &payload() const { + PlatformConfig const& platformConfig() const { return _platformConfig; } + payload::Payload& payload() const { #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-value" @@ -80,26 +82,21 @@ class RuntimeConfig { return *_payload; } unsigned thread() const { return _thread; } - const std::vector> &payloadSettings() const { - return _payloadSettings; - } + const std::vector>& payloadSettings() const { return _payloadSettings; } std::vector payloadItems() const { std::vector items; - for (auto const &pair : _payloadSettings) { + for (auto const& pair : _payloadSettings) { items.push_back(pair.first); } return items; } unsigned instructionCacheSize() const { return _instructionCacheSize; } - const std::list &dataCacheBufferSize() const { - return _dataCacheBufferSize; - } + const std::list& dataCacheBufferSize() const { return _dataCacheBufferSize; } unsigned ramBufferSize() const { return _ramBufferSize; } unsigned lines() const { return _lines; } - void setPayloadSettings( - std::vector> const &payloadSettings) { + void setPayloadSettings(std::vector> const& payloadSettings) { this->_payloadSettings = payloadSettings; } @@ -107,20 +104,17 @@ class RuntimeConfig { void printCodePathSummary() const { log::info() << "\n" - << " Taking " << platformConfig().payload().name() - << " path optimized for " << platformConfig().name() << " - " - << thread() << " thread(s) per core\n" + << " Taking " << platformConfig().payload().name() << " path optimized for " << platformConfig().name() + << " - " << thread() << " thread(s) per core\n" << " Used buffersizes per thread:"; if (instructionCacheSize() != 0) { - log::info() << " - L1i-Cache: " << instructionCacheSize() / thread() - << " Bytes"; + log::info() << " - L1i-Cache: " << instructionCacheSize() / thread() << " Bytes"; } unsigned i = 1; - for (auto const &bytes : dataCacheBufferSize()) { - log::info() << " - L" << i << "d-Cache: " << bytes / thread() - << " Bytes"; + for (auto const& bytes : dataCacheBufferSize()) { + log::info() << " - L" << i << "d-Cache: " << bytes / thread() << " Bytes"; i++; } diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp index b23f1b97..e5fa736f 100644 --- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp @@ -26,29 +26,23 @@ namespace firestarter::environment::x86::payload { class AVX512Payload final : public X86Payload { public: - AVX512Payload(asmjit::CpuFeatures const &supportedFeatures) - : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX512_F}, - "AVX512", 8, 32) {} + AVX512Payload(asmjit::CpuFeatures const& supportedFeatures) + : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX512_F}, "AVX512", 8, 32) {} - int compilePayload( - std::vector> const &proportion, - unsigned instructionCacheSize, - std::list const &dataCacheBufferSize, unsigned ramBufferSize, - unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) override; + int compilePayload(std::vector> const& proportion, unsigned instructionCacheSize, + std::list const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread, + unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override; std::list getAvailableInstructions() const override; - void init(unsigned long long *memoryAddr, - unsigned long long bufferSize) override; + void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override; - firestarter::environment::payload::Payload *clone() const override { + firestarter::environment::payload::Payload* clone() const override { return new AVX512Payload(this->supportedFeatures()); }; private: const std::map instructionFlops = { - {"REG", 32}, {"L1_L", 32}, {"L1_BROADCAST", 16}, {"L1_S", 16}, - {"L1_LS", 16}, {"L2_L", 32}, {"L2_S", 16}, {"L2_LS", 16}, - {"L3_L", 32}, {"L3_S", 16}, {"L3_LS", 16}, {"L3_P", 16}, + {"REG", 32}, {"L1_L", 32}, {"L1_BROADCAST", 16}, {"L1_S", 16}, {"L1_LS", 16}, {"L2_L", 32}, + {"L2_S", 16}, {"L2_LS", 16}, {"L3_L", 32}, {"L3_S", 16}, {"L3_LS", 16}, {"L3_P", 16}, {"RAM_L", 32}, {"RAM_S", 16}, {"RAM_LS", 16}, {"RAM_P", 16}}; const std::map instructionMemory = { diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp index 0a6e8014..d0e7b381 100644 --- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp +++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp @@ -26,29 +26,23 @@ namespace firestarter::environment::x86::payload { class AVXPayload final : public X86Payload { public: - AVXPayload(asmjit::CpuFeatures const &supportedFeatures) - : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX}, "AVX", - 4, 16) {} + AVXPayload(asmjit::CpuFeatures const& supportedFeatures) + : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX}, "AVX", 4, 16) {} - int compilePayload( - std::vector> const &proportion, - unsigned instructionCacheSize, - std::list const &dataCacheBufferSize, unsigned ramBufferSize, - unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) override; + int compilePayload(std::vector> const& proportion, unsigned instructionCacheSize, + std::list const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread, + unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override; std::list getAvailableInstructions() const override; - void init(unsigned long long *memoryAddr, - unsigned long long bufferSize) override; + void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override; - firestarter::environment::payload::Payload *clone() const override { + firestarter::environment::payload::Payload* clone() const override { return new AVXPayload(this->supportedFeatures()); }; private: const std::map instructionFlops = { - {"REG", 4}, {"L1_L", 4}, {"L1_S", 4}, {"L1_LS", 4}, {"L2_L", 4}, - {"L2_S", 4}, {"L2_LS", 4}, {"L3_L", 4}, {"L3_S", 4}, {"L3_LS", 4}, - {"L3_P", 4}, {"RAM_L", 4}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}}; + {"REG", 4}, {"L1_L", 4}, {"L1_S", 4}, {"L1_LS", 4}, {"L2_L", 4}, {"L2_S", 4}, {"L2_LS", 4}, {"L3_L", 4}, + {"L3_S", 4}, {"L3_LS", 4}, {"L3_P", 4}, {"RAM_L", 4}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}}; const std::map instructionMemory = { {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}; diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp index 47d8a778..6a1d3ee5 100644 --- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp @@ -27,31 +27,24 @@ namespace firestarter::environment::x86::payload { class FMA4Payload final : public X86Payload { public: - FMA4Payload(asmjit::CpuFeatures const &supportedFeatures) - : X86Payload( - supportedFeatures, - {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4}, - "FMA4", 4, 16) {} - - int compilePayload( - std::vector> const &proportion, - unsigned instructionCacheSize, - std::list const &dataCacheBufferSize, unsigned ramBufferSize, - unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) override; + FMA4Payload(asmjit::CpuFeatures const& supportedFeatures) + : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4}, "FMA4", 4, + 16) {} + + int compilePayload(std::vector> const& proportion, unsigned instructionCacheSize, + std::list const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread, + unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override; std::list getAvailableInstructions() const override; - void init(unsigned long long *memoryAddr, - unsigned long long bufferSize) override; + void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override; - firestarter::environment::payload::Payload *clone() const override { + firestarter::environment::payload::Payload* clone() const override { return new FMA4Payload(this->supportedFeatures()); }; private: const std::map instructionFlops = { - {"REG", 8}, {"L1_L", 12}, {"L1_S", 8}, {"L1_LS", 8}, {"L2_L", 8}, - {"L2_S", 4}, {"L2_LS", 4}, {"L3_L", 8}, {"L3_S", 4}, {"L3_LS", 4}, - {"L3_P", 4}, {"RAM_L", 8}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}}; + {"REG", 8}, {"L1_L", 12}, {"L1_S", 8}, {"L1_LS", 8}, {"L2_L", 8}, {"L2_S", 4}, {"L2_LS", 4}, {"L3_L", 8}, + {"L3_S", 4}, {"L3_LS", 4}, {"L3_P", 4}, {"RAM_L", 8}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}}; const std::map instructionMemory = { {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}; diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp index 57ab455d..da6c2b5a 100644 --- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp +++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp @@ -26,33 +26,25 @@ namespace firestarter::environment::x86::payload { class FMAPayload final : public X86Payload { public: - FMAPayload(asmjit::CpuFeatures const &supportedFeatures) - : X86Payload(supportedFeatures, - {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, - "FMA", 4, 16) {} + FMAPayload(asmjit::CpuFeatures const& supportedFeatures) + : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, "FMA", 4, 16) {} - int compilePayload( - std::vector> const &proportion, - unsigned instructionCacheSize, - std::list const &dataCacheBufferSize, unsigned ramBufferSize, - unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) override; + int compilePayload(std::vector> const& proportion, unsigned instructionCacheSize, + std::list const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread, + unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override; std::list getAvailableInstructions() const override; - void init(unsigned long long *memoryAddr, - unsigned long long bufferSize) override; + void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override; - firestarter::environment::payload::Payload *clone() const override { + firestarter::environment::payload::Payload* clone() const override { return new FMAPayload(this->supportedFeatures()); }; private: const std::map instructionFlops = { - {"REG", 16}, {"L1_L", 16}, {"L1_2L", 16}, {"L1_S", 8}, - {"L1_LS", 8}, {"L1_LS_256", 8}, {"L1_2LS_256", 16}, {"L2_L", 16}, - {"L2_S", 8}, {"L2_LS", 8}, {"L2_LS_256", 8}, {"L2_2LS_256", 16}, - {"L3_L", 16}, {"L3_S", 8}, {"L3_LS", 8}, {"L3_LS_256", 8}, - {"L3_P", 8}, {"RAM_L", 16}, {"RAM_S", 8}, {"RAM_LS", 8}, - {"RAM_P", 8}}; + {"REG", 16}, {"L1_L", 16}, {"L1_2L", 16}, {"L1_S", 8}, {"L1_LS", 8}, {"L1_LS_256", 8}, + {"L1_2LS_256", 16}, {"L2_L", 16}, {"L2_S", 8}, {"L2_LS", 8}, {"L2_LS_256", 8}, {"L2_2LS_256", 16}, + {"L3_L", 16}, {"L3_S", 8}, {"L3_LS", 8}, {"L3_LS_256", 8}, {"L3_P", 8}, {"RAM_L", 16}, + {"RAM_S", 8}, {"RAM_LS", 8}, {"RAM_P", 8}}; const std::map instructionMemory = { {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}; diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp index d02a28e9..d923c9b3 100644 --- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp @@ -26,29 +26,23 @@ namespace firestarter::environment::x86::payload { class SSE2Payload final : public X86Payload { public: - SSE2Payload(asmjit::CpuFeatures const &supportedFeatures) - : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kSSE2}, - "SSE2", 2, 16) {} + SSE2Payload(asmjit::CpuFeatures const& supportedFeatures) + : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kSSE2}, "SSE2", 2, 16) {} - int compilePayload( - std::vector> const &proportion, - unsigned instructionCacheSize, - std::list const &dataCacheBufferSize, unsigned ramBufferSize, - unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) override; + int compilePayload(std::vector> const& proportion, unsigned instructionCacheSize, + std::list const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread, + unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override; std::list getAvailableInstructions() const override; - void init(unsigned long long *memoryAddr, - unsigned long long bufferSize) override; + void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override; - firestarter::environment::payload::Payload *clone() const override { + firestarter::environment::payload::Payload* clone() const override { return new SSE2Payload(this->supportedFeatures()); }; private: const std::map instructionFlops = { - {"REG", 2}, {"L1_L", 2}, {"L1_S", 2}, {"L1_LS", 2}, {"L2_L", 2}, - {"L2_S", 2}, {"L2_LS", 2}, {"L3_L", 2}, {"L3_S", 2}, {"L3_LS", 2}, - {"L3_P", 2}, {"RAM_L", 2}, {"RAM_S", 2}, {"RAM_LS", 2}, {"RAM_P", 2}}; + {"REG", 2}, {"L1_L", 2}, {"L1_S", 2}, {"L1_LS", 2}, {"L2_L", 2}, {"L2_S", 2}, {"L2_LS", 2}, {"L3_L", 2}, + {"L3_S", 2}, {"L3_LS", 2}, {"L3_P", 2}, {"RAM_L", 2}, {"RAM_S", 2}, {"RAM_LS", 2}, {"RAM_P", 2}}; const std::map instructionMemory = { {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}; diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp index c0ebadc5..87d5e0be 100644 --- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp @@ -21,13 +21,12 @@ #pragma once -#include -#include +#include #include +#include #include - -#include +#include #define INIT_BLOCKSIZE 1024 @@ -36,41 +35,34 @@ namespace firestarter::environment::x86::payload { class X86Payload : public environment::payload::Payload { private: // we can use this to check, if our platform support this payload - asmjit::CpuFeatures const &_supportedFeatures; + asmjit::CpuFeatures const& _supportedFeatures; std::list featureRequests; protected: // asmjit::CodeHolder code; asmjit::JitRuntime rt; // typedef int (*LoadFunction)(firestarter::ThreadData *); - typedef unsigned long long (*LoadFunction)(unsigned long long *, - volatile unsigned long long *, - unsigned long long); + typedef unsigned long long (*LoadFunction)(unsigned long long*, volatile unsigned long long*, unsigned long long); LoadFunction loadFunction = nullptr; - asmjit::CpuFeatures const &supportedFeatures() const { - return this->_supportedFeatures; - } + asmjit::CpuFeatures const& supportedFeatures() const { return this->_supportedFeatures; } template - void emitErrorDetectionCode(asmjit::x86::Builder &cb, IterReg iter_reg, - asmjit::x86::Gpq addrHigh_reg, - asmjit::x86::Gpq pointer_reg, - asmjit::x86::Gpq temp_reg, - asmjit::x86::Gpq temp_reg2); + void emitErrorDetectionCode(asmjit::x86::Builder& cb, IterReg iter_reg, asmjit::x86::Gpq addrHigh_reg, + asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2); public: - X86Payload(asmjit::CpuFeatures const &supportedFeatures, - std::initializer_list featureRequests, - std::string name, unsigned registerSize, unsigned registerCount) - : Payload(name, registerSize, registerCount), - _supportedFeatures(supportedFeatures), - featureRequests(featureRequests) {} + X86Payload(asmjit::CpuFeatures const& supportedFeatures, + std::initializer_list featureRequests, std::string name, + unsigned registerSize, unsigned registerCount) + : Payload(name, registerSize, registerCount) + , _supportedFeatures(supportedFeatures) + , featureRequests(featureRequests) {} bool isAvailable() const override { bool available = true; - for (auto const &feature : featureRequests) { + for (auto const& feature : featureRequests) { available &= this->_supportedFeatures.has(feature); } @@ -84,18 +76,15 @@ class X86Payload : public environment::payload::Payload { #endif #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Woverloaded-virtual" - void init(unsigned long long *memoryAddr, unsigned long long bufferSize, - double firstValue, double lastValue); + void init(unsigned long long* memoryAddr, unsigned long long bufferSize, double firstValue, double lastValue); #pragma GCC diagnostic pop #if defined(__clang__) #pragma clang diagnostic pop #endif // use cpuid and usleep as low load - void lowLoadFunction(volatile unsigned long long *addrHigh, - unsigned long long period) override; + void lowLoadFunction(volatile unsigned long long* addrHigh, unsigned long long period) override; - unsigned long long highLoadFunction(unsigned long long *addrMem, - volatile unsigned long long *addrHigh, + unsigned long long highLoadFunction(unsigned long long* addrMem, volatile unsigned long long* addrHigh, unsigned long long iterations) override; }; diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp index a1776f37..7254cb55 100644 --- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp +++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp @@ -26,23 +26,17 @@ namespace firestarter::environment::x86::payload { class ZENFMAPayload final : public X86Payload { public: - ZENFMAPayload(asmjit::CpuFeatures const &supportedFeatures) - : X86Payload( - supportedFeatures, - {asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA}, - "ZENFMA", 4, 16) {} + ZENFMAPayload(asmjit::CpuFeatures const& supportedFeatures) + : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA}, + "ZENFMA", 4, 16) {} - int compilePayload( - std::vector> const &proportion, - unsigned instructionCacheSize, - std::list const &dataCacheBufferSize, unsigned ramBufferSize, - unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) override; + int compilePayload(std::vector> const& proportion, unsigned instructionCacheSize, + std::list const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread, + unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override; std::list getAvailableInstructions() const override; - void init(unsigned long long *memoryAddr, - unsigned long long bufferSize) override; + void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override; - firestarter::environment::payload::Payload *clone() const override { + firestarter::environment::payload::Payload* clone() const override { return new ZENFMAPayload(this->supportedFeatures()); }; diff --git a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp index 12a922b9..4cc4b811 100644 --- a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp @@ -26,17 +26,12 @@ namespace firestarter::environment::x86::platform { class BulldozerConfig final : public X86PlatformConfig { - public: - BulldozerConfig(asmjit::CpuFeatures const &supportedFeatures, - unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("BLD_OPTERON", 21, {1, 2, 3}, {1}, 0, - {16384, 1048576, 786432}, 104857600, 1536, family, - model, threads, - new payload::FMA4Payload(supportedFeatures)) {} + BulldozerConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) + : X86PlatformConfig("BLD_OPTERON", 21, {1, 2, 3}, {1}, 0, {16384, 1048576, 786432}, 104857600, 1536, family, + model, threads, new payload::FMA4Payload(supportedFeatures)) {} - std::vector> - getDefaultPayloadSettings() const override { + std::vector> getDefaultPayloadSettings() const override { return std::vector>( {{"RAM_L", 1}, {"L3_L", 1}, {"L2_LS", 5}, {"L1_L", 90}, {"REG", 45}}); } diff --git a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp index f079ec18..5b30d6a0 100644 --- a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp @@ -26,17 +26,12 @@ namespace firestarter::environment::x86::platform { class HaswellConfig final : public X86PlatformConfig { - public: - HaswellConfig(asmjit::CpuFeatures const &supportedFeatures, unsigned family, - unsigned model, unsigned threads) - : X86PlatformConfig("HSW_COREI", 6, {60, 61, 69, 70, 71}, {1, 2}, 0, - {32768, 262144, 1572864}, 104857600, 1536, family, - model, threads, - new payload::FMAPayload(supportedFeatures)) {} + HaswellConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) + : X86PlatformConfig("HSW_COREI", 6, {60, 61, 69, 70, 71}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, + family, model, threads, new payload::FMAPayload(supportedFeatures)) {} - std::vector> - getDefaultPayloadSettings() const override { + std::vector> getDefaultPayloadSettings() const override { return std::vector>( {{"RAM_L", 2}, {"L3_LS", 3}, {"L2_LS", 9}, {"L1_LS", 90}, {"REG", 40}}); } diff --git a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp index df5a1927..106dd0e3 100644 --- a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp @@ -26,22 +26,14 @@ namespace firestarter::environment::x86::platform { class HaswellEPConfig final : public X86PlatformConfig { - public: - HaswellEPConfig(asmjit::CpuFeatures const &supportedFeatures, - unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("HSW_XEONEP", 6, {63, 79}, {1, 2}, 0, - {32768, 262144, 2621440}, 104857600, 1536, family, - model, threads, - new payload::FMAPayload(supportedFeatures)) {} + HaswellEPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) + : X86PlatformConfig("HSW_XEONEP", 6, {63, 79}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, family, + model, threads, new payload::FMAPayload(supportedFeatures)) {} - std::vector> - getDefaultPayloadSettings() const override { - return std::vector>({{"RAM_L", 8}, - {"L3_LS", 1}, - {"L2_LS", 29}, - {"L1_LS", 100}, - {"REG", 100}}); + std::vector> getDefaultPayloadSettings() const override { + return std::vector>( + {{"RAM_L", 8}, {"L3_LS", 1}, {"L2_LS", 29}, {"L1_LS", 100}, {"REG", 100}}); } }; } // namespace firestarter::environment::x86::platform diff --git a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp index de520c56..709ef934 100644 --- a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp @@ -26,19 +26,13 @@ namespace firestarter::environment::x86::platform { class KnightsLandingConfig final : public X86PlatformConfig { - public: - KnightsLandingConfig(asmjit::CpuFeatures const &supportedFeatures, - unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("KNL_XEONPHI", 6, {87}, {4}, 0, - {32768, 524288, 236279125}, 26214400, 1536, family, - model, threads, - new payload::AVX512Payload(supportedFeatures)) {} + KnightsLandingConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) + : X86PlatformConfig("KNL_XEONPHI", 6, {87}, {4}, 0, {32768, 524288, 236279125}, 26214400, 1536, family, model, + threads, new payload::AVX512Payload(supportedFeatures)) {} - std::vector> - getDefaultPayloadSettings() const override { - return std::vector>( - {{"RAM_P", 3}, {"L2_S", 8}, {"L1_L", 40}, {"REG", 10}}); + std::vector> getDefaultPayloadSettings() const override { + return std::vector>({{"RAM_P", 3}, {"L2_S", 8}, {"L1_L", 40}, {"REG", 10}}); } }; } // namespace firestarter::environment::x86::platform diff --git a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp index 0ad94682..5ad0a065 100644 --- a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp @@ -26,22 +26,14 @@ namespace firestarter::environment::x86::platform { class NaplesConfig final : public X86PlatformConfig { - public: - NaplesConfig(asmjit::CpuFeatures const &supportedFeatures, unsigned family, - unsigned model, unsigned threads) - : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24}, {1, 2}, 0, - {65536, 524288, 2097152}, 104857600, 1536, family, - model, threads, - new payload::ZENFMAPayload(supportedFeatures)) {} + NaplesConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) + : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24}, {1, 2}, 0, {65536, 524288, 2097152}, 104857600, 1536, family, + model, threads, new payload::ZENFMAPayload(supportedFeatures)) {} - std::vector> - getDefaultPayloadSettings() const override { - return std::vector>({{"RAM_L", 3}, - {"L3_L", 14}, - {"L2_L", 75}, - {"L1_LS", 81}, - {"REG", 100}}); + std::vector> getDefaultPayloadSettings() const override { + return std::vector>( + {{"RAM_L", 3}, {"L3_L", 14}, {"L2_L", 75}, {"L1_LS", 81}, {"REG", 100}}); } }; } // namespace firestarter::environment::x86::platform diff --git a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp index da7764d4..3f0748de 100644 --- a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp @@ -26,19 +26,13 @@ namespace firestarter::environment::x86::platform { class NehalemConfig final : public X86PlatformConfig { - public: - NehalemConfig(asmjit::CpuFeatures const &supportedFeatures, unsigned family, - unsigned model, unsigned threads) - : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23}, {1, 2}, 0, - {32768, 262144, 1572864}, 104857600, 1536, family, - model, threads, - new payload::SSE2Payload(supportedFeatures)) {} + NehalemConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) + : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, family, + model, threads, new payload::SSE2Payload(supportedFeatures)) {} - std::vector> - getDefaultPayloadSettings() const override { - return std::vector>( - {{"RAM_P", 1}, {"L1_LS", 70}, {"REG", 2}}); + std::vector> getDefaultPayloadSettings() const override { + return std::vector>({{"RAM_P", 1}, {"L1_LS", 70}, {"REG", 2}}); } }; } // namespace firestarter::environment::x86::platform diff --git a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp index 06ac2f64..a738fb7f 100644 --- a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp @@ -26,19 +26,13 @@ namespace firestarter::environment::x86::platform { class NehalemEPConfig final : public X86PlatformConfig { - public: - NehalemEPConfig(asmjit::CpuFeatures const &supportedFeatures, - unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("NHM_XEONEP", 6, {26, 44}, {1, 2}, 0, - {32768, 262144, 2097152}, 104857600, 1536, family, - model, threads, - new payload::SSE2Payload(supportedFeatures)) {} + NehalemEPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) + : X86PlatformConfig("NHM_XEONEP", 6, {26, 44}, {1, 2}, 0, {32768, 262144, 2097152}, 104857600, 1536, family, + model, threads, new payload::SSE2Payload(supportedFeatures)) {} - std::vector> - getDefaultPayloadSettings() const override { - return std::vector>( - {{"RAM_P", 1}, {"L1_LS", 60}, {"REG", 2}}); + std::vector> getDefaultPayloadSettings() const override { + return std::vector>({{"RAM_P", 1}, {"L1_LS", 60}, {"REG", 2}}); } }; } // namespace firestarter::environment::x86::platform diff --git a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp index f7569bf4..230d91ba 100644 --- a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp @@ -26,23 +26,14 @@ namespace firestarter::environment::x86::platform { class RomeConfig final : public X86PlatformConfig { - public: - RomeConfig(asmjit::CpuFeatures const &supportedFeatures, unsigned family, - unsigned model, unsigned threads) - : X86PlatformConfig("ZEN_2_EPYC", 23, {49}, {1, 2}, 0, - {32768, 524288, 2097152}, 104857600, 1536, family, - model, threads, - new payload::FMAPayload(supportedFeatures)) {} + RomeConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) + : X86PlatformConfig("ZEN_2_EPYC", 23, {49}, {1, 2}, 0, {32768, 524288, 2097152}, 104857600, 1536, family, model, + threads, new payload::FMAPayload(supportedFeatures)) {} - std::vector> - getDefaultPayloadSettings() const override { - return std::vector>({{"RAM_L", 10}, - {"L3_L", 25}, - {"L2_L", 91}, - {"L1_2LS_256", 72}, - {"L1_LS_256", 82}, - {"REG", 75}}); + std::vector> getDefaultPayloadSettings() const override { + return std::vector>( + {{"RAM_L", 10}, {"L3_L", 25}, {"L2_L", 91}, {"L1_2LS_256", 72}, {"L1_LS_256", 82}, {"REG", 75}}); } }; } // namespace firestarter::environment::x86::platform diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp index 7e928c1f..a58e193a 100644 --- a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp @@ -26,22 +26,14 @@ namespace firestarter::environment::x86::platform { class SandyBridgeConfig final : public X86PlatformConfig { - public: - SandyBridgeConfig(asmjit::CpuFeatures const &supportedFeatures, - unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("SNB_COREI", 6, {42, 58}, {1, 2}, 0, - {32768, 262144, 1572864}, 104857600, 1536, family, - model, threads, - new payload::AVXPayload(supportedFeatures)) {} + SandyBridgeConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) + : X86PlatformConfig("SNB_COREI", 6, {42, 58}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, family, model, + threads, new payload::AVXPayload(supportedFeatures)) {} - std::vector> - getDefaultPayloadSettings() const override { - return std::vector>({{"RAM_L", 2}, - {"L3_LS", 4}, - {"L2_LS", 10}, - {"L1_LS", 90}, - {"REG", 45}}); + std::vector> getDefaultPayloadSettings() const override { + return std::vector>( + {{"RAM_L", 2}, {"L3_LS", 4}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 45}}); } }; } // namespace firestarter::environment::x86::platform diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp index cb7fcb43..3f4f6303 100644 --- a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp @@ -27,22 +27,14 @@ namespace firestarter::environment::x86::platform { class SandyBridgeEPConfig final : public X86PlatformConfig { - public: - SandyBridgeEPConfig(asmjit::CpuFeatures const &supportedFeatures, - unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("SNB_XEONEP", 6, {45, 62}, {1, 2}, 0, - {32768, 262144, 2621440}, 104857600, 1536, family, - model, threads, - new payload::AVXPayload(supportedFeatures)) {} + SandyBridgeEPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) + : X86PlatformConfig("SNB_XEONEP", 6, {45, 62}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, family, + model, threads, new payload::AVXPayload(supportedFeatures)) {} - std::vector> - getDefaultPayloadSettings() const override { - return std::vector>({{"RAM_L", 3}, - {"L3_LS", 2}, - {"L2_LS", 10}, - {"L1_LS", 90}, - {"REG", 30}}); + std::vector> getDefaultPayloadSettings() const override { + return std::vector>( + {{"RAM_L", 3}, {"L3_LS", 2}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 30}}); } }; } // namespace firestarter::environment::x86::platform diff --git a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp index aec85be8..c533c3a5 100644 --- a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp @@ -27,22 +27,14 @@ namespace firestarter::environment::x86::platform { class SkylakeConfig final : public X86PlatformConfig { - public: - SkylakeConfig(asmjit::CpuFeatures const &supportedFeatures, unsigned family, - unsigned model, unsigned threads) - : X86PlatformConfig("SKL_COREI", 6, {78, 94}, {1, 2}, 0, - {32768, 262144, 1572864}, 104857600, 1536, family, - model, threads, - new payload::FMAPayload(supportedFeatures)) {} + SkylakeConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) + : X86PlatformConfig("SKL_COREI", 6, {78, 94}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, family, model, + threads, new payload::FMAPayload(supportedFeatures)) {} - std::vector> - getDefaultPayloadSettings() const override { - return std::vector>({{"RAM_L", 3}, - {"L3_LS_256", 5}, - {"L2_LS_256", 18}, - {"L1_2LS_256", 78}, - {"REG", 40}}); + std::vector> getDefaultPayloadSettings() const override { + return std::vector>( + {{"RAM_L", 3}, {"L3_LS_256", 5}, {"L2_LS_256", 18}, {"L1_2LS_256", 78}, {"REG", 40}}); } }; } // namespace firestarter::environment::x86::platform diff --git a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp index be767d0b..8243d9d6 100644 --- a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp @@ -26,17 +26,12 @@ namespace firestarter::environment::x86::platform { class SkylakeSPConfig final : public X86PlatformConfig { - public: - SkylakeSPConfig(asmjit::CpuFeatures const &supportedFeatures, - unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("SKL_XEONEP", 6, {85}, {1, 2}, 0, - {32768, 1048576, 1441792}, 1048576000, 1536, family, - model, threads, - new payload::AVX512Payload(supportedFeatures)) {} + SkylakeSPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) + : X86PlatformConfig("SKL_XEONEP", 6, {85}, {1, 2}, 0, {32768, 1048576, 1441792}, 1048576000, 1536, family, model, + threads, new payload::AVX512Payload(supportedFeatures)) {} - std::vector> - getDefaultPayloadSettings() const override { + std::vector> getDefaultPayloadSettings() const override { return std::vector>({{"RAM_S", 3}, {"RAM_P", 1}, {"L3_S", 1}, diff --git a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp index 45956f38..648346d8 100644 --- a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp @@ -35,23 +35,20 @@ class X86PlatformConfig : public environment::platform::PlatformConfig { unsigned _currentThreads; public: - X86PlatformConfig(std::string name, unsigned family, - std::initializer_list models, - std::initializer_list threads, - unsigned instructionCacheSize, - std::initializer_list dataCacheBufferSize, - unsigned ramBuffersize, unsigned lines, - unsigned currentFamily, unsigned currentModel, - unsigned currentThreads, payload::X86Payload *payload) - : PlatformConfig(name, threads, instructionCacheSize, dataCacheBufferSize, - ramBuffersize, lines, payload), - _family(family), _models(models), _currentFamily(currentFamily), - _currentModel(currentModel), _currentThreads(currentThreads) {} + X86PlatformConfig(std::string name, unsigned family, std::initializer_list models, + std::initializer_list threads, unsigned instructionCacheSize, + std::initializer_list dataCacheBufferSize, unsigned ramBuffersize, unsigned lines, + unsigned currentFamily, unsigned currentModel, unsigned currentThreads, + payload::X86Payload* payload) + : PlatformConfig(name, threads, instructionCacheSize, dataCacheBufferSize, ramBuffersize, lines, payload) + , _family(family) + , _models(models) + , _currentFamily(currentFamily) + , _currentModel(currentModel) + , _currentThreads(currentThreads) {} bool isDefault() const override { - return _family == _currentFamily && - (std::find(_models.begin(), _models.end(), _currentModel) != - _models.end()) && + return _family == _currentFamily && (std::find(_models.begin(), _models.end(), _currentModel) != _models.end()) && isAvailable(); } }; diff --git a/include/firestarter/Environment/X86/X86CPUTopology.hpp b/include/firestarter/Environment/X86/X86CPUTopology.hpp index 44a02dc2..fa3b033f 100644 --- a/include/firestarter/Environment/X86/X86CPUTopology.hpp +++ b/include/firestarter/Environment/X86/X86CPUTopology.hpp @@ -21,28 +21,23 @@ #pragma once -#include - #include +#include + namespace firestarter::environment::x86 { class X86CPUTopology final : public CPUTopology { public: X86CPUTopology(); - friend std::ostream &operator<<(std::ostream &stream, - X86CPUTopology const &cpuTopology); + friend std::ostream& operator<<(std::ostream& stream, X86CPUTopology const& cpuTopology); - std::list const &features() const override { - return this->featureList; - } - const asmjit::CpuFeatures& featuresAsmjit() const{ - return this->cpuInfo.features(); - } + std::list const& features() const override { return this->featureList; } + const asmjit::CpuFeatures& featuresAsmjit() const { return this->cpuInfo.features(); } - std::string const &vendor() const override { return this->_vendor; } - std::string const &model() const override { return this->_model; } + std::string const& vendor() const override { return this->_vendor; } + std::string const& model() const override { return this->_model; } unsigned long long clockrate() const override; @@ -55,8 +50,7 @@ class X86CPUTopology final : public CPUTopology { private: bool hasRdtsc() const { return this->_hasRdtsc; } bool hasInvariantRdtsc() const { return this->_hasInvariantRdtsc; } - void cpuid(unsigned long long *a, unsigned long long *b, - unsigned long long *c, unsigned long long *d) const; + void cpuid(unsigned long long* a, unsigned long long* b, unsigned long long* c, unsigned long long* d) const; asmjit::CpuInfo cpuInfo; std::list featureList; @@ -67,8 +61,7 @@ class X86CPUTopology final : public CPUTopology { std::string _model; }; -inline std::ostream &operator<<(std::ostream &stream, - X86CPUTopology const &cpuTopology) { +inline std::ostream& operator<<(std::ostream& stream, X86CPUTopology const& cpuTopology) { return cpuTopology.print(stream); } diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp index 11ad940e..b0e3aa8d 100644 --- a/include/firestarter/Environment/X86/X86Environment.hpp +++ b/include/firestarter/Environment/X86/X86Environment.hpp @@ -21,9 +21,9 @@ #pragma once -#include -#include +#include +#include #include #include #include @@ -37,39 +37,35 @@ #include #include #include - -#include - +#include #include -#define REGISTER(NAME) \ - [](asmjit::CpuFeatures const &supportedFeatures, unsigned family, \ - unsigned model, unsigned threads) -> platform::X86PlatformConfig * { \ - return new platform::NAME(supportedFeatures, family, model, threads); \ +#define REGISTER(NAME) \ + [](asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, \ + unsigned threads) -> platform::X86PlatformConfig* { \ + return new platform::NAME(supportedFeatures, family, model, threads); \ } namespace firestarter::environment::x86 { class X86Environment final : public Environment { public: - X86Environment() : Environment(new X86CPUTopology()) {} + X86Environment() + : Environment(new X86CPUTopology()) {} ~X86Environment() { - for (auto const &config : platformConfigs) { + for (auto const& config : platformConfigs) { delete config; } - for (auto const &config : fallbackPlatformConfigs) { + for (auto const& config : fallbackPlatformConfigs) { delete config; } } - X86CPUTopology const &topology() { - return *reinterpret_cast(this->_topology); - } + X86CPUTopology const& topology() { return *reinterpret_cast(this->_topology); } void evaluateFunctions() override; - int selectFunction(unsigned functionId, - bool allowUnavailablePayload) override; + int selectFunction(unsigned functionId, bool allowUnavailablePayload) override; int selectInstructionGroups(std::string groups) override; void printAvailableInstructionGroups() override; void setLineCount(unsigned lineCount) override; @@ -77,24 +73,19 @@ class X86Environment final : public Environment { void printFunctionSummary() override; private: - // The available function IDs are generated by iterating through this list of - // PlatformConfig. Add new PlatformConfig at the bottom to maintain stable - // IDs. - const std::list> - platformConfigsCtor = { - REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig), - REGISTER(SkylakeSPConfig), REGISTER(HaswellConfig), - REGISTER(HaswellEPConfig), REGISTER(SandyBridgeConfig), - REGISTER(SandyBridgeEPConfig), REGISTER(NehalemConfig), - REGISTER(NehalemEPConfig), REGISTER(BulldozerConfig), - REGISTER(NaplesConfig), REGISTER(RomeConfig)}; + // The available function IDs are generated by iterating through this list + // of PlatformConfig. Add new PlatformConfig at the bottom to maintain + // stable IDs. + const std::list> + platformConfigsCtor = {REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig), REGISTER(SkylakeSPConfig), + REGISTER(HaswellConfig), REGISTER(HaswellEPConfig), REGISTER(SandyBridgeConfig), + REGISTER(SandyBridgeEPConfig), REGISTER(NehalemConfig), REGISTER(NehalemEPConfig), + REGISTER(BulldozerConfig), REGISTER(NaplesConfig), REGISTER(RomeConfig)}; - std::list platformConfigs; + std::list platformConfigs; // List of fallback PlatformConfig. Add one for each x86 extension. - const std::list> + const std::list> fallbackPlatformConfigsCtor = { REGISTER(SkylakeSPConfig), // AVX512 REGISTER(BulldozerConfig), // FMA4 @@ -103,7 +94,7 @@ class X86Environment final : public Environment { REGISTER(NehalemConfig) // SSE2 }; - std::list fallbackPlatformConfigs; + std::list fallbackPlatformConfigs; #undef REGISTER }; diff --git a/include/firestarter/ErrorDetectionStruct.hpp b/include/firestarter/ErrorDetectionStruct.hpp index 38bcbc6a..4ed2e9fa 100644 --- a/include/firestarter/ErrorDetectionStruct.hpp +++ b/include/firestarter/ErrorDetectionStruct.hpp @@ -28,14 +28,14 @@ struct ErrorDetectionStruct { // one ptr (8B) // the pointer to 16B of communication - volatile unsigned long long *communicationLeft; + volatile unsigned long long* communicationLeft; volatile unsigned long long localsLeft[4]; - // if this variable is not 0, an error occured in the comparison with the left - // thread. + // if this variable is not 0, an error occured in the comparison with the + // left thread. volatile unsigned long long errorLeft; volatile unsigned long long paddingLeft[2]; - volatile unsigned long long *communicationRight; + volatile unsigned long long* communicationRight; volatile unsigned long long localsRight[4]; // if this variable is not 0, an error occured in the comparison with the // right thread. diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp index 31347dd2..cb0218f0 100644 --- a/include/firestarter/Firestarter.hpp +++ b/include/firestarter/Firestarter.hpp @@ -29,8 +29,6 @@ #include #endif - - #include #if defined(linux) || defined(__linux__) @@ -43,8 +41,7 @@ #include #include -#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ - defined(_M_X64) +#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) #include #endif @@ -66,28 +63,18 @@ namespace firestarter { class Firestarter { public: - Firestarter(const int argc, const char **argv, - std::chrono::seconds const &timeout, unsigned loadPercent, - std::chrono::microseconds const &period, - unsigned requestedNumThreads, std::string const &cpuBind, - bool printFunctionSummary, unsigned functionId, - bool listInstructionGroups, std::string const &instructionGroups, - unsigned lineCount, bool allowUnavailablePayload, - bool dumpRegisters, - std::chrono::seconds const &dumpRegistersTimeDelta, - std::string const &dumpRegistersOutpath, bool errorDetection, - int gpus, unsigned gpuMatrixSize, bool gpuUseFloat, - bool gpuUseDouble, bool listMetrics, bool measurement, - std::chrono::milliseconds const &startDelta, - std::chrono::milliseconds const &stopDelta, - std::chrono::milliseconds const &measurementInterval, - std::vector const &metricPaths, - std::vector const &stdinMetrics, bool optimize, - std::chrono::seconds const &preheat, - std::string const &optimizationAlgorithm, - std::vector const &optimizationMetrics, - std::chrono::seconds const &evaluationDuration, - unsigned individuals, std::string const &optimizeOutfile, + Firestarter(const int argc, const char** argv, std::chrono::seconds const& timeout, unsigned loadPercent, + std::chrono::microseconds const& period, unsigned requestedNumThreads, std::string const& cpuBind, + bool printFunctionSummary, unsigned functionId, bool listInstructionGroups, + std::string const& instructionGroups, unsigned lineCount, bool allowUnavailablePayload, + bool dumpRegisters, std::chrono::seconds const& dumpRegistersTimeDelta, + std::string const& dumpRegistersOutpath, bool errorDetection, int gpus, unsigned gpuMatrixSize, + bool gpuUseFloat, bool gpuUseDouble, bool listMetrics, bool measurement, + std::chrono::milliseconds const& startDelta, std::chrono::milliseconds const& stopDelta, + std::chrono::milliseconds const& measurementInterval, std::vector const& metricPaths, + std::vector const& stdinMetrics, bool optimize, std::chrono::seconds const& preheat, + std::string const& optimizationAlgorithm, std::vector const& optimizationMetrics, + std::chrono::seconds const& evaluationDuration, unsigned individuals, std::string const& optimizeOutfile, unsigned generations, double nsga2_cr, double nsga2_m); ~Firestarter(); @@ -96,7 +83,7 @@ class Firestarter { private: const int _argc; - const char **_argv; + const char** _argv; const std::chrono::seconds _timeout; const unsigned _loadPercent; std::chrono::microseconds _load; @@ -123,13 +110,10 @@ class Firestarter { const double _nsga2_cr; const double _nsga2_m; -#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ - defined(_M_X64) - environment::x86::X86Environment *_environment = nullptr; +#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) + environment::x86::X86Environment* _environment = nullptr; - environment::x86::X86Environment &environment() const { - return *_environment; - } + environment::x86::X86Environment& environment() const { return *_environment; } #else #error "FIRESTARTER is not implemented for this ISA" #endif @@ -158,14 +142,11 @@ class Firestarter { void signalWork() { signalLoadWorkers(THREAD_WORK); }; // WatchdogWorker.cpp - int watchdogWorker(std::chrono::microseconds period, - std::chrono::microseconds load, - std::chrono::seconds timeout); + int watchdogWorker(std::chrono::microseconds period, std::chrono::microseconds load, std::chrono::seconds timeout); #ifdef FIRESTARTER_DEBUG_FEATURES // DumpRegisterWorker.cpp - int initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta, - std::string dumpFilePath); + int initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta, std::string dumpFilePath); void joinDumpRegisterWorker(); #endif @@ -191,8 +172,7 @@ class Firestarter { // variable to control the load of the threads inline static volatile unsigned long long loadVar = LOAD_LOW; - std::vector>> - loadThreads; + std::vector>> loadThreads; std::vector> errorCommunication; diff --git a/include/firestarter/Json/Summary.hpp b/include/firestarter/Json/Summary.hpp index 540c4aed..d9a923cc 100644 --- a/include/firestarter/Json/Summary.hpp +++ b/include/firestarter/Json/Summary.hpp @@ -25,14 +25,13 @@ namespace nlohmann { template <> struct adl_serializer { - static firestarter::measurement::Summary from_json(const json &j) { + static firestarter::measurement::Summary from_json(const json& j) { return {j["num_timepoints"].get(), - std::chrono::milliseconds( - j["duration"].get()), - j["average"].get(), j["stddev"].get()}; + std::chrono::milliseconds(j["duration"].get()), j["average"].get(), + j["stddev"].get()}; } - static void to_json(json &j, firestarter::measurement::Summary s) { + static void to_json(json& j, firestarter::measurement::Summary s) { j = json::object(); j["num_timepoints"] = s.num_timepoints; diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp index ec70476f..78b11b80 100644 --- a/include/firestarter/LoadWorkerData.hpp +++ b/include/firestarter/LoadWorkerData.hpp @@ -21,17 +21,15 @@ #pragma once +#include #include #include #include #include - -#include #include #include -#define PAD_SIZE(size, align) \ - align *(int)std::ceil((double)size / (double)align) +#define PAD_SIZE(size, align) align*(int)std::ceil((double)size / (double)align) #if defined(__APPLE__) #define ALIGNED_MALLOC(size, align) aligned_alloc(align, PAD_SIZE(size, align)) @@ -40,12 +38,10 @@ #define ALIGNED_MALLOC(size, align) _mm_malloc(PAD_SIZE(size, align), align) #define ALIGNED_FREE _mm_free #elif defined(_MSC_VER) -#define ALIGNED_MALLOC(size, align) \ - _aligned_malloc(PAD_SIZE(size, align), align) +#define ALIGNED_MALLOC(size, align) _aligned_malloc(PAD_SIZE(size, align), align) #define ALIGNED_FREE _aligned_free #else -#define ALIGNED_MALLOC(size, align) \ - std::aligned_alloc(align, PAD_SIZE(size, align)) +#define ALIGNED_MALLOC(size, align) std::aligned_alloc(align, PAD_SIZE(size, align)) #define ALIGNED_FREE std::free #endif @@ -53,25 +49,22 @@ namespace firestarter { class LoadWorkerData { public: - LoadWorkerData(int id, environment::Environment &environment, - volatile unsigned long long *loadVar, - unsigned long long period, bool dumpRegisters, - bool errorDetection) - : addrHigh(loadVar), period(period), dumpRegisters(dumpRegisters), - errorDetection(errorDetection), _id(id), _environment(environment), - _config(new environment::platform::RuntimeConfig( - environment.selectedConfig())) { + LoadWorkerData(int id, environment::Environment& environment, volatile unsigned long long* loadVar, + unsigned long long period, bool dumpRegisters, bool errorDetection) + : addrHigh(loadVar) + , period(period) + , dumpRegisters(dumpRegisters) + , errorDetection(errorDetection) + , _id(id) + , _environment(environment) + , _config(new environment::platform::RuntimeConfig(environment.selectedConfig())) { // use REGISTER_MAX_NUM cache lines for the dumped registers // and another cache line for the control variable. - // as we are doing aligned moves we only have the option to waste a whole - // cacheline - addrOffset = dumpRegisters - ? sizeof(DumpRegisterStruct) / sizeof(unsigned long long) - : 0; + // as we are doing aligned moves we only have the option to waste a + // whole cacheline + addrOffset = dumpRegisters ? sizeof(DumpRegisterStruct) / sizeof(unsigned long long) : 0; - addrOffset += errorDetection ? sizeof(ErrorDetectionStruct) / - sizeof(unsigned long long) - : 0; + addrOffset += errorDetection ? sizeof(ErrorDetectionStruct) / sizeof(unsigned long long) : 0; } ~LoadWorkerData() { @@ -81,27 +74,26 @@ class LoadWorkerData { } } - void setErrorCommunication( - std::shared_ptr communicationLeft, - std::shared_ptr communicationRight) { + void setErrorCommunication(std::shared_ptr communicationLeft, + std::shared_ptr communicationRight) { this->communicationLeft = communicationLeft; this->communicationRight = communicationRight; } int id() const { return _id; } - environment::Environment &environment() const { return _environment; } - environment::platform::RuntimeConfig &config() const { return *_config; } + environment::Environment& environment() const { return _environment; } + environment::platform::RuntimeConfig& config() const { return *_config; } - const ErrorDetectionStruct *errorDetectionStruct() const { - return reinterpret_cast(addrMem - addrOffset); + const ErrorDetectionStruct* errorDetectionStruct() const { + return reinterpret_cast(addrMem - addrOffset); } int comm = THREAD_WAIT; bool ack = false; std::mutex mutex; - unsigned long long *addrMem = nullptr; + unsigned long long* addrMem = nullptr; unsigned long long addrOffset; - volatile unsigned long long *addrHigh; + volatile unsigned long long* addrHigh; unsigned long long buffersizeMem; unsigned long long iterations = 0; // save the last iteration count when switching payloads @@ -121,8 +113,8 @@ class LoadWorkerData { private: int _id; - environment::Environment &_environment; - environment::platform::RuntimeConfig *_config; + environment::Environment& _environment; + environment::platform::RuntimeConfig* _config; }; } // namespace firestarter diff --git a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp index af8b7ff1..a91e1228 100644 --- a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp +++ b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp @@ -23,7 +23,6 @@ #include #include - #include namespace firestarter { @@ -34,13 +33,10 @@ template class FirstWorkerThreadFilter { public: typedef Record record_type; - static void setFirstThread(std::thread::id newFirstThread) { - firstThread = newFirstThread; - } + static void setFirstThread(std::thread::id newFirstThread) { firstThread = newFirstThread; } - bool filter(Record &r) const { - return r.std_thread_id() == firstThread || - r.severity() >= nitro::log::severity_level::error; + bool filter(Record& r) const { + return r.std_thread_id() == firstThread || r.severity() >= nitro::log::severity_level::error; } private: diff --git a/include/firestarter/Logging/Log.hpp b/include/firestarter/Logging/Log.hpp index f5b613c0..74cc3e1a 100644 --- a/include/firestarter/Logging/Log.hpp +++ b/include/firestarter/Logging/Log.hpp @@ -22,21 +22,17 @@ #pragma once #include - -#include -#include - +#include +#include +#include #include #include #include #include - #include #include - -#include -#include -#include +#include +#include #include #include @@ -46,8 +42,7 @@ namespace logging { class StdOut { public: - void sink(nitro::log::severity_level severity, - const std::string &formatted_record) { + void sink(nitro::log::severity_level severity, const std::string& formatted_record) { switch (severity) { case nitro::log::severity_level::warn: case nitro::log::severity_level::error: @@ -61,13 +56,12 @@ class StdOut { } }; -using record = nitro::log::record< - nitro::log::severity_attribute, nitro::log::message_attribute, - nitro::log::timestamp_attribute, nitro::log::std_thread_id_attribute>; +using record = nitro::log::record; template class formater { public: - std::string format(Record &r) { + std::string format(Record& r) { std::stringstream s; switch (r.severity()) { @@ -93,21 +87,16 @@ template class formater { } }; -template -using filter = nitro::log::filter::severity_filter; +template using filter = nitro::log::filter::severity_filter; template -using workerFilter = - nitro::log::filter::and_filter, - FirstWorkerThreadFilter>; +using workerFilter = nitro::log::filter::and_filter, FirstWorkerThreadFilter>; } // namespace logging -using log = nitro::log::logger; +using log = nitro::log::logger; using workerLog = - nitro::log::logger; + nitro::log::logger; } // namespace firestarter diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp index 4fc8a6a1..c115a476 100644 --- a/include/firestarter/Measurement/MeasurementWorker.hpp +++ b/include/firestarter/Measurement/MeasurementWorker.hpp @@ -21,11 +21,10 @@ #pragma once +#include #include #include #include - -#include #include #include @@ -34,12 +33,10 @@ extern "C" { #include #include #include - #include } -void insertCallback(void *cls, const char *metricName, int64_t timeSinceEpoch, - double value); +void insertCallback(void* cls, const char* metricName, int64_t timeSinceEpoch, double value); namespace firestarter::measurement { @@ -48,17 +45,16 @@ class MeasurementWorker { pthread_t workerThread; pthread_t stdinThread; - std::vector metrics = { - &rapl_metric, &perf_ipc_metric, &perf_freq_metric, &ipc_estimate_metric}; + std::vector metrics = {&rapl_metric, &perf_ipc_metric, &perf_freq_metric, &ipc_estimate_metric}; std::mutex values_mutex; std::map> values = {}; - static int *dataAcquisitionWorker(void *measurementWorker); + static int* dataAcquisitionWorker(void* measurementWorker); - static int *stdinDataAcquisitionWorker(void *measurementWorker); + static int* stdinDataAcquisitionWorker(void* measurementWorker); - const metric_interface_t *findMetricByName(std::string metricName); + const metric_interface_t* findMetricByName(std::string metricName); std::chrono::milliseconds updateInterval; @@ -70,46 +66,39 @@ class MeasurementWorker { std::string availableMetricsString; #ifndef FIRESTARTER_LINK_STATIC - std::vector _metricDylibs = {}; + std::vector _metricDylibs = {}; #endif std::vector _stdinMetrics = {}; public: // creates the worker thread - MeasurementWorker(std::chrono::milliseconds updateInterval, - unsigned long long numThreads, - std::vector const &metricDylibs, - std::vector const &stdinMetrics); + MeasurementWorker(std::chrono::milliseconds updateInterval, unsigned long long numThreads, + std::vector const& metricDylibs, std::vector const& stdinMetrics); // stops the worker threads ~MeasurementWorker(); - std::string const &availableMetrics() const { - return this->availableMetricsString; - } + std::string const& availableMetrics() const { return this->availableMetricsString; } - std::vector const &stdinMetrics() { return _stdinMetrics; } + std::vector const& stdinMetrics() { return _stdinMetrics; } // returns a list of metrics std::vector metricNames(); // setup the selected metrics // returns a vector with the names of inialized metrics - std::vector - initMetrics(std::vector const &metricNames); + std::vector initMetrics(std::vector const& metricNames); // callback function for metrics - void insertCallback(const char *metricName, int64_t timeSinceEpoch, - double value); + void insertCallback(const char* metricName, int64_t timeSinceEpoch, double value); // start the measurement void startMeasurement(); // get the measurement values begining from measurement start until now. - std::map getValues( - std::chrono::milliseconds startDelta = std::chrono::milliseconds::zero(), - std::chrono::milliseconds stopDelta = std::chrono::milliseconds::zero()); + std::map getValues(std::chrono::milliseconds startDelta = std::chrono::milliseconds::zero(), + std::chrono::milliseconds stopDelta = std::chrono::milliseconds::zero()); }; } // namespace firestarter::measurement diff --git a/include/firestarter/Measurement/MetricInterface.h b/include/firestarter/Measurement/MetricInterface.h index dbea19e8..c0c1c58b 100644 --- a/include/firestarter/Measurement/MetricInterface.h +++ b/include/firestarter/Measurement/MetricInterface.h @@ -44,13 +44,13 @@ typedef struct { // load it during runtime. typedef struct { // the name of the metric - const char *name; + const char* name; // metric type with bitfield from metric_type_t metric_type_t type; // the unit of the metric - const char *unit; + const char* unit; uint64_t callback_time; @@ -69,17 +69,15 @@ typedef struct { // Get a reading of the metric // Return EXIT_SUCCESS if we got a new value. // Set this function pointer to NULL if METRIC_INSERT_CALLBACK is specified. - int32_t (*get_reading)(double *value); + int32_t (*get_reading)(double* value); // Get error in case return code not being EXIT_SUCCESS - const char *(*get_error)(void); + const char* (*get_error)(void); // If METRIC_INSERT_CALLBACK is set in the type, this function will be passed // a callback and the first argument for the callback. // Further arguments of callback are the metric name, an unix timestamp (time // since epoch) and a metric value. - int32_t (*register_insert_callback)(void (*)(void *, const char *, int64_t, - double), - void *); + int32_t (*register_insert_callback)(void (*)(void*, const char*, int64_t, double), void*); } metric_interface_t; diff --git a/include/firestarter/Measurement/Summary.hpp b/include/firestarter/Measurement/Summary.hpp index 23f819f0..7f0d7899 100644 --- a/include/firestarter/Measurement/Summary.hpp +++ b/include/firestarter/Measurement/Summary.hpp @@ -21,9 +21,8 @@ #pragma once -#include - #include +#include #include #include @@ -34,17 +33,14 @@ extern "C" { namespace firestarter::measurement { struct Summary { - size_t num_timepoints; std::chrono::milliseconds duration; double average; double stddev; - static Summary calculate(std::vector::iterator begin, - std::vector::iterator end, - metric_type_t metricType, - unsigned long long numThreads); + static Summary calculate(std::vector::iterator begin, std::vector::iterator end, + metric_type_t metricType, unsigned long long numThreads); }; } // namespace firestarter::measurement diff --git a/include/firestarter/Measurement/TimeValue.hpp b/include/firestarter/Measurement/TimeValue.hpp index eae3de23..bf9377c9 100644 --- a/include/firestarter/Measurement/TimeValue.hpp +++ b/include/firestarter/Measurement/TimeValue.hpp @@ -26,12 +26,11 @@ namespace firestarter::measurement { struct TimeValue { - TimeValue() = default; - constexpr TimeValue(std::chrono::high_resolution_clock::time_point t, - double v) - : time(t), value(v){}; + constexpr TimeValue(std::chrono::high_resolution_clock::time_point t, double v) + : time(t) + , value(v){}; std::chrono::high_resolution_clock::time_point time; double value; diff --git a/include/firestarter/OneAPI/OneAPI.hpp b/include/firestarter/OneAPI/OneAPI.hpp index cf939388..0ed1844c 100644 --- a/include/firestarter/OneAPI/OneAPI.hpp +++ b/include/firestarter/OneAPI/OneAPI.hpp @@ -34,13 +34,11 @@ class OneAPI { std::condition_variable _waitForInitCv; std::mutex _waitForInitCvMutex; - static void initGpus(std::condition_variable &cv, - volatile unsigned long long *loadVar, bool useFloat, - bool useDouble, unsigned matrixSize, int gpus); + static void initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble, + unsigned matrixSize, int gpus); public: - OneAPI(volatile unsigned long long *loadVar, bool useFloat, bool useDouble, - unsigned matrixSize, int gpus); + OneAPI(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus); ~OneAPI() { if (_initThread.joinable()) { diff --git a/include/firestarter/Optimizer/Algorithm.hpp b/include/firestarter/Optimizer/Algorithm.hpp index 14009183..d9186322 100644 --- a/include/firestarter/Optimizer/Algorithm.hpp +++ b/include/firestarter/Optimizer/Algorithm.hpp @@ -30,10 +30,9 @@ class Algorithm { Algorithm() {} virtual ~Algorithm() {} - virtual void checkPopulation(Population const &pop, - std::size_t populationSize) = 0; + virtual void checkPopulation(Population const& pop, std::size_t populationSize) = 0; - virtual Population evolve(Population &pop) = 0; + virtual Population evolve(Population& pop) = 0; }; } // namespace firestarter::optimizer diff --git a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp index c1825f73..a144bb05 100644 --- a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp +++ b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp @@ -30,11 +30,9 @@ class NSGA2 : public Algorithm { NSGA2(unsigned gen, double cr, double m); ~NSGA2() {} - void checkPopulation(firestarter::optimizer::Population const &pop, - std::size_t populationSize) override; + void checkPopulation(firestarter::optimizer::Population const& pop, std::size_t populationSize) override; - firestarter::optimizer::Population - evolve(firestarter::optimizer::Population &pop) override; + firestarter::optimizer::Population evolve(firestarter::optimizer::Population& pop) override; private: unsigned _gen; diff --git a/include/firestarter/Optimizer/History.hpp b/include/firestarter/Optimizer/History.hpp index 9dec066d..2922301f 100644 --- a/include/firestarter/Optimizer/History.hpp +++ b/include/firestarter/Optimizer/History.hpp @@ -21,15 +21,14 @@ #pragma once -#include -#include -#include -#include - #include #include #include #include +#include +#include +#include +#include #include #include #include @@ -48,18 +47,14 @@ struct History { private: // https://stackoverflow.com/questions/17074324/how-can-i-sort-two-vectors-in-the-same-way-with-criteria-that-uses-only-one-of/17074810#17074810 template - inline static std::vector - sortPermutation(const std::vector &vec, Compare &compare) { + inline static std::vector sortPermutation(const std::vector& vec, Compare& compare) { std::vector p(vec.size()); std::iota(p.begin(), p.end(), 0); - std::sort(p.begin(), p.end(), [&](std::size_t i, std::size_t j) { - return compare(vec[i], vec[j]); - }); + std::sort(p.begin(), p.end(), [&](std::size_t i, std::size_t j) { return compare(vec[i], vec[j]); }); return p; } - inline static void padding(std::stringstream &ss, std::size_t width, - std::size_t taken, char c) { + inline static void padding(std::stringstream& ss, std::size_t width, std::size_t taken, char c) { for (std::size_t i = 0; i < (std::max)(width, taken) - taken; ++i) { ss << c; } @@ -69,24 +64,18 @@ struct History { inline static std::size_t MIN_COLUMN_WIDTH = 10; inline static std::vector _x = {}; - inline static std::vector< - std::map> - _f = {}; + inline static std::vector> _f = {}; public: - inline static void append( - std::vector const &ind, - std::map const &metric) { + inline static void append(std::vector const& ind, + std::map const& metric) { _x.push_back(ind); _f.push_back(metric); } - inline static std::optional< - std::map> - find(std::vector const &individual) { - auto findEqual = [individual](auto const &ind) { - return ind == individual; - }; + inline static std::optional> + find(std::vector const& individual) { + auto findEqual = [individual](auto const& ind) { return ind == individual; }; auto ind = std::find_if(_x.begin(), _x.end(), findEqual); if (ind == _x.end()) { return {}; @@ -95,25 +84,22 @@ struct History { return _f[dist]; } - inline static void - printBest(std::vector const &optimizationMetrics, - std::vector const &payloadItems) { + inline static void printBest(std::vector const& optimizationMetrics, + std::vector const& payloadItems) { // TODO: print paretto front // print the best 20 individuals for each metric in a format // where the user can give it to --run-instruction-groups directly std::map columnWidth; - for (auto const &metric : optimizationMetrics) { + for (auto const& metric : optimizationMetrics) { columnWidth[metric] = (std::max)(metric.size(), MIN_COLUMN_WIDTH); firestarter::log::trace() << metric << ": " << columnWidth[metric]; } - for (auto const &metric : optimizationMetrics) { - using SummaryMap = - std::map; - auto compareIndividual = [&metric](SummaryMap const &mapA, - SummaryMap const &mapB) { + for (auto const& metric : optimizationMetrics) { + using SummaryMap = std::map; + auto compareIndividual = [&metric](SummaryMap const& mapA, SummaryMap const& mapB) { auto summaryA = mapA.find(metric); auto summaryB = mapB.find(metric); @@ -132,25 +118,24 @@ struct History { auto perm = sortPermutation(_f, compareIndividual); - auto formatIndividual = - [&payloadItems](std::vector const &individual) { - std::string result = ""; - assert(payloadItems.size() == individual.size()); + auto formatIndividual = [&payloadItems](std::vector const& individual) { + std::string result = ""; + assert(payloadItems.size() == individual.size()); - for (std::size_t i = 0; i < individual.size(); ++i) { - // skip zero values - if (individual[i] == 0) { - continue; - } + for (std::size_t i = 0; i < individual.size(); ++i) { + // skip zero values + if (individual[i] == 0) { + continue; + } - if (result.size() != 0) { - result += ","; - } - result += payloadItems[i] + ":" + std::to_string(individual[i]); - } + if (result.size() != 0) { + result += ","; + } + result += payloadItems[i] + ":" + std::to_string(individual[i]); + } - return result; - }; + return result; + }; auto begin = perm.begin(); auto end = perm.end(); @@ -177,7 +162,7 @@ struct History { secondLine << " "; padding(secondLine, (std::max)(max, ind.size()), 0, '-'); - for (auto const &metric : optimizationMetrics) { + for (auto const& metric : optimizationMetrics) { auto width = columnWidth[metric]; firstLine << " | "; @@ -203,7 +188,7 @@ struct History { ss << " " << ind; padding(ss, max, ind.size(), ' '); - for (auto const &metric : optimizationMetrics) { + for (auto const& metric : optimizationMetrics) { auto width = columnWidth[metric]; std::string value; @@ -230,26 +215,24 @@ struct History { firestarter::log::info() << ss.str(); } - firestarter::log::info() - << "To run FIRESTARTER with the best individual of a given metric " - "use the command line argument " - "`--run-instruction-groups=INDIVIDUAL`"; + firestarter::log::info() << "To run FIRESTARTER with the best individual of a given metric " + "use the command line argument " + "`--run-instruction-groups=INDIVIDUAL`"; } - inline static void save(std::string const &path, std::string const &startTime, - std::vector const &payloadItems, - const int argc, const char **argv) { + inline static void save(std::string const& path, std::string const& startTime, + std::vector const& payloadItems, const int argc, const char** argv) { using json = nlohmann::json; json j = json::object(); j["individuals"] = json::array(); - for (auto const &ind : _x) { + for (auto const& ind : _x) { j["individuals"].push_back(ind); } j["metrics"] = json::array(); - for (auto const &eval : _f) { + for (auto const& eval : _f) { j["metrics"].push_back(eval); } @@ -269,7 +252,7 @@ struct History { // save the payload items j["payloadItems"] = json::array(); - for (auto const &item : payloadItems) { + for (auto const& item : payloadItems) { j["payloadItems"].push_back(item); } @@ -286,7 +269,7 @@ struct History { std::string outpath = path; if (outpath.empty()) { - char *pwd = get_current_dir_name(); + char* pwd = get_current_dir_name(); if (pwd) { outpath = pwd; free(pwd); diff --git a/include/firestarter/Optimizer/OptimizerWorker.hpp b/include/firestarter/Optimizer/OptimizerWorker.hpp index 90eb80a5..816f4882 100644 --- a/include/firestarter/Optimizer/OptimizerWorker.hpp +++ b/include/firestarter/Optimizer/OptimizerWorker.hpp @@ -19,10 +19,9 @@ * Contact: daniel.hackenberg@tu-dresden.de *****************************************************************************/ +#include #include #include - -#include #include extern "C" { @@ -33,11 +32,9 @@ namespace firestarter::optimizer { class OptimizerWorker { public: - OptimizerWorker( - std::unique_ptr &&algorithm, - firestarter::optimizer::Population &population, - std::string const &optimizationAlgorithm, unsigned individuals, - std::chrono::seconds const &preheat); + OptimizerWorker(std::unique_ptr&& algorithm, + firestarter::optimizer::Population& population, std::string const& optimizationAlgorithm, + unsigned individuals, std::chrono::seconds const& preheat); ~OptimizerWorker() {} @@ -46,7 +43,7 @@ class OptimizerWorker { void kill(); private: - static void *optimizerThread(void *optimizerWorker); + static void* optimizerThread(void* optimizerWorker); std::unique_ptr _algorithm; firestarter::optimizer::Population _population; diff --git a/include/firestarter/Optimizer/Population.hpp b/include/firestarter/Optimizer/Population.hpp index b02f451d..3bf3ac38 100644 --- a/include/firestarter/Optimizer/Population.hpp +++ b/include/firestarter/Optimizer/Population.hpp @@ -22,11 +22,10 @@ #ifndef FIRESTARTER_OPTIMIZER_POPULATION_HPP #define FIRESTARTER_OPTIMIZER_POPULATION_HPP +#include #include #include #include - -#include #include #include #include @@ -40,13 +39,17 @@ class Population { // Construct a population from a problem. Population() = default; - Population(std::shared_ptr &&problem) - : _problem(std::move(problem)), gen(rd()) {} + Population(std::shared_ptr&& problem) + : _problem(std::move(problem)) + , gen(rd()) {} - Population(Population &pop) - : _problem(pop._problem), _x(pop._x), _f(pop._f), gen(rd()) {} + Population(Population& pop) + : _problem(pop._problem) + , _x(pop._x) + , _f(pop._f) + , gen(rd()) {} - Population &operator=(Population const &pop) { + Population& operator=(Population const& pop) { _problem = std::move(pop._problem); _x = pop._x; _f = pop._f; @@ -62,10 +65,9 @@ class Population { std::size_t size() const; // add one individual to the population. fitness will be evaluated. - void append(Individual const &ind); + void append(Individual const& ind); - void insert(std::size_t idx, Individual const &ind, - std::vector const &fit); + void insert(std::size_t idx, Individual const& ind, std::vector const& fit); // get a random individual inside bounds of problem Individual getRandomIndividual(); @@ -74,14 +76,14 @@ class Population { // return nothing in case of mutli-objective. std::optional bestIndividual() const; - Problem const &problem() const { return *_problem; } + Problem const& problem() const { return *_problem; } - std::vector const &x() const { return _x; } - std::vector> const &f() const { return _f; } + std::vector const& x() const { return _x; } + std::vector> const& f() const { return _f; } private: // add one individual to the population with a fitness. - void append(Individual const &ind, std::vector const &fit); + void append(Individual const& ind, std::vector const& fit); // our problem. std::shared_ptr _problem; diff --git a/include/firestarter/Optimizer/Problem.hpp b/include/firestarter/Optimizer/Problem.hpp index f88b0bc3..009b4d01 100644 --- a/include/firestarter/Optimizer/Problem.hpp +++ b/include/firestarter/Optimizer/Problem.hpp @@ -21,10 +21,9 @@ #pragma once +#include #include #include - -#include #include #include #include @@ -33,16 +32,14 @@ namespace firestarter::optimizer { class Problem { public: - Problem() : _fevals(0) {} + Problem() + : _fevals(0) {} virtual ~Problem() {} // return the fitness for an individual - virtual std::map - metrics(Individual const &individual) = 0; + virtual std::map metrics(Individual const& individual) = 0; - virtual std::vector - fitness(std::map const - &summaries) = 0; + virtual std::vector fitness(std::map const& summaries) = 0; // get the bounds of the problem virtual std::vector> getBounds() const = 0; diff --git a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp index 1ca0de58..f24ae2f2 100644 --- a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp +++ b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp @@ -21,11 +21,10 @@ #pragma once -#include - #include #include #include +#include #include #include #include @@ -35,28 +34,26 @@ namespace firestarter::optimizer::problem { class CLIArgumentProblem final : public firestarter::optimizer::Problem { - public: - CLIArgumentProblem( - std::function> const &)> - &&changePayloadFunction, - std::shared_ptr const - &measurementWorker, - std::vector const &metrics, std::chrono::seconds timeout, - std::chrono::milliseconds startDelta, std::chrono::milliseconds stopDelta, - std::vector const &instructionGroups) - : _changePayloadFunction(changePayloadFunction), - _measurementWorker(measurementWorker), _metrics(metrics), - _timeout(timeout), _startDelta(startDelta), _stopDelta(stopDelta), - _instructionGroups(instructionGroups) { + CLIArgumentProblem(std::function> const&)>&& changePayloadFunction, + std::shared_ptr const& measurementWorker, + std::vector const& metrics, std::chrono::seconds timeout, + std::chrono::milliseconds startDelta, std::chrono::milliseconds stopDelta, + std::vector const& instructionGroups) + : _changePayloadFunction(changePayloadFunction) + , _measurementWorker(measurementWorker) + , _metrics(metrics) + , _timeout(timeout) + , _startDelta(startDelta) + , _stopDelta(stopDelta) + , _instructionGroups(instructionGroups) { assert(_metrics.size() != 0); } ~CLIArgumentProblem() {} // return all available metrics for the individual - std::map - metrics(std::vector const &individual) override { + std::map metrics(std::vector const& individual) override { // increment evaluation idx _fevals++; @@ -71,32 +68,29 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem { _changePayloadFunction(payload); // start the measurement - // NOTE: starting the measurement must happen after switching to not mess up - // ipc-estimate metric + // NOTE: starting the measurement must happen after switching to not + // mess up ipc-estimate metric _measurementWorker->startMeasurement(); // wait for the measurement to finish std::this_thread::sleep_for(_timeout); // FIXME: this is an ugly workaround for the ipc-estimate metric - // changeing the payload triggers a write of the iteration counter of the - // last payload, which we use to estimate the ipc. + // changeing the payload triggers a write of the iteration counter of + // the last payload, which we use to estimate the ipc. _changePayloadFunction(payload); // return the results return _measurementWorker->getValues(_startDelta, _stopDelta); } - std::vector fitness( - std::map const &summaries) - override { + std::vector fitness(std::map const& summaries) override { std::vector values = {}; - for (auto const &metricName : _metrics) { - auto findName = [metricName](auto const &summary) { + for (auto const& metricName : _metrics) { + auto findName = [metricName](auto const& summary) { auto invertedName = "-" + summary.first; - return metricName.compare(summary.first) == 0 || - metricName.compare(invertedName) == 0; + return metricName.compare(summary.first) == 0 || metricName.compare(invertedName) == 0; }; auto it = std::find_if(summaries.begin(), summaries.end(), findName); @@ -121,8 +115,8 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem { // get the bounds of the problem std::vector> getBounds() const override { - std::vector> vec( - _instructionGroups.size(), std::make_tuple(0, 100)); + std::vector> vec(_instructionGroups.size(), + std::make_tuple(0, 100)); return vec; } @@ -131,10 +125,8 @@ class CLIArgumentProblem final : public firestarter::optimizer::Problem { std::size_t getNobjs() const override { return _metrics.size(); } private: - std::function> const &)> - _changePayloadFunction; - std::shared_ptr - _measurementWorker; + std::function> const&)> _changePayloadFunction; + std::shared_ptr _measurementWorker; std::vector _metrics; std::chrono::seconds _timeout; std::chrono::milliseconds _startDelta; diff --git a/include/firestarter/Optimizer/Util/MultiObjective.hpp b/include/firestarter/Optimizer/Util/MultiObjective.hpp index 00701bfd..da61bf73 100644 --- a/include/firestarter/Optimizer/Util/MultiObjective.hpp +++ b/include/firestarter/Optimizer/Util/MultiObjective.hpp @@ -22,7 +22,6 @@ #pragma once #include - #include #include #include @@ -33,37 +32,28 @@ bool less_than_f(double a, double b); bool greater_than_f(double a, double b); -bool pareto_dominance(const std::vector &obj1, - const std::vector &obj2); +bool pareto_dominance(const std::vector& obj1, const std::vector& obj2); -std::tuple>, - std::vector>, std::vector, +std::tuple>, std::vector>, std::vector, std::vector> -fast_non_dominated_sorting(const std::vector> &points); +fast_non_dominated_sorting(const std::vector>& points); -std::vector -crowding_distance(const std::vector> &non_dom_front); +std::vector crowding_distance(const std::vector>& non_dom_front); -std::vector::size_type mo_tournament_selection( - std::vector::size_type idx1, std::vector::size_type idx2, - const std::vector::size_type> &non_domination_rank, - const std::vector &crowding_d, std::mt19937 &mt); +std::vector::size_type +mo_tournament_selection(std::vector::size_type idx1, std::vector::size_type idx2, + const std::vector::size_type>& non_domination_rank, + const std::vector& crowding_d, std::mt19937& mt); -std::pair -sbx_crossover(const firestarter::optimizer::Individual &parent1, - const firestarter::optimizer::Individual &parent2, - const double p_cr, std::mt19937 &mt); +std::pair +sbx_crossover(const firestarter::optimizer::Individual& parent1, const firestarter::optimizer::Individual& parent2, + const double p_cr, std::mt19937& mt); -void polynomial_mutation( - firestarter::optimizer::Individual &child, - const std::vector> &bounds, const double p_m, - std::mt19937 &mt); +void polynomial_mutation(firestarter::optimizer::Individual& child, + const std::vector>& bounds, const double p_m, std::mt19937& mt); -std::vector -select_best_N_mo(const std::vector> &input_f, - std::size_t N); +std::vector select_best_N_mo(const std::vector>& input_f, std::size_t N); -std::vector ideal(const std::vector> &points); +std::vector ideal(const std::vector>& points); } // namespace firestarter::optimizer::util diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp index e5abece9..8a17021f 100644 --- a/src/firestarter/Cuda/Cuda.cpp +++ b/src/firestarter/Cuda/Cuda.cpp @@ -31,29 +31,29 @@ #include #ifdef FIRESTARTER_BUILD_CUDA - #include - #include - #include - #include - #define FS_ACCEL_PREFIX_LC_LONG cuda - #define FS_ACCEL_PREFIX_LC cu - #define FS_ACCEL_PREFIX_UC CU - #define FS_ACCEL_PREFIX_UC_LONG CUDA - #define FS_ACCEL_STRING "CUDA" +#include +#include +#include +#include +#define FS_ACCEL_PREFIX_LC_LONG cuda +#define FS_ACCEL_PREFIX_LC cu +#define FS_ACCEL_PREFIX_UC CU +#define FS_ACCEL_PREFIX_UC_LONG CUDA +#define FS_ACCEL_STRING "CUDA" #else - #ifdef FIRESTARTER_BUILD_HIP - #include - #include - #include - #include - #define FS_ACCEL_PREFIX_LC_LONG hip - #define FS_ACCEL_PREFIX_LC hip - #define FS_ACCEL_PREFIX_UC HIP - #define FS_ACCEL_PREFIX_UC_LONG HIP - #define FS_ACCEL_STRING "HIP" - #else - #error "Attempting to compile file but neither CUDA nor HIP is used" - #endif +#ifdef FIRESTARTER_BUILD_HIP +#include +#include +#include +#include +#define FS_ACCEL_PREFIX_LC_LONG hip +#define FS_ACCEL_PREFIX_LC hip +#define FS_ACCEL_PREFIX_UC HIP +#define FS_ACCEL_PREFIX_UC_LONG HIP +#define FS_ACCEL_STRING "HIP" +#else +#error "Attempting to compile file but neither CUDA nor HIP is used" +#endif #endif #define CONCAT_(prefix, suffix) prefix##suffix /// Concatenate `prefix, suffix` into `prefixsuffix` @@ -66,71 +66,66 @@ #include #include -#define ACCELL_SAFE_CALL(cuerr, dev_index) \ - accell_safe_call(cuerr, dev_index, __FILE__, __LINE__) +#define ACCELL_SAFE_CALL(cuerr, dev_index) accell_safe_call(cuerr, dev_index, __FILE__, __LINE__) #define SEED 123 using namespace firestarter::cuda; // CUDA error checking -static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC_LONG,Error_t) cuerr, int dev_index, - const char *file, const int line) { - if (cuerr != CONCAT(FS_ACCEL_PREFIX_LC_LONG,Success) && cuerr != 1) { - firestarter::log::error() - << FS_ACCEL_STRING" error at " << file << ":" << line << ": error code = " << cuerr - << " (" << CONCAT(FS_ACCEL_PREFIX_LC_LONG,GetErrorString)(cuerr) - << "), device index: " << dev_index; +static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC_LONG, Error_t) cuerr, int dev_index, const char* file, + const int line) { + if (cuerr != CONCAT(FS_ACCEL_PREFIX_LC_LONG, Success) && cuerr != 1) { + firestarter::log::error() << FS_ACCEL_STRING " error at " << file << ":" << line << ": error code = " << cuerr + << " (" << CONCAT(FS_ACCEL_PREFIX_LC_LONG, GetErrorString)(cuerr) + << "), device index: " << dev_index; exit(cuerr); } return; } -static const char *_accellGetErrorEnum(CONCAT(FS_ACCEL_PREFIX_LC,blasStatus_t) error) { +static const char* _accellGetErrorEnum(CONCAT(FS_ACCEL_PREFIX_LC, blasStatus_t) error) { switch (error) { - case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_SUCCESS): - return FS_ACCEL_STRING"blas status: success"; - case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_NOT_INITIALIZED): - return FS_ACCEL_STRING"blas status: not initialized"; - case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_ALLOC_FAILED): - return FS_ACCEL_STRING"blas status: alloc failed"; - case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_INVALID_VALUE): - return FS_ACCEL_STRING"blas status: invalid value"; - case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_ARCH_MISMATCH): - return FS_ACCEL_STRING"blas status: arch mismatch"; - case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_MAPPING_ERROR): - return FS_ACCEL_STRING"blas status: mapping error"; - case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_EXECUTION_FAILED): - return FS_ACCEL_STRING"blas status: execution failed"; - case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_INTERNAL_ERROR): - return FS_ACCEL_STRING"blas status: internal error"; - case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_NOT_SUPPORTED): - return FS_ACCEL_STRING"blas status: not supported"; + case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_SUCCESS): + return FS_ACCEL_STRING "blas status: success"; + case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_NOT_INITIALIZED): + return FS_ACCEL_STRING "blas status: not initialized"; + case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_ALLOC_FAILED): + return FS_ACCEL_STRING "blas status: alloc failed"; + case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_INVALID_VALUE): + return FS_ACCEL_STRING "blas status: invalid value"; + case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_ARCH_MISMATCH): + return FS_ACCEL_STRING "blas status: arch mismatch"; + case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_MAPPING_ERROR): + return FS_ACCEL_STRING "blas status: mapping error"; + case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_EXECUTION_FAILED): + return FS_ACCEL_STRING "blas status: execution failed"; + case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_INTERNAL_ERROR): + return FS_ACCEL_STRING "blas status: internal error"; + case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_NOT_SUPPORTED): + return FS_ACCEL_STRING "blas status: not supported"; #ifdef FIRESTARTER_BUILD_CUDA - case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_LICENSE_ERROR): - return FS_ACCEL_STRING"blas status: license error"; + case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_LICENSE_ERROR): + return FS_ACCEL_STRING "blas status: license error"; #endif #ifdef FIRESTARTER_BUILD_HIP - case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_UNKNOWN): - return FS_ACCEL_STRING"blas status: unknown"; - case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_HANDLE_IS_NULLPTR): - return FS_ACCEL_STRING"blas status: handle is null pointer"; - case CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_INVALID_ENUM): - return FS_ACCEL_STRING"blas status: invalid enum"; + case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_UNKNOWN): + return FS_ACCEL_STRING "blas status: unknown"; + case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_HANDLE_IS_NULLPTR): + return FS_ACCEL_STRING "blas status: handle is null pointer"; + case CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_INVALID_ENUM): + return FS_ACCEL_STRING "blas status: invalid enum"; #endif } - return ""; } -static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC,blasStatus_t) cuerr, int dev_index, - const char *file, const int line) { - if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC,BLAS_STATUS_SUCCESS)) { - firestarter::log::error() - << FS_ACCEL_STRING"BLAS error at " << file << ":" << line - << ": error code = " << cuerr << " (" << _accellGetErrorEnum(cuerr) - << "), device index: " << dev_index; +static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC, blasStatus_t) cuerr, int dev_index, const char* file, + const int line) { + if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC, BLAS_STATUS_SUCCESS)) { + firestarter::log::error() << FS_ACCEL_STRING "BLAS error at " << file << ":" << line << ": error code = " << cuerr + << " (" << _accellGetErrorEnum(cuerr) << "), device index: " << dev_index; exit(cuerr); } @@ -138,16 +133,15 @@ static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC,blasStatus_t) cuer } #ifdef FIRESTARTER_BUILD_CUDA -static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_UC,result) cuerr, int dev_index, - const char *file, const int line) { - if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC_LONG,_SUCCESS)) { - const char *errorString; +static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_UC, result) cuerr, int dev_index, const char* file, + const int line) { + if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC_LONG, _SUCCESS)) { + const char* errorString; - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,GetErrorName)(cuerr, &errorString), dev_index); + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, GetErrorName)(cuerr, &errorString), dev_index); - firestarter::log::error() - << FS_ACCEL_STRING" error at " << file << ":" << line << ": error code = " << cuerr - << " (" << errorString << "), device index: " << dev_index; + firestarter::log::error() << FS_ACCEL_STRING " error at " << file << ":" << line << ": error code = " << cuerr + << " (" << errorString << "), device index: " << dev_index; exit(cuerr); } @@ -155,50 +149,48 @@ static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_UC,result) cuerr, int } #endif -static const char *_accellrandGetErrorEnum(CONCAT(FS_ACCEL_PREFIX_LC,randStatus_t) cuerr) { +static const char* _accellrandGetErrorEnum(CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t) cuerr) { switch (cuerr) { - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_SUCCESS): - return FS_ACCEL_STRING"rand status: success"; - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_VERSION_MISMATCH): - return FS_ACCEL_STRING"rand status: version mismatch"; - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_NOT_INITIALIZED): - return FS_ACCEL_STRING"rand status: not initialized"; - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_ALLOCATION_FAILED): - return FS_ACCEL_STRING"rand status: allocation failed"; - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_TYPE_ERROR): - return FS_ACCEL_STRING"rand status: type error"; - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_OUT_OF_RANGE): - return FS_ACCEL_STRING"rand status: out of range"; - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_LENGTH_NOT_MULTIPLE): - return FS_ACCEL_STRING"rand status: length not multiple"; - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_DOUBLE_PRECISION_REQUIRED): - return FS_ACCEL_STRING"rand status: double precision required"; - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_LAUNCH_FAILURE): - return FS_ACCEL_STRING"rand status: launch failure"; - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_PREEXISTING_FAILURE): - return FS_ACCEL_STRING"rand status: preexisting failure"; - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_INITIALIZATION_FAILED): - return FS_ACCEL_STRING"rand status: initialization failed"; - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_ARCH_MISMATCH): - return FS_ACCEL_STRING"rand status: arch mismatch"; - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_INTERNAL_ERROR): - return FS_ACCEL_STRING"rand status: internal error"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_SUCCESS): + return FS_ACCEL_STRING "rand status: success"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_VERSION_MISMATCH): + return FS_ACCEL_STRING "rand status: version mismatch"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_NOT_INITIALIZED): + return FS_ACCEL_STRING "rand status: not initialized"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_ALLOCATION_FAILED): + return FS_ACCEL_STRING "rand status: allocation failed"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_TYPE_ERROR): + return FS_ACCEL_STRING "rand status: type error"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_OUT_OF_RANGE): + return FS_ACCEL_STRING "rand status: out of range"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_LENGTH_NOT_MULTIPLE): + return FS_ACCEL_STRING "rand status: length not multiple"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_DOUBLE_PRECISION_REQUIRED): + return FS_ACCEL_STRING "rand status: double precision required"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_LAUNCH_FAILURE): + return FS_ACCEL_STRING "rand status: launch failure"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_PREEXISTING_FAILURE): + return FS_ACCEL_STRING "rand status: preexisting failure"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_INITIALIZATION_FAILED): + return FS_ACCEL_STRING "rand status: initialization failed"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_ARCH_MISMATCH): + return FS_ACCEL_STRING "rand status: arch mismatch"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_INTERNAL_ERROR): + return FS_ACCEL_STRING "rand status: internal error"; #ifdef FIRESTARTER_BUILD_HIP - case CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_NOT_IMPLEMENTED): - return FS_ACCEL_STRING"rand status: not implemented"; + case CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_NOT_IMPLEMENTED): + return FS_ACCEL_STRING "rand status: not implemented"; #endif } return ""; } -static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC,randStatus_t) cuerr, int dev_index, - const char *file, const int line) { - if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC,RAND_STATUS_SUCCESS)) { - firestarter::log::error() - << FS_ACCEL_STRING"RAND error at " << file << ":" << line - << ": error code = " << cuerr << " (" << _accellrandGetErrorEnum(cuerr) - << "), device index: " << dev_index; +static inline void accell_safe_call(CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t) cuerr, int dev_index, const char* file, + const int line) { + if (cuerr != CONCAT(FS_ACCEL_PREFIX_UC, RAND_STATUS_SUCCESS)) { + firestarter::log::error() << FS_ACCEL_STRING "RAND error at " << file << ":" << line << ": error code = " << cuerr + << " (" << _accellrandGetErrorEnum(cuerr) << "), device index: " << dev_index; exit(cuerr); } @@ -226,8 +218,8 @@ static int get_precision(int useDouble, struct hipDeviceProp_t properties) { #endif #endif #if (CUDART_VERSION >= 8000) -// read precision ratio (dp/sp) of GPU to choose the right variant for maximum -// workload + // read precision ratio (dp/sp) of GPU to choose the right variant for maximum + // workload if (useDouble == 2 && properties.singleToDoublePrecisionPerfRatio > 3) { return 0; } else if (useDouble) { @@ -237,7 +229,7 @@ static int get_precision(int useDouble, struct hipDeviceProp_t properties) { } } #else -// as precision ratio is not supported return default/user input value + // as precision ratio is not supported return default/user input value (void)properties; if (useDouble) { @@ -263,9 +255,8 @@ static int get_precision(int device_index, int useDouble) { ACCELL_SAFE_CALL(hipSetDevice(device_index), device_index); #endif #endif - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,MemGetInfo)(&memory_avail, &memory_total), device_index); - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG,GetDeviceProperties)(&properties, device_index), - device_index); + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, MemGetInfo)(&memory_avail, &memory_total), device_index); + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG, GetDeviceProperties)(&properties, device_index), device_index); useDouble = get_precision(useDouble, properties); @@ -273,14 +264,12 @@ static int get_precision(int device_index, int useDouble) { // the user wants to compute DP on a SP-only-Card. if (useDouble && properties.major <= 1 && properties.minor <= 2) { std::stringstream ss; - ss << FS_ACCEL_STRING" GPU " << device_index << ": " << properties.name << " "; + ss << FS_ACCEL_STRING " GPU " << device_index << ": " << properties.name << " "; - firestarter::log::error() - << ss.str() << "Doesn't support double precision.\n" - << ss.str() << "Compute Capability: " << properties.major << "." - << properties.minor << ". Requiered for double precision: >=1.3\n" - << ss.str() - << "Stressing with single precision instead. Maybe use -f parameter."; + firestarter::log::error() << ss.str() << "Doesn't support double precision.\n" + << ss.str() << "Compute Capability: " << properties.major << "." << properties.minor + << ". Requiered for double precision: >=1.3\n" + << ss.str() << "Stressing with single precision instead. Maybe use -f parameter."; useDouble = 0; } @@ -305,65 +294,45 @@ static int get_msize(int device_index, int useDouble) { ACCELL_SAFE_CALL(cuCtxDestroy(context), device_index); - return round_up( - (int)(0.8 * sqrt(((memory_avail) / - ((useDouble ? sizeof(double) : sizeof(float)) * 3)))), - 1024); // a multiple of 1024 works always well + return round_up((int)(0.8 * sqrt(((memory_avail) / ((useDouble ? sizeof(double) : sizeof(float)) * 3)))), + 1024); // a multiple of 1024 works always well } #endif -static CONCAT(FS_ACCEL_PREFIX_LC,blasStatus_t) gemm( - CONCAT(FS_ACCEL_PREFIX_LC,blasHandle_t) handle, - CONCAT(FS_ACCEL_PREFIX_LC,blasOperation_t) transa, - CONCAT(FS_ACCEL_PREFIX_LC,blasOperation_t) transb, - int &m, int &n, int &k, - const float *alpha, const float *A, int &lda, - const float *B, int &ldb, const float *beta, - float *C, int &ldc) { - return CONCAT(FS_ACCEL_PREFIX_LC,blasSgemm)(handle, transa, transb, m, n, k, - alpha, A, lda, B, ldb, - beta, C, ldc); +static CONCAT(FS_ACCEL_PREFIX_LC, blasStatus_t) + gemm(CONCAT(FS_ACCEL_PREFIX_LC, blasHandle_t) handle, CONCAT(FS_ACCEL_PREFIX_LC, blasOperation_t) transa, + CONCAT(FS_ACCEL_PREFIX_LC, blasOperation_t) transb, int& m, int& n, int& k, const float* alpha, const float* A, + int& lda, const float* B, int& ldb, const float* beta, float* C, int& ldc) { + return CONCAT(FS_ACCEL_PREFIX_LC, blasSgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } -static CONCAT(FS_ACCEL_PREFIX_LC,blasStatus_t) gemm( - CONCAT(FS_ACCEL_PREFIX_LC,blasHandle_t) handle, - CONCAT(FS_ACCEL_PREFIX_LC,blasOperation_t) transa, - CONCAT(FS_ACCEL_PREFIX_LC,blasOperation_t) transb, - int &m, int &n, int &k, - const double *alpha, const double *A, int &lda, - const double *B, int &ldb, const double *beta, - double *C, int &ldc) { - return CONCAT(FS_ACCEL_PREFIX_LC,blasDgemm)(handle, transa, transb, m, n, k, - alpha, A, lda, B, ldb, - beta, C, ldc); +static CONCAT(FS_ACCEL_PREFIX_LC, blasStatus_t) + gemm(CONCAT(FS_ACCEL_PREFIX_LC, blasHandle_t) handle, CONCAT(FS_ACCEL_PREFIX_LC, blasOperation_t) transa, + CONCAT(FS_ACCEL_PREFIX_LC, blasOperation_t) transb, int& m, int& n, int& k, const double* alpha, + const double* A, int& lda, const double* B, int& ldb, const double* beta, double* C, int& ldc) { + return CONCAT(FS_ACCEL_PREFIX_LC, blasDgemm)(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } -static CONCAT(FS_ACCEL_PREFIX_LC,randStatus_t) generateUniform( - CONCAT(FS_ACCEL_PREFIX_LC,randGenerator_t) generator, - float *outputPtr, size_t num) { - return CONCAT(FS_ACCEL_PREFIX_LC,randGenerateUniform)(generator, outputPtr, num); +static CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t) + generateUniform(CONCAT(FS_ACCEL_PREFIX_LC, randGenerator_t) generator, float* outputPtr, size_t num) { + return CONCAT(FS_ACCEL_PREFIX_LC, randGenerateUniform)(generator, outputPtr, num); } -static CONCAT(FS_ACCEL_PREFIX_LC,randStatus_t) generateUniform( - CONCAT(FS_ACCEL_PREFIX_LC,randGenerator_t) generator, - double *outputPtr, size_t num) { - return CONCAT(FS_ACCEL_PREFIX_LC,randGenerateUniformDouble)(generator, outputPtr, num); +static CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t) + generateUniform(CONCAT(FS_ACCEL_PREFIX_LC, randGenerator_t) generator, double* outputPtr, size_t num) { + return CONCAT(FS_ACCEL_PREFIX_LC, randGenerateUniformDouble)(generator, outputPtr, num); } // GPU index. Used to pin this thread to the GPU. template -static void create_load(std::condition_variable &waitForInitCv, - std::mutex &waitForInitCvMutex, int device_index, - std::atomic &initCount, - volatile unsigned long long *loadVar, int matrixSize) { - static_assert( - std::is_same::value || std::is_same::value, - "create_load: Template argument T must be either float or double"); +static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index, + std::atomic& initCount, volatile unsigned long long* loadVar, int matrixSize) { + static_assert(std::is_same::value || std::is_same::value, + "create_load: Template argument T must be either float or double"); int iterations, i; - firestarter::log::trace() << "Starting CUDA/HIP with given matrix size " - << matrixSize; + firestarter::log::trace() << "Starting CUDA/HIP with given matrix size " << matrixSize; size_t size_use = 0; if (matrixSize > 0) { @@ -387,43 +356,35 @@ static void create_load(std::condition_variable &waitForInitCv, // reserving the GPU and initializing cublas firestarter::log::trace() << "Getting " FS_ACCEL_STRING " device nr. " << device_index; - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,DeviceGet)(&device, device_index), device_index); + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, DeviceGet)(&device, device_index), device_index); #ifdef FIRESTARTER_BUILD_CUDA - firestarter::log::trace() << "Creating " FS_ACCEL_STRING " context for computation on device nr. " - << device_index; + firestarter::log::trace() << "Creating " FS_ACCEL_STRING " context for computation on device nr. " << device_index; ACCELL_SAFE_CALL(cuCtxCreate(&context, 0, device), device_index); - firestarter::log::trace() << "Set created " FS_ACCEL_STRING " context on device nr. " - << device_index; + firestarter::log::trace() << "Set created " FS_ACCEL_STRING " context on device nr. " << device_index; ACCELL_SAFE_CALL(cuCtxSetCurrent(context), device_index); #else #ifdef FIRESTARTER_BUILD_HIP - firestarter::log::trace() << "Creating " FS_ACCEL_STRING " Stream for computation on device nr. " - << device_index; + firestarter::log::trace() << "Creating " FS_ACCEL_STRING " Stream for computation on device nr. " << device_index; ACCELL_SAFE_CALL(hipSetDevice(device_index), device_index); ACCELL_SAFE_CALL(hipStreamCreate(&stream), device_index); #endif #endif - firestarter::log::trace() << "Create " FS_ACCEL_STRING " Blas on device nr. " - << device_index; - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,blasCreate)(&cublas), device_index); + firestarter::log::trace() << "Create " FS_ACCEL_STRING " Blas on device nr. " << device_index; + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, blasCreate)(&cublas), device_index); firestarter::log::trace() << "Get " FS_ACCEL_STRING " device properties (e.g., support for double)" - << " on device nr. " - << device_index; - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG,GetDeviceProperties)(&properties, device_index), - device_index); + << " on device nr. " << device_index; + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG, GetDeviceProperties)(&properties, device_index), device_index); // getting information about the GPU memory size_t memory_avail, memory_total; - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,MemGetInfo)(&memory_avail, &memory_total), device_index); + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, MemGetInfo)(&memory_avail, &memory_total), device_index); - firestarter::log::trace() << "Get " FS_ACCEL_STRING " Memory info on device nr. " - << device_index - <<": " << memory_avail << " B avail. from " - << memory_total << " B total"; + firestarter::log::trace() << "Get " FS_ACCEL_STRING " Memory info on device nr. " << device_index << ": " + << memory_avail << " B avail. from " << memory_total << " B total"; // defining memory pointers #ifdef FIRESTARTER_BUILD_CUDA @@ -449,81 +410,55 @@ static void create_load(std::condition_variable &waitForInitCv, memory_size = sizeof(T) * size_use * size_use; iterations = (use_bytes - 2 * memory_size) / memory_size; // = 1; - firestarter::log::trace() - << "Allocating " FS_ACCEL_STRING " memory on device nr. " - << device_index; + firestarter::log::trace() << "Allocating " FS_ACCEL_STRING " memory on device nr. " << device_index; // allocating memory on the GPU #ifdef FIRESTARTER_BUILD_CUDA ACCELL_SAFE_CALL(cuMemAlloc(&a_data_ptr, memory_size), device_index); ACCELL_SAFE_CALL(cuMemAlloc(&b_data_ptr, memory_size), device_index); - ACCELL_SAFE_CALL(cuMemAlloc(&c_data_ptr, iterations * memory_size), - device_index); + ACCELL_SAFE_CALL(cuMemAlloc(&c_data_ptr, iterations * memory_size), device_index); #else #ifdef FIRESTARTER_BUILD_HIP ACCELL_SAFE_CALL(hipMalloc(&a_data_ptr, memory_size), device_index); ACCELL_SAFE_CALL(hipMalloc(&b_data_ptr, memory_size), device_index); - ACCELL_SAFE_CALL(hipMalloc(&c_data_ptr, iterations * memory_size), - device_index); + ACCELL_SAFE_CALL(hipMalloc(&c_data_ptr, iterations * memory_size), device_index); #endif #endif - firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. " - << device_index - <<". A: " << a_data_ptr << "(Size: " - << memory_size << "B)" - << "\n"; - - firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. " - << device_index - <<". B: " << b_data_ptr << "(Size: " - << memory_size << "B)" - << "\n"; - firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. " - << device_index - <<". C: " << c_data_ptr << "(Size: " - << iterations * memory_size << "B)" - << "\n"; - - firestarter::log::trace() << "Initializing " FS_ACCEL_STRING " matrices a, b on device nr. " - << device_index - << ". Using " - << size_use * size_use - << " elements of size " - << sizeof(T) << " Byte"; + firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. " << device_index + << ". A: " << a_data_ptr << "(Size: " << memory_size << "B)" + << "\n"; + + firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. " << device_index + << ". B: " << b_data_ptr << "(Size: " << memory_size << "B)" + << "\n"; + firestarter::log::trace() << "Allocated " FS_ACCEL_STRING " memory on device nr. " << device_index + << ". C: " << c_data_ptr << "(Size: " << iterations * memory_size << "B)" + << "\n"; + + firestarter::log::trace() << "Initializing " FS_ACCEL_STRING " matrices a, b on device nr. " << device_index + << ". Using " << size_use * size_use << " elements of size " << sizeof(T) << " Byte"; // initialize matrix A and B on the GPU with random values - CONCAT(FS_ACCEL_PREFIX_LC,randGenerator_t) random_gen; - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,randCreateGenerator)( - &random_gen, - CONCAT(FS_ACCEL_PREFIX_UC,RAND_RNG_PSEUDO_DEFAULT)), - device_index); - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,randSetPseudoRandomGeneratorSeed)( - random_gen, SEED), - device_index); + CONCAT(FS_ACCEL_PREFIX_LC, randGenerator_t) random_gen; ACCELL_SAFE_CALL( - generateUniform(random_gen, (T *)a_data_ptr, size_use * size_use), + CONCAT(FS_ACCEL_PREFIX_LC, randCreateGenerator)(&random_gen, CONCAT(FS_ACCEL_PREFIX_UC, RAND_RNG_PSEUDO_DEFAULT)), device_index); - ACCELL_SAFE_CALL( - generateUniform(random_gen, (T *)b_data_ptr, size_use * size_use), - device_index); - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,randDestroyGenerator)(random_gen), - device_index); + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, randSetPseudoRandomGeneratorSeed)(random_gen, SEED), device_index); + ACCELL_SAFE_CALL(generateUniform(random_gen, (T*)a_data_ptr, size_use * size_use), device_index); + ACCELL_SAFE_CALL(generateUniform(random_gen, (T*)b_data_ptr, size_use * size_use), device_index); + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, randDestroyGenerator)(random_gen), device_index); // initialize c_data_ptr with copies of A for (i = 0; i < iterations; i++) { - firestarter::log::trace() << "Initializing " FS_ACCEL_STRING " matrix c-" - << i - << " by copying " - << memory_size - << " byte from " - << a_data_ptr - << " to " - << c_data_ptr + (size_t)(i * size_use * size_use * (float)sizeof(T)/(float)sizeof(c_data_ptr)) - << "\n"; - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,MemcpyDtoD)( - c_data_ptr + (size_t)(i * size_use * size_use * (float)sizeof(T)/(float)sizeof(c_data_ptr)), - a_data_ptr, memory_size), - device_index); + firestarter::log::trace() << "Initializing " FS_ACCEL_STRING " matrix c-" << i << " by copying " << memory_size + << " byte from " << a_data_ptr << " to " + << c_data_ptr + + (size_t)(i * size_use * size_use * (float)sizeof(T) / (float)sizeof(c_data_ptr)) + << "\n"; + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, MemcpyDtoD)( + c_data_ptr + (size_t)(i * size_use * size_use * (float)sizeof(T) / (float)sizeof(c_data_ptr)), + a_data_ptr, memory_size), + device_index); } // save gpuvar->init_count and sys.out @@ -531,15 +466,12 @@ static void create_load(std::condition_variable &waitForInitCv, std::lock_guard lk(waitForInitCvMutex); #define TO_MB(x) (unsigned long)(x / 1024 / 1024) - firestarter::log::info() - << " GPU " << device_index << "\n" - << " name: " << properties.name << "\n" - << " memory: " << TO_MB(memory_avail) << "/" - << TO_MB(memory_total) << " MiB available (using " << TO_MB(use_bytes) - << " MiB)\n" - << " matrix size: " << size_use << "\n" - << " used precision: " - << ((sizeof(T) == sizeof(double)) ? "double" : "single"); + firestarter::log::info() << " GPU " << device_index << "\n" + << " name: " << properties.name << "\n" + << " memory: " << TO_MB(memory_avail) << "/" << TO_MB(memory_total) + << " MiB available (using " << TO_MB(use_bytes) << " MiB)\n" + << " matrix size: " << size_use << "\n" + << " used precision: " << ((sizeof(T) == sizeof(double)) ? "double" : "single"); #undef TO_MB initCount++; @@ -553,17 +485,12 @@ static void create_load(std::condition_variable &waitForInitCv, // actual stress begins here while (*loadVar != LOAD_STOP) { for (i = 0; i < iterations; i++) { - ACCELL_SAFE_CALL(gemm( - cublas, - CONCAT(FS_ACCEL_PREFIX_UC,BLAS_OP_N), - CONCAT(FS_ACCEL_PREFIX_UC,BLAS_OP_N), - size_use_i, size_use_i, - size_use_i, &alpha, (const T *)a_data_ptr, size_use_i, - (const T *)b_data_ptr, size_use_i, &beta, - (T *)c_data_ptr + i * size_use * size_use, size_use_i), - device_index); - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG,DeviceSynchronize)(), + ACCELL_SAFE_CALL(gemm(cublas, CONCAT(FS_ACCEL_PREFIX_UC, BLAS_OP_N), CONCAT(FS_ACCEL_PREFIX_UC, BLAS_OP_N), + size_use_i, size_use_i, size_use_i, &alpha, (const T*)a_data_ptr, size_use_i, + (const T*)b_data_ptr, size_use_i, &beta, (T*)c_data_ptr + i * size_use * size_use, + size_use_i), device_index); + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC_LONG, DeviceSynchronize)(), device_index); } } @@ -578,20 +505,18 @@ static void create_load(std::condition_variable &waitForInitCv, ACCELL_SAFE_CALL(hipFree(c_data_ptr), device_index); #endif #endif - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,blasDestroy)(cublas), device_index); + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, blasDestroy)(cublas), device_index); #ifdef FIRESTARTER_BUILD_CUDA - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,CtxDestroy)(context), device_index); + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, CtxDestroy)(context), device_index); #else #ifdef FIRESTARTER_BUILD_HIP - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,StreamDestroy)(stream), device_index); + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, StreamDestroy)(stream), device_index); #endif #endif } -Cuda::Cuda(volatile unsigned long long *loadVar, bool useFloat, bool useDouble, - unsigned matrixSize, int gpus) { - std::thread t(Cuda::initGpus, std::ref(_waitForInitCv), loadVar, useFloat, - useDouble, matrixSize, gpus); +Cuda::Cuda(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) { + std::thread t(Cuda::initGpus, std::ref(_waitForInitCv), loadVar, useFloat, useDouble, matrixSize, gpus); _initThread = std::move(t); std::unique_lock lk(_waitForInitCvMutex); @@ -599,14 +524,13 @@ Cuda::Cuda(volatile unsigned long long *loadVar, bool useFloat, bool useDouble, _waitForInitCv.wait(lk); } -void Cuda::initGpus(std::condition_variable &cv, - volatile unsigned long long *loadVar, bool useFloat, - bool useDouble, unsigned matrixSize, int gpus) { +void Cuda::initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble, + unsigned matrixSize, int gpus) { std::condition_variable waitForInitCv; std::mutex waitForInitCvMutex; if (gpus) { - ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC,Init)(0), -1); + ACCELL_SAFE_CALL(CONCAT(FS_ACCEL_PREFIX_LC, Init)(0), -1); int devCount; #ifdef FIRESTARTER_BUILD_CUDA ACCELL_SAFE_CALL(cuDeviceGetCount(&devCount), -1); @@ -641,12 +565,10 @@ void Cuda::initGpus(std::condition_variable &cv, } if (gpus > devCount) { - firestarter::log::warn() - << "You requested more " FS_ACCEL_STRING " devices than available. " - "Maybe you set " FS_ACCEL_STRING "_VISIBLE_DEVICES?"; - firestarter::log::warn() - << "FIRESTARTER will use " << devCount << " of the requested " - << gpus << " " FS_ACCEL_STRING " device(s)"; + firestarter::log::warn() << "You requested more " FS_ACCEL_STRING " devices than available. " + "Maybe you set " FS_ACCEL_STRING "_VISIBLE_DEVICES?"; + firestarter::log::warn() << "FIRESTARTER will use " << devCount << " of the requested " << gpus + << " " FS_ACCEL_STRING " device(s)"; gpus = devCount; } @@ -659,14 +581,12 @@ void Cuda::initGpus(std::condition_variable &cv, int precision = get_precision(i, use_double); if (precision) { - std::thread t(create_load, std::ref(waitForInitCv), - std::ref(waitForInitCvMutex), i, std::ref(initCount), - loadVar, (int)matrixSize); + std::thread t(create_load, std::ref(waitForInitCv), std::ref(waitForInitCvMutex), i, + std::ref(initCount), loadVar, (int)matrixSize); gpuThreads.push_back(std::move(t)); } else { - std::thread t(create_load, std::ref(waitForInitCv), - std::ref(waitForInitCvMutex), i, std::ref(initCount), - loadVar, (int)matrixSize); + std::thread t(create_load, std::ref(waitForInitCv), std::ref(waitForInitCvMutex), i, + std::ref(initCount), loadVar, (int)matrixSize); gpuThreads.push_back(std::move(t)); } } @@ -682,19 +602,17 @@ void Cuda::initGpus(std::condition_variable &cv, cv.notify_all(); /* join computation threads */ - for (auto &t : gpuThreads) { + for (auto& t : gpuThreads) { t.join(); } } else { - firestarter::log::info() - << " - No " FS_ACCEL_STRING " devices. Just stressing CPU(s). Maybe use " - "FIRESTARTER instead of FIRESTARTER_" FS_ACCEL_STRING "?"; + firestarter::log::info() << " - No " FS_ACCEL_STRING " devices. Just stressing CPU(s). Maybe use " + "FIRESTARTER instead of FIRESTARTER_" FS_ACCEL_STRING "?"; cv.notify_all(); } } else { - firestarter::log::info() - << " --gpus 0 is set. Just stressing CPU(s). Maybe use " - "FIRESTARTER instead of FIRESTARTER_" FS_ACCEL_STRING "?"; + firestarter::log::info() << " --gpus 0 is set. Just stressing CPU(s). Maybe use " + "FIRESTARTER instead of FIRESTARTER_" FS_ACCEL_STRING "?"; cv.notify_all(); } } diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp index 3f7ab6a9..c5d7b34e 100644 --- a/src/firestarter/DumpRegisterWorker.cpp +++ b/src/firestarter/DumpRegisterWorker.cpp @@ -55,24 +55,18 @@ static std::string registerNameBySize(unsigned registerSize) { } } // namespace -int Firestarter::initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta, - std::string dumpFilePath) { +int Firestarter::initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta, std::string dumpFilePath) { - auto data = std::make_unique( - this->loadThreads.begin()->second, dumpTimeDelta, dumpFilePath); + auto data = std::make_unique(this->loadThreads.begin()->second, dumpTimeDelta, dumpFilePath); - this->dumpRegisterWorkerThread = - std::thread(Firestarter::dumpRegisterWorker, std::move(data)); + this->dumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(data)); return EXIT_SUCCESS; } -void Firestarter::joinDumpRegisterWorker() { - this->dumpRegisterWorkerThread.join(); -} +void Firestarter::joinDumpRegisterWorker() { this->dumpRegisterWorkerThread.join(); } -void Firestarter::dumpRegisterWorker( - std::unique_ptr data) { +void Firestarter::dumpRegisterWorker(std::unique_ptr data) { pthread_setname_np(pthread_self(), "DumpRegWorker"); @@ -81,21 +75,16 @@ void Firestarter::dumpRegisterWorker( std::string registerPrefix = registerNameBySize(registerSize); auto offset = sizeof(DumpRegisterStruct) / sizeof(unsigned long long); - auto dumpRegisterStruct = reinterpret_cast( - data->loadWorkerData->addrMem - offset); + auto dumpRegisterStruct = reinterpret_cast(data->loadWorkerData->addrMem - offset); - auto dumpVar = reinterpret_cast( - &dumpRegisterStruct->dumpVar); + auto dumpVar = reinterpret_cast(&dumpRegisterStruct->dumpVar); // memory of simd variables is before the padding - volatile unsigned long long *dumpMemAddr = - dumpRegisterStruct->padding - registerCount * registerSize; + volatile unsigned long long* dumpMemAddr = dumpRegisterStruct->padding - registerCount * registerSize; // TODO: maybe use aligned_malloc to make memcpy more efficient and don't // interrupt the workload as much? - unsigned long long *last = reinterpret_cast( - malloc(sizeof(unsigned long long) * offset)); - unsigned long long *current = reinterpret_cast( - malloc(sizeof(unsigned long long) * offset)); + unsigned long long* last = reinterpret_cast(malloc(sizeof(unsigned long long) * offset)); + unsigned long long* current = reinterpret_cast(malloc(sizeof(unsigned long long) * offset)); if (last == nullptr || current == nullptr) { log::error() << "Malloc failed in Firestarter::dumpRegisterWorker"; @@ -143,8 +132,7 @@ void Firestarter::dumpRegisterWorker( } // copy the register content to minimize the interruption of the load worker - std::memcpy(current, (void *)dumpMemAddr, - sizeof(unsigned long long) * offset); + std::memcpy(current, (void*)dumpMemAddr, sizeof(unsigned long long) * offset); // skip the first output, as we first have to get some valid values for last if (!skipFirst) { @@ -162,8 +150,7 @@ void Firestarter::dumpRegisterWorker( for (auto j = 0; j < registerSize; j++) { auto index = registerSize * i + j; - auto hd = static_cast( - hammingDistance(current[index], last[index])); + auto hd = static_cast(hammingDistance(current[index], last[index])); dumpFile << hd; if (j != registerSize - 1) { diff --git a/src/firestarter/Environment/CPUTopology.cpp b/src/firestarter/Environment/CPUTopology.cpp index d7fb4bf0..a21bd9b8 100644 --- a/src/firestarter/Environment/CPUTopology.cpp +++ b/src/firestarter/Environment/CPUTopology.cpp @@ -32,18 +32,18 @@ extern "C" { using namespace firestarter::environment; -std::ostream &CPUTopology::print(std::ostream &stream) const { +std::ostream& CPUTopology::print(std::ostream& stream) const { stream << " system summary:\n" << " number of processors: " << this->numPackages() << "\n" << " number of cores (total)): " << this->numCoresTotal() << "\n" - << " (this includes only cores in the cgroup)" << "\n" - << " number of threads per core: " << this->numThreadsPerCore() + << " (this includes only cores in the cgroup)" << "\n" + << " number of threads per core: " << this->numThreadsPerCore() << "\n" << " total number of threads: " << this->numThreads() << "\n\n"; std::stringstream ss; - for (auto const &ent : this->features()) { + for (auto const& ent : this->features()) { ss << ent << " "; } @@ -52,20 +52,18 @@ std::ostream &CPUTopology::print(std::ostream &stream) const { << " vendor: " << this->vendor() << "\n" << " processor-name: " << this->processorName() << "\n" << " model: " << this->model() << "\n" - << " frequency: " << this->clockrate() / 1000000 - << " MHz\n" + << " frequency: " << this->clockrate() / 1000000 << " MHz\n" << " supported features: " << ss.str() << "\n" << " Caches:"; std::vector caches = { - HWLOC_OBJ_L1CACHE, HWLOC_OBJ_L1ICACHE, HWLOC_OBJ_L2CACHE, - HWLOC_OBJ_L2ICACHE, HWLOC_OBJ_L3CACHE, HWLOC_OBJ_L3ICACHE, - HWLOC_OBJ_L4CACHE, HWLOC_OBJ_L5CACHE, + HWLOC_OBJ_L1CACHE, HWLOC_OBJ_L1ICACHE, HWLOC_OBJ_L2CACHE, HWLOC_OBJ_L2ICACHE, + HWLOC_OBJ_L3CACHE, HWLOC_OBJ_L3ICACHE, HWLOC_OBJ_L4CACHE, HWLOC_OBJ_L5CACHE, }; std::vector cacheStrings = {}; - for (hwloc_obj_type_t const &cache : caches) { + for (hwloc_obj_type_t const& cache : caches) { int width; char string[128]; int shared; @@ -93,8 +91,8 @@ std::ostream &CPUTopology::print(std::ostream &stream) const { break; } - ss << " Cache, " << cacheObj->attr->cache.size / 1024 << " KiB, " - << cacheObj->attr->cache.linesize << " B Cacheline, "; + ss << " Cache, " << cacheObj->attr->cache.size / 1024 << " KiB, " << cacheObj->attr->cache.linesize + << " B Cacheline, "; switch (cacheObj->attr->cache.associativity) { case -1: @@ -131,8 +129,7 @@ CPUTopology::CPUTopology(std::string architecture) hwloc_topology_init(&this->topology); // do not filter icaches - hwloc_topology_set_cache_types_filter(this->topology, - HWLOC_TYPE_FILTER_KEEP_ALL); + hwloc_topology_set_cache_types_filter(this->topology, HWLOC_TYPE_FILTER_KEEP_ALL); hwloc_topology_load(this->topology); @@ -162,7 +159,7 @@ CPUTopology::CPUTopology(std::string architecture) this->_numPackages = hwloc_get_nbobjs_by_depth(this->topology, depth); } - log::trace() << "Number of Packages:" << this->_numPackages; + log::trace() << "Number of Packages:" << this->_numPackages; // get number of cores per package depth = hwloc_get_type_depth(this->topology, HWLOC_OBJ_CORE); @@ -170,9 +167,8 @@ CPUTopology::CPUTopology(std::string architecture) this->_numCoresTotal = 1; log::warn() << "Could not get number of cores"; } else { - this->_numCoresTotal = - hwloc_get_nbobjs_by_depth(this->topology, depth); - if ( this->_numCoresTotal == 0 ) { + this->_numCoresTotal = hwloc_get_nbobjs_by_depth(this->topology, depth); + if (this->_numCoresTotal == 0) { log::warn() << "Could not get number of cores"; this->_numCoresTotal = 1; } @@ -186,10 +182,8 @@ CPUTopology::CPUTopology(std::string architecture) this->_numThreadsPerCore = 1; log::warn() << "Could not get number of threads"; } else { - this->_numThreadsPerCore = - hwloc_get_nbobjs_by_depth(this->topology, depth) / - this->_numCoresTotal ; - if ( this->_numThreadsPerCore == 0 ) { + this->_numThreadsPerCore = hwloc_get_nbobjs_by_depth(this->topology, depth) / this->_numCoresTotal; + if (this->_numThreadsPerCore == 0) { log::warn() << "Could not get number of threads per core"; this->_numThreadsPerCore = 1; } @@ -233,30 +227,17 @@ CPUTopology::CPUTopology(std::string architecture) if (clockrate == "0") { firestarter::log::warn() << "Can't determine clockrate from /proc/cpuinfo"; } else { - firestarter::log::trace() - << "Clockrate from /proc/cpuinfo is " << clockrate; + firestarter::log::trace() << "Clockrate from /proc/cpuinfo is " << clockrate; this->_clockrate = 1e6 * std::stoi(clockrate); } auto governor = this->scalingGovernor(); if (!governor.empty()) { - auto scalingCurFreq = - this->getFileAsStream( - "/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq") - .str(); - auto cpuinfoCurFreq = - this->getFileAsStream( - "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq") - .str(); - auto scalingMaxFreq = - this->getFileAsStream( - "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq") - .str(); - auto cpuinfoMaxFreq = - this->getFileAsStream( - "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq") - .str(); + auto scalingCurFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq").str(); + auto cpuinfoCurFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq").str(); + auto scalingMaxFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq").str(); + auto cpuinfoMaxFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq").str(); if (governor.compare("performance") || governor.compare("powersave")) { if (scalingCurFreq.empty()) { @@ -322,15 +303,14 @@ CPUTopology::CPUTopology(std::string architecture) int width = hwloc_get_nbobjs_by_type(this->topology, HWLOC_OBJ_L1ICACHE); if (width >= 1) { - hwloc_obj_t cacheObj = - hwloc_get_obj_by_type(this->topology, HWLOC_OBJ_L1ICACHE, 0); + hwloc_obj_t cacheObj = hwloc_get_obj_by_type(this->topology, HWLOC_OBJ_L1ICACHE, 0); this->_instructionCacheSize = cacheObj->attr->cache.size; } } CPUTopology::~CPUTopology() { hwloc_topology_destroy(this->topology); } -std::stringstream CPUTopology::getFileAsStream(std::string const &filePath) { +std::stringstream CPUTopology::getFileAsStream(std::string const& filePath) { std::ifstream file(filePath); std::stringstream ss; @@ -345,9 +325,7 @@ std::stringstream CPUTopology::getFileAsStream(std::string const &filePath) { } std::string CPUTopology::scalingGovernor() const { - return this - ->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor") - .str(); + return this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor").str(); } int CPUTopology::getCoreIdFromPU(unsigned pu) const { @@ -424,8 +402,7 @@ unsigned CPUTopology::maxNumThreads() const { // Find CPUs per kind for (int kind_index = 0; kind_index < nr_cpukinds; kind_index++) { - int result = hwloc_cpukinds_get_info(this->topology, kind_index, bitmap, - NULL, NULL, NULL, 0); + int result = hwloc_cpukinds_get_info(this->topology, kind_index, bitmap, NULL, NULL, NULL, 0); if (result) { log::warn() << "Could not get information for CPU kind " << kind_index; } diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp index d827ee83..34022c93 100644 --- a/src/firestarter/Environment/Environment.cpp +++ b/src/firestarter/Environment/Environment.cpp @@ -28,8 +28,7 @@ using namespace firestarter::environment; -#if (defined(linux) || defined(__linux__)) && \ - defined(FIRESTARTER_THREAD_AFFINITY) +#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY) extern "C" { #include @@ -37,23 +36,23 @@ extern "C" { // this code is from the C version of FIRESTARTER // TODO: replace this with cpu affinity of hwloc -#define ADD_CPU_SET(cpu, cpuset) \ - do { \ - if (this->cpuAllowed(cpu)) { \ - CPU_SET(cpu, &cpuset); \ - } else { \ - if (cpu >= this->topology().numThreads()) { \ - log::error() << "The given bind argument (-b/--bind) includes CPU " \ - << cpu << " that is not available on this system."; \ - } else { \ - log::error() << "The given bind argument (-b/--bind) cannot " \ - "be implemented with the cpuset given from the OS\n" \ - << "This can be caused by the taskset tool, cgroups, " \ - "the batch system, or similar mechanisms.\n" \ - << "Please fix the argument to match the restrictions."; \ - } \ - return EACCES; \ - } \ +#define ADD_CPU_SET(cpu, cpuset) \ + do { \ + if (this->cpuAllowed(cpu)) { \ + CPU_SET(cpu, &cpuset); \ + } else { \ + if (cpu >= this->topology().numThreads()) { \ + log::error() << "The given bind argument (-b/--bind) includes CPU " << cpu \ + << " that is not available on this system."; \ + } else { \ + log::error() << "The given bind argument (-b/--bind) cannot " \ + "be implemented with the cpuset given from the OS\n" \ + << "This can be caused by the taskset tool, cgroups, " \ + "the batch system, or similar mechanisms.\n" \ + << "Please fix the argument to match the restrictions."; \ + } \ + return EACCES; \ + } \ } while (0) int Environment::cpuSet(unsigned id) { @@ -78,20 +77,16 @@ int Environment::cpuAllowed(unsigned id) { } #endif -int Environment::evaluateCpuAffinity(unsigned requestedNumThreads, - std::string cpuBind) { -#if not((defined(linux) || defined(__linux__)) && \ - defined(FIRESTARTER_THREAD_AFFINITY)) +int Environment::evaluateCpuAffinity(unsigned requestedNumThreads, std::string cpuBind) { +#if not((defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)) (void)cpuBind; #endif - if (requestedNumThreads > 0 && - requestedNumThreads > this->topology().numThreads()) { + if (requestedNumThreads > 0 && requestedNumThreads > this->topology().numThreads()) { log::warn() << "Not enough CPUs for requested number of threads"; } -#if (defined(linux) || defined(__linux__)) && \ - defined(FIRESTARTER_THREAD_AFFINITY) +#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY) cpu_set_t cpuset; CPU_ZERO(&cpuset); @@ -161,8 +156,7 @@ int Environment::evaluateCpuAffinity(unsigned requestedNumThreads, s = 1; } if (y < x) { - log::error() << "y has to be >= x in x-y expressions of CPU list: " - << token; + log::error() << "y has to be >= x in x-y expressions of CPU list: " << token; return EXIT_FAILURE; } for (unsigned long i = x; i <= y; i += s) { @@ -185,8 +179,7 @@ int Environment::evaluateCpuAffinity(unsigned requestedNumThreads, log::error() << "Found no usable CPUs!"; return 127; } -#if (defined(linux) || defined(__linux__)) && \ - defined(FIRESTARTER_THREAD_AFFINITY) +#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY) else { for (unsigned i = 0; i < this->topology().maxNumThreads(); i++) { if (CPU_ISSET(i, &cpuset)) { @@ -208,20 +201,18 @@ int Environment::evaluateCpuAffinity(unsigned requestedNumThreads, void Environment::printThreadSummary() { log::info() << "\n using " << this->requestedNumThreads() << " threads"; -#if (defined(linux) || defined(__linux__)) && \ - defined(FIRESTARTER_THREAD_AFFINITY) +#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY) bool printCoreIdInfo = false; size_t i = 0; std::vector cpuBind(this->cpuBind); cpuBind.resize(this->requestedNumThreads()); - for (auto const &bind : cpuBind) { + for (auto const& bind : cpuBind) { int coreId = this->topology().getCoreIdFromPU(bind); int pkgId = this->topology().getPkgIdFromPU(bind); if (coreId != -1 && pkgId != -1) { - log::info() << " - Thread " << i << " run on CPU " << bind << ", core " - << coreId << " in package: " << pkgId; + log::info() << " - Thread " << i << " run on CPU " << bind << ", core " << coreId << " in package: " << pkgId; printCoreIdInfo = true; } @@ -229,8 +220,7 @@ void Environment::printThreadSummary() { } if (printCoreIdInfo) { - log::info() - << " The cores are numbered using the logical_index from hwloc."; + log::info() << " The cores are numbered using the logical_index from hwloc."; } #endif } @@ -241,8 +231,7 @@ int Environment::setCpuAffinity(unsigned thread) { return EXIT_FAILURE; } -#if (defined(linux) || defined(__linux__)) && \ - defined(FIRESTARTER_THREAD_AFFINITY) +#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY) this->cpuSet(this->cpuBind.at(thread)); #endif diff --git a/src/firestarter/Environment/Payload/Payload.cpp b/src/firestarter/Environment/Payload/Payload.cpp index 68cfc547..5cda6abc 100644 --- a/src/firestarter/Environment/Payload/Payload.cpp +++ b/src/firestarter/Environment/Payload/Payload.cpp @@ -26,12 +26,10 @@ using namespace firestarter::environment::payload; -unsigned -Payload::getSequenceStartCount(const std::vector &sequence, - const std::string start) { +unsigned Payload::getSequenceStartCount(const std::vector& sequence, const std::string start) { unsigned i = 0; - for (const auto &item : sequence) { + for (const auto& item : sequence) { if (0 == item.rfind(start, 0)) { i++; } @@ -40,13 +38,10 @@ Payload::getSequenceStartCount(const std::vector &sequence, return i; } -std::vector Payload::generateSequence( - std::vector> const &proportions) { +std::vector Payload::generateSequence(std::vector> const& proportions) { std::vector> prop = proportions; - prop.erase(std::remove_if(prop.begin(), prop.end(), - [](auto const &pair) { return pair.second == 0; }), - prop.end()); + prop.erase(std::remove_if(prop.begin(), prop.end(), [](auto const& pair) { return pair.second == 0; }), prop.end()); std::vector sequence = {}; @@ -62,8 +57,7 @@ std::vector Payload::generateSequence( for (++it; it != prop.end(); ++it) { for (unsigned i = 0; i < it->second; i++) { insertIt = sequence.begin(); - std::advance(insertIt, 1 + floor(i * (sequence.size() + it->second - i) / - (float)it->second)); + std::advance(insertIt, 1 + floor(i * (sequence.size() + it->second - i) / (float)it->second)); sequence.insert(insertIt, it->first); } } @@ -71,38 +65,32 @@ std::vector Payload::generateSequence( return sequence; } -unsigned Payload::getL2LoopCount(const std::vector &sequence, - const unsigned numberOfLines, +unsigned Payload::getL2LoopCount(const std::vector& sequence, const unsigned numberOfLines, const unsigned size, const unsigned threads) { if (this->getL2SequenceCount(sequence) == 0) { return 0; } - return (0.8 * size / 64 / threads / - (this->getL2SequenceCount(sequence) * - this->getNumberOfSequenceRepetitions(sequence, - numberOfLines / threads))); + return ( + 0.8 * size / 64 / threads / + (this->getL2SequenceCount(sequence) * this->getNumberOfSequenceRepetitions(sequence, numberOfLines / threads))); } -unsigned Payload::getL3LoopCount(const std::vector &sequence, - const unsigned numberOfLines, +unsigned Payload::getL3LoopCount(const std::vector& sequence, const unsigned numberOfLines, const unsigned size, const unsigned threads) { if (this->getL3SequenceCount(sequence) == 0) { return 0; } - return (0.8 * size / 64 / threads / - (this->getL3SequenceCount(sequence) * - this->getNumberOfSequenceRepetitions(sequence, - numberOfLines / threads))); + return ( + 0.8 * size / 64 / threads / + (this->getL3SequenceCount(sequence) * this->getNumberOfSequenceRepetitions(sequence, numberOfLines / threads))); } -unsigned Payload::getRAMLoopCount(const std::vector &sequence, - const unsigned numberOfLines, +unsigned Payload::getRAMLoopCount(const std::vector& sequence, const unsigned numberOfLines, const unsigned size, const unsigned threads) { if (this->getRAMSequenceCount(sequence) == 0) { return 0; } - return (1.0 * size / 64 / threads / - (this->getRAMSequenceCount(sequence) * - this->getNumberOfSequenceRepetitions(sequence, - numberOfLines / threads))); + return ( + 1.0 * size / 64 / threads / + (this->getRAMSequenceCount(sequence) * this->getNumberOfSequenceRepetitions(sequence, numberOfLines / threads))); } diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp index 9316ed39..2c23d1c4 100644 --- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp @@ -25,29 +25,25 @@ using namespace firestarter::environment::x86::payload; using namespace asmjit; using namespace asmjit::x86; -int AVX512Payload::compilePayload( - std::vector> const &proportion, - unsigned instructionCacheSize, - std::list const &dataCacheBufferSize, unsigned ramBufferSize, - unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) { +int AVX512Payload::compilePayload(std::vector> const& proportion, + unsigned instructionCacheSize, std::list const& dataCacheBufferSize, + unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, + bool errorDetection) { // Compute the sequence of instruction groups and the number of its repetions // to reach the desired size auto sequence = this->generateSequence(proportion); - auto repetitions = - this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); + auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); // compute count of flops and memory access for performance report unsigned flops = 0; unsigned bytes = 0; - for (const auto &item : sequence) { + for (const auto& item : sequence) { auto it = this->instructionFlops.find(item); if (it == this->instructionFlops.end()) { - workerLog::error() << "Instruction group " << item << " undefined in " - << name() << "."; + workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; return EXIT_FAILURE; } @@ -75,12 +71,9 @@ int AVX512Payload::compilePayload( auto ram_size = ramBufferSize / thread; // calculate the reset counters for the buffers - auto l2_loop_count = - getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); - auto l3_loop_count = - getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); - auto ram_loop_count = - getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); + auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); + auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); + auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); CodeHolder code; code.init(this->rt.environment()); @@ -90,9 +83,8 @@ int AVX512Payload::compilePayload( } Builder cb(&code); - cb.addDiagnosticOptions( - asmjit::DiagnosticOptions::kValidateAssembler | - asmjit::DiagnosticOptions::kValidateIntermediate ); + cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | + asmjit::DiagnosticOptions::kValidateIntermediate); auto pointer_reg = rax; auto l1_addr = rbx; @@ -116,8 +108,7 @@ int AVX512Payload::compilePayload( auto ram_reg = zmm30; FuncDetail func; - func.init(FuncSignatureT( + func.init(FuncSignatureT( CallConvId::kCDecl), this->rt.environment()); @@ -132,10 +123,9 @@ int AVX512Payload::compilePayload( frame.addDirtyRegs(Mm(i)); } // make all other used registers dirty except RAX - frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, - l3_count_reg, ram_count_reg, temp_reg, offset_reg, - addrHigh_reg, iter_reg, ram_addr); - for (const auto ® : shift_reg) { + frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg, + offset_reg, addrHigh_reg, iter_reg, ram_addr); + for (const auto& reg : shift_reg) { frame.addDirtyRegs(reg); } @@ -161,7 +151,7 @@ int AVX512Payload::compilePayload( cb.mov(offset_reg, Imm(64)); // increment after each cache/memory access // Initialize registers for shift operations - for (auto const ® : shift_reg32) { + for (auto const& reg : shift_reg32) { cb.mov(reg, Imm(0xAAAAAAAA)); } // Initialize AVX512-Registers for FMA Operations @@ -183,23 +173,14 @@ int AVX512Payload::compilePayload( cb.mov(ram_addr, pointer_reg); cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer cb.mov(l2_count_reg, Imm(l2_loop_count)); - workerLog::trace() << "reset counter for L2-buffer with " - << l2_loop_count - << " cache line accesses per loop (" - << l2_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop (" + << l2_size / 1024 << ") KiB"; cb.mov(l3_count_reg, Imm(l3_loop_count)); - workerLog::trace() << "reset counter for L3-buffer with " - << l3_loop_count - << " cache line accesses per loop (" - << l3_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop (" + << l3_size / 1024 << ") KiB"; cb.mov(ram_count_reg, Imm(ram_loop_count)); - workerLog::trace() << "reset counter for RAM-buffer with " - << ram_loop_count - << " cache line accesses per loop (" - << ram_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop (" + << ram_size / 1024 << ") KiB"; cb.align(AlignMode::kCode, 64); @@ -213,13 +194,13 @@ int AVX512Payload::compilePayload( auto mov_src = mov_dst + 1; unsigned l1_offset = 0; -#define L1_INCREMENT() \ - l1_offset += 64; \ - if (l1_offset < l1_size * 0.5) { \ - cb.add(l1_addr, offset_reg); \ - } else { \ - l1_offset = 0; \ - cb.mov(l1_addr, pointer_reg); \ +#define L1_INCREMENT() \ + l1_offset += 64; \ + if (l1_offset < l1_size * 0.5) { \ + cb.add(l1_addr, offset_reg); \ + } else { \ + l1_offset = 0; \ + cb.mov(l1_addr, pointer_reg); \ } #define L2_INCREMENT() cb.add(l2_addr, offset_reg) @@ -229,12 +210,11 @@ int AVX512Payload::compilePayload( #define RAM_INCREMENT() cb.add(ram_addr, offset_reg) for (unsigned count = 0; count < repetitions; count++) { - for (const auto &item : sequence) { + for (const auto& item : sequence) { if (item == "REG") { cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2); cb.vfmadd231pd(Zmm(mov_dst), zmm2, zmm1); - cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], - temp_reg); + cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], temp_reg); mov_dst++; } else if (item == "L1_L") { cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2); @@ -297,8 +277,7 @@ int AVX512Payload::compilePayload( cb.prefetcht2(ptr(ram_addr)); RAM_INCREMENT(); } else { - workerLog::error() << "Instruction group " << item << " not found in " - << this->name() << "."; + workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; return EXIT_FAILURE; } @@ -378,9 +357,7 @@ int AVX512Payload::compilePayload( // dump all the ymm register for (int i = 0; i < (int)this->registerCount(); i++) { - cb.vmovapd( - zmmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), - Zmm(i)); + cb.vmovapd(zmmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Zmm(i)); } // set read flag @@ -390,8 +367,7 @@ int AVX512Payload::compilePayload( } if (errorDetection) { - this->emitErrorDetectionCode( - cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); + this->emitErrorDetectionCode(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); } cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH)); @@ -410,8 +386,7 @@ int AVX512Payload::compilePayload( Error err = this->rt.add(&this->loadFunction, &code); if (err) { - workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " - << __FILE__ << " at " << __LINE__; + workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; } @@ -424,8 +399,7 @@ int AVX512Payload::compilePayload( workerLog::warn() << "Work-loop is bigger than the L1i-Cache."; } - workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size - << " Bytes (" << instructionCachePercentage + workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage << "%) from the L1i-Cache for the work-loop."; workerLog::trace() << "Sequence size: " << sequence.size(); workerLog::trace() << "Repetition count: " << repetitions; @@ -437,14 +411,12 @@ int AVX512Payload::compilePayload( std::list AVX512Payload::getAvailableInstructions() const { std::list instructions; - transform(this->instructionFlops.begin(), this->instructionFlops.end(), - back_inserter(instructions), - [](const auto &item) { return item.first; }); + transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions), + [](const auto& item) { return item.first; }); return instructions; } -void AVX512Payload::init(unsigned long long *memoryAddr, - unsigned long long bufferSize) { +void AVX512Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) { X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4); } diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp index b6899025..c925f538 100644 --- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp +++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp @@ -29,28 +29,24 @@ using namespace firestarter::environment::x86::payload; using namespace asmjit; using namespace asmjit::x86; -int AVXPayload::compilePayload( - std::vector> const &proportion, - unsigned instructionCacheSize, - std::list const &dataCacheBufferSize, unsigned ramBufferSize, - unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) { +int AVXPayload::compilePayload(std::vector> const& proportion, + unsigned instructionCacheSize, std::list const& dataCacheBufferSize, + unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, + bool errorDetection) { // Compute the sequence of instruction groups and the number of its repetions // to reach the desired size auto sequence = this->generateSequence(proportion); - auto repetitions = - this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); + auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); // compute count of flops and memory access for performance report unsigned flops = 0; unsigned bytes = 0; - for (const auto &item : sequence) { + for (const auto& item : sequence) { auto it = this->instructionFlops.find(item); if (it == this->instructionFlops.end()) { - workerLog::error() << "Instruction group " << item << " undefined in " - << name() << "."; + workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; return EXIT_FAILURE; } @@ -78,12 +74,9 @@ int AVXPayload::compilePayload( auto ram_size = ramBufferSize / thread; // calculate the reset counters for the buffers - auto l2_loop_count = - getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); - auto l3_loop_count = - getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); - auto ram_loop_count = - getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); + auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); + auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); + auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); CodeHolder code; code.init(this->rt.environment()); @@ -93,9 +86,8 @@ int AVXPayload::compilePayload( } Builder cb(&code); - cb.addDiagnosticOptions( - asmjit::DiagnosticOptions::kValidateAssembler | - asmjit::DiagnosticOptions::kValidateIntermediate ); + cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | + asmjit::DiagnosticOptions::kValidateIntermediate); auto pointer_reg = rax; auto l1_addr = rbx; @@ -115,8 +107,7 @@ int AVXPayload::compilePayload( auto trans_regs = 6; FuncDetail func; - func.init(FuncSignatureT( + func.init(FuncSignatureT( CallConvId::kCDecl), this->rt.environment()); @@ -132,9 +123,8 @@ int AVXPayload::compilePayload( frame.addDirtyRegs(Mm(i)); } // make all other used registers dirty except RAX - frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, - l3_count_reg, ram_count_reg, temp_reg, temp_reg2, - offset_reg, addrHigh_reg, iter_reg); + frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg, + temp_reg2, offset_reg, addrHigh_reg, iter_reg); FuncArgsAssignment args(&func); args.assignAll(pointer_reg, addrHigh_reg, iter_reg); @@ -185,8 +175,7 @@ int AVXPayload::compilePayload( } cb.pinsrq(Xmm(trans_start), temp_reg, Imm(0)); cb.pinsrq(Xmm(trans_start), temp_reg, Imm(1)); - cb.vinsertf128(Ymm(trans_start), Ymm(trans_start), Xmm(trans_start), - Imm(1)); + cb.vinsertf128(Ymm(trans_start), Ymm(trans_start), Xmm(trans_start), Imm(1)); for (int i = trans_start + 1; i <= trans_end; i++) { if (i % 2 == 0) { cb.shr(temp_reg, Imm(4)); @@ -207,23 +196,14 @@ int AVXPayload::compilePayload( cb.mov(ram_addr, pointer_reg); cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer cb.mov(l2_count_reg, Imm(l2_loop_count)); - workerLog::trace() << "reset counter for L2-buffer with " - << l2_loop_count - << " cache line accesses per loop (" - << l2_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop (" + << l2_size / 1024 << ") KiB"; cb.mov(l3_count_reg, Imm(l3_loop_count)); - workerLog::trace() << "reset counter for L3-buffer with " - << l3_loop_count - << " cache line accesses per loop (" - << l3_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop (" + << l3_size / 1024 << ") KiB"; cb.mov(ram_count_reg, Imm(ram_loop_count)); - workerLog::trace() << "reset counter for RAM-buffer with " - << ram_loop_count - << " cache line accesses per loop (" - << ram_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop (" + << ram_size / 1024 << ") KiB"; cb.align(AlignMode::kCode, 64); @@ -237,13 +217,13 @@ int AVXPayload::compilePayload( auto mov_src = mov_dst + 1; unsigned l1_offset = 0; -#define L1_INCREMENT() \ - l1_offset += 64; \ - if (l1_offset < l1_size * 0.5) { \ - cb.add(l1_addr, offset_reg); \ - } else { \ - l1_offset = 0; \ - cb.mov(l1_addr, pointer_reg); \ +#define L1_INCREMENT() \ + l1_offset += 64; \ + if (l1_offset < l1_size * 0.5) { \ + cb.add(l1_addr, offset_reg); \ + } else { \ + l1_offset = 0; \ + cb.mov(l1_addr, pointer_reg); \ } #define L2_INCREMENT() cb.add(l2_addr, offset_reg); @@ -253,19 +233,15 @@ int AVXPayload::compilePayload( #define RAM_INCREMENT() cb.add(ram_addr, offset_reg) for (unsigned count = 0; count < repetitions; count++) { - for (const auto &item : sequence) { + for (const auto& item : sequence) { if (item == "REG") { - cb.vaddpd( - Ymm(add_dest), Ymm(add_dest), - Ymm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); + cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); cb.vmovdqa(Ymm(mov_dst), Ymm(mov_src)); } else if (item == "L1_L") { cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32)); L1_INCREMENT(); } else if (item == "L1_S") { - cb.vaddpd( - Ymm(add_dest), Ymm(add_dest), - Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); + cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.vmovapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest)); L1_INCREMENT(); this->_instructions++; @@ -278,9 +254,7 @@ int AVXPayload::compilePayload( cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l2_addr, 64)); L2_INCREMENT(); } else if (item == "L2_S") { - cb.vaddpd( - Ymm(add_dest), Ymm(add_dest), - Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); + cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.vmovapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest)); L2_INCREMENT(); this->_instructions++; @@ -293,9 +267,7 @@ int AVXPayload::compilePayload( cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64)); L3_INCREMENT(); } else if (item == "L3_S") { - cb.vaddpd( - Ymm(add_dest), Ymm(add_dest), - Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); + cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); L3_INCREMENT(); this->_instructions++; @@ -313,9 +285,7 @@ int AVXPayload::compilePayload( cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(ram_addr, 64)); RAM_INCREMENT(); } else if (item == "RAM_S") { - cb.vaddpd( - Ymm(add_dest), Ymm(add_dest), - Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); + cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); RAM_INCREMENT(); this->_instructions++; @@ -330,19 +300,16 @@ int AVXPayload::compilePayload( RAM_INCREMENT(); this->_instructions++; } else { - workerLog::error() << "Instruction group " << item << " not found in " - << this->name() << "."; + workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; return EXIT_FAILURE; } if (shift_regs > 1) { this->_instructions++; if (left) { - cb.psrlw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs), - Mm(shift_dst)); + cb.psrlw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs), Mm(shift_dst)); } else { - cb.psllw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs), - Mm(shift_dst)); + cb.psllw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs), Mm(shift_dst)); } } @@ -420,9 +387,7 @@ int AVXPayload::compilePayload( // dump all the ymm register for (int i = 0; i < (int)this->registerCount(); i++) { - cb.vmovapd( - ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), - Ymm(i)); + cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i)); } // set read flag @@ -432,8 +397,7 @@ int AVXPayload::compilePayload( } if (errorDetection) { - this->emitErrorDetectionCode( - cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); + this->emitErrorDetectionCode(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); } cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH)); @@ -452,8 +416,7 @@ int AVXPayload::compilePayload( Error err = this->rt.add(&this->loadFunction, &code); if (err) { - workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " - << __FILE__ << " at " << __LINE__; + workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; } @@ -466,8 +429,7 @@ int AVXPayload::compilePayload( workerLog::warn() << "Work-loop is bigger than the L1i-Cache."; } - workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size - << " Bytes (" << instructionCachePercentage + workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage << "%) from the L1i-Cache for the work-loop."; workerLog::trace() << "Sequence size: " << sequence.size(); workerLog::trace() << "Repetition count: " << repetitions; @@ -479,15 +441,12 @@ int AVXPayload::compilePayload( std::list AVXPayload::getAvailableInstructions() const { std::list instructions; - transform(this->instructionFlops.begin(), this->instructionFlops.end(), - back_inserter(instructions), - [](const auto &item) { return item.first; }); + transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions), + [](const auto& item) { return item.first; }); return instructions; } -void AVXPayload::init(unsigned long long *memoryAddr, - unsigned long long bufferSize) { - X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, - 1.654738925401e-15); +void AVXPayload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) { + X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, 1.654738925401e-15); } diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp index 32e81752..1e5ffa85 100644 --- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp @@ -29,28 +29,24 @@ using namespace firestarter::environment::x86::payload; using namespace asmjit; using namespace asmjit::x86; -int FMA4Payload::compilePayload( - std::vector> const &proportion, - unsigned instructionCacheSize, - std::list const &dataCacheBufferSize, unsigned ramBufferSize, - unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) { +int FMA4Payload::compilePayload(std::vector> const& proportion, + unsigned instructionCacheSize, std::list const& dataCacheBufferSize, + unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, + bool errorDetection) { // Compute the sequence of instruction groups and the number of its repetions // to reach the desired size auto sequence = this->generateSequence(proportion); - auto repetitions = - this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); + auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); // compute count of flops and memory access for performance report unsigned flops = 0; unsigned bytes = 0; - for (const auto &item : sequence) { + for (const auto& item : sequence) { auto it = this->instructionFlops.find(item); if (it == this->instructionFlops.end()) { - workerLog::error() << "Instruction group " << item << " undefined in " - << name() << "."; + workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; return EXIT_FAILURE; } @@ -78,12 +74,9 @@ int FMA4Payload::compilePayload( auto ram_size = ramBufferSize / thread; // calculate the reset counters for the buffers - auto l2_loop_count = - getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); - auto l3_loop_count = - getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); - auto ram_loop_count = - getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); + auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); + auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); + auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); CodeHolder code; code.init(this->rt.environment()); @@ -93,9 +86,8 @@ int FMA4Payload::compilePayload( } Builder cb(&code); - cb.addDiagnosticOptions( - asmjit::DiagnosticOptions::kValidateAssembler | - asmjit::DiagnosticOptions::kValidateIntermediate ); + cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | + asmjit::DiagnosticOptions::kValidateIntermediate); auto pointer_reg = rax; auto l1_addr = rbx; @@ -119,8 +111,7 @@ int FMA4Payload::compilePayload( auto ram_reg = xmm15; FuncDetail func; - func.init(FuncSignatureT( + func.init(FuncSignatureT( CallConvId::kCDecl), this->rt.environment()); @@ -135,10 +126,9 @@ int FMA4Payload::compilePayload( frame.addDirtyRegs(Mm(i)); } // make all other used registers dirty except RAX - frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, - l3_count_reg, ram_count_reg, temp_reg, temp_reg2, - offset_reg, addrHigh_reg, iter_reg, ram_addr); - for (const auto ® : shift_reg) { + frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg, + temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr); + for (const auto& reg : shift_reg) { frame.addDirtyRegs(reg); } @@ -164,7 +154,7 @@ int FMA4Payload::compilePayload( cb.mov(offset_reg, Imm(64)); // increment after each cache/memory access // Initialize registers for shift operations - for (auto const ® : shift_reg32) { + for (auto const& reg : shift_reg32) { cb.mov(reg, Imm(0xAAAAAAAA)); } // Initialize AVX-Registers for FMA4 Operations @@ -185,23 +175,14 @@ int FMA4Payload::compilePayload( cb.mov(ram_addr, pointer_reg); cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer cb.mov(l2_count_reg, Imm(l2_loop_count)); - workerLog::trace() << "reset counter for L2-buffer with " - << l2_loop_count - << " cache line accesses per loop (" - << l2_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop (" + << l2_size / 1024 << ") KiB"; cb.mov(l3_count_reg, Imm(l3_loop_count)); - workerLog::trace() << "reset counter for L3-buffer with " - << l3_loop_count - << " cache line accesses per loop (" - << l3_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop (" + << l3_size / 1024 << ") KiB"; cb.mov(ram_count_reg, Imm(ram_loop_count)); - workerLog::trace() << "reset counter for RAM-buffer with " - << ram_loop_count - << " cache line accesses per loop (" - << ram_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop (" + << ram_size / 1024 << ") KiB"; cb.align(AlignMode::kCode, 64); @@ -215,13 +196,13 @@ int FMA4Payload::compilePayload( auto mov_src = mov_dst + 1; unsigned l1_offset = 0; -#define L1_INCREMENT() \ - l1_offset += 64; \ - if (l1_offset < l1_size * 0.5) { \ - cb.add(l1_addr, offset_reg); \ - } else { \ - l1_offset = 0; \ - cb.mov(l1_addr, pointer_reg); \ +#define L1_INCREMENT() \ + l1_offset += 64; \ + if (l1_offset < l1_size * 0.5) { \ + cb.add(l1_addr, offset_reg); \ + } else { \ + l1_offset = 0; \ + cb.mov(l1_addr, pointer_reg); \ } #define L2_INCREMENT() cb.add(l2_addr, offset_reg); @@ -231,101 +212,80 @@ int FMA4Payload::compilePayload( #define RAM_INCREMENT() cb.add(ram_addr, offset_reg) for (unsigned count = 0; count < repetitions; count++) { - for (const auto &item : sequence) { + for (const auto& item : sequence) { if (item == "REG") { - cb.vfmaddpd( - Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - cb.vfmaddpd( - Xmm(mov_dst), Xmm(mov_dst), xmm1, - Xmm(add_start + (add_dest - add_start + add_regs + 2) % add_regs)); - cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], - temp_reg); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, + Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); + cb.vfmaddpd(Xmm(mov_dst), Xmm(mov_dst), xmm1, + Xmm(add_start + (add_dest - add_start + add_regs + 2) % add_regs)); + cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], temp_reg); mov_dst++; } else if (item == "L1_L") { - cb.vfmaddpd( - Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm1, - ymmword_ptr(l1_addr, 32)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, + Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); + cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm1, ymmword_ptr(l1_addr, 32)); L1_INCREMENT(); } else if (item == "L1_S") { cb.vmovapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest)); - cb.vfmaddpd( - Ymm(add_dest), Ymm(add_dest), ymm0, - Ymm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); + cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm0, + Ymm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); L1_INCREMENT(); } else if (item == "L1_LS") { cb.vmovapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest)); - cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm0, - ymmword_ptr(l1_addr, 32)); + cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 32)); L1_INCREMENT(); } else if (item == "L2_L") { - cb.vfmaddpd( - Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm1, - xmmword_ptr(l2_addr, 64)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, + Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm1, xmmword_ptr(l2_addr, 64)); L2_INCREMENT(); } else if (item == "L2_S") { cb.vmovapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest)); - cb.vfmaddpd( - Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, + Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); L2_INCREMENT(); } else if (item == "L2_LS") { cb.vmovapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest)); - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, - xmmword_ptr(l2_addr, 64)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l2_addr, 64)); L2_INCREMENT(); } else if (item == "L3_L") { - cb.vfmaddpd( - Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm1, - xmmword_ptr(l3_addr, 64)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, + Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm1, xmmword_ptr(l3_addr, 64)); L3_INCREMENT(); } else if (item == "L3_S") { cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); - cb.vfmaddpd( - Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, + Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); L3_INCREMENT(); } else if (item == "L3_LS") { cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, - xmmword_ptr(l3_addr, 64)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l3_addr, 64)); L3_INCREMENT(); } else if (item == "L3_P") { - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, - xmmword_ptr(l1_addr, 32)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l1_addr, 32)); cb.prefetcht2(ptr(l3_addr)); L3_INCREMENT(); } else if (item == "RAM_L") { - cb.vfmaddpd( - Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, + Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); cb.vfmaddpd(ram_reg, ram_reg, xmm1, xmmword_ptr(ram_addr, 64)); RAM_INCREMENT(); } else if (item == "RAM_S") { cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); - cb.vfmaddpd( - Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, + Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); RAM_INCREMENT(); } else if (item == "RAM_LS") { cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, - xmmword_ptr(ram_addr, 32)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(ram_addr, 32)); RAM_INCREMENT(); } else if (item == "RAM_P") { - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, - xmmword_ptr(l1_addr, 32)); + cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l1_addr, 32)); cb.prefetcht2(ptr(ram_addr)); RAM_INCREMENT(); } else { - workerLog::error() << "Instruction group " << item << " not found in " - << this->name() << "."; + workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; return EXIT_FAILURE; } @@ -405,9 +365,7 @@ int FMA4Payload::compilePayload( // dump all the ymm register for (int i = 0; i < (int)this->registerCount(); i++) { - cb.vmovapd( - ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), - Ymm(i)); + cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i)); } // set read flag @@ -417,8 +375,7 @@ int FMA4Payload::compilePayload( } if (errorDetection) { - this->emitErrorDetectionCode( - cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); + this->emitErrorDetectionCode(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); } cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH)); @@ -437,8 +394,7 @@ int FMA4Payload::compilePayload( Error err = this->rt.add(&this->loadFunction, &code); if (err) { - workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " - << __FILE__ << " at " << __LINE__; + workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; } @@ -451,8 +407,7 @@ int FMA4Payload::compilePayload( workerLog::warn() << "Work-loop is bigger than the L1i-Cache."; } - workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size - << " Bytes (" << instructionCachePercentage + workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage << "%) from the L1i-Cache for the work-loop."; workerLog::trace() << "Sequence size: " << sequence.size(); workerLog::trace() << "Repetition count: " << repetitions; @@ -464,14 +419,12 @@ int FMA4Payload::compilePayload( std::list FMA4Payload::getAvailableInstructions() const { std::list instructions; - transform(this->instructionFlops.begin(), this->instructionFlops.end(), - back_inserter(instructions), - [](const auto &item) { return item.first; }); + transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions), + [](const auto& item) { return item.first; }); return instructions; } -void FMA4Payload::init(unsigned long long *memoryAddr, - unsigned long long bufferSize) { +void FMA4Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) { X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4); } diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp index e3087c01..3a432bfb 100644 --- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp +++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp @@ -29,28 +29,24 @@ using namespace firestarter::environment::x86::payload; using namespace asmjit; using namespace asmjit::x86; -int FMAPayload::compilePayload( - std::vector> const &proportion, - unsigned instructionCacheSize, - std::list const &dataCacheBufferSize, unsigned ramBufferSize, - unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) { +int FMAPayload::compilePayload(std::vector> const& proportion, + unsigned instructionCacheSize, std::list const& dataCacheBufferSize, + unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, + bool errorDetection) { // Compute the sequence of instruction groups and the number of its repetions // to reach the desired size auto sequence = this->generateSequence(proportion); - auto repetitions = - this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); + auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); // compute count of flops and memory access for performance report unsigned flops = 0; unsigned bytes = 0; - for (const auto &item : sequence) { + for (const auto& item : sequence) { auto it = this->instructionFlops.find(item); if (it == this->instructionFlops.end()) { - workerLog::error() << "Instruction group " << item << " undefined in " - << name() << "."; + workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; return EXIT_FAILURE; } @@ -78,12 +74,9 @@ int FMAPayload::compilePayload( auto ram_size = ramBufferSize / thread; // calculate the reset counters for the buffers - auto l2_loop_count = - getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); - auto l3_loop_count = - getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); - auto ram_loop_count = - getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); + auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); + auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); + auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); CodeHolder code; code.init(this->rt.environment()); @@ -93,9 +86,8 @@ int FMAPayload::compilePayload( } Builder cb(&code); - cb.addDiagnosticOptions( - asmjit::DiagnosticOptions::kValidateAssembler | - asmjit::DiagnosticOptions::kValidateIntermediate ); + cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | + asmjit::DiagnosticOptions::kValidateIntermediate); auto pointer_reg = rax; auto l1_addr = rbx; @@ -119,8 +111,7 @@ int FMAPayload::compilePayload( auto ram_reg = ymm15; FuncDetail func; - func.init(FuncSignatureT( + func.init(FuncSignatureT( CallConvId::kCDecl), this->rt.environment()); @@ -135,10 +126,9 @@ int FMAPayload::compilePayload( frame.addDirtyRegs(Mm(i)); } // make all other used registers dirty except RAX - frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, - l3_count_reg, ram_count_reg, temp_reg, temp_reg2, - offset_reg, addrHigh_reg, iter_reg, ram_addr); - for (const auto ® : shift_reg) { + frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg, + temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr); + for (const auto& reg : shift_reg) { frame.addDirtyRegs(reg); } @@ -164,7 +154,7 @@ int FMAPayload::compilePayload( cb.mov(offset_reg, Imm(64)); // increment after each cache/memory access // Initialize registers for shift operations - for (auto const ® : shift_reg32) { + for (auto const& reg : shift_reg32) { cb.mov(reg, Imm(0xAAAAAAAA)); } // Initialize AVX-Registers for FMA Operations @@ -186,23 +176,14 @@ int FMAPayload::compilePayload( cb.mov(ram_addr, pointer_reg); cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer cb.mov(l2_count_reg, Imm(l2_loop_count)); - workerLog::trace() << "reset counter for L2-buffer with " - << l2_loop_count - << " cache line accesses per loop (" - << l2_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop (" + << l2_size / 1024 << ") KiB"; cb.mov(l3_count_reg, Imm(l3_loop_count)); - workerLog::trace() << "reset counter for L3-buffer with " - << l3_loop_count - << " cache line accesses per loop (" - << l3_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop (" + << l3_size / 1024 << ") KiB"; cb.mov(ram_count_reg, Imm(ram_loop_count)); - workerLog::trace() << "reset counter for RAM-buffer with " - << ram_loop_count - << " cache line accesses per loop (" - << ram_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop (" + << ram_size / 1024 << ") KiB"; cb.align(AlignMode::kCode, 64); @@ -216,22 +197,22 @@ int FMAPayload::compilePayload( auto mov_src = mov_dst + 1; unsigned l1_offset = 0; -#define L1_INCREMENT_TIMES(n) \ - l1_offset += n * 64; \ - if (l1_offset < l1_size * 0.5) { \ - cb.add(l1_addr, offset_reg); \ - } else { \ - l1_offset = 0; \ - cb.mov(l1_addr, pointer_reg); \ +#define L1_INCREMENT_TIMES(n) \ + l1_offset += n * 64; \ + if (l1_offset < l1_size * 0.5) { \ + cb.add(l1_addr, offset_reg); \ + } else { \ + l1_offset = 0; \ + cb.mov(l1_addr, pointer_reg); \ } #define L1_INCREMENT() L1_INCREMENT_TIMES(1) -#define L2_INCREMENT_TIMES(n) \ - if (n == 1) { \ - cb.add(l2_addr, offset_reg); \ - } else { \ - cb.add(l2_addr, n * 64); \ +#define L2_INCREMENT_TIMES(n) \ + if (n == 1) { \ + cb.add(l2_addr, offset_reg); \ + } else { \ + cb.add(l2_addr, n * 64); \ } #define L2_INCREMENT() L2_INCREMENT_TIMES(1) @@ -241,12 +222,11 @@ int FMAPayload::compilePayload( #define RAM_INCREMENT() cb.add(ram_addr, offset_reg) for (unsigned count = 0; count < repetitions; count++) { - for (const auto &item : sequence) { + for (const auto& item : sequence) { if (item == "REG") { cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2); cb.vfmadd231pd(Ymm(mov_dst), ymm2, ymm1); - cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], - temp_reg); + cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], temp_reg); mov_dst++; } else if (item == "L1_L") { cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2); @@ -331,8 +311,7 @@ int FMAPayload::compilePayload( cb.prefetcht2(ptr(ram_addr)); RAM_INCREMENT(); } else { - workerLog::error() << "Instruction group " << item << " not found in " - << this->name() << "."; + workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; return EXIT_FAILURE; } @@ -414,9 +393,7 @@ int FMAPayload::compilePayload( // dump all the ymm register for (int i = 0; i < (int)this->registerCount(); i++) { - cb.vmovapd( - ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), - Ymm(i)); + cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i)); } // set read flag @@ -426,8 +403,7 @@ int FMAPayload::compilePayload( } if (errorDetection) { - this->emitErrorDetectionCode( - cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); + this->emitErrorDetectionCode(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); } cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH)); @@ -446,8 +422,7 @@ int FMAPayload::compilePayload( Error err = this->rt.add(&this->loadFunction, &code); if (err) { - workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " - << __FILE__ << " at " << __LINE__; + workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; } @@ -460,8 +435,7 @@ int FMAPayload::compilePayload( workerLog::warn() << "Work-loop is bigger than the L1i-Cache."; } - workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size - << " Bytes (" << instructionCachePercentage + workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage << "%) from the L1i-Cache for the work-loop."; workerLog::trace() << "Sequence size: " << sequence.size(); workerLog::trace() << "Repetition count: " << repetitions; @@ -473,14 +447,12 @@ int FMAPayload::compilePayload( std::list FMAPayload::getAvailableInstructions() const { std::list instructions; - transform(this->instructionFlops.begin(), this->instructionFlops.end(), - back_inserter(instructions), - [](const auto &item) { return item.first; }); + transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions), + [](const auto& item) { return item.first; }); return instructions; } -void FMAPayload::init(unsigned long long *memoryAddr, - unsigned long long bufferSize) { +void FMAPayload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) { X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4); } diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp index d22880d1..d3d0147f 100644 --- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp @@ -29,28 +29,24 @@ using namespace firestarter::environment::x86::payload; using namespace asmjit; using namespace asmjit::x86; -int SSE2Payload::compilePayload( - std::vector> const &proportion, - unsigned instructionCacheSize, - std::list const &dataCacheBufferSize, unsigned ramBufferSize, - unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) { +int SSE2Payload::compilePayload(std::vector> const& proportion, + unsigned instructionCacheSize, std::list const& dataCacheBufferSize, + unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, + bool errorDetection) { // Compute the sequence of instruction groups and the number of its repetions // to reach the desired size auto sequence = this->generateSequence(proportion); - auto repetitions = - this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); + auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); // compute count of flops and memory access for performance report unsigned flops = 0; unsigned bytes = 0; - for (const auto &item : sequence) { + for (const auto& item : sequence) { auto it = this->instructionFlops.find(item); if (it == this->instructionFlops.end()) { - workerLog::error() << "Instruction group " << item << " undefined in " - << name() << "."; + workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; return EXIT_FAILURE; } @@ -78,12 +74,9 @@ int SSE2Payload::compilePayload( auto ram_size = ramBufferSize / thread; // calculate the reset counters for the buffers - auto l2_loop_count = - getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); - auto l3_loop_count = - getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); - auto ram_loop_count = - getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); + auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); + auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); + auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); CodeHolder code; code.init(this->rt.environment()); @@ -93,9 +86,8 @@ int SSE2Payload::compilePayload( } Builder cb(&code); - cb.addDiagnosticOptions( - asmjit::DiagnosticOptions::kValidateAssembler | - asmjit::DiagnosticOptions::kValidateIntermediate ); + cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | + asmjit::DiagnosticOptions::kValidateIntermediate); auto pointer_reg = rax; auto l1_addr = rbx; @@ -115,8 +107,7 @@ int SSE2Payload::compilePayload( auto trans_regs = 2; FuncDetail func; - func.init(FuncSignatureT( + func.init(FuncSignatureT( CallConvId::kCDecl), this->rt.environment()); @@ -132,9 +123,8 @@ int SSE2Payload::compilePayload( frame.addDirtyRegs(Mm(i)); } // make all other used registers dirty except RAX - frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, - l3_count_reg, ram_count_reg, temp_reg, temp_reg2, - offset_reg, addrHigh_reg, iter_reg); + frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg, + temp_reg2, offset_reg, addrHigh_reg, iter_reg); FuncArgsAssignment args(&func); args.assignAll(pointer_reg, addrHigh_reg, iter_reg); @@ -204,23 +194,14 @@ int SSE2Payload::compilePayload( cb.mov(ram_addr, pointer_reg); cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer cb.mov(l2_count_reg, Imm(l2_loop_count)); - workerLog::trace() << "reset counter for L2-buffer with " - << l2_loop_count - << " cache line accesses per loop (" - << l2_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop (" + << l2_size / 1024 << ") KiB"; cb.mov(l3_count_reg, Imm(l3_loop_count)); - workerLog::trace() << "reset counter for L3-buffer with " - << l3_loop_count - << " cache line accesses per loop (" - << l3_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop (" + << l3_size / 1024 << ") KiB"; cb.mov(ram_count_reg, Imm(ram_loop_count)); - workerLog::trace() << "reset counter for RAM-buffer with " - << ram_loop_count - << " cache line accesses per loop (" - << ram_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop (" + << ram_size / 1024 << ") KiB"; cb.align(AlignMode::kCode, 64); @@ -233,13 +214,13 @@ int SSE2Payload::compilePayload( auto mov_src = mov_dst + 1; unsigned l1_offset = 0; -#define L1_INCREMENT() \ - l1_offset += 64; \ - if (l1_offset < l1_size * 0.5) { \ - cb.add(l1_addr, offset_reg); \ - } else { \ - l1_offset = 0; \ - cb.mov(l1_addr, pointer_reg); \ +#define L1_INCREMENT() \ + l1_offset += 64; \ + if (l1_offset < l1_size * 0.5) { \ + cb.add(l1_addr, offset_reg); \ + } else { \ + l1_offset = 0; \ + cb.mov(l1_addr, pointer_reg); \ } #define L2_INCREMENT() cb.add(l2_addr, offset_reg); @@ -249,19 +230,15 @@ int SSE2Payload::compilePayload( #define RAM_INCREMENT() cb.add(ram_addr, offset_reg) for (unsigned count = 0; count < repetitions; count++) { - for (const auto &item : sequence) { + for (const auto& item : sequence) { if (item == "REG") { - cb.addpd( - Xmm(add_dest), - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); + cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); cb.movdqa(Xmm(mov_dst), Xmm(mov_src)); } else if (item == "L1_L") { cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32)); L1_INCREMENT(); } else if (item == "L1_S") { - cb.addpd( - Xmm(add_dest), - Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); + cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.movapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest)); L1_INCREMENT(); this->_instructions++; @@ -274,9 +251,7 @@ int SSE2Payload::compilePayload( cb.addpd(Xmm(add_dest), xmmword_ptr(l2_addr, 64)); L2_INCREMENT(); } else if (item == "L2_S") { - cb.addpd( - Xmm(add_dest), - Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); + cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.movapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest)); L2_INCREMENT(); this->_instructions++; @@ -289,9 +264,7 @@ int SSE2Payload::compilePayload( cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64)); L3_INCREMENT(); } else if (item == "L3_S") { - cb.addpd( - Xmm(add_dest), - Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); + cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.movapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); L3_INCREMENT(); this->_instructions++; @@ -309,9 +282,7 @@ int SSE2Payload::compilePayload( cb.addpd(Xmm(add_dest), xmmword_ptr(ram_addr, 64)); RAM_INCREMENT(); } else if (item == "RAM_S") { - cb.addpd( - Xmm(add_dest), - Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); + cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.movapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); RAM_INCREMENT(); this->_instructions++; @@ -326,16 +297,13 @@ int SSE2Payload::compilePayload( RAM_INCREMENT(); this->_instructions++; } else { - workerLog::error() << "Instruction group " << item << " not found in " - << this->name() << "."; + workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; return EXIT_FAILURE; } if (mov_regs > 0) { this->_instructions++; - cb.movq( - Mm(mov_start + (movq_dst - mov_start + mov_regs - 1) % mov_regs), - Mm(movq_dst)); + cb.movq(Mm(mov_start + (movq_dst - mov_start + mov_regs - 1) % mov_regs), Mm(movq_dst)); } add_dest++; @@ -411,9 +379,7 @@ int SSE2Payload::compilePayload( // dump all the xmm register for (int i = 0; i < (int)this->registerCount(); i++) { - cb.movapd( - xmmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), - Xmm(i)); + cb.movapd(xmmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Xmm(i)); } // set read flag @@ -423,8 +389,7 @@ int SSE2Payload::compilePayload( } if (errorDetection) { - this->emitErrorDetectionCode( - cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); + this->emitErrorDetectionCode(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); } cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH)); @@ -443,8 +408,7 @@ int SSE2Payload::compilePayload( Error err = this->rt.add(&this->loadFunction, &code); if (err) { - workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " - << __FILE__ << " at " << __LINE__; + workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; } @@ -457,8 +421,7 @@ int SSE2Payload::compilePayload( workerLog::warn() << "Work-loop is bigger than the L1i-Cache."; } - workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size - << " Bytes (" << instructionCachePercentage + workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage << "%) from the L1i-Cache for the work-loop."; workerLog::trace() << "Sequence size: " << sequence.size(); workerLog::trace() << "Repetition count: " << repetitions; @@ -470,15 +433,12 @@ int SSE2Payload::compilePayload( std::list SSE2Payload::getAvailableInstructions() const { std::list instructions; - transform(this->instructionFlops.begin(), this->instructionFlops.end(), - back_inserter(instructions), - [](const auto &item) { return item.first; }); + transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions), + [](const auto& item) { return item.first; }); return instructions; } -void SSE2Payload::init(unsigned long long *memoryAddr, - unsigned long long bufferSize) { - X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, - 1.654738925401e-15); +void SSE2Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) { + X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, 1.654738925401e-15); } diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp index 42a2fa5b..8d85dc2d 100644 --- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp @@ -32,8 +32,7 @@ using namespace firestarter::environment::x86::payload; -void X86Payload::lowLoadFunction(volatile unsigned long long *addrHigh, - unsigned long long period) { +void X86Payload::lowLoadFunction(volatile unsigned long long* addrHigh, unsigned long long period) { int nap; #ifdef _MSC_VER std::array cpuid; @@ -70,46 +69,36 @@ void X86Payload::lowLoadFunction(volatile unsigned long long *addrHigh, } } -void X86Payload::init(unsigned long long *memoryAddr, - unsigned long long bufferSize, double firstValue, +void X86Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize, double firstValue, double lastValue) { unsigned long long i = 0; for (; i < INIT_BLOCKSIZE; i++) - *((double *)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * firstValue; + *((double*)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * firstValue; for (; i <= bufferSize - INIT_BLOCKSIZE; i += INIT_BLOCKSIZE) - std::memcpy(memoryAddr + i, memoryAddr + i - INIT_BLOCKSIZE, - sizeof(unsigned long long) * INIT_BLOCKSIZE); + std::memcpy(memoryAddr + i, memoryAddr + i - INIT_BLOCKSIZE, sizeof(unsigned long long) * INIT_BLOCKSIZE); for (; i < bufferSize; i++) - *((double *)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * lastValue; + *((double*)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * lastValue; } -unsigned long long -X86Payload::highLoadFunction(unsigned long long *addrMem, - volatile unsigned long long *addrHigh, - unsigned long long iterations) { +unsigned long long X86Payload::highLoadFunction(unsigned long long* addrMem, volatile unsigned long long* addrHigh, + unsigned long long iterations) { return this->loadFunction(addrMem, addrHigh, iterations); } // add MM regs to dirty regs // zmm31 is used for backup if VectorReg is of type asmjit::x86::Zmm template -void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder &cb, - IterReg iter_reg, - asmjit::x86::Gpq addrHigh_reg, - asmjit::x86::Gpq pointer_reg, - asmjit::x86::Gpq temp_reg, +void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder& cb, IterReg iter_reg, asmjit::x86::Gpq addrHigh_reg, + asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2) { // we don't want anything to break... so we use asserts for everything that // could break it - static_assert(std::is_base_of::value, - "VectorReg must be of asmjit::asmjit::x86::Vec"); - static_assert(std::is_same::value || - std::is_same::value || + static_assert(std::is_base_of::value, "VectorReg must be of asmjit::asmjit::x86::Vec"); + static_assert(std::is_same::value || std::is_same::value || std::is_same::value, "VectorReg ist not of any supported type"); - static_assert(std::is_same::value || - std::is_same::value, + static_assert(std::is_same::value || std::is_same::value, "IterReg is not of any supported type"); if constexpr (std::is_same::value) { @@ -281,8 +270,7 @@ void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder &cb, cb.movq(temp_reg2, asmjit::x86::Mm(4)); cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1)); - cb.vinsertf128(asmjit::x86::ymm0, asmjit::x86::ymm0, asmjit::x86::xmm0, - asmjit::Imm(1)); + cb.vinsertf128(asmjit::x86::ymm0, asmjit::x86::ymm0, asmjit::x86::xmm0, asmjit::Imm(1)); cb.movq(temp_reg2, asmjit::x86::Mm(7)); cb.movq(asmjit::x86::xmm0, temp_reg2); @@ -463,24 +451,16 @@ void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder &cb, cb.bind(SkipErrorDetection); } -template void -X86Payload::emitErrorDetectionCode( - asmjit::x86::Builder &cb, asmjit::x86::Gpq iter_reg, - asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg, +template void X86Payload::emitErrorDetectionCode( + asmjit::x86::Builder& cb, asmjit::x86::Gpq iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2); -template void -X86Payload::emitErrorDetectionCode( - asmjit::x86::Builder &cb, asmjit::x86::Gpq iter_reg, - asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg, +template void X86Payload::emitErrorDetectionCode( + asmjit::x86::Builder& cb, asmjit::x86::Gpq iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2); -template void -X86Payload::emitErrorDetectionCode( - asmjit::x86::Builder &cb, asmjit::x86::Mm iter_reg, - asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg, +template void X86Payload::emitErrorDetectionCode( + asmjit::x86::Builder& cb, asmjit::x86::Mm iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2); -template void -X86Payload::emitErrorDetectionCode( - asmjit::x86::Builder &cb, asmjit::x86::Mm iter_reg, - asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg, +template void X86Payload::emitErrorDetectionCode( + asmjit::x86::Builder& cb, asmjit::x86::Mm iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2); diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp index 9e99ca2d..b933dcd1 100644 --- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp +++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp @@ -29,28 +29,24 @@ using namespace firestarter::environment::x86::payload; using namespace asmjit; using namespace asmjit::x86; -int ZENFMAPayload::compilePayload( - std::vector> const &proportion, - unsigned instructionCacheSize, - std::list const &dataCacheBufferSize, unsigned ramBufferSize, - unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) { +int ZENFMAPayload::compilePayload(std::vector> const& proportion, + unsigned instructionCacheSize, std::list const& dataCacheBufferSize, + unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, + bool errorDetection) { // Compute the sequence of instruction groups and the number of its repetions // to reach the desired size auto sequence = this->generateSequence(proportion); - auto repetitions = - this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); + auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); // compute count of flops and memory access for performance report unsigned flops = 0; unsigned bytes = 0; - for (const auto &item : sequence) { + for (const auto& item : sequence) { auto it = this->instructionFlops.find(item); if (it == this->instructionFlops.end()) { - workerLog::error() << "Instruction group " << item << " undefined in " - << name() << "."; + workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; return EXIT_FAILURE; } @@ -78,12 +74,9 @@ int ZENFMAPayload::compilePayload( auto ram_size = ramBufferSize / thread; // calculate the reset counters for the buffers - auto l2_loop_count = - getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); - auto l3_loop_count = - getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); - auto ram_loop_count = - getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); + auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); + auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); + auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); CodeHolder code; code.init(this->rt.environment()); @@ -93,9 +86,8 @@ int ZENFMAPayload::compilePayload( } Builder cb(&code); - cb.addDiagnosticOptions( - asmjit::DiagnosticOptions::kValidateAssembler | - asmjit::DiagnosticOptions::kValidateIntermediate ); + cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | + asmjit::DiagnosticOptions::kValidateIntermediate); auto pointer_reg = rax; auto l1_addr = rbx; @@ -116,8 +108,7 @@ int ZENFMAPayload::compilePayload( auto ram_reg = ymm15; FuncDetail func; - func.init(FuncSignatureT( + func.init(FuncSignatureT( CallConvId::kCDecl), this->rt.environment()); @@ -132,10 +123,9 @@ int ZENFMAPayload::compilePayload( frame.addDirtyRegs(Mm(i)); } // make all other used registers dirty except RAX - frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, - l3_count_reg, ram_count_reg, temp_reg, temp_reg2, - offset_reg, addrHigh_reg, iter_reg, ram_addr); - for (const auto ® : shift_reg) { + frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg, + temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr); + for (const auto& reg : shift_reg) { frame.addDirtyRegs(reg); } @@ -161,7 +151,7 @@ int ZENFMAPayload::compilePayload( cb.mov(offset_reg, Imm(64)); // increment after each cache/memory access // Initialize registers for shift operations - for (auto const ® : shift_reg) { + for (auto const& reg : shift_reg) { cb.mov(reg, Imm(0xAAAAAAAAAAAAAAAA)); } // Initialize AVX-Registers for FMA Operations @@ -190,23 +180,14 @@ int ZENFMAPayload::compilePayload( cb.mov(ram_addr, pointer_reg); cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer cb.mov(l2_count_reg, Imm(l2_loop_count)); - workerLog::trace() << "reset counter for L2-buffer with " - << l2_loop_count - << " cache line accesses per loop (" - << l2_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop (" + << l2_size / 1024 << ") KiB"; cb.mov(l3_count_reg, Imm(l3_loop_count)); - workerLog::trace() << "reset counter for L3-buffer with " - << l3_loop_count - << " cache line accesses per loop (" - << l3_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop (" + << l3_size / 1024 << ") KiB"; cb.mov(ram_count_reg, Imm(ram_loop_count)); - workerLog::trace() << "reset counter for RAM-buffer with " - << ram_loop_count - << " cache line accesses per loop (" - << ram_size/1024 - << ") KiB"; + workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop (" + << ram_size / 1024 << ") KiB"; cb.align(AlignMode::kCode, 64); @@ -219,13 +200,13 @@ int ZENFMAPayload::compilePayload( auto add_dest = add_regs_start; unsigned l1_offset = 0; -#define L1_INCREMENT() \ - l1_offset += 64; \ - if (l1_offset < l1_size * 0.5) { \ - cb.add(l1_addr, offset_reg); \ - } else { \ - l1_offset = 0; \ - cb.mov(l1_addr, pointer_reg); \ +#define L1_INCREMENT() \ + l1_offset += 64; \ + if (l1_offset < l1_size * 0.5) { \ + cb.add(l1_addr, offset_reg); \ + } else { \ + l1_offset = 0; \ + cb.mov(l1_addr, pointer_reg); \ } #define L2_INCREMENT() cb.add(l2_addr, offset_reg); @@ -235,7 +216,7 @@ int ZENFMAPayload::compilePayload( #define RAM_INCREMENT() cb.add(ram_addr, offset_reg) for (unsigned count = 0; count < repetitions; count++) { - for (const auto &item : sequence) { + for (const auto& item : sequence) { // swap second and third param of fma instruction to force bitchanges on // the pipes to its execution units @@ -251,8 +232,7 @@ int ZENFMAPayload::compilePayload( if (item == "REG") { cb.vfmadd231pd(Ymm(add_dest), secondParam, thirdParam); - cb.xor_(temp_reg, - shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]); + cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]); if (left) { cb.shr(shift_reg[shift_pos], Imm(1)); } else { @@ -264,28 +244,23 @@ int ZENFMAPayload::compilePayload( L1_INCREMENT(); } else if (item == "L2_L") { cb.vfmadd231pd(Ymm(add_dest), secondParam, ymmword_ptr(l2_addr, 64)); - cb.xor_(temp_reg, - shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]); + cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]); L2_INCREMENT(); } else if (item == "L3_L") { cb.vfmadd231pd(Ymm(add_dest), secondParam, ymmword_ptr(l3_addr, 64)); - cb.xor_(temp_reg, - shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]); + cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]); L3_INCREMENT(); } else if (item == "RAM_L") { cb.vfmadd231pd(Ymm(ram_reg), secondParam, ymmword_ptr(ram_addr, 32)); - cb.xor_(temp_reg, - shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]); + cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]); RAM_INCREMENT(); } else { - workerLog::error() << "Instruction group " << item << " not found in " - << this->name() << "."; + workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; return EXIT_FAILURE; } // make sure the shifts do could end up shifting out the data one end. - if (itemCount < (int)(sequence.size() * repetitions - - (sequence.size() * repetitions) % 4)) { + if (itemCount < (int)(sequence.size() * repetitions - (sequence.size() * repetitions) % 4)) { switch (itemCount % 4) { case 0: cb.vpsrlq(Xmm(13), Xmm(13), Imm(1)); @@ -369,9 +344,7 @@ int ZENFMAPayload::compilePayload( // dump all the ymm register for (int i = 0; i < (int)this->registerCount(); i++) { - cb.vmovapd( - ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), - Ymm(i)); + cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i)); } // set read flag @@ -381,8 +354,7 @@ int ZENFMAPayload::compilePayload( } if (errorDetection) { - this->emitErrorDetectionCode( - cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); + this->emitErrorDetectionCode(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); } cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH)); @@ -401,8 +373,7 @@ int ZENFMAPayload::compilePayload( Error err = this->rt.add(&this->loadFunction, &code); if (err) { - workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " - << __FILE__ << " at " << __LINE__; + workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; } @@ -415,8 +386,7 @@ int ZENFMAPayload::compilePayload( workerLog::warn() << "Work-loop is bigger than the L1i-Cache."; } - workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size - << " Bytes (" << instructionCachePercentage + workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage << "%) from the L1i-Cache for the work-loop."; workerLog::trace() << "Sequence size: " << sequence.size(); workerLog::trace() << "Repetition count: " << repetitions; @@ -428,14 +398,12 @@ int ZENFMAPayload::compilePayload( std::list ZENFMAPayload::getAvailableInstructions() const { std::list instructions; - transform(this->instructionFlops.begin(), this->instructionFlops.end(), - back_inserter(instructions), - [](const auto &item) { return item.first; }); + transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions), + [](const auto& item) { return item.first; }); return instructions; } -void ZENFMAPayload::init(unsigned long long *memoryAddr, - unsigned long long bufferSize) { +void ZENFMAPayload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) { X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4); } diff --git a/src/firestarter/Environment/X86/X86CPUTopology.cpp b/src/firestarter/Environment/X86/X86CPUTopology.cpp index 8b8abe2b..6e7eb288 100644 --- a/src/firestarter/Environment/X86/X86CPUTopology.cpp +++ b/src/firestarter/Environment/X86/X86CPUTopology.cpp @@ -34,12 +34,12 @@ using namespace firestarter::environment::x86; X86CPUTopology::X86CPUTopology() - : CPUTopology("x86_64"), cpuInfo(asmjit::CpuInfo::host()), - _vendor(this->cpuInfo.vendor()) { + : CPUTopology("x86_64") + , cpuInfo(asmjit::CpuInfo::host()) + , _vendor(this->cpuInfo.vendor()) { std::stringstream ss; - ss << "Family " << this->familyId() << ", Model " << this->modelId() - << ", Stepping " << this->stepping(); + ss << "Family " << this->familyId() << ", Model " << this->modelId() << ", Stepping " << this->stepping(); this->_model = ss.str(); for (int i = 0; i <= (int)asmjit::CpuFeatures::X86::Id::kMaxValue; i++) { @@ -152,8 +152,7 @@ unsigned long long X86CPUTopology::clockrate() const { } /* non invariant TSCs can be used if CPUs run at fixed frequency */ - if (!this->hasInvariantRdtsc() && governor.compare("performance") && - governor.compare("powersave")) { + if (!this->hasInvariantRdtsc() && governor.compare("performance") && governor.compare("powersave")) { return CPUTopology::clockrate(); } @@ -181,8 +180,7 @@ unsigned long long X86CPUTopology::clockrate() const { end_time = Clock::now(); end2_tsc = this->timestamp(); - time_diff = - std::chrono::duration_cast(end_time - start_time).count(); + time_diff = std::chrono::duration_cast(end_time - start_time).count(); } while (0 == time_diff); clock_lower_bound = (((end1_tsc - start2_tsc) * 1000000) / (time_diff)); @@ -190,8 +188,7 @@ unsigned long long X86CPUTopology::clockrate() const { // if both values differ significantly, the measurement could have been // interrupted between 2 rdtsc's - if (((double)clock_lower_bound > (((double)clock_upper_bound) * 0.999)) && - ((time_diff) > 2000)) { + if (((double)clock_lower_bound > (((double)clock_upper_bound) * 0.999)) && ((time_diff) > 2000)) { num_measurements++; clock = (clock_lower_bound + clock_upper_bound) / 2; if (clockrate == 0) @@ -230,8 +227,8 @@ unsigned long long X86CPUTopology::timestamp() const { #endif } -void X86CPUTopology::cpuid(unsigned long long *a, unsigned long long *b, - unsigned long long *c, unsigned long long *d) const { +void X86CPUTopology::cpuid(unsigned long long* a, unsigned long long* b, unsigned long long* c, + unsigned long long* d) const { #ifndef _MSC_VER unsigned long long reg_a, reg_b, reg_c, reg_d; diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp index d981358d..b923fbf4 100644 --- a/src/firestarter/Environment/X86/X86Environment.cpp +++ b/src/firestarter/Environment/X86/X86Environment.cpp @@ -31,48 +31,42 @@ using namespace firestarter::environment::x86; void X86Environment::evaluateFunctions() { for (auto ctor : this->platformConfigsCtor) { // add asmjit for model and family detection - this->platformConfigs.push_back( - ctor(this->topology().featuresAsmjit(), this->topology().familyId(), - this->topology().modelId(), this->topology().numThreadsPerCore())); + this->platformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(), + this->topology().modelId(), this->topology().numThreadsPerCore())); } for (auto ctor : this->fallbackPlatformConfigsCtor) { - this->fallbackPlatformConfigs.push_back( - ctor(this->topology().featuresAsmjit(), this->topology().familyId(), - this->topology().modelId(), this->topology().numThreadsPerCore())); + this->fallbackPlatformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(), + this->topology().modelId(), this->topology().numThreadsPerCore())); } } -int X86Environment::selectFunction(unsigned functionId, - bool allowUnavailablePayload) { +int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePayload) { unsigned id = 1; std::string defaultPayloadName(""); // if functionId is 0 get the default or fallback for (auto config : this->platformConfigs) { - for (auto const &[thread, functionName] : config->getThreadMap()) { + for (auto const& [thread, functionName] : config->getThreadMap()) { // the selected function if (id == functionId) { if (!config->isAvailable()) { - log::error() << "Function " << functionId << " (\"" << functionName - << "\") requires " << config->payload().name() - << ", which is not supported by the processor."; + log::error() << "Function " << functionId << " (\"" << functionName << "\") requires " + << config->payload().name() << ", which is not supported by the processor."; if (!allowUnavailablePayload) { return EXIT_FAILURE; } } // found function - this->_selectedConfig = - new ::firestarter::environment::platform::RuntimeConfig( - *config, thread, this->topology().instructionCacheSize()); + this->_selectedConfig = new ::firestarter::environment::platform::RuntimeConfig( + *config, thread, this->topology().instructionCacheSize()); return EXIT_SUCCESS; } // default function if (0 == functionId && config->isDefault()) { if (thread == this->topology().numThreadsPerCore()) { - this->_selectedConfig = - new ::firestarter::environment::platform::RuntimeConfig( - *config, thread, this->topology().instructionCacheSize()); + this->_selectedConfig = new ::firestarter::environment::platform::RuntimeConfig( + *config, thread, this->topology().instructionCacheSize()); return EXIT_SUCCESS; } else { defaultPayloadName = config->payload().name(); @@ -88,8 +82,7 @@ int X86Environment::selectFunction(unsigned functionId, if (!defaultPayloadName.empty()) { // default payload available, but number of threads per core is not // supported - log::warn() << "No " << defaultPayloadName << " code path for " - << this->topology().numThreadsPerCore() + log::warn() << "No " << defaultPayloadName << " code path for " << this->topology().numThreadsPerCore() << " threads per core!"; } log::warn() << this->topology().vendor() << " " << this->topology().model() @@ -102,7 +95,7 @@ int X86Environment::selectFunction(unsigned functionId, if (config->isAvailable()) { auto selectedThread = 0; auto selectedFunctionName = std::string(""); - for (auto const &[thread, functionName] : config->getThreadMap()) { + for (auto const& [thread, functionName] : config->getThreadMap()) { if (thread == this->topology().numThreadsPerCore()) { selectedThread = thread; selectedFunctionName = functionName; @@ -112,12 +105,9 @@ int X86Environment::selectFunction(unsigned functionId, selectedThread = config->getThreadMap().begin()->first; selectedFunctionName = config->getThreadMap().begin()->second; } - this->_selectedConfig = - new ::firestarter::environment::platform::RuntimeConfig( - *config, selectedThread, - this->topology().instructionCacheSize()); - log::warn() << "Using function " << selectedFunctionName - << " as fallback.\n" + this->_selectedConfig = new ::firestarter::environment::platform::RuntimeConfig( + *config, selectedThread, this->topology().instructionCacheSize()); + log::warn() << "Using function " << selectedFunctionName << " as fallback.\n" << "You can use the parameter --function to try other " "functions."; return EXIT_SUCCESS; @@ -130,18 +120,14 @@ int X86Environment::selectFunction(unsigned functionId, return EXIT_FAILURE; } - log::error() << "unknown function id: " << functionId - << ", see --avail for available ids"; + log::error() << "unknown function id: " << functionId << ", see --avail for available ids"; return EXIT_FAILURE; } int X86Environment::selectInstructionGroups(std::string groups) { const std::string delimiter = ","; const std::regex re("^(\\w+):(\\d+)$"); - const auto availableInstructionGroups = this->selectedConfig() - .platformConfig() - .payload() - .getAvailableInstructions(); + const auto availableInstructionGroups = this->selectedConfig().platformConfig().payload().getAvailableInstructions(); std::stringstream ss(groups); std::vector> payloadSettings = {}; @@ -152,29 +138,25 @@ int X86Environment::selectInstructionGroups(std::string groups) { std::getline(ss, token, ','); if (std::regex_match(token, m, re)) { - if (std::find(availableInstructionGroups.begin(), - availableInstructionGroups.end(), - m[1].str()) == availableInstructionGroups.end()) { - log::error() - << "Invalid instruction-group: " << m[1].str() - << "\n --run-instruction-groups format: multiple INST:VAL " - "pairs comma-seperated"; + if (std::find(availableInstructionGroups.begin(), availableInstructionGroups.end(), m[1].str()) == + availableInstructionGroups.end()) { + log::error() << "Invalid instruction-group: " << m[1].str() + << "\n --run-instruction-groups format: multiple INST:VAL " + "pairs comma-seperated"; return EXIT_FAILURE; } int num = std::stoul(m[2].str()); if (num == 0) { - log::error() - << "instruction-group VAL may not contain number 0" - << "\n --run-instruction-groups format: multiple INST:VAL " - "pairs comma-seperated"; + log::error() << "instruction-group VAL may not contain number 0" + << "\n --run-instruction-groups format: multiple INST:VAL " + "pairs comma-seperated"; return EXIT_FAILURE; } payloadSettings.push_back(std::make_pair(m[1].str(), num)); } else { - log::error() - << "Invalid symbols in instruction-group: " << token - << "\n --run-instruction-groups format: multiple INST:VAL " - "pairs comma-seperated"; + log::error() << "Invalid symbols in instruction-group: " << token + << "\n --run-instruction-groups format: multiple INST:VAL " + "pairs comma-seperated"; return EXIT_FAILURE; } } @@ -189,10 +171,7 @@ int X86Environment::selectInstructionGroups(std::string groups) { void X86Environment::printAvailableInstructionGroups() { std::stringstream ss; - for (auto const &item : this->selectedConfig() - .platformConfig() - .payload() - .getAvailableInstructions()) { + for (auto const& item : this->selectedConfig().platformConfig().payload().getAvailableInstructions()) { ss << item << ","; } @@ -202,18 +181,13 @@ void X86Environment::printAvailableInstructionGroups() { } log::info() << " available instruction-groups for payload " - << this->selectedConfig().platformConfig().payload().name() - << ":\n" + << this->selectedConfig().platformConfig().payload().name() << ":\n" << " " << s; } -void X86Environment::setLineCount(unsigned lineCount) { - this->selectedConfig().setLineCount(lineCount); -} +void X86Environment::setLineCount(unsigned lineCount) { this->selectedConfig().setLineCount(lineCount); } -void X86Environment::printSelectedCodePathSummary() { - this->selectedConfig().printCodePathSummary(); -} +void X86Environment::printSelectedCodePathSummary() { this->selectedConfig().printCodePathSummary(); } void X86Environment::printFunctionSummary() { log::info() << " available load-functions:\n" @@ -226,16 +200,14 @@ void X86Environment::printFunctionSummary() { unsigned id = 1; - for (auto const &config : this->platformConfigs) { - for (auto const &[thread, functionName] : config->getThreadMap()) { - const char *available = config->isAvailable() ? "yes" : "no"; - const char *fmt = " %4u | %-30s | %-24s | %s"; - int sz = - std::snprintf(nullptr, 0, fmt, id, functionName.c_str(), available, - config->getDefaultPayloadSettingsString().c_str()); + for (auto const& config : this->platformConfigs) { + for (auto const& [thread, functionName] : config->getThreadMap()) { + const char* available = config->isAvailable() ? "yes" : "no"; + const char* fmt = " %4u | %-30s | %-24s | %s"; + int sz = std::snprintf(nullptr, 0, fmt, id, functionName.c_str(), available, + config->getDefaultPayloadSettingsString().c_str()); std::vector buf(sz + 1); - std::snprintf(&buf[0], buf.size(), fmt, id, functionName.c_str(), - available, + std::snprintf(&buf[0], buf.size(), fmt, id, functionName.c_str(), available, config->getDefaultPayloadSettingsString().c_str()); log::info() << std::string(&buf[0]); id++; diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp index 5fb58ad4..7dd511f5 100644 --- a/src/firestarter/Firestarter.cpp +++ b/src/firestarter/Firestarter.cpp @@ -40,41 +40,47 @@ extern "C" { using namespace firestarter; -Firestarter::Firestarter( - const int argc, const char **argv, std::chrono::seconds const &timeout, - unsigned loadPercent, std::chrono::microseconds const &period, - unsigned requestedNumThreads, std::string const &cpuBind, - bool printFunctionSummary, unsigned functionId, bool listInstructionGroups, - std::string const &instructionGroups, unsigned lineCount, - bool allowUnavailablePayload, bool dumpRegisters, - std::chrono::seconds const &dumpRegistersTimeDelta, - std::string const &dumpRegistersOutpath, bool errorDetection, int gpus, - unsigned gpuMatrixSize, bool gpuUseFloat, bool gpuUseDouble, - bool listMetrics, bool measurement, - std::chrono::milliseconds const &startDelta, - std::chrono::milliseconds const &stopDelta, - std::chrono::milliseconds const &measurementInterval, - std::vector const &metricPaths, - std::vector const &stdinMetrics, bool optimize, - std::chrono::seconds const &preheat, - std::string const &optimizationAlgorithm, - std::vector const &optimizationMetrics, - std::chrono::seconds const &evaluationDuration, unsigned individuals, - std::string const &optimizeOutfile, unsigned generations, double nsga2_cr, - double nsga2_m) - : _argc(argc), _argv(argv), _timeout(timeout), _loadPercent(loadPercent), - _period(period), _dumpRegisters(dumpRegisters), - _dumpRegistersTimeDelta(dumpRegistersTimeDelta), - _dumpRegistersOutpath(dumpRegistersOutpath), - _errorDetection(errorDetection), _gpus(gpus), - _gpuMatrixSize(gpuMatrixSize), _gpuUseFloat(gpuUseFloat), - _gpuUseDouble(gpuUseDouble), _startDelta(startDelta), - _stopDelta(stopDelta), _measurement(measurement), _optimize(optimize), - _preheat(preheat), _optimizationAlgorithm(optimizationAlgorithm), - _optimizationMetrics(optimizationMetrics), - _evaluationDuration(evaluationDuration), _individuals(individuals), - _optimizeOutfile(optimizeOutfile), _generations(generations), - _nsga2_cr(nsga2_cr), _nsga2_m(nsga2_m) { +Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds const& timeout, unsigned loadPercent, + std::chrono::microseconds const& period, unsigned requestedNumThreads, + std::string const& cpuBind, bool printFunctionSummary, unsigned functionId, + bool listInstructionGroups, std::string const& instructionGroups, unsigned lineCount, + bool allowUnavailablePayload, bool dumpRegisters, + std::chrono::seconds const& dumpRegistersTimeDelta, std::string const& dumpRegistersOutpath, + bool errorDetection, int gpus, unsigned gpuMatrixSize, bool gpuUseFloat, bool gpuUseDouble, + bool listMetrics, bool measurement, std::chrono::milliseconds const& startDelta, + std::chrono::milliseconds const& stopDelta, + std::chrono::milliseconds const& measurementInterval, + std::vector const& metricPaths, std::vector const& stdinMetrics, + bool optimize, std::chrono::seconds const& preheat, std::string const& optimizationAlgorithm, + std::vector const& optimizationMetrics, + std::chrono::seconds const& evaluationDuration, unsigned individuals, + std::string const& optimizeOutfile, unsigned generations, double nsga2_cr, double nsga2_m) + : _argc(argc) + , _argv(argv) + , _timeout(timeout) + , _loadPercent(loadPercent) + , _period(period) + , _dumpRegisters(dumpRegisters) + , _dumpRegistersTimeDelta(dumpRegistersTimeDelta) + , _dumpRegistersOutpath(dumpRegistersOutpath) + , _errorDetection(errorDetection) + , _gpus(gpus) + , _gpuMatrixSize(gpuMatrixSize) + , _gpuUseFloat(gpuUseFloat) + , _gpuUseDouble(gpuUseDouble) + , _startDelta(startDelta) + , _stopDelta(stopDelta) + , _measurement(measurement) + , _optimize(optimize) + , _preheat(preheat) + , _optimizationAlgorithm(optimizationAlgorithm) + , _optimizationMetrics(optimizationMetrics) + , _evaluationDuration(evaluationDuration) + , _individuals(individuals) + , _optimizeOutfile(optimizeOutfile) + , _generations(generations) + , _nsga2_cr(nsga2_cr) + , _nsga2_m(nsga2_m) { int returnCode; _load = (_period * _loadPercent) / 100; @@ -90,18 +96,15 @@ Firestarter::Firestarter( (void)stdinMetrics; #endif -#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ - defined(_M_X64) +#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) this->_environment = new environment::x86::X86Environment(); #endif - if (EXIT_SUCCESS != (returnCode = this->environment().evaluateCpuAffinity( - requestedNumThreads, cpuBind))) { + if (EXIT_SUCCESS != (returnCode = this->environment().evaluateCpuAffinity(requestedNumThreads, cpuBind))) { std::exit(returnCode); } -#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ - defined(_M_X64) +#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) // Error detection uses crc32 instruction added by the SSE4.2 extension to x86 if (_errorDetection) { if (!_environment->topology().featuresAsmjit().has(asmjit::CpuFeatures::X86::kSSE4_2)) { @@ -112,10 +115,9 @@ Firestarter::Firestarter( #endif if (_errorDetection && this->environment().requestedNumThreads() < 2) { - throw std::invalid_argument( - "Option --error-detection must run with 2 or more threads. Number of " - "threads is " + - std::to_string(this->environment().requestedNumThreads()) + "\n"); + throw std::invalid_argument("Option --error-detection must run with 2 or more threads. Number of " + "threads is " + + std::to_string(this->environment().requestedNumThreads()) + "\n"); } this->environment().evaluateFunctions(); @@ -125,8 +127,7 @@ Firestarter::Firestarter( std::exit(EXIT_SUCCESS); } - if (EXIT_SUCCESS != (returnCode = this->environment().selectFunction( - functionId, allowUnavailablePayload))) { + if (EXIT_SUCCESS != (returnCode = this->environment().selectFunction(functionId, allowUnavailablePayload))) { std::exit(returnCode); } @@ -136,9 +137,7 @@ Firestarter::Firestarter( } if (!instructionGroups.empty()) { - if (EXIT_SUCCESS != - (returnCode = - this->environment().selectInstructionGroups(instructionGroups))) { + if (EXIT_SUCCESS != (returnCode = this->environment().selectInstructionGroups(instructionGroups))) { std::exit(returnCode); } } @@ -150,8 +149,7 @@ Firestarter::Firestarter( #if defined(linux) || defined(__linux__) if (_measurement || listMetrics || _optimize) { _measurementWorker = std::make_shared( - measurementInterval, this->environment().requestedNumThreads(), - metricPaths, stdinMetrics); + measurementInterval, this->environment().requestedNumThreads(), metricPaths, stdinMetrics); if (listMetrics) { log::info() << _measurementWorker->availableMetrics(); @@ -168,23 +166,19 @@ Firestarter::Firestarter( } // check if selected metrics are initialized - for (auto const &optimizationMetric : optimizationMetrics) { - auto nameEqual = [optimizationMetric](auto const &name) { + for (auto const& optimizationMetric : optimizationMetrics) { + auto nameEqual = [optimizationMetric](auto const& name) { auto invertedName = "-" + name; - return name.compare(optimizationMetric) == 0 || - invertedName.compare(optimizationMetric) == 0; + return name.compare(optimizationMetric) == 0 || invertedName.compare(optimizationMetric) == 0; }; // metric name is not found if (std::find_if(all.begin(), all.end(), nameEqual) == all.end()) { - log::error() << "Metric \"" << optimizationMetric - << "\" does not exist."; + log::error() << "Metric \"" << optimizationMetric << "\" does not exist."; std::exit(EXIT_FAILURE); } // metric has not initialized properly - if (std::find_if(initialized.begin(), initialized.end(), nameEqual) == - initialized.end()) { - log::error() << "Metric \"" << optimizationMetric - << "\" failed to initialize."; + if (std::find_if(initialized.begin(), initialized.end(), nameEqual) == initialized.end()) { + log::error() << "Metric \"" << optimizationMetric << "\" failed to initialize."; std::exit(EXIT_FAILURE); } } @@ -192,23 +186,23 @@ Firestarter::Firestarter( if (_optimize) { auto applySettings = std::bind( - [this](std::vector> const &setting) { + [this](std::vector> const& setting) { using Clock = std::chrono::high_resolution_clock; auto start = Clock::now(); - for (auto &thread : this->loadThreads) { + for (auto& thread : this->loadThreads) { auto td = thread.second; td->config().setPayloadSettings(setting); } - for (auto const &thread : this->loadThreads) { + for (auto const& thread : this->loadThreads) { auto td = thread.second; td->mutex.lock(); } - for (auto const &thread : this->loadThreads) { + for (auto const& thread : this->loadThreads) { auto td = thread.second; td->comm = THREAD_SWITCH; @@ -217,7 +211,7 @@ Firestarter::Firestarter( this->loadVar = LOAD_SWITCH; - for (auto const &thread : this->loadThreads) { + for (auto const& thread : this->loadThreads) { auto td = thread.second; bool ack; @@ -239,7 +233,7 @@ Firestarter::Firestarter( unsigned long long startTimestamp = 0xffffffffffffffff; unsigned long long stopTimestamp = 0; - for (auto const &thread : this->loadThreads) { + for (auto const& thread : this->loadThreads) { auto td = thread.second; if (startTimestamp > td->lastStartTsc) { @@ -250,46 +244,33 @@ Firestarter::Firestarter( } } - for (auto const &thread : this->loadThreads) { + for (auto const& thread : this->loadThreads) { auto td = thread.second; - ipc_estimate_metric_insert( - (double)td->lastIterations * - (double)this->loadThreads.front() - .second->config() - .payload() - .instructions() / - (double)(stopTimestamp - startTimestamp)); + ipc_estimate_metric_insert((double)td->lastIterations * + (double)this->loadThreads.front().second->config().payload().instructions() / + (double)(stopTimestamp - startTimestamp)); } auto end = Clock::now(); log::trace() << "Switching payload took " - << std::chrono::duration_cast( - end - start) - .count() - << "ms"; + << std::chrono::duration_cast(end - start).count() << "ms"; }, std::placeholders::_1); - auto prob = - std::make_shared( - std::move(applySettings), _measurementWorker, _optimizationMetrics, - _evaluationDuration, _startDelta, _stopDelta, - this->environment().selectedConfig().payloadItems()); + auto prob = std::make_shared( + std::move(applySettings), _measurementWorker, _optimizationMetrics, _evaluationDuration, _startDelta, + _stopDelta, this->environment().selectedConfig().payloadItems()); _population = firestarter::optimizer::Population(std::move(prob)); if (_optimizationAlgorithm == "NSGA2") { - _algorithm = std::make_unique( - _generations, _nsga2_cr, _nsga2_m); + _algorithm = std::make_unique(_generations, _nsga2_cr, _nsga2_m); } else { - throw std::invalid_argument("Algorithm " + _optimizationAlgorithm + - " unknown."); + throw std::invalid_argument("Algorithm " + _optimizationAlgorithm + " unknown."); } - _algorithm->checkPopulation( - static_cast(_population), - _individuals); + _algorithm->checkPopulation(static_cast(_population), _individuals); } #endif @@ -299,8 +280,7 @@ Firestarter::Firestarter( // setup thread with either high or low load configured at the start // low loads has to know the length of the period - if (EXIT_SUCCESS != (returnCode = this->initLoadWorkers((_loadPercent == 0), - _period.count()))) { + if (EXIT_SUCCESS != (returnCode = this->initLoadWorkers((_loadPercent == 0), _period.count()))) { std::exit(returnCode); } @@ -328,16 +308,13 @@ void Firestarter::mainThread() { this->environment().printThreadSummary(); #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP) - _cuda = std::make_unique(&this->loadVar, _gpuUseFloat, - _gpuUseDouble, _gpuMatrixSize, _gpus); + _cuda = std::make_unique(&this->loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus); #endif #ifdef FIRESTARTER_BUILD_ONEAPI - _oneapi = std::make_unique(&this->loadVar, _gpuUseFloat, - _gpuUseDouble, _gpuMatrixSize, _gpus); + _oneapi = std::make_unique(&this->loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus); #endif - #if defined(linux) || defined(__linux__) // if measurement is enabled, start it here if (_measurement) { @@ -350,8 +327,7 @@ void Firestarter::mainThread() { #ifdef FIRESTARTER_DEBUG_FEATURES if (_dumpRegisters) { int returnCode; - if (EXIT_SUCCESS != (returnCode = this->initDumpRegisterWorker( - _dumpRegistersTimeDelta, _dumpRegistersOutpath))) { + if (EXIT_SUCCESS != (returnCode = this->initDumpRegisterWorker(_dumpRegistersTimeDelta, _dumpRegistersOutpath))) { std::exit(returnCode); } } @@ -366,20 +342,17 @@ void Firestarter::mainThread() { auto startTime = optimizer::History::getTime(); Firestarter::_optimizer = std::make_unique( - std::move(_algorithm), _population, _optimizationAlgorithm, - _individuals, _preheat); + std::move(_algorithm), _population, _optimizationAlgorithm, _individuals, _preheat); // wait here until optimizer thread terminates Firestarter::_optimizer->join(); auto payloadItems = this->environment().selectedConfig().payloadItems(); - firestarter::optimizer::History::save(_optimizeOutfile, startTime, - payloadItems, _argc, _argv); + firestarter::optimizer::History::save(_optimizeOutfile, startTime, payloadItems, _argc, _argv); // print the best 20 according to each metric - firestarter::optimizer::History::printBest(_optimizationMetrics, - payloadItems); + firestarter::optimizer::History::printBest(_optimizationMetrics, payloadItems); // stop all the load threads std::raise(SIGTERM); @@ -403,11 +376,9 @@ void Firestarter::mainThread() { if (_measurement) { // TODO: clear this up log::info() << "metric,num_timepoints,duration_ms,average,stddev"; - for (auto const &[name, sum] : - _measurementWorker->getValues(_startDelta, _stopDelta)) { - log::info() << std::quoted(name) << "," << sum.num_timepoints << "," - << sum.duration.count() << "," << sum.average << "," - << sum.stddev; + for (auto const& [name, sum] : _measurementWorker->getValues(_startDelta, _stopDelta)) { + log::info() << std::quoted(name) << "," << sum.num_timepoints << "," << sum.duration.count() << "," << sum.average + << "," << sum.stddev; } } #endif @@ -420,8 +391,7 @@ void Firestarter::mainThread() { void Firestarter::setLoad(unsigned long long value) { // signal load change to workers Firestarter::loadVar = value; -#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ - defined(_M_X64) +#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) #ifndef _MSC_VER __asm__ __volatile__("mfence;"); #else diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp index 3c922cf6..53323187 100644 --- a/src/firestarter/LoadWorker.cpp +++ b/src/firestarter/LoadWorker.cpp @@ -43,7 +43,7 @@ extern "C" { using namespace firestarter; -auto aligned_free_deleter = [](void *p) { ALIGNED_FREE(p); }; +auto aligned_free_deleter = [](void* p) { ALIGNED_FREE(p); }; int Firestarter::initLoadWorkers(bool lowLoad, unsigned long long period) { int returnCode; @@ -62,38 +62,32 @@ int Firestarter::initLoadWorkers(bool lowLoad, unsigned long long period) { // communication pointers and add these to the threaddata if (_errorDetection) { for (unsigned long long i = 0; i < numThreads; i++) { - auto commPtr = reinterpret_cast( - ALIGNED_MALLOC(2 * sizeof(unsigned long long), 64)); + auto commPtr = reinterpret_cast(ALIGNED_MALLOC(2 * sizeof(unsigned long long), 64)); assert(commPtr); - this->errorCommunication.push_back( - std::shared_ptr(commPtr, aligned_free_deleter)); - log::debug() << "Threads " << (i + numThreads - 1) % numThreads << " and " - << i << " commPtr = 0x" << std::setfill('0') - << std::setw(sizeof(unsigned long long) * 2) << std::hex + this->errorCommunication.push_back(std::shared_ptr(commPtr, aligned_free_deleter)); + log::debug() << "Threads " << (i + numThreads - 1) % numThreads << " and " << i << " commPtr = 0x" + << std::setfill('0') << std::setw(sizeof(unsigned long long) * 2) << std::hex << (unsigned long long)commPtr; } } for (unsigned long long i = 0; i < numThreads; i++) { - auto td = std::make_shared(i, this->environment(), - &this->loadVar, period, - _dumpRegisters, _errorDetection); + auto td = std::make_shared(i, this->environment(), &this->loadVar, period, _dumpRegisters, + _errorDetection); if (_errorDetection) { // distribute pointers for error deteciton. (set threads in a ring) // give this thread the left pointer i and right pointer (i+1) % // requestedNumThreads(). - td->setErrorCommunication(this->errorCommunication[i], - this->errorCommunication[(i + 1) % numThreads]); + td->setErrorCommunication(this->errorCommunication[i], this->errorCommunication[(i + 1) % numThreads]); } - auto dataCacheSizeIt = - td->config().platformConfig().dataCacheBufferSize().begin(); + auto dataCacheSizeIt = td->config().platformConfig().dataCacheBufferSize().begin(); auto ramBufferSize = td->config().platformConfig().ramBufferSize(); - td->buffersizeMem = (*dataCacheSizeIt + *std::next(dataCacheSizeIt, 1) + - *std::next(dataCacheSizeIt, 2) + ramBufferSize) / - td->config().thread() / sizeof(unsigned long long); + td->buffersizeMem = + (*dataCacheSizeIt + *std::next(dataCacheSizeIt, 1) + *std::next(dataCacheSizeIt, 2) + ramBufferSize) / + td->config().thread() / sizeof(unsigned long long); // create the thread std::thread t(Firestarter::loadThreadWorker, td); @@ -102,8 +96,7 @@ int Firestarter::initLoadWorkers(bool lowLoad, unsigned long long period) { if (i == 0) { // only show error for all worker threads except first. - firestarter::logging::FirstWorkerThreadFilter< - firestarter::logging::record>::setFirstThread(t.get_id()); + firestarter::logging::FirstWorkerThreadFilter::setFirstThread(t.get_id()); } this->loadThreads.push_back(std::make_pair(std::move(t), td)); @@ -118,20 +111,20 @@ void Firestarter::signalLoadWorkers(int comm) { bool ack; // start the work - for (auto const &thread : this->loadThreads) { + for (auto const& thread : this->loadThreads) { auto td = thread.second; td->mutex.lock(); } - for (auto const &thread : this->loadThreads) { + for (auto const& thread : this->loadThreads) { auto td = thread.second; td->comm = comm; td->mutex.unlock(); } - for (auto const &thread : this->loadThreads) { + for (auto const& thread : this->loadThreads) { auto td = thread.second; do { @@ -148,7 +141,7 @@ void Firestarter::signalLoadWorkers(int comm) { void Firestarter::joinLoadWorkers() { // wait for threads after watchdog has requested termination - for (auto &thread : this->loadThreads) { + for (auto& thread : this->loadThreads) { thread.first.join(); } } @@ -160,8 +153,7 @@ void Firestarter::printThreadErrorReport() { std::vector errors(maxSize, false); for (decltype(maxSize) i = 0; i < maxSize; i++) { - auto errorDetectionStruct = - this->loadThreads[i].second->errorDetectionStruct(); + auto errorDetectionStruct = this->loadThreads[i].second->errorDetectionStruct(); if (errorDetectionStruct->errorLeft) { errors[(i + maxSize - 1) % maxSize] = true; @@ -173,10 +165,8 @@ void Firestarter::printThreadErrorReport() { for (decltype(maxSize) i = 0; i < maxSize; i++) { if (errors[i]) { - log::fatal() - << "Data mismatch between Threads " << i << " and " - << (i + 1) % maxSize - << ".\n This may be caused by bit-flips in the hardware."; + log::fatal() << "Data mismatch between Threads " << i << " and " << (i + 1) % maxSize + << ".\n This may be caused by bit-flips in the hardware."; } } } @@ -191,7 +181,7 @@ void Firestarter::printPerformanceReport() { log::debug() << "\nperformance report:\n"; - for (auto const &thread : this->loadThreads) { + for (auto const& thread : this->loadThreads) { auto td = thread.second; log::debug() << "Thread " << td->id() << ": " << td->iterations @@ -207,39 +197,33 @@ void Firestarter::printPerformanceReport() { iterations += td->iterations; } - double runtime = (double)(stopTimestamp - startTimestamp) / - (double)this->environment().topology().clockrate(); + double runtime = (double)(stopTimestamp - startTimestamp) / (double)this->environment().topology().clockrate(); double gFlops = - (double)this->loadThreads.front().second->config().payload().flops() * - 0.000000001 * (double)iterations / runtime; + (double)this->loadThreads.front().second->config().payload().flops() * 0.000000001 * (double)iterations / runtime; double bandwidth = - (double)this->loadThreads.front().second->config().payload().bytes() * - 0.000000001 * (double)iterations / runtime; + (double)this->loadThreads.front().second->config().payload().bytes() * 0.000000001 * (double)iterations / runtime; // insert values for ipc-estimate metric // if we are on linux #if defined(linux) || defined(__linux__) if (_measurement) { - for (auto const &thread : this->loadThreads) { + for (auto const& thread : this->loadThreads) { auto td = thread.second; ipc_estimate_metric_insert((double)td->iterations * - (double)this->loadThreads.front() - .second->config() - .payload() - .instructions() / + (double)this->loadThreads.front().second->config().payload().instructions() / (double)(stopTimestamp - startTimestamp)); } } #endif // format runtime, gflops and bandwidth %.2f - const char *fmt = "%.2f"; + const char* fmt = "%.2f"; int size; -#define FORMAT(input) \ - size = std::snprintf(nullptr, 0, fmt, input); \ - std::vector input##Vector(size + 1); \ - std::snprintf(&input##Vector[0], input##Vector.size(), fmt, input); \ +#define FORMAT(input) \ + size = std::snprintf(nullptr, 0, fmt, input); \ + std::vector input##Vector(size + 1); \ + std::snprintf(&input##Vector[0], input##Vector.size(), fmt, input); \ auto input##String = std::string(&input##Vector[0]) FORMAT(runtime); @@ -248,21 +232,19 @@ void Firestarter::printPerformanceReport() { #undef FORMAT - log::debug() - << "\n" - << "total iterations: " << iterations << "\n" - << "runtime: " << runtimeString << " seconds (" - << stopTimestamp - startTimestamp << " cycles)\n" - << "\n" - << "estimated floating point performance: " << gFlopsString << " GFLOPS\n" - << "estimated memory bandwidth*: " << bandwidthString << " GB/s\n" - << "\n" - << "* this estimate is highly unreliable if --function is used in order " - "to " - "select\n" - << " a function that is not optimized for your architecture, or if " - "FIRESTARTER is\n" - << " executed on an unsupported architecture!"; + log::debug() << "\n" + << "total iterations: " << iterations << "\n" + << "runtime: " << runtimeString << " seconds (" << stopTimestamp - startTimestamp << " cycles)\n" + << "\n" + << "estimated floating point performance: " << gFlopsString << " GFLOPS\n" + << "estimated memory bandwidth*: " << bandwidthString << " GB/s\n" + << "\n" + << "* this estimate is highly unreliable if --function is used in order " + "to " + "select\n" + << " a function that is not optimized for your architecture, or if " + "FIRESTARTER is\n" + << " executed on an unsupported architecture!"; } void Firestarter::loadThreadWorker(std::shared_ptr td) { @@ -296,36 +278,30 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { td->environment().setCpuAffinity(td->id()); // compile payload - td->config().payload().compilePayload( - td->config().payloadSettings(), td->config().instructionCacheSize(), - td->config().dataCacheBufferSize(), td->config().ramBufferSize(), - td->config().thread(), td->config().lines(), td->dumpRegisters, - td->errorDetection); + td->config().payload().compilePayload(td->config().payloadSettings(), td->config().instructionCacheSize(), + td->config().dataCacheBufferSize(), td->config().ramBufferSize(), + td->config().thread(), td->config().lines(), td->dumpRegisters, + td->errorDetection); // allocate memory // if we should dump some registers, we use the first part of the memory // for them. - td->addrMem = - reinterpret_cast(ALIGNED_MALLOC( - (td->buffersizeMem + td->addrOffset) * sizeof(unsigned long long), - 64)) + - td->addrOffset; + td->addrMem = reinterpret_cast( + ALIGNED_MALLOC((td->buffersizeMem + td->addrOffset) * sizeof(unsigned long long), 64)) + + td->addrOffset; // exit application on error if (td->addrMem - td->addrOffset == nullptr) { - workerLog::error() << "Could not allocate memory for CPU load thread " - << td->id() << "\n"; + workerLog::error() << "Could not allocate memory for CPU load thread " << td->id() << "\n"; exit(ENOMEM); } if (td->dumpRegisters) { - reinterpret_cast(td->addrMem - td->addrOffset) - ->dumpVar = DumpVariable::Wait; + reinterpret_cast(td->addrMem - td->addrOffset)->dumpVar = DumpVariable::Wait; } if (td->errorDetection) { - auto errorDetectionStruct = reinterpret_cast( - td->addrMem - td->addrOffset); + auto errorDetectionStruct = reinterpret_cast(td->addrMem - td->addrOffset); std::memset(errorDetectionStruct, 0, sizeof(ErrorDetectionStruct)); @@ -334,8 +310,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { errorDetectionStruct->communicationRight = td->communicationRight.get(); // do first touch memset 0 for the communication pointers - std::memset((void *)errorDetectionStruct->communicationLeft, 0, - sizeof(unsigned long long) * 2); + std::memset((void*)errorDetectionStruct->communicationLeft, 0, sizeof(unsigned long long) * 2); } // call init function @@ -354,11 +329,9 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { VT_USER_START("HIGH_LOAD_FUNC"); #endif #ifdef ENABLE_SCOREP - SCOREP_USER_REGION_BY_NAME_BEGIN("HIGH", - SCOREP_USER_REGION_TYPE_COMMON); + SCOREP_USER_REGION_BY_NAME_BEGIN("HIGH", SCOREP_USER_REGION_TYPE_COMMON); #endif - td->iterations = td->config().payload().highLoadFunction( - td->addrMem, td->addrHigh, td->iterations); + td->iterations = td->config().payload().highLoadFunction(td->addrMem, td->addrHigh, td->iterations); // call low load function #ifdef ENABLE_VTRACING @@ -393,11 +366,10 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { break; case THREAD_SWITCH: // compile payload - td->config().payload().compilePayload( - td->config().payloadSettings(), td->config().instructionCacheSize(), - td->config().dataCacheBufferSize(), td->config().ramBufferSize(), - td->config().thread(), td->config().lines(), td->dumpRegisters, - td->errorDetection); + td->config().payload().compilePayload(td->config().payloadSettings(), td->config().instructionCacheSize(), + td->config().dataCacheBufferSize(), td->config().ramBufferSize(), + td->config().thread(), td->config().lines(), td->dumpRegisters, + td->errorDetection); // call init function td->config().payload().init(td->addrMem, td->buffersizeMem); diff --git a/src/firestarter/Main.cpp b/src/firestarter/Main.cpp index fa96740f..5076d812 100644 --- a/src/firestarter/Main.cpp +++ b/src/firestarter/Main.cpp @@ -27,16 +27,16 @@ #include struct Config { - inline static const std::vector> - optionsMap = {{"information", "Information Options:\n"}, - {"general", "General Options:\n"}, - {"specialized-workloads", "Specialized workloads:\n"}, + inline static const std::vector> optionsMap = { + {"information", "Information Options:\n"}, + {"general", "General Options:\n"}, + {"specialized-workloads", "Specialized workloads:\n"}, #ifdef FIRESTARTER_DEBUG_FEATURES - {"debug", "Debugging:\n"}, + {"debug", "Debugging:\n"}, #endif #if defined(linux) || defined(__linux__) - {"measurement", "Measurement:\n"}, - {"optimization", "Optimization:\n"} + {"measurement", "Measurement:\n"}, + {"optimization", "Optimization:\n"} #endif }; @@ -84,48 +84,41 @@ struct Config { double nsga2_cr; double nsga2_m; - Config(int argc, const char **argv); + Config(int argc, const char** argv); }; void print_copyright() { - firestarter::log::info() - << "This program is free software: you can redistribute it and/or " - "modify\n" - << "it under the terms of the GNU General Public License as published " - "by\n" - << "the Free Software Foundation, either version 3 of the License, or\n" - << "(at your option) any later version.\n" - << "\n" - << "You should have received a copy of the GNU General Public License\n" - << "along with this program. If not, see " - ".\n"; + firestarter::log::info() << "This program is free software: you can redistribute it and/or " + "modify\n" + << "it under the terms of the GNU General Public License as published " + "by\n" + << "the Free Software Foundation, either version 3 of the License, or\n" + << "(at your option) any later version.\n" + << "\n" + << "You should have received a copy of the GNU General Public License\n" + << "along with this program. If not, see " + ".\n"; } void print_warranty() { - firestarter::log::info() - << "This program is distributed in the hope that it will be useful,\n" - << "but WITHOUT ANY WARRANTY; without even the implied warranty of\n" - << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" - << "GNU General Public License for more details.\n" - << "\n" - << "You should have received a copy of the GNU General Public License\n" - << "along with this program. If not, see " - ".\n"; + firestarter::log::info() << "This program is distributed in the hope that it will be useful,\n" + << "but WITHOUT ANY WARRANTY; without even the implied warranty of\n" + << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" + << "GNU General Public License for more details.\n" + << "\n" + << "You should have received a copy of the GNU General Public License\n" + << "along with this program. If not, see " + ".\n"; } -void print_help(cxxopts::Options const &parser, std::string const §ion) { - std::vector> options( - Config::optionsMap.size()); +void print_help(cxxopts::Options const& parser, std::string const& section) { + std::vector> options(Config::optionsMap.size()); if (section.size() == 0) { - std::copy(Config::optionsMap.begin(), Config::optionsMap.end(), - options.begin()); + std::copy(Config::optionsMap.begin(), Config::optionsMap.end(), options.begin()); } else { - auto findSection = [&](std::pair const &pair) { - return pair.first == section; - }; - auto it = std::copy_if(Config::optionsMap.begin(), Config::optionsMap.end(), - options.begin(), findSection); + auto findSection = [&](std::pair const& pair) { return pair.first == section; }; + auto it = std::copy_if(Config::optionsMap.begin(), Config::optionsMap.end(), options.begin(), findSection); options.resize(std::distance(options.begin(), it)); } @@ -162,7 +155,7 @@ void print_help(cxxopts::Options const &parser, std::string const §ion) { // clang-format on } -Config::Config(int argc, const char **argv) { +Config::Config(int argc, const char** argv) { cxxopts::Options parser(argv[0]); @@ -270,17 +263,13 @@ Config::Config(int argc, const char **argv) { auto options = parser.parse(argc, argv); if (options.count("quiet")) { - firestarter::logging::filter::set_severity( - nitro::log::severity_level::warn); + firestarter::logging::filter::set_severity(nitro::log::severity_level::warn); } else if (options.count("report")) { - firestarter::logging::filter::set_severity( - nitro::log::severity_level::debug); + firestarter::logging::filter::set_severity(nitro::log::severity_level::debug); } else if (options.count("debug")) { - firestarter::logging::filter::set_severity( - nitro::log::severity_level::trace); + firestarter::logging::filter::set_severity(nitro::log::severity_level::trace); } else { - firestarter::logging::filter::set_severity( - nitro::log::severity_level::info); + firestarter::logging::filter::set_severity(nitro::log::severity_level::info); } if (options.count("version")) { @@ -297,25 +286,18 @@ Config::Config(int argc, const char **argv) { std::exit(EXIT_SUCCESS); } - firestarter::log::info() - << "This program comes with ABSOLUTELY NO WARRANTY; for details run `" - << argv[0] << " -w`.\n" - << "This is free software, and you are welcome to redistribute it\n" - << "under certain conditions; run `" << argv[0] - << " -c` for details.\n"; + firestarter::log::info() << "This program comes with ABSOLUTELY NO WARRANTY; for details run `" << argv[0] + << " -w`.\n" + << "This is free software, and you are welcome to redistribute it\n" + << "under certain conditions; run `" << argv[0] << " -c` for details.\n"; if (options.count("help")) { auto section = options["help"].as(); // section not found - auto findSection = [&](std::pair const &pair) { - return pair.first == section; - }; - if (std::find_if(optionsMap.begin(), optionsMap.end(), findSection) == - optionsMap.end() && - section.size() != 0) { - throw std::invalid_argument("Section \"" + section + - "\" not found in help."); + auto findSection = [&](std::pair const& pair) { return pair.first == section; }; + if (std::find_if(optionsMap.begin(), optionsMap.end(), findSection) == optionsMap.end() && section.size() != 0) { + throw std::invalid_argument("Section \"" + section + "\" not found in help."); } print_help(parser, section); @@ -340,29 +322,25 @@ Config::Config(int argc, const char **argv) { allowUnavailablePayload = options.count("allow-unavailable-payload"); dumpRegisters = options.count("dump-registers"); if (dumpRegisters) { - dumpRegistersTimeDelta = - std::chrono::seconds(options["dump-registers"].as()); + dumpRegistersTimeDelta = std::chrono::seconds(options["dump-registers"].as()); if (timeout != std::chrono::microseconds::zero() && loadPercent != 100) { throw std::invalid_argument("Option --dump-registers may only be used " "without a timeout and full load."); } if (errorDetection) { - throw std::invalid_argument( - "Options --dump-registers and --error-detection cannot be used " - "together."); + throw std::invalid_argument("Options --dump-registers and --error-detection cannot be used " + "together."); } } #endif requestedNumThreads = options["threads"].as(); -#if (defined(linux) || defined(__linux__)) && \ - defined(FIRESTARTER_THREAD_AFFINITY) +#if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY) cpuBind = options["bind"].as(); if (!cpuBind.empty()) { if (requestedNumThreads != 0) { - throw std::invalid_argument( - "Options -b/--bind and -n/--threads cannot be used together."); + throw std::invalid_argument("Options -b/--bind and -n/--threads cannot be used together."); } } #endif @@ -378,8 +356,7 @@ Config::Config(int argc, const char **argv) { gpuMatrixSize = options["matrixsize"].as(); if (gpuMatrixSize > 0 && gpuMatrixSize < 64) { - throw std::invalid_argument( - "Option -m/--matrixsize may not be below 64."); + throw std::invalid_argument("Option -m/--matrixsize may not be below 64."); } gpus = options["gpus"].as(); @@ -396,17 +373,14 @@ Config::Config(int argc, const char **argv) { } #if defined(linux) || defined(__linux__) - startDelta = - std::chrono::milliseconds(options["start-delta"].as()); + startDelta = std::chrono::milliseconds(options["start-delta"].as()); stopDelta = std::chrono::milliseconds(options["stop-delta"].as()); - measurementInterval = std::chrono::milliseconds( - options["measurement-interval"].as()); + measurementInterval = std::chrono::milliseconds(options["measurement-interval"].as()); #ifndef FIRESTARTER_LINK_STATIC metricPaths = options["metric-path"].as>(); #endif if (options.count("metric-from-stdin")) { - stdinMetrics = - options["metric-from-stdin"].as>(); + stdinMetrics = options["metric-from-stdin"].as>(); } measurement = options.count("measurement"); listMetrics = options.count("list-metrics"); @@ -417,22 +391,19 @@ Config::Config(int argc, const char **argv) { "cannot be used together."); } if (measurement) { - throw std::invalid_argument( - "Options --measurement and --optimize cannot be used together."); + throw std::invalid_argument("Options --measurement and --optimize cannot be used together."); } preheat = std::chrono::seconds(options["preheat"].as()); optimizationAlgorithm = options["optimize"].as(); if (options.count("optimization-metric")) { - optimizationMetrics = - options["optimization-metric"].as>(); + optimizationMetrics = options["optimization-metric"].as>(); } if (loadPercent != 100) { throw std::invalid_argument("Options -p | --period and -l | --load are " "not compatible with --optimize."); } if (timeout == std::chrono::seconds::zero()) { - throw std::invalid_argument( - "Option -t | --timeout must be specified for optimization."); + throw std::invalid_argument("Option -t | --timeout must be specified for optimization."); } evaluationDuration = timeout; // this will deactivate the watchdog worker @@ -451,42 +422,37 @@ Config::Config(int argc, const char **argv) { } #endif - } catch (std::exception &e) { + } catch (std::exception& e) { firestarter::log::error() << e.what() << "\n"; print_help(parser, ""); std::exit(EXIT_FAILURE); } } -int main(int argc, const char **argv) { +int main(int argc, const char** argv) { - firestarter::log::info() - << "FIRESTARTER - A Processor Stress Test Utility, Version " - << _FIRESTARTER_VERSION_STRING << "\n" - << "Copyright (C) " << _FIRESTARTER_BUILD_YEAR - << " TU Dresden, Center for Information Services and High Performance " - "Computing" - << "\n"; + firestarter::log::info() << "FIRESTARTER - A Processor Stress Test Utility, Version " << _FIRESTARTER_VERSION_STRING + << "\n" + << "Copyright (C) " << _FIRESTARTER_BUILD_YEAR + << " TU Dresden, Center for Information Services and High Performance " + "Computing" + << "\n"; Config cfg{argc, argv}; try { firestarter::Firestarter firestarter( - argc, argv, cfg.timeout, cfg.loadPercent, cfg.period, - cfg.requestedNumThreads, cfg.cpuBind, cfg.printFunctionSummary, - cfg.functionId, cfg.listInstructionGroups, cfg.instructionGroups, - cfg.lineCount, cfg.allowUnavailablePayload, cfg.dumpRegisters, - cfg.dumpRegistersTimeDelta, cfg.dumpRegistersOutpath, - cfg.errorDetection, cfg.gpus, cfg.gpuMatrixSize, cfg.gpuUseFloat, - cfg.gpuUseDouble, cfg.listMetrics, cfg.measurement, cfg.startDelta, - cfg.stopDelta, cfg.measurementInterval, cfg.metricPaths, - cfg.stdinMetrics, cfg.optimize, cfg.preheat, cfg.optimizationAlgorithm, - cfg.optimizationMetrics, cfg.evaluationDuration, cfg.individuals, - cfg.optimizeOutfile, cfg.generations, cfg.nsga2_cr, cfg.nsga2_m); + argc, argv, cfg.timeout, cfg.loadPercent, cfg.period, cfg.requestedNumThreads, cfg.cpuBind, + cfg.printFunctionSummary, cfg.functionId, cfg.listInstructionGroups, cfg.instructionGroups, cfg.lineCount, + cfg.allowUnavailablePayload, cfg.dumpRegisters, cfg.dumpRegistersTimeDelta, cfg.dumpRegistersOutpath, + cfg.errorDetection, cfg.gpus, cfg.gpuMatrixSize, cfg.gpuUseFloat, cfg.gpuUseDouble, cfg.listMetrics, + cfg.measurement, cfg.startDelta, cfg.stopDelta, cfg.measurementInterval, cfg.metricPaths, cfg.stdinMetrics, + cfg.optimize, cfg.preheat, cfg.optimizationAlgorithm, cfg.optimizationMetrics, cfg.evaluationDuration, + cfg.individuals, cfg.optimizeOutfile, cfg.generations, cfg.nsga2_cr, cfg.nsga2_m); firestarter.mainThread(); - } catch (std::exception const &e) { + } catch (std::exception const& e) { firestarter::log::error() << e.what(); return EXIT_FAILURE; } diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp index 498330ab..efd7a4bc 100644 --- a/src/firestarter/Measurement/MeasurementWorker.cpp +++ b/src/firestarter/Measurement/MeasurementWorker.cpp @@ -30,27 +30,25 @@ extern "C" { } #endif -void insertCallback(void *cls, const char *metricName, int64_t timeSinceEpoch, - double value) { - static_cast(cls) - ->insertCallback(metricName, timeSinceEpoch, value); +void insertCallback(void* cls, const char* metricName, int64_t timeSinceEpoch, double value) { + static_cast(cls)->insertCallback(metricName, timeSinceEpoch, value); } using namespace firestarter::measurement; -MeasurementWorker::MeasurementWorker( - std::chrono::milliseconds updateInterval, unsigned long long numThreads, - std::vector const &metricDylibs, - std::vector const &stdinMetrics) - : updateInterval(updateInterval), numThreads(numThreads) { +MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, unsigned long long numThreads, + std::vector const& metricDylibs, + std::vector const& stdinMetrics) + : updateInterval(updateInterval) + , numThreads(numThreads) { #ifndef FIRESTARTER_LINK_STATIC // open dylibs and find metric symbol. // create an entry in _metricDylibs with handle from dlopen and // metric_interface_t structure. add this structe as a pointer to metrics. - for (auto const &dylib : metricDylibs) { - void *handle; - const char *filename = dylib.c_str(); + for (auto const& dylib : metricDylibs) { + void* handle; + const char* filename = dylib.c_str(); handle = dlopen(dylib.c_str(), RTLD_NOW | RTLD_LOCAL); @@ -62,11 +60,11 @@ MeasurementWorker::MeasurementWorker( // clear existing error dlerror(); - metric_interface_t *metric = nullptr; + metric_interface_t* metric = nullptr; - metric = (metric_interface_t *)dlsym(handle, "metric"); + metric = (metric_interface_t*)dlsym(handle, "metric"); - char *error; + char* error; if ((error = dlerror()) != NULL) { firestarter::log::error() << filename << ": " << error; dlclose(handle); @@ -74,8 +72,7 @@ MeasurementWorker::MeasurementWorker( } if (this->findMetricByName(metric->name) != nullptr) { - firestarter::log::error() - << "A metric named \"" << metric->name << "\" is already loaded."; + firestarter::log::error() << "A metric named \"" << metric->name << "\" is already loaded."; dlclose(handle); continue; } @@ -89,10 +86,9 @@ MeasurementWorker::MeasurementWorker( #endif // setup metric objects for metric names passed from stdin. - for (auto const &name : stdinMetrics) { + for (auto const& name : stdinMetrics) { if (this->findMetricByName(name) != nullptr) { - firestarter::log::error() - << "A metric named \"" << name << "\" is already loaded."; + firestarter::log::error() << "A metric named \"" << name << "\" is already loaded."; continue; } @@ -103,7 +99,7 @@ MeasurementWorker::MeasurementWorker( unsigned maxLength = 0; std::map available; - for (auto const &metric : this->metrics) { + for (auto const& metric : this->metrics) { std::string name(metric->name); maxLength = maxLength < name.size() ? name.size() : maxLength; int returnCode = metric->init(); @@ -114,7 +110,7 @@ MeasurementWorker::MeasurementWorker( unsigned padding = maxLength > 6 ? maxLength - 6 : 0; ss << " METRIC" << std::string(padding + 1, ' ') << "| available\n"; ss << " " << std::string(padding + 7, '-') << "-----------\n"; - for (auto const &[key, value] : available) { + for (auto const& [key, value] : available) { ss << " " << key << std::string(padding + 7 - key.size(), ' ') << "| "; ss << (value ? "yes" : "no") << "\n"; } @@ -122,16 +118,12 @@ MeasurementWorker::MeasurementWorker( this->availableMetricsString = ss.str(); pthread_create(&this->workerThread, NULL, - reinterpret_cast( - MeasurementWorker::dataAcquisitionWorker), - this); + reinterpret_cast(MeasurementWorker::dataAcquisitionWorker), this); // create a worker for getting metric values from stdin if (this->_stdinMetrics.size() > 0) { pthread_create(&this->stdinThread, NULL, - reinterpret_cast( - MeasurementWorker::stdinDataAcquisitionWorker), - this); + reinterpret_cast(MeasurementWorker::stdinDataAcquisitionWorker), this); } } @@ -146,7 +138,7 @@ MeasurementWorker::~MeasurementWorker() { pthread_join(this->stdinThread, NULL); } - for (auto const &[key, value] : this->values) { + for (auto const& [key, value] : this->values) { auto metric = this->findMetricByName(key); if (metric == nullptr) { continue; @@ -164,48 +156,39 @@ MeasurementWorker::~MeasurementWorker() { std::vector MeasurementWorker::metricNames() { std::vector metrics; - std::transform( - this->metrics.begin(), this->metrics.end(), std::back_inserter(metrics), - [](auto &metric) -> std::string { return std::string(metric->name); }); - for (auto const &name : this->_stdinMetrics) { + std::transform(this->metrics.begin(), this->metrics.end(), std::back_inserter(metrics), + [](auto& metric) -> std::string { return std::string(metric->name); }); + for (auto const& name : this->_stdinMetrics) { metrics.push_back(name); } return metrics; } -const metric_interface_t * -MeasurementWorker::findMetricByName(std::string metricName) { - auto name_equal = [metricName](auto &metricInterface) { - return metricName.compare(metricInterface->name) == 0; - }; - auto metric = - std::find_if(this->metrics.begin(), this->metrics.end(), name_equal); +const metric_interface_t* MeasurementWorker::findMetricByName(std::string metricName) { + auto name_equal = [metricName](auto& metricInterface) { return metricName.compare(metricInterface->name) == 0; }; + auto metric = std::find_if(this->metrics.begin(), this->metrics.end(), name_equal); // metric not found if (metric == this->metrics.end()) { return nullptr; } // metric found - return const_cast(*metric); + return const_cast(*metric); } // this must be called by the main thread. // if not done so things like perf_event_attr.inherit might not work as expected -std::vector -MeasurementWorker::initMetrics(std::vector const &metricNames) { +std::vector MeasurementWorker::initMetrics(std::vector const& metricNames) { this->values_mutex.lock(); std::vector initialized = {}; // try to find each metric and initialize it - for (auto const &metricName : metricNames) { + for (auto const& metricName : metricNames) { // init values map with empty vector - auto name_equal = [metricName](auto const &pair) { - return metricName.compare(pair.first) == 0; - }; - auto pair = - std::find_if(this->values.begin(), this->values.end(), name_equal); + auto name_equal = [metricName](auto const& pair) { return metricName.compare(pair.first) == 0; }; + auto pair = std::find_if(this->values.begin(), this->values.end(), name_equal); if (pair != this->values.end()) { pair->second.clear(); } else { @@ -213,8 +196,7 @@ MeasurementWorker::initMetrics(std::vector const &metricNames) { if (metric != nullptr) { int returnValue = metric->init(); if (returnValue != EXIT_SUCCESS) { - log::error() << "Metric " << metric->name << ": " - << metric->get_error(); + log::error() << "Metric " << metric->name << ": " << metric->get_error(); continue; } } @@ -233,19 +215,13 @@ MeasurementWorker::initMetrics(std::vector const &metricNames) { return initialized; } -void MeasurementWorker::insertCallback(const char *metricName, - int64_t timeSinceEpoch, double value) { +void MeasurementWorker::insertCallback(const char* metricName, int64_t timeSinceEpoch, double value) { this->values_mutex.lock(); using Duration = std::chrono::duration; - auto time = - std::chrono::time_point( - Duration(timeSinceEpoch)); - auto name_equal = [metricName](auto const &pair) { - return std::string(metricName).compare(pair.first) == 0; - }; - auto pair = - std::find_if(this->values.begin(), this->values.end(), name_equal); + auto time = std::chrono::time_point(Duration(timeSinceEpoch)); + auto name_equal = [metricName](auto const& pair) { return std::string(metricName).compare(pair.first) == 0; }; + auto pair = std::find_if(this->values.begin(), this->values.end(), name_equal); if (pair != this->values.end()) { pair->second.push_back(TimeValue(time, value)); @@ -254,18 +230,15 @@ void MeasurementWorker::insertCallback(const char *metricName, this->values_mutex.unlock(); } -void MeasurementWorker::startMeasurement() { - this->startTime = std::chrono::high_resolution_clock::now(); -} +void MeasurementWorker::startMeasurement() { this->startTime = std::chrono::high_resolution_clock::now(); } -std::map -MeasurementWorker::getValues(std::chrono::milliseconds startDelta, - std::chrono::milliseconds stopDelta) { +std::map MeasurementWorker::getValues(std::chrono::milliseconds startDelta, + std::chrono::milliseconds stopDelta) { std::map measurment = {}; this->values_mutex.lock(); - for (auto &[key, values] : this->values) { + for (auto& [key, values] : this->values) { auto startTime = this->startTime; auto endTime = std::chrono::high_resolution_clock::now(); auto metric = this->findMetricByName(key); @@ -288,15 +261,11 @@ MeasurementWorker::getValues(std::chrono::milliseconds startDelta, decltype(values) croppedValues(values.size()); - auto findAll = [startTime, endTime](auto const &tv) { - return startTime <= tv.time && tv.time <= endTime; - }; - auto it = std::copy_if(values.begin(), values.end(), croppedValues.begin(), - findAll); + auto findAll = [startTime, endTime](auto const& tv) { return startTime <= tv.time && tv.time <= endTime; }; + auto it = std::copy_if(values.begin(), values.end(), croppedValues.begin(), findAll); croppedValues.resize(std::distance(croppedValues.begin(), it)); - Summary sum = Summary::calculate(croppedValues.begin(), croppedValues.end(), - type, this->numThreads); + Summary sum = Summary::calculate(croppedValues.begin(), croppedValues.end(), type, this->numThreads); measurment[key] = sum; } @@ -306,11 +275,11 @@ MeasurementWorker::getValues(std::chrono::milliseconds startDelta, return measurment; } -int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) { +int* MeasurementWorker::dataAcquisitionWorker(void* measurementWorker) { pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - auto _this = reinterpret_cast(measurementWorker); + auto _this = reinterpret_cast(measurementWorker); #ifndef __APPLE__ pthread_setname_np(pthread_self(), "DataAcquisition"); @@ -319,8 +288,7 @@ int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) { using clock = std::chrono::high_resolution_clock; using callbackTuple = - std::tuple; + std::tuple; auto callbackTupleComparator = [](callbackTuple left, callbackTuple right) { return std::get<2>(left) > std::get<2>(right); }; @@ -328,29 +296,26 @@ int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) { // this datastructure holds a tuple of our callback, the callback frequency // and the next timepoint. it will be sorted, so the pop function will give // back the next callback - std::priority_queue, - decltype(callbackTupleComparator)> - callbackQueue(callbackTupleComparator); + std::priority_queue, decltype(callbackTupleComparator)> callbackQueue( + callbackTupleComparator); _this->values_mutex.lock(); - for (auto const &[key, value] : _this->values) { + for (auto const& [key, value] : _this->values) { auto metric_interface = _this->findMetricByName(key); if (metric_interface == nullptr) { continue; } - auto callbackTime = - std::chrono::microseconds(metric_interface->callback_time); + auto callbackTime = std::chrono::microseconds(metric_interface->callback_time); if (callbackTime.count() == 0) { continue; } auto currentTime = clock::now(); - callbackQueue.push( - std::make_tuple(metric_interface->callback, callbackTime, currentTime)); + callbackQueue.push(std::make_tuple(metric_interface->callback, callbackTime, currentTime)); } _this->values_mutex.unlock(); @@ -363,7 +328,7 @@ int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) { if (nextFetch <= now) { _this->values_mutex.lock(); - for (auto &[metricName, values] : _this->values) { + for (auto& [metricName, values] : _this->values) { auto metric_interface = _this->findMetricByName(metricName); if (metric_interface == nullptr) { @@ -372,11 +337,9 @@ int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) { double value; - if (!metric_interface->type.insert_callback && - metric_interface->get_reading != nullptr) { + if (!metric_interface->type.insert_callback && metric_interface->get_reading != nullptr) { if (EXIT_SUCCESS == metric_interface->get_reading(&value)) { - auto tv = - TimeValue(std::chrono::high_resolution_clock::now(), value); + auto tv = TimeValue(std::chrono::high_resolution_clock::now(), value); values.push_back(tv); } } @@ -401,8 +364,7 @@ int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) { // add it with the updated callback time to the queue again nextCallback = now + callbackTime; - callbackQueue.push( - std::make_tuple(callbackFunction, callbackTime, nextCallback)); + callbackQueue.push(std::make_tuple(callbackFunction, callbackTime, nextCallback)); } nextWake = nextCallback < nextWake ? nextCallback : nextWake; @@ -412,11 +374,11 @@ int *MeasurementWorker::dataAcquisitionWorker(void *measurementWorker) { } } -int *MeasurementWorker::stdinDataAcquisitionWorker(void *measurementWorker) { +int* MeasurementWorker::stdinDataAcquisitionWorker(void* measurementWorker) { pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - auto _this = reinterpret_cast(measurementWorker); + auto _this = reinterpret_cast(measurementWorker); #ifndef __APPLE__ pthread_setname_np(pthread_self(), "StdinDataAcquis"); @@ -427,11 +389,8 @@ int *MeasurementWorker::stdinDataAcquisitionWorker(void *measurementWorker) { double value; char name[128]; if (std::sscanf(line.c_str(), "%127s %ld %lf", name, &time, &value) == 3) { - auto name_equal = [name](auto const &allowedName) { - return allowedName.compare(std::string(name)) == 0; - }; - auto item = std::find_if(_this->stdinMetrics().begin(), - _this->stdinMetrics().end(), name_equal); + auto name_equal = [name](auto const& allowedName) { return allowedName.compare(std::string(name)) == 0; }; + auto item = std::find_if(_this->stdinMetrics().begin(), _this->stdinMetrics().end(), name_equal); // metric name is allowed if (item != _this->stdinMetrics().end()) { _this->insertCallback(name, time, value); diff --git a/src/firestarter/Measurement/Metric/IPCEstimate.cpp b/src/firestarter/Measurement/Metric/IPCEstimate.cpp index a58f91bb..145f02ae 100644 --- a/src/firestarter/Measurement/Metric/IPCEstimate.cpp +++ b/src/firestarter/Measurement/Metric/IPCEstimate.cpp @@ -30,8 +30,8 @@ extern "C" { static std::string errorString = ""; -static void (*callback)(void *, const char *, int64_t, double) = nullptr; -static void *callback_arg = nullptr; +static void (*callback)(void*, const char*, int64_t, double) = nullptr; +static void* callback_arg = nullptr; static int32_t fini(void) { callback = nullptr; @@ -46,14 +46,12 @@ static int32_t init(void) { return EXIT_SUCCESS; } -static const char *get_error(void) { - const char *errorCString = errorString.c_str(); +static const char* get_error(void) { + const char* errorCString = errorString.c_str(); return errorCString; } -static int32_t register_insert_callback(void (*c)(void *, const char *, int64_t, - double), - void *arg) { +static int32_t register_insert_callback(void (*c)(void*, const char*, int64_t, double), void* arg) { callback = c; callback_arg = arg; return EXIT_SUCCESS; @@ -64,9 +62,9 @@ void ipc_estimate_metric_insert(double value) { return; } - int64_t t = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now().time_since_epoch()) - .count(); + int64_t t = + std::chrono::duration_cast(std::chrono::high_resolution_clock::now().time_since_epoch()) + .count(); callback(callback_arg, "ipc-estimate", t, value); } diff --git a/src/firestarter/Measurement/Metric/Perf.cpp b/src/firestarter/Measurement/Metric/Perf.cpp index 48f3120b..a7266db2 100644 --- a/src/firestarter/Measurement/Metric/Perf.cpp +++ b/src/firestarter/Measurement/Metric/Perf.cpp @@ -52,8 +52,7 @@ static int32_t init_value; static struct read_format last; -static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, - int cpu, int group_fd, unsigned long flags) { +static long perf_event_open(struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); } @@ -80,9 +79,7 @@ static int32_t init(void) { // The official way of knowing if perf_event_open() support is enabled // is checking for the existence of the file // /proc/sys/kernel/perf_event_paranoid. - errorString = - "syscall perf_event_open not supported or file " PERF_EVENT_PARANOID - " does not exist"; + errorString = "syscall perf_event_open not supported or file " PERF_EVENT_PARANOID " does not exist"; init_value = EXIT_FAILURE; init_done = true; return EXIT_FAILURE; @@ -117,17 +114,16 @@ static int32_t init(void) { cpu_cycles_attr.exclude_kernel = 1; cpu_cycles_attr.exclude_hv = 1; - if ((cpu_cycles_fd = perf_event_open( - &cpu_cycles_attr, - // pid == 0 and cpu == -1 - // This measures the calling process/thread on any CPU. - 0, -1, - // The group_fd argument allows event groups to be created. An event - // group has one event which is the group leader. The leader is - // created first, with group_fd = -1. The rest of the group members - // are created with subsequent perf_event_open() calls with group_fd - // being set to the file descriptor of the group leader. - -1, 0)) < 0) { + if ((cpu_cycles_fd = perf_event_open(&cpu_cycles_attr, + // pid == 0 and cpu == -1 + // This measures the calling process/thread on any CPU. + 0, -1, + // The group_fd argument allows event groups to be created. An event + // group has one event which is the group leader. The leader is + // created first, with group_fd = -1. The rest of the group members + // are created with subsequent perf_event_open() calls with group_fd + // being set to the file descriptor of the group leader. + -1, 0)) < 0) { fini(); errorString = "perf_event_open failed for PERF_COUNT_HW_CPU_CYCLES"; init_value = EXIT_FAILURE; @@ -147,17 +143,16 @@ static int32_t init(void) { instructions_attr.exclude_kernel = 1; instructions_attr.exclude_hv = 1; - if ((instructions_fd = perf_event_open( - &instructions_attr, - // pid == 0 and cpu == -1 - // This measures the calling process/thread on any CPU. - 0, -1, - // The group_fd argument allows event groups to be created. An event - // group has one event which is the group leader. The leader is - // created first, with group_fd = -1. The rest of the group members - // are created with subsequent perf_event_open() calls with group_fd - // being set to the file descriptor of the group leader. - cpu_cycles_fd, 0)) < 0) { + if ((instructions_fd = perf_event_open(&instructions_attr, + // pid == 0 and cpu == -1 + // This measures the calling process/thread on any CPU. + 0, -1, + // The group_fd argument allows event groups to be created. An event + // group has one event which is the group leader. The leader is + // created first, with group_fd = -1. The rest of the group members + // are created with subsequent perf_event_open() calls with group_fd + // being set to the file descriptor of the group leader. + cpu_cycles_fd, 0)) < 0) { fini(); errorString = "perf_event_open failed for PERF_COUNT_HW_INSTRUCTIONS"; init_value = EXIT_FAILURE; @@ -183,7 +178,7 @@ static int32_t init(void) { return EXIT_SUCCESS; } -static uint64_t value_from_id(struct read_format *values, uint64_t id) { +static uint64_t value_from_id(struct read_format* values, uint64_t id) { for (decltype(values->nr) i = 0; i < values->nr; ++i) { if (id == values->values[i].id) { return values->values[i].value; @@ -193,7 +188,7 @@ static uint64_t value_from_id(struct read_format *values, uint64_t id) { return 0; } -static int32_t get_reading(double *ipc_value, double *freq_value) { +static int32_t get_reading(double* ipc_value, double* freq_value) { if (cpu_cycles_fd < 0 || instructions_fd < 0) { fini(); @@ -210,10 +205,8 @@ static int32_t get_reading(double *ipc_value, double *freq_value) { if (ipc_value != nullptr) { uint64_t diff[2]; - diff[0] = value_from_id(&read_values, instructions_id) - - value_from_id(&last, instructions_id); - diff[1] = value_from_id(&read_values, cpu_cycles_id) - - value_from_id(&last, cpu_cycles_id); + diff[0] = value_from_id(&read_values, instructions_id) - value_from_id(&last, instructions_id); + diff[1] = value_from_id(&read_values, cpu_cycles_id) - value_from_id(&last, cpu_cycles_id); std::memcpy(&last, &read_values, sizeof(last)); @@ -227,16 +220,12 @@ static int32_t get_reading(double *ipc_value, double *freq_value) { return EXIT_SUCCESS; } -static int32_t get_reading_ipc(double *value) { - return get_reading(value, nullptr); -} +static int32_t get_reading_ipc(double* value) { return get_reading(value, nullptr); } -static int32_t get_reading_freq(double *value) { - return get_reading(nullptr, value); -} +static int32_t get_reading_freq(double* value) { return get_reading(nullptr, value); } -static const char *get_error(void) { - const char *errorCString = errorString.c_str(); +static const char* get_error(void) { + const char* errorCString = errorString.c_str(); return errorCString; } } diff --git a/src/firestarter/Measurement/Metric/RAPL.cpp b/src/firestarter/Measurement/Metric/RAPL.cpp index 5f6b4bd7..e6d28f1d 100644 --- a/src/firestarter/Measurement/Metric/RAPL.cpp +++ b/src/firestarter/Measurement/Metric/RAPL.cpp @@ -37,19 +37,19 @@ extern "C" { static std::string errorString = ""; struct reader_def { - char *path; + char* path; long long int last_reading; long long int overflow; long long int max; }; struct reader_def_free { - void operator()(struct reader_def *def) { + void operator()(struct reader_def* def) { if (def != nullptr) { - if (((void *)def->path) != nullptr) { - free((void *)def->path); + if (((void*)def->path) != nullptr) { + free((void*)def->path); } - free((void *)def); + free((void*)def); } } }; @@ -65,7 +65,7 @@ static int32_t fini(void) { static int32_t init(void) { errorString = ""; - DIR *raplDir = opendir(RAPL_PATH); + DIR* raplDir = opendir(RAPL_PATH); if (raplDir == NULL) { errorString = "Could not open " RAPL_PATH; return EXIT_FAILURE; @@ -81,7 +81,7 @@ static int32_t init(void) { // a vector of all paths to package and dram std::vector paths = {}; - struct dirent *dir; + struct dirent* dir; while ((dir = readdir(raplDir)) != NULL) { std::stringstream path; std::stringstream namePath; @@ -120,7 +120,7 @@ static int32_t init(void) { return EXIT_FAILURE; } - for (auto const &path : paths) { + for (auto const& path : paths) { std::stringstream energyUjPath; energyUjPath << path << "/energy_uj"; std::ifstream energyReadingStream(energyUjPath.str()); @@ -147,8 +147,7 @@ static int32_t init(void) { if (read == 0) { std::stringstream ss; - ss << "Contents in file " << energyUjPath.str() - << " do not conform to mask (unsigned long long)"; + ss << "Contents in file " << energyUjPath.str() << " do not conform to mask (unsigned long long)"; errorString = ss.str(); break; } @@ -158,21 +157,18 @@ static int32_t init(void) { if (read == 0) { std::stringstream ss; - ss << "Contents in file " << maxEnergyUjRangePath.str() - << " do not conform to mask (unsigned long long)"; + ss << "Contents in file " << maxEnergyUjRangePath.str() << " do not conform to mask (unsigned long long)"; errorString = ss.str(); break; } - std::shared_ptr def( - reinterpret_cast( - malloc(sizeof(struct reader_def))), - reader_def_free()); + std::shared_ptr def(reinterpret_cast(malloc(sizeof(struct reader_def))), + reader_def_free()); auto pathName = path.c_str(); size_t size = (strlen(pathName) + 1) * sizeof(char); - void *name = malloc(size); + void* name = malloc(size); memcpy(name, pathName, size); - def->path = (char *)name; + def->path = (char*)name; def->max = max; def->last_reading = reading; def->overflow = 0; @@ -188,10 +184,10 @@ static int32_t init(void) { return EXIT_SUCCESS; } -static int32_t get_reading(double *value) { +static int32_t get_reading(double* value) { double finalReading = 0.0; - for (auto &def : readers) { + for (auto& def : readers) { long long int reading; std::string buffer; @@ -207,8 +203,7 @@ static int32_t get_reading(double *value) { def->last_reading = reading; - finalReading += - 1.0E-6 * (double)(def->overflow * def->max + def->last_reading); + finalReading += 1.0E-6 * (double)(def->overflow * def->max + def->last_reading); } if (value != nullptr) { @@ -218,8 +213,8 @@ static int32_t get_reading(double *value) { return EXIT_SUCCESS; } -static const char *get_error(void) { - const char *errorCString = errorString.c_str(); +static const char* get_error(void) { + const char* errorCString = errorString.c_str(); return errorCString; } diff --git a/src/firestarter/Measurement/Summary.cpp b/src/firestarter/Measurement/Summary.cpp index 590c4e01..2d1bd8f4 100644 --- a/src/firestarter/Measurement/Summary.cpp +++ b/src/firestarter/Measurement/Summary.cpp @@ -28,10 +28,8 @@ using namespace firestarter::measurement; // this functions borows a lot of code from // https://github.com/metricq/metricq-cpp/blob/master/tools/metricq-summary/src/summary.cpp -Summary Summary::calculate(std::vector::iterator begin, - std::vector::iterator end, - metric_type_t metricType, - unsigned long long numThreads) { +Summary Summary::calculate(std::vector::iterator begin, std::vector::iterator end, + metric_type_t metricType, unsigned long long numThreads) { std::vector values = {}; // TODO: i would really like to make this code a bit more readable, but i @@ -43,10 +41,7 @@ Summary Summary::calculate(std::vector::iterator begin, prev = *begin++; for (auto it = begin; it != end; ++it) { auto time_diff = - 1e-6 * - (double)std::chrono::duration_cast( - it->time - prev.time) - .count(); + 1e-6 * (double)std::chrono::duration_cast(it->time - prev.time).count(); auto value_diff = it->value - prev.value; double value = value_diff / time_diff; @@ -84,8 +79,7 @@ Summary Summary::calculate(std::vector::iterator begin, auto last = begin; std::advance(last, summary.num_timepoints - 1); - summary.duration = std::chrono::duration_cast( - last->time - begin->time); + summary.duration = std::chrono::duration_cast(last->time - begin->time); auto sum_over_nths = [&begin, end, summary](auto fn) { double acc = 0.0; diff --git a/src/firestarter/OneAPI/OneAPI.cpp b/src/firestarter/OneAPI/OneAPI.cpp index f09f79b0..c31ae6cd 100644 --- a/src/firestarter/OneAPI/OneAPI.cpp +++ b/src/firestarter/OneAPI/OneAPI.cpp @@ -22,14 +22,12 @@ /* OneAPI for GPUs, based on CUDA component *****************************************************************************/ -#include #include #include +#include - -#include #include - +#include #include #include @@ -37,34 +35,28 @@ using namespace firestarter::oneapi; - /* Random number generation helpers */ -template -void generate_random_data(size_t elems, T *v) -{ - for (size_t i = 0; i < elems; i++) - v[i] = double(std::rand()) / RAND_MAX; +template void generate_random_data(size_t elems, T* v) { + for (size_t i = 0; i < elems; i++) + v[i] = double(std::rand()) / RAND_MAX; } -template -void replicate_data(sycl::queue &Q, T *dst, size_t dst_elems, const T *src, size_t src_elems) -{ - firestarter::log::trace() << "replicate_data " << dst_elems << " elements from " << - src << " to " << dst ; - while (dst_elems > 0) { - auto copy_elems = std::min(dst_elems, src_elems); - Q.copy(src, dst, copy_elems); - dst += copy_elems; - dst_elems -= copy_elems; - } - Q.wait(); +template void replicate_data(sycl::queue& Q, T* dst, size_t dst_elems, const T* src, size_t src_elems) { + firestarter::log::trace() << "replicate_data " << dst_elems << " elements from " << src << " to " << dst; + while (dst_elems > 0) { + auto copy_elems = std::min(dst_elems, src_elems); + Q.copy(src, dst, copy_elems); + dst += copy_elems; + dst_elems -= copy_elems; + } + Q.wait(); } static int get_precision(int device_index, int useDouble) { firestarter::log::trace() << "Checking useDouble " << useDouble; - if (!useDouble){ + if (!useDouble) { return 0; } @@ -79,11 +71,11 @@ static int get_precision(int device_index, int useDouble) { // Choose a platform based on specific criteria (e.g., device type) sycl::platform chosenPlatform; auto nr_gpus = 0; - for (const auto &platform : platforms) { + for (const auto& platform : platforms) { firestarter::log::trace() << "Checking SYCL platform " << platform.get_info(); auto devices = platform.get_devices(); nr_gpus = 0; - for (const auto &device : devices) { + for (const auto& device : devices) { firestarter::log::trace() << "Checking SYCL device " << device.get_info(); if (device.is_gpu()) { // Choose GPU, you can use other criteria firestarter::log::trace() << " ... is GPU"; @@ -99,13 +91,11 @@ static int get_precision(int device_index, int useDouble) { } // Get a list of devices for the chosen platform - firestarter::log::trace() << "Get support for double" - << " on device nr. " - << device_index; + << " on device nr. " << device_index; auto devices = chosenPlatform.get_devices(); if (devices[device_index].has(sycl::aspect::fp64)) - supports_double=1; + supports_double = 1; return supports_double; } @@ -123,19 +113,14 @@ static int round_up(int num_to_round, int multiple) { return num_to_round + multiple - remainder; } - // GPU index. Used to pin this thread to the GPU. template -static void create_load(std::condition_variable &waitForInitCv, - std::mutex &waitForInitCvMutex, int device_index, - std::atomic &initCount, - volatile unsigned long long *loadVar, int matrixSize) { - static_assert( - std::is_same::value || std::is_same::value, - "create_load: Template argument T must be either float or double"); +static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index, + std::atomic& initCount, volatile unsigned long long* loadVar, int matrixSize) { + static_assert(std::is_same::value || std::is_same::value, + "create_load: Template argument T must be either float or double"); - firestarter::log::trace() << "Starting OneAPI with given matrix size " - << matrixSize; + firestarter::log::trace() << "Starting OneAPI with given matrix size " << matrixSize; size_t size_use = 0; if (matrixSize > 0) { @@ -158,14 +143,14 @@ static void create_load(std::condition_variable &waitForInitCv, // Choose a platform based on specific criteria (e.g., device type) sycl::platform chosenPlatform; auto nr_gpus = 0; - for (const auto &platform : platforms) { + for (const auto& platform : platforms) { auto devices = platform.get_devices(); nr_gpus = 0; - for (const auto &device : devices) { - if (device.is_gpu()) { // Choose GPU, you can use other criteria - chosenPlatform = platform; - nr_gpus++; - } + for (const auto& device : devices) { + if (device.is_gpu()) { // Choose GPU, you can use other criteria + chosenPlatform = platform; + nr_gpus++; + } } } @@ -174,24 +159,20 @@ static void create_load(std::condition_variable &waitForInitCv, return; } - // Get a list of devices for the chosen platform + // Get a list of devices for the chosen platform auto devices = chosenPlatform.get_devices(); - - firestarter::log::trace() << "Creating SYCL queue for computation on device nr. " - << device_index; + firestarter::log::trace() << "Creating SYCL queue for computation on device nr. " << device_index; auto chosenDevice = devices[device_index]; sycl::queue device_queue(chosenDevice); firestarter::log::trace() << "Get memory size on device nr. " << device_index; - // getting information about the GPU memory size_t memory_total = devices[device_index].get_info(); - firestarter::log::trace() << "Get Memory info on device nr. " - << device_index - <<": has " << memory_total << " B global memory"; + firestarter::log::trace() << "Get Memory info on device nr. " << device_index << ": has " << memory_total + << " B global memory"; // check if the user has not set a matrix OR has set a too big matrixsite and // if this is true: set a good matrixsize @@ -201,27 +182,21 @@ static void create_load(std::condition_variable &waitForInitCv, } firestarter::log::trace() << "Set OneAPI matrix size in B: " << size_use; - use_bytes =sizeof(T) * size_use * size_use * 3; - - + use_bytes = sizeof(T) * size_use * size_use * 3; /* Allocate A/B/C matrices */ - firestarter::log::trace() - << "Allocating memory on device nr. " - << device_index; + firestarter::log::trace() << "Allocating memory on device nr. " << device_index; auto A = malloc_device(size_use * size_use, device_queue); auto B = malloc_device(size_use * size_use, device_queue); auto C = malloc_device(size_use * size_use, device_queue); /* Create 64 MB random data on Host */ - constexpr int rd_size = 1024*1024*64; + constexpr int rd_size = 1024 * 1024 * 64; auto random_data = malloc_host(rd_size, device_queue); generate_random_data(rd_size, random_data); - firestarter::log::trace() - << "Copy memory to device nr. " - << device_index; + firestarter::log::trace() << "Copy memory to device nr. " << device_index; /* fill A and B with random data */ replicate_data(device_queue, A, size_use * size_use, random_data, rd_size); replicate_data(device_queue, B, size_use * size_use, random_data, rd_size); @@ -230,14 +205,13 @@ static void create_load(std::condition_variable &waitForInitCv, std::lock_guard lk(waitForInitCvMutex); #define TO_MB(x) (unsigned long)(x / 1024 / 1024) - firestarter::log::info() - << " GPU " << device_index << "\n" - << " name: " << devices[device_index].get_info() << "\n" - << " memory: " << TO_MB(memory_total) << " MiB total (using " << TO_MB(use_bytes) - << " MiB)\n" - << " matrix size: " << size_use << "\n" - << " used precision: " - << ((sizeof(T) == sizeof(double)) ? "double" : "single"); + firestarter::log::info() << " GPU " << device_index << "\n" + << " name: " << devices[device_index].get_info() + << "\n" + << " memory: " << TO_MB(memory_total) << " MiB total (using " + << TO_MB(use_bytes) << " MiB)\n" + << " matrix size: " << size_use << "\n" + << " used precision: " << ((sizeof(T) == sizeof(double)) ? "double" : "single"); #undef TO_MB initCount++; @@ -246,26 +220,24 @@ static void create_load(std::condition_variable &waitForInitCv, firestarter::log::trace() << "Run gemm on device nr. " << device_index; /* With this, we could run multiple gemms ...*/ -/* auto run_gemms = [=, &device_queue](int runs) -> double { - using namespace oneapi::mkl; - for (int i = 0; i < runs; i++) - - return runs; - }; -*/ + /* auto run_gemms = [=, &device_queue](int runs) -> double { + using namespace oneapi::mkl; + for (int i = 0; i < runs; i++) + + return runs; + }; + */ while (*loadVar != LOAD_STOP) { - firestarter::log::trace() << "Run gemm on device nr. " << device_index; - oneapi::mkl::blas::gemm(device_queue, oneapi::mkl::transpose::N, oneapi::mkl::transpose::N, size_use, size_use, size_use, 1, A, size_use, B, size_use, 0, C, size_use); - firestarter::log::trace() << "wait gemm on device nr. " << device_index; + firestarter::log::trace() << "Run gemm on device nr. " << device_index; + oneapi::mkl::blas::gemm(device_queue, oneapi::mkl::transpose::N, oneapi::mkl::transpose::N, size_use, size_use, + size_use, 1, A, size_use, B, size_use, 0, C, size_use); + firestarter::log::trace() << "wait gemm on device nr. " << device_index; device_queue.wait_and_throw(); } - } -OneAPI::OneAPI(volatile unsigned long long *loadVar, bool useFloat, bool useDouble, - unsigned matrixSize, int gpus) { - std::thread t(OneAPI::initGpus, std::ref(_waitForInitCv), loadVar, useFloat, - useDouble, matrixSize, gpus); +OneAPI::OneAPI(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) { + std::thread t(OneAPI::initGpus, std::ref(_waitForInitCv), loadVar, useFloat, useDouble, matrixSize, gpus); _initThread = std::move(t); std::unique_lock lk(_waitForInitCvMutex); @@ -273,9 +245,8 @@ OneAPI::OneAPI(volatile unsigned long long *loadVar, bool useFloat, bool useDoub _waitForInitCv.wait(lk); } -void OneAPI::initGpus(std::condition_variable &cv, - volatile unsigned long long *loadVar, bool useFloat, - bool useDouble, unsigned matrixSize, int gpus) { +void OneAPI::initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble, + unsigned matrixSize, int gpus) { std::condition_variable waitForInitCv; std::mutex waitForInitCvMutex; @@ -291,14 +262,14 @@ void OneAPI::initGpus(std::condition_variable &cv, // Choose a platform based on specific criteria (e.g., device type) sycl::platform chosenPlatform; auto devCount = 0; - for (const auto &platform : platforms) { + for (const auto& platform : platforms) { auto devices = platform.get_devices(); devCount = 0; - for (const auto &device : devices) { - if (device.is_gpu()) { // Choose GPU, you can use other criteria - chosenPlatform = platform; - devCount++; - } + for (const auto& device : devices) { + if (device.is_gpu()) { // Choose GPU, you can use other criteria + chosenPlatform = platform; + devCount++; + } } } @@ -327,9 +298,8 @@ void OneAPI::initGpus(std::condition_variable &cv, } if (gpus > devCount) { firestarter::log::warn() << "You requested more OneAPI devices than available."; - firestarter::log::warn() - << "FIRESTARTER will use " << devCount << " of the requested " - << gpus << " OneAPI device(s)"; + firestarter::log::warn() << "FIRESTARTER will use " << devCount << " of the requested " << gpus + << " OneAPI device(s)"; gpus = devCount; } @@ -340,21 +310,19 @@ void OneAPI::initGpus(std::condition_variable &cv, // if there's a GPU in the system without Double Precision support, we // have to correct this. int precision = get_precision(i, use_double); - if (precision == -1){ + if (precision == -1) { firestarter::log::warn() << "This should not have happened. Could not get precision via SYCL."; } if (precision) { firestarter::log::trace() << "Starting OneAPI GPU double workload."; - std::thread t(create_load, std::ref(waitForInitCv), - std::ref(waitForInitCvMutex), i, std::ref(initCount), - loadVar, (int)matrixSize); + std::thread t(create_load, std::ref(waitForInitCv), std::ref(waitForInitCvMutex), i, + std::ref(initCount), loadVar, (int)matrixSize); gpuThreads.push_back(std::move(t)); } else { firestarter::log::trace() << "Starting OneAPI GPU float workload."; - std::thread t(create_load, std::ref(waitForInitCv), - std::ref(waitForInitCvMutex), i, std::ref(initCount), - loadVar, (int)matrixSize); + std::thread t(create_load, std::ref(waitForInitCv), std::ref(waitForInitCvMutex), i, + std::ref(initCount), loadVar, (int)matrixSize); gpuThreads.push_back(std::move(t)); } } @@ -370,19 +338,17 @@ void OneAPI::initGpus(std::condition_variable &cv, cv.notify_all(); /* join computation threads */ - for (auto &t : gpuThreads) { + for (auto& t : gpuThreads) { t.join(); } } else { - firestarter::log::info() - << " - No OneAPI devices. Just stressing CPU(s). Maybe use " - "FIRESTARTER instead of FIRESTARTER_OneAPI?"; + firestarter::log::info() << " - No OneAPI devices. Just stressing CPU(s). Maybe use " + "FIRESTARTER instead of FIRESTARTER_OneAPI?"; cv.notify_all(); } } else { - firestarter::log::info() - << " --gpus 0 is set. Just stressing CPU(s). Maybe use " - "FIRESTARTER instead of FIRESTARTER_OneAPI?"; + firestarter::log::info() << " --gpus 0 is set. Just stressing CPU(s). Maybe use " + "FIRESTARTER instead of FIRESTARTER_OneAPI?"; cv.notify_all(); } } diff --git a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp index 7c8a8146..972c0d0a 100644 --- a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp +++ b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp @@ -31,7 +31,10 @@ using namespace firestarter::optimizer::algorithm; NSGA2::NSGA2(unsigned gen, double cr, double m) - : Algorithm(), _gen(gen), _cr(cr), _m(m) { + : Algorithm() + , _gen(gen) + , _cr(cr) + , _m(m) { if (cr >= 1. || cr < 0.) { throw std::invalid_argument("The crossover probability must be in the " "[0,1[ range, while a value of " + @@ -44,14 +47,12 @@ NSGA2::NSGA2(unsigned gen, double cr, double m) } } -void NSGA2::checkPopulation(firestarter::optimizer::Population const &pop, - std::size_t populationSize) { - const auto &prob = pop.problem(); +void NSGA2::checkPopulation(firestarter::optimizer::Population const& pop, std::size_t populationSize) { + const auto& prob = pop.problem(); if (!prob.isMO()) { - throw std::invalid_argument( - "NSGA2 is a multiobjective algorithms, while number of objectives is " + - std::to_string(prob.getNobjs())); + throw std::invalid_argument("NSGA2 is a multiobjective algorithms, while number of objectives is " + + std::to_string(prob.getNobjs())); } if (populationSize < 5u || (populationSize % 4 != 0u)) { @@ -63,15 +64,13 @@ void NSGA2::checkPopulation(firestarter::optimizer::Population const &pop, } } -firestarter::optimizer::Population -NSGA2::evolve(firestarter::optimizer::Population &pop) { - const auto &prob = pop.problem(); +firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Population& pop) { + const auto& prob = pop.problem(); const auto bounds = prob.getBounds(); auto NP = pop.size(); auto fevals0 = prob.getFevals(); - this->checkPopulation( - const_cast(pop), NP); + this->checkPopulation(const_cast(pop), NP); std::random_device rd; std::mt19937 rng(rd()); @@ -117,15 +116,11 @@ NSGA2::evolve(firestarter::optimizer::Population &pop) { // We compute crowding distance and non dominated rank for the current // population auto fnds_res = util::fast_non_dominated_sorting(pop.f()); - auto ndf = - std::get<0>(fnds_res); // non dominated fronts [[0,3,2],[1,5,6],[4],...] - std::vector pop_cd( - NP); // crowding distances of the whole population - auto ndr = - std::get<3>(fnds_res); // non domination rank [0,1,0,0,2,1,1, ... ] - for (const auto &front_idxs : ndf) { - if (front_idxs.size() == - 1u) { // handles the case where the front has collapsed to one point + auto ndf = std::get<0>(fnds_res); // non dominated fronts [[0,3,2],[1,5,6],[4],...] + std::vector pop_cd(NP); // crowding distances of the whole population + auto ndr = std::get<3>(fnds_res); // non domination rank [0,1,0,0,2,1,1, ... ] + for (const auto& front_idxs : ndf) { + if (front_idxs.size() == 1u) { // handles the case where the front has collapsed to one point pop_cd[front_idxs[0]] = std::numeric_limits::infinity(); } else if (front_idxs.size() == 2u) { // handles the case where the front // has collapsed to one point @@ -147,12 +142,9 @@ NSGA2::evolve(firestarter::optimizer::Population &pop) { // of parents that will each create 2 new offspring for (decltype(NP) i = 0u; i < NP; i += 4) { // We create two offsprings using the shuffled list 1 - parent1_idx = util::mo_tournament_selection(shuffle1[i], shuffle1[i + 1], - ndr, pop_cd, rng); - parent2_idx = util::mo_tournament_selection( - shuffle1[i + 2], shuffle1[i + 3], ndr, pop_cd, rng); - children = util::sbx_crossover(pop.x()[parent1_idx], pop.x()[parent2_idx], - _cr, rng); + parent1_idx = util::mo_tournament_selection(shuffle1[i], shuffle1[i + 1], ndr, pop_cd, rng); + parent2_idx = util::mo_tournament_selection(shuffle1[i + 2], shuffle1[i + 3], ndr, pop_cd, rng); + children = util::sbx_crossover(pop.x()[parent1_idx], pop.x()[parent2_idx], _cr, rng); util::polynomial_mutation(children.first, bounds, _m, rng); util::polynomial_mutation(children.second, bounds, _m, rng); @@ -160,12 +152,9 @@ NSGA2::evolve(firestarter::optimizer::Population &pop) { popnew.append(children.second); // We repeat with the shuffled list 2 - parent1_idx = util::mo_tournament_selection(shuffle2[i], shuffle2[i + 1], - ndr, pop_cd, rng); - parent2_idx = util::mo_tournament_selection( - shuffle2[i + 2], shuffle2[i + 3], ndr, pop_cd, rng); - children = util::sbx_crossover(pop.x()[parent1_idx], pop.x()[parent2_idx], - _cr, rng); + parent1_idx = util::mo_tournament_selection(shuffle2[i], shuffle2[i + 1], ndr, pop_cd, rng); + parent2_idx = util::mo_tournament_selection(shuffle2[i + 2], shuffle2[i + 3], ndr, pop_cd, rng); + children = util::sbx_crossover(pop.x()[parent1_idx], pop.x()[parent2_idx], _cr, rng); util::polynomial_mutation(children.first, bounds, _m, rng); util::polynomial_mutation(children.second, bounds, _m, rng); diff --git a/src/firestarter/Optimizer/OptimizerWorker.cpp b/src/firestarter/Optimizer/OptimizerWorker.cpp index 48819fd5..7cb98cce 100644 --- a/src/firestarter/Optimizer/OptimizerWorker.cpp +++ b/src/firestarter/Optimizer/OptimizerWorker.cpp @@ -25,18 +25,16 @@ using namespace firestarter::optimizer; -OptimizerWorker::OptimizerWorker( - std::unique_ptr &&algorithm, - firestarter::optimizer::Population &population, - std::string const &optimizationAlgorithm, unsigned individuals, - std::chrono::seconds const &preheat) - : _algorithm(std::move(algorithm)), _population(population), - _optimizationAlgorithm(optimizationAlgorithm), _individuals(individuals), - _preheat(preheat) { - pthread_create( - &this->workerThread, NULL, - reinterpret_cast(OptimizerWorker::optimizerThread), - this); +OptimizerWorker::OptimizerWorker(std::unique_ptr&& algorithm, + firestarter::optimizer::Population& population, + std::string const& optimizationAlgorithm, unsigned individuals, + std::chrono::seconds const& preheat) + : _algorithm(std::move(algorithm)) + , _population(population) + , _optimizationAlgorithm(optimizationAlgorithm) + , _individuals(individuals) + , _preheat(preheat) { + pthread_create(&this->workerThread, NULL, reinterpret_cast(OptimizerWorker::optimizerThread), this); } void OptimizerWorker::kill() { @@ -49,10 +47,10 @@ void OptimizerWorker::join() { pthread_join(this->workerThread, NULL); } -void *OptimizerWorker::optimizerThread(void *optimizerWorker) { +void* OptimizerWorker::optimizerThread(void* optimizerWorker) { pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - auto _this = reinterpret_cast(optimizerWorker); + auto _this = reinterpret_cast(optimizerWorker); #ifndef __APPLE__ pthread_setname_np(pthread_self(), "Optimizer"); diff --git a/src/firestarter/Optimizer/Population.cpp b/src/firestarter/Optimizer/Population.cpp index 7d3a7e1a..d7915bd7 100644 --- a/src/firestarter/Optimizer/Population.cpp +++ b/src/firestarter/Optimizer/Population.cpp @@ -29,8 +29,7 @@ using namespace firestarter::optimizer; void Population::generateInitialPopulation(std::size_t populationSize) { - firestarter::log::trace() << "Generating " << populationSize - << " random individuals for initial population."; + firestarter::log::trace() << "Generating " << populationSize << " random individuals for initial population."; auto dims = this->problem().getDims(); auto remaining = populationSize; @@ -44,10 +43,8 @@ void Population::generateInitialPopulation(std::size_t populationSize) { remaining -= dims; } else { - firestarter::log::trace() - << "Population size (" << std::to_string(populationSize) - << ") is less than size of problem dimension (" << std::to_string(dims) - << ")"; + firestarter::log::trace() << "Population size (" << std::to_string(populationSize) + << ") is less than size of problem dimension (" << std::to_string(dims) << ")"; } for (decltype(remaining) i = 0; i < remaining; i++) { @@ -57,7 +54,7 @@ void Population::generateInitialPopulation(std::size_t populationSize) { std::size_t Population::size() const { return _x.size(); } -void Population::append(Individual const &ind) { +void Population::append(Individual const& ind) { assert(this->problem().getDims() == ind.size()); std::map metrics; @@ -79,10 +76,10 @@ void Population::append(Individual const &ind) { } } -void Population::append(Individual const &ind, std::vector const &fit) { +void Population::append(Individual const& ind, std::vector const& fit) { std::stringstream ss; ss << " - Fitness: "; - for (auto const &v : fit) { + for (auto const& v : fit) { ss << v << " "; } firestarter::log::trace() << ss.str(); @@ -94,8 +91,7 @@ void Population::append(Individual const &ind, std::vector const &fit) { this->_f.push_back(fit); } -void Population::insert(std::size_t idx, Individual const &ind, - std::vector const &fit) { +void Population::insert(std::size_t idx, Individual const& ind, std::vector const& fit) { // assert that population is big enough assert(_x.size() > idx); @@ -117,8 +113,7 @@ Individual Population::getRandomIndividual() { out[i] = std::uniform_int_distribution(lb, ub)(this->gen); - firestarter::log::trace() - << " - " << i << ": [" << lb << "," << ub << "]: " << out[i]; + firestarter::log::trace() << " - " << i << ": [" << lb << "," << ub << "]: " << out[i]; } return out; @@ -134,8 +129,7 @@ std::optional Population::bestIndividual() const { // assert that we have individuals assert(this->_x.size() > 0); - auto best = std::max_element(this->_x.begin(), this->_x.end(), - [](auto a, auto b) { return a < b; }); + auto best = std::max_element(this->_x.begin(), this->_x.end(), [](auto a, auto b) { return a < b; }); assert(best != this->_x.end()); diff --git a/src/firestarter/Optimizer/Util/MultiObjective.cpp b/src/firestarter/Optimizer/Util/MultiObjective.cpp index 2c87ba2f..df24effa 100644 --- a/src/firestarter/Optimizer/Util/MultiObjective.cpp +++ b/src/firestarter/Optimizer/Util/MultiObjective.cpp @@ -81,13 +81,11 @@ bool greater_than_f(double a, double b) { * @throws std::invalid_argument if the dimensions of the two objectives are * different */ -bool pareto_dominance(const std::vector &obj1, - const std::vector &obj2) { +bool pareto_dominance(const std::vector& obj1, const std::vector& obj2) { if (obj1.size() != obj2.size()) { throw std::invalid_argument( - "Different number of objectives found in input fitnesses: " + - std::to_string(obj1.size()) + " and " + std::to_string(obj2.size()) + - ". I cannot define dominance"); + "Different number of objectives found in input fitnesses: " + std::to_string(obj1.size()) + " and " + + std::to_string(obj2.size()) + ". I cannot define dominance"); } bool found_strictly_dominating_dimension = false; for (decltype(obj1.size()) i = 0u; i < obj1.size(); ++i) { @@ -130,16 +128,14 @@ bool pareto_dominance(const std::vector &obj1, * * @throws std::invalid_argument If the size of \p points is not at least 2 */ -std::tuple>, - std::vector>, std::vector, +std::tuple>, std::vector>, std::vector, std::vector> -fast_non_dominated_sorting(const std::vector> &points) { +fast_non_dominated_sorting(const std::vector>& points) { auto N = points.size(); // We make sure to have two points at least (one could also be allowed) if (N < 2u) { - throw std::invalid_argument( - "At least two points are needed for fast_non_dominated_sorting: " + - std::to_string(N) + " detected."); + throw std::invalid_argument("At least two points are needed for fast_non_dominated_sorting: " + std::to_string(N) + + " detected."); } // Initialize the return values std::vector> non_dom_fronts(1u); @@ -174,8 +170,7 @@ fast_non_dominated_sorting(const std::vector> &points) { while (current_front.size() != 0u) { std::vector next_front; for (decltype(current_front.size()) p = 0u; p < current_front.size(); ++p) { - for (decltype(dom_list[current_front[p]].size()) q = 0u; - q < dom_list[current_front[p]].size(); ++q) { + for (decltype(dom_list[current_front[p]].size()) q = 0u; q < dom_list[current_front[p]].size(); ++q) { --dom_count_copy[dom_list[current_front[p]][q]]; if (dom_count_copy[dom_list[current_front[p]][q]] == 0u) { non_dom_rank[dom_list[current_front[p]][q]] = front_counter + 1u; @@ -189,8 +184,7 @@ fast_non_dominated_sorting(const std::vector> &points) { non_dom_fronts.push_back(current_front); } } - return std::make_tuple(std::move(non_dom_fronts), std::move(dom_list), - std::move(dom_count), std::move(non_dom_rank)); + return std::make_tuple(std::move(non_dom_fronts), std::move(dom_list), std::move(dom_count), std::move(non_dom_rank)); } /// Crowding distance @@ -218,14 +212,12 @@ fast_non_dominated_sorting(const std::vector> &points) { * @throws std::invalid_argument If points in \p non_dom_front do not all have * the same dimensionality */ -std::vector -crowding_distance(const std::vector> &non_dom_front) { +std::vector crowding_distance(const std::vector>& non_dom_front) { auto N = non_dom_front.size(); // We make sure to have two points at least if (N < 2u) { - throw std::invalid_argument( - "A non dominated front must contain at least two points: " + - std::to_string(N) + " detected."); + throw std::invalid_argument("A non dominated front must contain at least two points: " + std::to_string(N) + + " detected."); } auto M = non_dom_front[0].size(); // We make sure the first point of the input non dominated front contains at @@ -236,9 +228,8 @@ crowding_distance(const std::vector> &non_dom_front) { std::to_string(M) + " detected."); } // We make sure all points contain the same number of objectives - if (!std::all_of( - non_dom_front.begin(), non_dom_front.end(), - [M](const std::vector &item) { return item.size() == M; })) { + if (!std::all_of(non_dom_front.begin(), non_dom_front.end(), + [M](const std::vector& item) { return item.size() == M; })) { throw std::invalid_argument("A non dominated front must contain points of " "uniform dimensionality. Some " "different sizes were instead detected."); @@ -247,19 +238,14 @@ crowding_distance(const std::vector> &non_dom_front) { std::iota(indexes.begin(), indexes.end(), std::size_t(0u)); std::vector retval(N, 0.); for (decltype(M) i = 0u; i < M; ++i) { - std::sort(indexes.begin(), indexes.end(), - [i, &non_dom_front](std::size_t idx1, std::size_t idx2) { - return less_than_f(non_dom_front[idx1][i], - non_dom_front[idx2][i]); - }); + std::sort(indexes.begin(), indexes.end(), [i, &non_dom_front](std::size_t idx1, std::size_t idx2) { + return less_than_f(non_dom_front[idx1][i], non_dom_front[idx2][i]); + }); retval[indexes[0]] = std::numeric_limits::infinity(); retval[indexes[N - 1u]] = std::numeric_limits::infinity(); - double df = - non_dom_front[indexes[N - 1u]][i] - non_dom_front[indexes[0]][i]; + double df = non_dom_front[indexes[N - 1u]][i] - non_dom_front[indexes[0]][i]; for (decltype(N - 2u) j = 1u; j < N - 1u; ++j) { - retval[indexes[j]] += (non_dom_front[indexes[j + 1u]][i] - - non_dom_front[indexes[j - 1u]][i]) / - df; + retval[indexes[j]] += (non_dom_front[indexes[j + 1u]][i] - non_dom_front[indexes[j - 1u]][i]) / df; } } return retval; @@ -267,10 +253,10 @@ crowding_distance(const std::vector> &non_dom_front) { // Multi-objective tournament selection. Requires all sizes to be consistent. // Does not check if input is well formed. -std::vector::size_type mo_tournament_selection( - std::vector::size_type idx1, std::vector::size_type idx2, - const std::vector::size_type> &non_domination_rank, - const std::vector &crowding_d, std::mt19937 &mt) { +std::vector::size_type +mo_tournament_selection(std::vector::size_type idx1, std::vector::size_type idx2, + const std::vector::size_type>& non_domination_rank, + const std::vector& crowding_d, std::mt19937& mt) { if (non_domination_rank[idx1] < non_domination_rank[idx2]) return idx1; if (non_domination_rank[idx1] > non_domination_rank[idx2]) @@ -288,11 +274,9 @@ std::vector::size_type mo_tournament_selection( // otherwise Requires dimensions of the parent and bounds to be equal -> out of // bound reads. nix is the integer dimension (integer alleles assumed at the end // of the chromosome) -std::pair -sbx_crossover(const firestarter::optimizer::Individual &parent1, - const firestarter::optimizer::Individual &parent2, - const double p_cr, std::mt19937 &mt) { +std::pair +sbx_crossover(const firestarter::optimizer::Individual& parent1, const firestarter::optimizer::Individual& parent2, + const double p_cr, std::mt19937& mt) { // Decision vector dimensions auto nix = parent1.size(); firestarter::optimizer::Individual::size_type site1, site2; @@ -304,14 +288,11 @@ sbx_crossover(const firestarter::optimizer::Individual &parent1, 1.); // to generate a number in [0, 1) // This implements a Simulated Binary Crossover SBX - if (drng(mt) < - p_cr) { // No crossever at all will happen with probability p_cr + if (drng(mt) < p_cr) { // No crossever at all will happen with probability p_cr // This implements two-points crossover and applies it to the integer part // of the chromosome. if (nix > 0u) { - std::uniform_int_distribution< - firestarter::optimizer::Individual::size_type> - ra_num(0, nix - 1u); + std::uniform_int_distribution ra_num(0, nix - 1u); site1 = ra_num(mt); site2 = ra_num(mt); if (site1 > site2) { @@ -328,10 +309,9 @@ sbx_crossover(const firestarter::optimizer::Individual &parent1, // Performs polynomial mutation. Requires all sizes to be consistent. Does not // check if input is well formed. p_m is the mutation probability -void polynomial_mutation( - firestarter::optimizer::Individual &child, - const std::vector> &bounds, const double p_m, - std::mt19937 &mt) { +void polynomial_mutation(firestarter::optimizer::Individual& child, + const std::vector>& bounds, const double p_m, + std::mt19937& mt) { // Decision vector dimensions auto nix = child.size(); // Random distributions @@ -343,9 +323,7 @@ void polynomial_mutation( // We need to draw a random integer in [lb, ub]. auto lb = std::get<0>(bounds[j]); auto ub = std::get<1>(bounds[j]); - std::uniform_int_distribution< - firestarter::optimizer::Individual::size_type> - dist(lb, ub); + std::uniform_int_distribution dist(lb, ub); auto mutated = dist(mt); child[j] = mutated; } @@ -384,9 +362,7 @@ void polynomial_mutation( * @throws unspecified all exceptions thrown by * pagmo::fast_non_dominated_sorting and pagmo::crowding_distance */ -std::vector -select_best_N_mo(const std::vector> &input_f, - std::size_t N) { +std::vector select_best_N_mo(const std::vector>& input_f, std::size_t N) { if (N == 0u) { // corner case return {}; } @@ -406,7 +382,7 @@ select_best_N_mo(const std::vector> &input_f, // Run fast-non-dominated sorting auto tuple = fast_non_dominated_sorting(input_f); // Insert all non dominated fronts if not more than N - for (const auto &front : std::get<0>(tuple)) { + for (const auto& front : std::get<0>(tuple)) { if (retval.size() + front.size() <= N) { for (auto i : front) { retval.push_back(i); @@ -430,10 +406,9 @@ select_best_N_mo(const std::vector> &input_f, // crowding std::vector idxs(front.size()); std::iota(idxs.begin(), idxs.end(), std::size_t(0u)); - std::sort(idxs.begin(), idxs.end(), - [&cds](std::size_t idx1, std::size_t idx2) { - return greater_than_f(cds[idx1], cds[idx2]); - }); // Descending order1 + std::sort(idxs.begin(), idxs.end(), [&cds](std::size_t idx1, std::size_t idx2) { + return greater_than_f(cds[idx1], cds[idx2]); + }); // Descending order1 auto remaining = N - retval.size(); for (decltype(remaining) i = 0u; i < remaining; ++i) { retval.push_back(front[idxs[i]]); @@ -458,7 +433,7 @@ select_best_N_mo(const std::vector> &input_f, * @throws std::invalid_argument if the input objective vectors are not all of * the same size */ -std::vector ideal(const std::vector> &points) { +std::vector ideal(const std::vector>& points) { // Corner case if (points.size() == 0u) { return {}; @@ -466,7 +441,7 @@ std::vector ideal(const std::vector> &points) { // Sanity checks auto M = points[0].size(); - for (const auto &f : points) { + for (const auto& f : points) { if (f.size() != M) { throw std::invalid_argument("Input vector of objectives must contain " "fitness vector of equal dimension " + @@ -476,11 +451,10 @@ std::vector ideal(const std::vector> &points) { // Actual algorithm std::vector retval(M); for (decltype(M) i = 0u; i < M; ++i) { - retval[i] = (*std::min_element( - points.begin(), points.end(), - [i](const std::vector &f1, const std::vector &f2) { - return util::greater_than_f(f1[i], f2[i]); - }))[i]; + retval[i] = (*std::min_element(points.begin(), points.end(), + [i](const std::vector& f1, const std::vector& f2) { + return util::greater_than_f(f1[i], f2[i]); + }))[i]; } return retval; } diff --git a/src/firestarter/WatchdogWorker.cpp b/src/firestarter/WatchdogWorker.cpp index 6a3f6b95..f5091fa2 100644 --- a/src/firestarter/WatchdogWorker.cpp +++ b/src/firestarter/WatchdogWorker.cpp @@ -30,8 +30,7 @@ using namespace firestarter; -int Firestarter::watchdogWorker(std::chrono::microseconds period, - std::chrono::microseconds load, +int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::microseconds load, std::chrono::seconds timeout) { using clock = std::chrono::high_resolution_clock; @@ -58,14 +57,13 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, // get the time already advanced in the current timeslice // this can happen if a load function does not terminates just on time - nsec advance = std::chrono::duration_cast(currentTime - startTime) % - std::chrono::duration_cast(period); + nsec advance = + std::chrono::duration_cast(currentTime - startTime) % std::chrono::duration_cast(period); // subtract the advaned time from our timeslice by spilting it based on // the load level nsec load_reduction = - (std::chrono::duration_cast(load).count() * advance) / - std::chrono::duration_cast(period).count(); + (std::chrono::duration_cast(load).count() * advance) / std::chrono::duration_cast(period).count(); nsec idle_reduction = advance - load_reduction; // signal high load level @@ -79,14 +77,12 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, VT_USER_START("WD_HIGH"); #endif #ifdef ENABLE_SCOREP - SCOREP_USER_REGION_BY_NAME_BEGIN("WD_HIGH", - SCOREP_USER_REGION_TYPE_COMMON); + SCOREP_USER_REGION_BY_NAME_BEGIN("WD_HIGH", SCOREP_USER_REGION_TYPE_COMMON); #endif { std::unique_lock lk(this->_watchdogTerminateMutex); // abort waiting if we get the interrupt signal - this->_watchdogTerminateAlert.wait_for( - lk, load_nsec, [this]() { return this->_watchdog_terminate; }); + this->_watchdogTerminateAlert.wait_for(lk, load_nsec, [this]() { return this->_watchdog_terminate; }); // terminate on interrupt if (this->_watchdog_terminate) { return EXIT_SUCCESS; @@ -110,14 +106,12 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, VT_USER_START("WD_LOW"); #endif #ifdef ENABLE_SCOREP - SCOREP_USER_REGION_BY_NAME_BEGIN("WD_LOW", - SCOREP_USER_REGION_TYPE_COMMON); + SCOREP_USER_REGION_BY_NAME_BEGIN("WD_LOW", SCOREP_USER_REGION_TYPE_COMMON); #endif { std::unique_lock lk(this->_watchdogTerminateMutex); // abort waiting if we get the interrupt signal - this->_watchdogTerminateAlert.wait_for( - lk, idle_nsec, [this]() { return this->_watchdog_terminate; }); + this->_watchdogTerminateAlert.wait_for(lk, idle_nsec, [this]() { return this->_watchdog_terminate; }); // terminate on interrupt if (this->_watchdog_terminate) { return EXIT_SUCCESS; @@ -136,8 +130,7 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, // exit when termination signal is received or timeout is reached { std::lock_guard lk(this->_watchdogTerminateMutex); - if (this->_watchdog_terminate || - (timeout > sec::zero() && (time > timeout))) { + if (this->_watchdog_terminate || (timeout > sec::zero() && (time > timeout))) { this->setLoad(LOAD_STOP); return EXIT_SUCCESS; @@ -152,8 +145,7 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, { std::unique_lock lk(Firestarter::_watchdogTerminateMutex); // abort waiting if we get the interrupt signal - Firestarter::_watchdogTerminateAlert.wait_for( - lk, timeout, []() { return Firestarter::_watchdog_terminate; }); + Firestarter::_watchdogTerminateAlert.wait_for(lk, timeout, []() { return Firestarter::_watchdog_terminate; }); } this->setLoad(LOAD_STOP); From f4fd0eca8351e7dd5d493723e2b226b765ec3294 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Thu, 19 Sep 2024 18:34:33 +0200 Subject: [PATCH 004/167] ignore clang-format in blame --- .git-blame-ignore-revs | 1 + 1 file changed, 1 insertion(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..d1806bac --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1 @@ +9732bdb59717274f666e9c1497289d1f9a0d7858 From de5ffd6b42e7c8fc28aecee204161568ab65dc49 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Thu, 19 Sep 2024 18:37:50 +0200 Subject: [PATCH 005/167] add clang-format ci job --- .github/workflows/clang-format.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/workflows/clang-format.yml diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml new file mode 100644 index 00000000..ef004c50 --- /dev/null +++ b/.github/workflows/clang-format.yml @@ -0,0 +1,19 @@ +name: clang-format-review + +# You can be more specific, but it currently only works on pull requests +on: [push, pull_request] + +jobs: + clang-format: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install clang-tidy + run: | + sudo apt-get update + sudo apt-get install -y clang-tidy + - name: Analyze + run: | + clang-format --dry-run --Werror -style=file $(find ./src/ -name '*.cpp' -print) + clang-format --dry-run --Werror -style=file $(find ./include/ -name '*.hpp' -print) + clang-format --dry-run --Werror -style=file $(find ./include/ -name '*.h' -print) From 7473e5fdaad0fd30538246f7cc0ce94e99d57a0a Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Thu, 19 Sep 2024 18:38:25 +0200 Subject: [PATCH 006/167] add .cache folder to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c4fde123..e157a461 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ result* *.swp *.swo build*/ +.cache/ From c513d311a45c0464c871677e143bdf3026c82122 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Thu, 19 Sep 2024 19:23:40 +0200 Subject: [PATCH 007/167] add .clang-tidy --- .clang-tidy | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 .clang-tidy diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 00000000..2c3f3334 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,45 @@ +--- +# Configure clang-tidy for this project. + +# -bugprone-narrowing-conversions: too many false positives around +# `std::size_t` vs. `*::difference_type`. + +Checks: > + -*, + boost-*, + bugprone-*, + cert-*, + clang-analyzer-*, + concurrency-*, + cppcoreguidelines-*, + google-*, + misc-*, + modernize-*, + performance-*, + portability-*, + readability-*, + -bugprone-narrowing-conversions, + -cppcoreguidelines-special-member-functions + +# Turn all the warnings from the checks above into errors. +WarningsAsErrors: "*" + +HeaderFilterRegex: "(include/).*\\.(h|hpp)$" + +CheckOptions: + - { key: readability-identifier-naming.NamespaceCase, value: lower_cases } + - { key: readability-identifier-naming.ClassCase, value: CamelCase } + - { key: readability-identifier-naming.StructCase, value: CamelCase } + - { key: readability-identifier-naming.FunctionCase, value: camelBack } + - { key: readability-identifier-naming.MemberCase, value: CamelCase } + - { key: readability-identifier-naming.VariableCase, value: CamelCase } + - { key: readability-identifier-naming.EnumCase, value: CamelCase } + - { key: readability-identifier-naming.ParameterCase, value: CamelCase } + - { key: readability-identifier-naming.UnionCase, value: CamelCase } + - { key: readability-identifier-naming.IgnoreMainLikeFunctions, value: 1 } + - { key: readability-redundant-member-init.IgnoreBaseInCopyConstructors, value: 1 } + - { key: modernize-use-default-member-init.UseAssignment, value: 1 } + - { key: readability-implicit-bool-conversion.AllowIntegerConditions, value: 1 } + - { key: readability-implicit-bool-conversion.AllowPointerConditions, value: 1 } + - { key: readability-function-cognitive-complexity.IgnoreMacros, value: 1 } + - { key: misc-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic, value: "true" } \ No newline at end of file From 29df30a2e2645f2a02b81d7a9f24e4019d04ca55 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 24 Sep 2024 14:03:08 +0200 Subject: [PATCH 008/167] disable clang-tidy extension that causes crash --- .clang-tidy | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 2c3f3334..c3a4c174 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -4,6 +4,8 @@ # -bugprone-narrowing-conversions: too many false positives around # `std::size_t` vs. `*::difference_type`. +# -boost-use-ranges: crash of clangd https://github.com/llvm/llvm-project/issues/109037 + Checks: > -*, boost-*, @@ -19,7 +21,8 @@ Checks: > portability-*, readability-*, -bugprone-narrowing-conversions, - -cppcoreguidelines-special-member-functions + -cppcoreguidelines-special-member-functions, + -boost-use-ranges # Turn all the warnings from the checks above into errors. WarningsAsErrors: "*" From f3960af25f5a4566b2f87282ab2ba1fc6ca81697 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 24 Sep 2024 14:12:05 +0200 Subject: [PATCH 009/167] add clang-tidy workflow --- .github/workflows/clang-tidy.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/clang-tidy.yml diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml new file mode 100644 index 00000000..562b5079 --- /dev/null +++ b/.github/workflows/clang-tidy.yml @@ -0,0 +1,21 @@ +name: clang-tidy-review + +on: [pull_request] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - uses: ZedThree/clang-tidy-review@v0.14.0 + id: review + with: + split_workflow: true + + - uses: ZedThree/clang-tidy-review/upload@v0.14.0 + id: upload-review + + - if: steps.review.outputs.total_comments > 0 + run: exit 1 From b79754ef0abc37ff92d04dafbea957e5e3933e2f Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Tue, 24 Sep 2024 22:08:24 +0200 Subject: [PATCH 010/167] first pass of clang-tidy for the header files --- include/firestarter/Cuda/Cuda.hpp | 17 +- include/firestarter/DumpRegisterStruct.hpp | 9 +- .../firestarter/DumpRegisterWorkerData.hpp | 29 +- .../firestarter/Environment/CPUTopology.hpp | 6 +- .../firestarter/Environment/Environment.hpp | 51 +- .../Environment/Payload/Payload.hpp | 108 ++-- .../Environment/Platform/PlatformConfig.hpp | 92 ++-- .../Environment/Platform/RuntimeConfig.hpp | 105 ++-- .../Environment/X86/Payload/AVX512Payload.hpp | 22 +- .../Environment/X86/Payload/AVXPayload.hpp | 22 +- .../Environment/X86/Payload/FMA4Payload.hpp | 24 +- .../Environment/X86/Payload/FMAPayload.hpp | 22 +- .../Environment/X86/Payload/SSE2Payload.hpp | 23 +- .../Environment/X86/Payload/X86Payload.hpp | 53 +- .../Environment/X86/Payload/ZENFMAPayload.hpp | 23 +- .../Environment/X86/X86CPUTopology.hpp | 44 +- .../Environment/X86/X86Environment.hpp | 24 +- include/firestarter/ErrorDetectionStruct.hpp | 17 +- include/firestarter/Firestarter.hpp | 126 ++--- include/firestarter/Json/Summary.hpp | 8 +- include/firestarter/LoadWorkerData.hpp | 91 ++-- .../Logging/FirstWorkerThreadFilter.hpp | 18 +- .../Measurement/MeasurementWorker.hpp | 55 +- .../Measurement/Metric/IPCEstimate.h | 7 +- include/firestarter/Measurement/Metric/Perf.h | 7 +- include/firestarter/Measurement/Metric/RAPL.h | 5 +- .../firestarter/Measurement/MetricInterface.h | 62 ++- include/firestarter/Measurement/Summary.hpp | 12 +- include/firestarter/Measurement/TimeValue.hpp | 10 +- include/firestarter/OneAPI/OneAPI.hpp | 17 +- include/firestarter/Optimizer/Algorithm.hpp | 8 +- .../firestarter/Optimizer/Algorithm/NSGA2.hpp | 14 +- include/firestarter/Optimizer/History.hpp | 182 ++++--- .../firestarter/Optimizer/OptimizerWorker.hpp | 22 +- include/firestarter/Optimizer/Population.hpp | 59 ++- include/firestarter/Optimizer/Problem.hpp | 22 +- .../Optimizer/Problem/CLIArgumentProblem.hpp | 106 ++-- .../Optimizer/Util/MultiObjective.hpp | 35 +- src/firestarter/Cuda/Cuda.cpp | 6 +- src/firestarter/DumpRegisterWorker.cpp | 38 +- src/firestarter/Environment/CPUTopology.cpp | 54 +- src/firestarter/Environment/Environment.cpp | 146 +++--- .../Environment/Payload/Payload.cpp | 83 +-- .../Environment/X86/Payload/AVX512Payload.cpp | 85 ++-- .../Environment/X86/Payload/AVXPayload.cpp | 60 +-- .../Environment/X86/Payload/FMA4Payload.cpp | 38 +- .../Environment/X86/Payload/FMAPayload.cpp | 37 +- .../Environment/X86/Payload/SSE2Payload.cpp | 58 ++- .../Environment/X86/Payload/X86Payload.cpp | 473 +++++++++--------- .../Environment/X86/Payload/ZENFMAPayload.cpp | 37 +- .../Environment/X86/X86CPUTopology.cpp | 52 +- .../Environment/X86/X86Environment.cpp | 20 +- src/firestarter/Firestarter.cpp | 258 +++++----- src/firestarter/LoadWorker.cpp | 171 ++++--- .../Measurement/MeasurementWorker.cpp | 130 ++--- .../Measurement/Metric/IPCEstimate.cpp | 36 +- src/firestarter/Measurement/Metric/Perf.cpp | 64 +-- src/firestarter/Measurement/Metric/RAPL.cpp | 46 +- src/firestarter/Measurement/Summary.cpp | 68 +-- src/firestarter/OneAPI/OneAPI.cpp | 6 +- src/firestarter/Optimizer/Algorithm/NSGA2.cpp | 75 ++- src/firestarter/Optimizer/OptimizerWorker.cpp | 24 +- src/firestarter/Optimizer/Population.cpp | 24 +- .../Optimizer/Util/MultiObjective.cpp | 45 +- src/firestarter/WatchdogWorker.cpp | 20 +- 65 files changed, 1873 insertions(+), 1838 deletions(-) diff --git a/include/firestarter/Cuda/Cuda.hpp b/include/firestarter/Cuda/Cuda.hpp index d7911eb4..1c6f234e 100644 --- a/include/firestarter/Cuda/Cuda.hpp +++ b/include/firestarter/Cuda/Cuda.hpp @@ -24,25 +24,24 @@ #include #include #include -#include namespace firestarter::cuda { class Cuda { private: - std::thread _initThread; - std::condition_variable _waitForInitCv; - std::mutex _waitForInitCvMutex; + std::thread InitThread; + std::condition_variable WaitForInitCv; + std::mutex WaitForInitCvMutex; - static void initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble, - unsigned matrixSize, int gpus); + static void initGpus(std::condition_variable& Cv, volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble, + unsigned MatrixSize, int Gpus); public: - Cuda(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus); + Cuda(volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus); ~Cuda() { - if (_initThread.joinable()) { - _initThread.join(); + if (InitThread.joinable()) { + InitThread.join(); } } }; diff --git a/include/firestarter/DumpRegisterStruct.hpp b/include/firestarter/DumpRegisterStruct.hpp index 7e80c111..d5f162d3 100644 --- a/include/firestarter/DumpRegisterStruct.hpp +++ b/include/firestarter/DumpRegisterStruct.hpp @@ -21,20 +21,21 @@ #pragma once +#include namespace firestarter { /* DO NOT CHANGE! the asm load-loop tests if it should dump the current register * content */ -enum DumpVariable : unsigned long long { Start = 0, Wait = 1 }; +enum DumpVariable : uint64_t { Start = 0, Wait = 1 }; #define REGISTER_MAX_NUM 32 struct DumpRegisterStruct { // REGISTER_MAX_NUM cachelines - volatile double registerValues[REGISTER_MAX_NUM * 8]; + volatile double RegisterValues[REGISTER_MAX_NUM * 8]; // pad to use a whole cacheline - volatile unsigned long long padding[7]; - volatile DumpVariable dumpVar; + volatile uint64_t Padding[7]; + volatile DumpVariable DumpVar; }; #undef REGISTER_MAX_NUM diff --git a/include/firestarter/DumpRegisterWorkerData.hpp b/include/firestarter/DumpRegisterWorkerData.hpp index 14ccc95f..a05863d5 100644 --- a/include/firestarter/DumpRegisterWorkerData.hpp +++ b/include/firestarter/DumpRegisterWorkerData.hpp @@ -24,6 +24,7 @@ #include #include #include +#include #ifdef FIRESTARTER_DEBUG_FEATURES @@ -31,28 +32,28 @@ namespace firestarter { class DumpRegisterWorkerData { public: - DumpRegisterWorkerData(std::shared_ptr loadWorkerData, std::chrono::seconds dumpTimeDelta, - std::string dumpFilePath) - : loadWorkerData(loadWorkerData) - , dumpTimeDelta(dumpTimeDelta) { - if (dumpFilePath.empty()) { - char cwd[PATH_MAX]; - if (getcwd(cwd, sizeof(cwd)) != NULL) { - this->dumpFilePath = cwd; + DumpRegisterWorkerData(std::shared_ptr LoadWorkerDataPtr, std::chrono::seconds DumpTimeDelta, + const std::string& DumpFilePath) + : LoadWorkerDataPtr(std::move(LoadWorkerDataPtr)) + , DumpTimeDelta(DumpTimeDelta) { + if (DumpFilePath.empty()) { + char Cwd[PATH_MAX]; + if (getcwd(Cwd, sizeof(Cwd)) != nullptr) { + this->DumpFilePath = Cwd; } else { log::error() << "getcwd() failed. Set --dump-registers-outpath to /tmp"; - this->dumpFilePath = "/tmp"; + this->DumpFilePath = "/tmp"; } } else { - this->dumpFilePath = dumpFilePath; + this->DumpFilePath = DumpFilePath; } } - ~DumpRegisterWorkerData() {} + ~DumpRegisterWorkerData() = default; - std::shared_ptr loadWorkerData; - const std::chrono::seconds dumpTimeDelta; - std::string dumpFilePath; + std::shared_ptr LoadWorkerDataPtr; + const std::chrono::seconds DumpTimeDelta; + std::string DumpFilePath; }; } // namespace firestarter diff --git a/include/firestarter/Environment/CPUTopology.hpp b/include/firestarter/Environment/CPUTopology.hpp index af749d78..ba10df3c 100644 --- a/include/firestarter/Environment/CPUTopology.hpp +++ b/include/firestarter/Environment/CPUTopology.hpp @@ -52,12 +52,12 @@ class CPUTopology { unsigned instructionCacheSize() const { return _instructionCacheSize; } // return the cpu clockrate in Hz - virtual unsigned long long clockrate() const { return _clockrate; } + virtual uint64_t clockrate() const { return _clockrate; } // return the cpu features virtual std::list const& features() const = 0; // get a timestamp - virtual unsigned long long timestamp() const = 0; + virtual uint64_t timestamp() const = 0; int getPkgIdFromPU(unsigned pu) const; int getCoreIdFromPU(unsigned pu) const; @@ -76,7 +76,7 @@ class CPUTopology { std::string _vendor = ""; std::string _processorName = ""; unsigned _instructionCacheSize = 0; - unsigned long long _clockrate = 0; + uint64_t _clockrate = 0; hwloc_topology_t topology; }; diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp index 24722dc3..5f204ccc 100644 --- a/include/firestarter/Environment/Environment.hpp +++ b/include/firestarter/Environment/Environment.hpp @@ -22,6 +22,7 @@ #pragma once #include +#include #include #include #include @@ -31,61 +32,61 @@ namespace firestarter::environment { class Environment { public: - Environment(CPUTopology* topology) - : _topology(topology) {} - ~Environment() { - delete this->_topology; - if (_selectedConfig != nullptr) { - delete _selectedConfig; - } + Environment() = delete; + explicit Environment(CPUTopology* Topology) + : Topology(Topology) {} + virtual ~Environment() { + delete this->Topology; + + delete SelectedConfig; } - int evaluateCpuAffinity(unsigned requestedNumThreads, std::string cpuBind); - int setCpuAffinity(unsigned thread); + auto evaluateCpuAffinity(unsigned RequestedNumThreads, std::string CpuBind) -> int; + auto setCpuAffinity(unsigned Thread) -> int; void printThreadSummary(); virtual void evaluateFunctions() = 0; - virtual int selectFunction(unsigned functionId, bool allowUnavailablePayload) = 0; - virtual int selectInstructionGroups(std::string groups) = 0; + virtual auto selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) -> int = 0; + virtual auto selectInstructionGroups(std::string Groups) -> int = 0; virtual void printAvailableInstructionGroups() = 0; - virtual void setLineCount(unsigned lineCount) = 0; + virtual void setLineCount(unsigned LineCount) = 0; virtual void printSelectedCodePathSummary() = 0; virtual void printFunctionSummary() = 0; - platform::RuntimeConfig& selectedConfig() const { + [[nodiscard]] auto selectedConfig() const -> platform::RuntimeConfig& { #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-value" #endif #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-value" - assert(("No RuntimeConfig selected", _selectedConfig != nullptr)); + assert(("No RuntimeConfig selected", SelectedConfig != nullptr)); #pragma GCC diagnostic pop #if defined(__clang__) #pragma clang diagnostic pop #endif - return *_selectedConfig; + return *SelectedConfig; } - unsigned long long requestedNumThreads() const { return _requestedNumThreads; } + [[nodiscard]] auto requestedNumThreads() const -> uint64_t { return RequestedNumThreads; } - CPUTopology const& topology() const { - assert(_topology != nullptr); - return *_topology; + [[nodiscard]] auto topology() const -> CPUTopology const& { + assert(Topology != nullptr); + return *Topology; } protected: - platform::RuntimeConfig* _selectedConfig = nullptr; - CPUTopology* _topology = nullptr; + platform::RuntimeConfig* SelectedConfig = nullptr; + CPUTopology* Topology = nullptr; private: - unsigned long long _requestedNumThreads; + uint64_t RequestedNumThreads = 0; // TODO: replace these functions with the builtins one from hwloc - int cpuAllowed(unsigned id); - int cpuSet(unsigned id); + auto cpuAllowed(unsigned Id) -> int; + auto cpuSet(unsigned Id) -> int; - std::vector cpuBind; + std::vector CpuBind; }; } // namespace firestarter::environment diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp index f16d6879..3871400f 100644 --- a/include/firestarter/Environment/Payload/Payload.hpp +++ b/include/firestarter/Environment/Payload/Payload.hpp @@ -21,81 +21,87 @@ #pragma once -#include #include #include +#include #include namespace firestarter::environment::payload { class Payload { private: - std::string _name; - unsigned getSequenceStartCount(const std::vector& sequence, const std::string start); + std::string Name; + [[nodiscard]] static auto getSequenceStartCount(const std::vector& Sequence, const std::string& Start) + -> unsigned; protected: - unsigned _flops; - unsigned _bytes; + unsigned Flops = 0; + unsigned Bytes = 0; // number of instructions in load loop - unsigned _instructions; + unsigned Instructions = 0; // size of used simd registers in bytes - unsigned _registerSize; + unsigned RegisterSize = 0; // number of used simd registers - unsigned _registerCount; + unsigned RegisterCount = 0; - std::vector generateSequence(const std::vector>& proportion); - unsigned getL2SequenceCount(const std::vector& sequence) { - return getSequenceStartCount(sequence, "L2"); + [[nodiscard]] static auto generateSequence(const std::vector>& Proportion) + -> std::vector; + [[nodiscard]] static auto getL2SequenceCount(const std::vector& Sequence) -> unsigned { + return getSequenceStartCount(Sequence, "L2"); }; - unsigned getL3SequenceCount(const std::vector& sequence) { - return getSequenceStartCount(sequence, "L3"); + [[nodiscard]] static auto getL3SequenceCount(const std::vector& Sequence) -> unsigned { + return getSequenceStartCount(Sequence, "L3"); }; - unsigned getRAMSequenceCount(const std::vector& sequence) { - return getSequenceStartCount(sequence, "RAM"); + [[nodiscard]] static auto getRAMSequenceCount(const std::vector& Sequence) -> unsigned { + return getSequenceStartCount(Sequence, "RAM"); }; - unsigned getNumberOfSequenceRepetitions(const std::vector& sequence, const unsigned numberOfLines) { - if (sequence.size() == 0) { + [[nodiscard]] static auto getNumberOfSequenceRepetitions(const std::vector& Sequence, + const unsigned NumberOfLines) -> unsigned { + if (Sequence.size() == 0) { return 0; } - return numberOfLines / sequence.size(); + return NumberOfLines / Sequence.size(); }; - unsigned getL2LoopCount(const std::vector& sequence, const unsigned numberOfLines, const unsigned size, - const unsigned threads); - unsigned getL3LoopCount(const std::vector& sequence, const unsigned numberOfLines, const unsigned size, - const unsigned threads); - unsigned getRAMLoopCount(const std::vector& sequence, const unsigned numberOfLines, const unsigned size, - const unsigned threads); + [[nodiscard]] static auto getL2LoopCount(const std::vector& Sequence, unsigned NumberOfLines, + unsigned Size, unsigned Threads) -> unsigned; + [[nodiscard]] static auto getL3LoopCount(const std::vector& Sequence, unsigned NumberOfLines, + unsigned Size, unsigned Threads) -> unsigned; + [[nodiscard]] static auto getRAMLoopCount(const std::vector& Sequence, unsigned NumberOfLines, + unsigned Size, unsigned Threads) -> unsigned; public: - Payload(std::string name, unsigned registerSize, unsigned registerCount) - : _name(name) - , _registerSize(registerSize) - , _registerCount(registerCount) {} - virtual ~Payload() {} - - const std::string& name() const { return _name; } - unsigned flops() const { return _flops; } - unsigned bytes() const { return _bytes; } - unsigned instructions() const { return _instructions; } - unsigned registerSize() const { return _registerSize; } - unsigned registerCount() const { return _registerCount; } - - virtual bool isAvailable() const = 0; - - virtual void lowLoadFunction(volatile unsigned long long* addrHigh, unsigned long long period) = 0; - - virtual int compilePayload(std::vector> const& proportion, - unsigned instructionCacheSize, std::list const& dataCacheBufferSize, - unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) = 0; - virtual std::list getAvailableInstructions() const = 0; - virtual void init(unsigned long long* memoryAddr, unsigned long long bufferSize) = 0; - virtual unsigned long long highLoadFunction(unsigned long long* addrMem, volatile unsigned long long* addrHigh, - unsigned long long iterations) = 0; - - virtual Payload* clone() const = 0; + Payload() = delete; + + Payload(std::string Name, unsigned RegisterSize, unsigned RegisterCount) + : Name(std::move(Name)) + , RegisterSize(RegisterSize) + , RegisterCount(RegisterCount) {} + virtual ~Payload() = default; + + [[nodiscard]] auto name() const -> const std::string& { return Name; } + [[nodiscard]] auto flops() const -> unsigned { return Flops; } + [[nodiscard]] auto bytes() const -> unsigned { return Bytes; } + [[nodiscard]] auto instructions() const -> unsigned { return Instructions; } + [[nodiscard]] auto registerSize() const -> unsigned { return RegisterSize; } + [[nodiscard]] auto registerCount() const -> unsigned { return RegisterCount; } + + [[nodiscard]] virtual auto isAvailable() const -> bool = 0; + + virtual void lowLoadFunction(volatile uint64_t* AddrHigh, uint64_t Period) = 0; + + [[nodiscard]] virtual auto compilePayload(std::vector> const& Proportion, + unsigned InstructionCacheSize, + std::list const& DataCacheBufferSize, unsigned RamBufferSize, + unsigned Thread, unsigned NumberOfLines, bool DumpRegisters, + bool ErrorDetection) -> int = 0; + [[nodiscard]] virtual auto getAvailableInstructions() const -> std::list = 0; + virtual void init(uint64_t* MemoryAddr, uint64_t BufferSize) = 0; + [[nodiscard]] virtual auto highLoadFunction(uint64_t* AddrMem, volatile uint64_t* AddrHigh, uint64_t Iterations) + -> uint64_t = 0; + + [[nodiscard]] virtual auto clone() const -> Payload* = 0; }; } // namespace firestarter::environment::payload diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp index b396d134..5588cb8a 100644 --- a/include/firestarter/Environment/Platform/PlatformConfig.hpp +++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp @@ -21,79 +21,81 @@ #pragma once -#include #include #include #include #include #include #include +#include namespace firestarter::environment::platform { class PlatformConfig { private: - std::string _name; - std::list _threads; - payload::Payload* _payload; + std::string Name; + std::list Threads; + payload::Payload* Payload; protected: - unsigned _instructionCacheSize; - std::list _dataCacheBufferSize; - unsigned _ramBufferSize; - unsigned _lines; + unsigned InstructionCacheSize; + std::list DataCacheBufferSize; + unsigned RamBufferSize; + unsigned Lines; public: - PlatformConfig(std::string name, std::list threads, unsigned instructionCacheSize, - std::initializer_list dataCacheBufferSize, unsigned ramBufferSize, unsigned lines, - payload::Payload* payload) - : _name(name) - , _threads(threads) - , _payload(payload) - , _instructionCacheSize(instructionCacheSize) - , _dataCacheBufferSize(dataCacheBufferSize) - , _ramBufferSize(ramBufferSize) - , _lines(lines) {} - virtual ~PlatformConfig() { delete _payload; } - - const std::string& name() const { return _name; } - unsigned instructionCacheSize() const { return _instructionCacheSize; } - const std::list& dataCacheBufferSize() const { return _dataCacheBufferSize; } - unsigned ramBufferSize() const { return _ramBufferSize; } - unsigned lines() const { return _lines; } - payload::Payload const& payload() const { return *_payload; } - - std::map getThreadMap() const { - std::map threadMap; - - for (auto const& thread : _threads) { - std::stringstream functionName; - functionName << "FUNC_" << name() << "_" << payload().name() << "_" << thread << "T"; - threadMap[thread] = functionName.str(); + PlatformConfig() = delete; + + PlatformConfig(std::string Name, std::list Threads, unsigned InstructionCacheSize, + std::initializer_list DataCacheBufferSize, unsigned RamBufferSize, unsigned Lines, + payload::Payload* Payload) + : Name(std::move(Name)) + , Threads(std::move(Threads)) + , Payload(Payload) + , InstructionCacheSize(InstructionCacheSize) + , DataCacheBufferSize(DataCacheBufferSize) + , RamBufferSize(RamBufferSize) + , Lines(Lines) {} + virtual ~PlatformConfig() { delete Payload; } + + [[nodiscard]] auto name() const -> const std::string& { return Name; } + [[nodiscard]] auto instructionCacheSize() const -> unsigned { return InstructionCacheSize; } + [[nodiscard]] auto dataCacheBufferSize() const -> const std::list& { return DataCacheBufferSize; } + [[nodiscard]] auto ramBufferSize() const -> unsigned { return RamBufferSize; } + [[nodiscard]] auto lines() const -> unsigned { return Lines; } + [[nodiscard]] auto payload() const -> payload::Payload const& { return *Payload; } + + [[nodiscard]] auto getThreadMap() const -> std::map { + std::map ThreadMap; + + for (auto const& Thread : Threads) { + std::stringstream FunctionName; + FunctionName << "FUNC_" << name() << "_" << payload().name() << "_" << Thread << "T"; + ThreadMap[Thread] = FunctionName.str(); } - return threadMap; + return ThreadMap; } - bool isAvailable() const { return payload().isAvailable(); } + [[nodiscard]] auto isAvailable() const -> bool { return payload().isAvailable(); } - virtual bool isDefault() const = 0; + [[nodiscard]] virtual auto isDefault() const -> bool = 0; - virtual std::vector> getDefaultPayloadSettings() const = 0; + [[nodiscard]] virtual auto getDefaultPayloadSettings() const -> std::vector> = 0; - std::string getDefaultPayloadSettingsString() const { - std::stringstream ss; + [[nodiscard]] auto getDefaultPayloadSettingsString() const -> std::string { + std::stringstream Ss; for (auto const& [name, value] : this->getDefaultPayloadSettings()) { - ss << name << ":" << value << ","; + Ss << name << ":" << value << ","; } - auto str = ss.str(); - if (str.size() > 0) { - str.pop_back(); + auto Str = Ss.str(); + if (Str.size() > 0) { + Str.pop_back(); } - return str; + return Str; } }; diff --git a/include/firestarter/Environment/Platform/RuntimeConfig.hpp b/include/firestarter/Environment/Platform/RuntimeConfig.hpp index 86946877..3d1d1786 100644 --- a/include/firestarter/Environment/Platform/RuntimeConfig.hpp +++ b/include/firestarter/Environment/Platform/RuntimeConfig.hpp @@ -28,44 +28,44 @@ namespace firestarter::environment::platform { class RuntimeConfig { private: - PlatformConfig const& _platformConfig; - std::unique_ptr _payload; - unsigned _thread; - std::vector> _payloadSettings; - unsigned _instructionCacheSize; - std::list _dataCacheBufferSize; - unsigned _ramBufferSize; - unsigned _lines; + PlatformConfig const& PlatformConfigValue; + std::unique_ptr Payload; + unsigned Thread; + std::vector> PayloadSettings; + unsigned InstructionCacheSize; + std::list DataCacheBufferSize; + unsigned RamBufferSize; + unsigned Lines; public: - RuntimeConfig(PlatformConfig const& platformConfig, unsigned thread, unsigned detectedInstructionCacheSize) - : _platformConfig(platformConfig) - , _payload(nullptr) - , _thread(thread) - , _payloadSettings(platformConfig.getDefaultPayloadSettings()) - , _instructionCacheSize(platformConfig.instructionCacheSize()) - , _dataCacheBufferSize(platformConfig.dataCacheBufferSize()) - , _ramBufferSize(platformConfig.ramBufferSize()) - , _lines(platformConfig.lines()) { - if (detectedInstructionCacheSize != 0) { - this->_instructionCacheSize = detectedInstructionCacheSize; + RuntimeConfig(PlatformConfig const& PlatformConfigValue, unsigned Thread, unsigned DetectedInstructionCacheSize) + : PlatformConfigValue(PlatformConfigValue) + , Payload(nullptr) + , Thread(Thread) + , PayloadSettings(PlatformConfigValue.getDefaultPayloadSettings()) + , InstructionCacheSize(PlatformConfigValue.instructionCacheSize()) + , DataCacheBufferSize(PlatformConfigValue.dataCacheBufferSize()) + , RamBufferSize(PlatformConfigValue.ramBufferSize()) + , Lines(PlatformConfigValue.lines()) { + if (DetectedInstructionCacheSize != 0) { + this->InstructionCacheSize = DetectedInstructionCacheSize; } }; - RuntimeConfig(const RuntimeConfig& c) - : _platformConfig(c.platformConfig()) - , _payload(c.platformConfig().payload().clone()) - , _thread(c.thread()) - , _payloadSettings(c.payloadSettings()) - , _instructionCacheSize(c.instructionCacheSize()) - , _dataCacheBufferSize(c.dataCacheBufferSize()) - , _ramBufferSize(c.ramBufferSize()) - , _lines(c.lines()) {} + RuntimeConfig(const RuntimeConfig& Other) + : PlatformConfigValue(Other.platformConfig()) + , Payload(Other.platformConfig().payload().clone()) + , Thread(Other.thread()) + , PayloadSettings(Other.payloadSettings()) + , InstructionCacheSize(Other.instructionCacheSize()) + , DataCacheBufferSize(Other.dataCacheBufferSize()) + , RamBufferSize(Other.ramBufferSize()) + , Lines(Other.lines()) {} - ~RuntimeConfig() { _payload.reset(); } + ~RuntimeConfig() { Payload.reset(); } - PlatformConfig const& platformConfig() const { return _platformConfig; } - payload::Payload& payload() const { + [[nodiscard]] auto platformConfig() const -> PlatformConfig const& { return PlatformConfigValue; } + [[nodiscard]] auto payload() const -> payload::Payload& { #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-value" @@ -74,33 +74,36 @@ class RuntimeConfig { #pragma GCC diagnostic ignored "-Wunused-value" assert(("Payload pointer is null. Each thread has to use it's own " "RuntimeConfig", - _payload != nullptr)); + Payload != nullptr)); #pragma GCC diagnostic pop #if defined(__clang__) #pragma clang diagnostic pop #endif - return *_payload; + return *Payload; } - unsigned thread() const { return _thread; } - const std::vector>& payloadSettings() const { return _payloadSettings; } - std::vector payloadItems() const { - std::vector items; - for (auto const& pair : _payloadSettings) { - items.push_back(pair.first); + [[nodiscard]] auto thread() const -> unsigned { return Thread; } + [[nodiscard]] auto payloadSettings() const -> const std::vector>& { + return PayloadSettings; + } + [[nodiscard]] auto payloadItems() const -> std::vector { + std::vector Items; + Items.reserve(PayloadSettings.size()); + for (auto const& Pair : PayloadSettings) { + Items.push_back(Pair.first); } - return items; + return Items; } - unsigned instructionCacheSize() const { return _instructionCacheSize; } - const std::list& dataCacheBufferSize() const { return _dataCacheBufferSize; } - unsigned ramBufferSize() const { return _ramBufferSize; } - unsigned lines() const { return _lines; } + [[nodiscard]] auto instructionCacheSize() const -> unsigned { return InstructionCacheSize; } + [[nodiscard]] auto dataCacheBufferSize() const -> const std::list& { return DataCacheBufferSize; } + [[nodiscard]] auto ramBufferSize() const -> unsigned { return RamBufferSize; } + [[nodiscard]] auto lines() const -> unsigned { return Lines; } - void setPayloadSettings(std::vector> const& payloadSettings) { - this->_payloadSettings = payloadSettings; + void setPayloadSettings(std::vector> const& PayloadSettings) { + this->PayloadSettings = PayloadSettings; } - void setLineCount(unsigned lineCount) { this->_lines = lineCount; } + void setLineCount(unsigned LineCount) { this->Lines = LineCount; } void printCodePathSummary() const { log::info() << "\n" @@ -112,10 +115,10 @@ class RuntimeConfig { log::info() << " - L1i-Cache: " << instructionCacheSize() / thread() << " Bytes"; } - unsigned i = 1; - for (auto const& bytes : dataCacheBufferSize()) { - log::info() << " - L" << i << "d-Cache: " << bytes / thread() << " Bytes"; - i++; + unsigned I = 1; + for (auto const& Bytes : dataCacheBufferSize()) { + log::info() << " - L" << I << "d-Cache: " << Bytes / thread() << " Bytes"; + I++; } log::info() << " - Memory: " << ramBufferSize() / thread() << " Bytes"; diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp index e5fa736f..317b8196 100644 --- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp @@ -26,26 +26,28 @@ namespace firestarter::environment::x86::payload { class AVX512Payload final : public X86Payload { public: - AVX512Payload(asmjit::CpuFeatures const& supportedFeatures) - : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX512_F}, "AVX512", 8, 32) {} + AVX512Payload() = delete; - int compilePayload(std::vector> const& proportion, unsigned instructionCacheSize, - std::list const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread, - unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override; - std::list getAvailableInstructions() const override; - void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override; + explicit AVX512Payload(asmjit::CpuFeatures const& SupportedFeatures) + : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kAVX512_F}, "AVX512", 8, 32) {} - firestarter::environment::payload::Payload* clone() const override { + auto compilePayload(std::vector> const& Proportion, unsigned InstructionCacheSize, + std::list const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread, + unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override; + [[nodiscard]] auto getAvailableInstructions() const -> std::list override; + void init(uint64_t* MemoryAddr, uint64_t BufferSize) override; + + [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override { return new AVX512Payload(this->supportedFeatures()); }; private: - const std::map instructionFlops = { + const std::map InstructionFlops = { {"REG", 32}, {"L1_L", 32}, {"L1_BROADCAST", 16}, {"L1_S", 16}, {"L1_LS", 16}, {"L2_L", 32}, {"L2_S", 16}, {"L2_LS", 16}, {"L3_L", 32}, {"L3_S", 16}, {"L3_LS", 16}, {"L3_P", 16}, {"RAM_L", 32}, {"RAM_S", 16}, {"RAM_LS", 16}, {"RAM_P", 16}}; - const std::map instructionMemory = { + const std::map InstructionMemory = { {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}; }; } // namespace firestarter::environment::x86::payload diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp index d0e7b381..6516c0de 100644 --- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp +++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp @@ -26,25 +26,27 @@ namespace firestarter::environment::x86::payload { class AVXPayload final : public X86Payload { public: - AVXPayload(asmjit::CpuFeatures const& supportedFeatures) - : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX}, "AVX", 4, 16) {} + AVXPayload() = delete; - int compilePayload(std::vector> const& proportion, unsigned instructionCacheSize, - std::list const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread, - unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override; - std::list getAvailableInstructions() const override; - void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override; + explicit AVXPayload(asmjit::CpuFeatures const& SupportedFeatures) + : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kAVX}, "AVX", 4, 16) {} - firestarter::environment::payload::Payload* clone() const override { + auto compilePayload(std::vector> const& Proportion, unsigned InstructionCacheSize, + std::list const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread, + unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override; + [[nodiscard]] auto getAvailableInstructions() const -> std::list override; + void init(uint64_t* MemoryAddr, uint64_t BufferSize) override; + + [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override { return new AVXPayload(this->supportedFeatures()); }; private: - const std::map instructionFlops = { + const std::map InstructionFlops = { {"REG", 4}, {"L1_L", 4}, {"L1_S", 4}, {"L1_LS", 4}, {"L2_L", 4}, {"L2_S", 4}, {"L2_LS", 4}, {"L3_L", 4}, {"L3_S", 4}, {"L3_LS", 4}, {"L3_P", 4}, {"RAM_L", 4}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}}; - const std::map instructionMemory = { + const std::map InstructionMemory = { {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}; }; } // namespace firestarter::environment::x86::payload diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp index 6a1d3ee5..bb623e68 100644 --- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp @@ -27,26 +27,28 @@ namespace firestarter::environment::x86::payload { class FMA4Payload final : public X86Payload { public: - FMA4Payload(asmjit::CpuFeatures const& supportedFeatures) - : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4}, "FMA4", 4, + FMA4Payload() = delete; + + explicit FMA4Payload(asmjit::CpuFeatures const& SupportedFeatures) + : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA4}, "FMA4", 4, 16) {} - int compilePayload(std::vector> const& proportion, unsigned instructionCacheSize, - std::list const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread, - unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override; - std::list getAvailableInstructions() const override; - void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override; + auto compilePayload(std::vector> const& Proportion, unsigned InstructionCacheSize, + std::list const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread, + unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override; + [[nodiscard]] auto getAvailableInstructions() const -> std::list override; + void init(uint64_t* MemoryAddr, uint64_t BufferSize) override; - firestarter::environment::payload::Payload* clone() const override { + [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override { return new FMA4Payload(this->supportedFeatures()); }; private: - const std::map instructionFlops = { + const std::map InstructionFlops = { {"REG", 8}, {"L1_L", 12}, {"L1_S", 8}, {"L1_LS", 8}, {"L2_L", 8}, {"L2_S", 4}, {"L2_LS", 4}, {"L3_L", 8}, {"L3_S", 4}, {"L3_LS", 4}, {"L3_P", 4}, {"RAM_L", 8}, {"RAM_S", 4}, {"RAM_LS", 4}, {"RAM_P", 4}}; - const std::map instructionMemory = { + const std::map InstructionMemory = { {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}; }; -} // namespace firestarter::environment::x86::payload +} // namespace firestarter::environment::x86::payload \ No newline at end of file diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp index da6c2b5a..b610a838 100644 --- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp +++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp @@ -26,27 +26,29 @@ namespace firestarter::environment::x86::payload { class FMAPayload final : public X86Payload { public: - FMAPayload(asmjit::CpuFeatures const& supportedFeatures) - : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, "FMA", 4, 16) {} + FMAPayload() = delete; - int compilePayload(std::vector> const& proportion, unsigned instructionCacheSize, - std::list const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread, - unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override; - std::list getAvailableInstructions() const override; - void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override; + explicit FMAPayload(asmjit::CpuFeatures const& SupportedFeatures) + : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kAVX, asmjit::CpuFeatures::X86::kFMA}, "FMA", 4, 16) {} - firestarter::environment::payload::Payload* clone() const override { + auto compilePayload(std::vector> const& Proportion, unsigned InstructionCacheSize, + std::list const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread, + unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override; + [[nodiscard]] auto getAvailableInstructions() const -> std::list override; + void init(uint64_t* MemoryAddr, uint64_t BufferSize) override; + + [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override { return new FMAPayload(this->supportedFeatures()); }; private: - const std::map instructionFlops = { + const std::map InstructionFlops = { {"REG", 16}, {"L1_L", 16}, {"L1_2L", 16}, {"L1_S", 8}, {"L1_LS", 8}, {"L1_LS_256", 8}, {"L1_2LS_256", 16}, {"L2_L", 16}, {"L2_S", 8}, {"L2_LS", 8}, {"L2_LS_256", 8}, {"L2_2LS_256", 16}, {"L3_L", 16}, {"L3_S", 8}, {"L3_LS", 8}, {"L3_LS_256", 8}, {"L3_P", 8}, {"RAM_L", 16}, {"RAM_S", 8}, {"RAM_LS", 8}, {"RAM_P", 8}}; - const std::map instructionMemory = { + const std::map InstructionMemory = { {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}; }; } // namespace firestarter::environment::x86::payload diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp index d923c9b3..538837b4 100644 --- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp @@ -21,30 +21,33 @@ #pragma once +#include #include namespace firestarter::environment::x86::payload { class SSE2Payload final : public X86Payload { public: - SSE2Payload(asmjit::CpuFeatures const& supportedFeatures) - : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::kSSE2}, "SSE2", 2, 16) {} + SSE2Payload() = delete; - int compilePayload(std::vector> const& proportion, unsigned instructionCacheSize, - std::list const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread, - unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override; - std::list getAvailableInstructions() const override; - void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override; + explicit SSE2Payload(asmjit::CpuFeatures const& SupportedFeatures) + : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::kSSE2}, "SSE2", 2, 16) {} - firestarter::environment::payload::Payload* clone() const override { + auto compilePayload(std::vector> const& Proportion, unsigned InstructionCacheSize, + std::list const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread, + unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override; + [[nodiscard]] auto getAvailableInstructions() const -> std::list override; + void init(uint64_t* MemoryAddr, uint64_t BufferSize) override; + + [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override { return new SSE2Payload(this->supportedFeatures()); }; private: - const std::map instructionFlops = { + const std::map InstructionFlops = { {"REG", 2}, {"L1_L", 2}, {"L1_S", 2}, {"L1_LS", 2}, {"L2_L", 2}, {"L2_S", 2}, {"L2_LS", 2}, {"L3_L", 2}, {"L3_S", 2}, {"L3_LS", 2}, {"L3_P", 2}, {"RAM_L", 2}, {"RAM_S", 2}, {"RAM_LS", 2}, {"RAM_P", 2}}; - const std::map instructionMemory = { + const std::map InstructionMemory = { {"RAM_L", 64}, {"RAM_S", 128}, {"RAM_LS", 128}, {"RAM_P", 64}}; }; } // namespace firestarter::environment::x86::payload diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp index 87d5e0be..9e947143 100644 --- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp @@ -23,10 +23,12 @@ #include +#include #include #include #include #include +#include #define INIT_BLOCKSIZE 1024 @@ -35,38 +37,38 @@ namespace firestarter::environment::x86::payload { class X86Payload : public environment::payload::Payload { private: // we can use this to check, if our platform support this payload - asmjit::CpuFeatures const& _supportedFeatures; - std::list featureRequests; + asmjit::CpuFeatures const& SupportedFeatures; + std::list FeatureRequests; protected: // asmjit::CodeHolder code; - asmjit::JitRuntime rt; + asmjit::JitRuntime Rt; // typedef int (*LoadFunction)(firestarter::ThreadData *); - typedef unsigned long long (*LoadFunction)(unsigned long long*, volatile unsigned long long*, unsigned long long); - LoadFunction loadFunction = nullptr; + using LoadFunctionType = uint64_t (*)(uint64_t*, volatile uint64_t*, uint64_t); + LoadFunctionType LoadFunction = nullptr; - asmjit::CpuFeatures const& supportedFeatures() const { return this->_supportedFeatures; } + [[nodiscard]] auto supportedFeatures() const -> asmjit::CpuFeatures const& { return this->SupportedFeatures; } - template - void emitErrorDetectionCode(asmjit::x86::Builder& cb, IterReg iter_reg, asmjit::x86::Gpq addrHigh_reg, - asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2); + template + void emitErrorDetectionCode(asmjit::x86::Builder& Cb, IterRegT IterReg, asmjit::x86::Gpq AddrHighReg, + asmjit::x86::Gpq PointerReg, asmjit::x86::Gpq TempReg, asmjit::x86::Gpq TempReg2); public: - X86Payload(asmjit::CpuFeatures const& supportedFeatures, - std::initializer_list featureRequests, std::string name, - unsigned registerSize, unsigned registerCount) - : Payload(name, registerSize, registerCount) - , _supportedFeatures(supportedFeatures) - , featureRequests(featureRequests) {} - - bool isAvailable() const override { - bool available = true; - - for (auto const& feature : featureRequests) { - available &= this->_supportedFeatures.has(feature); + X86Payload(asmjit::CpuFeatures const& SupportedFeatures, + std::initializer_list FeatureRequests, std::string Name, + unsigned RegisterSize, unsigned RegisterCount) + : Payload(std::move(Name), RegisterSize, RegisterCount) + , SupportedFeatures(SupportedFeatures) + , FeatureRequests(FeatureRequests) {} + + [[nodiscard]] auto isAvailable() const -> bool override { + bool Available = true; + + for (auto const& Feature : FeatureRequests) { + Available &= this->SupportedFeatures.has(Feature); } - return available; + return Available; }; // A generic implemenation for all x86 payloads @@ -76,16 +78,15 @@ class X86Payload : public environment::payload::Payload { #endif #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Woverloaded-virtual" - void init(unsigned long long* memoryAddr, unsigned long long bufferSize, double firstValue, double lastValue); + void init(uint64_t* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue); #pragma GCC diagnostic pop #if defined(__clang__) #pragma clang diagnostic pop #endif // use cpuid and usleep as low load - void lowLoadFunction(volatile unsigned long long* addrHigh, unsigned long long period) override; + void lowLoadFunction(volatile uint64_t* AddrHigh, uint64_t Period) override; - unsigned long long highLoadFunction(unsigned long long* addrMem, volatile unsigned long long* addrHigh, - unsigned long long iterations) override; + auto highLoadFunction(uint64_t* AddrMem, volatile uint64_t* AddrHigh, uint64_t Iterations) -> uint64_t override; }; } // namespace firestarter::environment::x86::payload diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp index 7254cb55..425dd600 100644 --- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp +++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp @@ -21,29 +21,32 @@ #pragma once +#include #include namespace firestarter::environment::x86::payload { class ZENFMAPayload final : public X86Payload { public: - ZENFMAPayload(asmjit::CpuFeatures const& supportedFeatures) - : X86Payload(supportedFeatures, {asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA}, + ZENFMAPayload() = delete; + + explicit ZENFMAPayload(asmjit::CpuFeatures const& SupportedFeatures) + : X86Payload(SupportedFeatures, {asmjit::CpuFeatures::X86::Id::kAVX, asmjit::CpuFeatures::X86::Id::kFMA}, "ZENFMA", 4, 16) {} - int compilePayload(std::vector> const& proportion, unsigned instructionCacheSize, - std::list const& dataCacheBufferSize, unsigned ramBufferSize, unsigned thread, - unsigned numberOfLines, bool dumpRegisters, bool errorDetection) override; - std::list getAvailableInstructions() const override; - void init(unsigned long long* memoryAddr, unsigned long long bufferSize) override; + auto compilePayload(std::vector> const& Proportion, unsigned InstructionCacheSize, + std::list const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread, + unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int override; + [[nodiscard]] auto getAvailableInstructions() const -> std::list override; + void init(uint64_t* MemoryAddr, uint64_t BufferSize) override; - firestarter::environment::payload::Payload* clone() const override { + [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override { return new ZENFMAPayload(this->supportedFeatures()); }; private: - const std::map instructionFlops = { + const std::map InstructionFlops = { {"REG", 8}, {"L1_LS", 8}, {"L2_L", 8}, {"L3_L", 8}, {"RAM_L", 8}}; - const std::map instructionMemory = {{"RAM_L", 64}}; + const std::map InstructionMemory = {{"RAM_L", 64}}; }; } // namespace firestarter::environment::x86::payload diff --git a/include/firestarter/Environment/X86/X86CPUTopology.hpp b/include/firestarter/Environment/X86/X86CPUTopology.hpp index fa3b033f..d9ca6393 100644 --- a/include/firestarter/Environment/X86/X86CPUTopology.hpp +++ b/include/firestarter/Environment/X86/X86CPUTopology.hpp @@ -31,38 +31,38 @@ class X86CPUTopology final : public CPUTopology { public: X86CPUTopology(); - friend std::ostream& operator<<(std::ostream& stream, X86CPUTopology const& cpuTopology); + friend auto operator<<(std::ostream& Stream, X86CPUTopology const& CpuTopology) -> std::ostream&; - std::list const& features() const override { return this->featureList; } - const asmjit::CpuFeatures& featuresAsmjit() const { return this->cpuInfo.features(); } + [[nodiscard]] auto features() const -> std::list const& override { return this->FeatureList; } + [[nodiscard]] auto featuresAsmjit() const -> const asmjit::CpuFeatures& { return this->CpuInfo.features(); } - std::string const& vendor() const override { return this->_vendor; } - std::string const& model() const override { return this->_model; } + [[nodiscard]] auto vendor() const -> std::string const& override { return this->Vendor; } + [[nodiscard]] auto model() const -> std::string const& override { return this->Model; } - unsigned long long clockrate() const override; + [[nodiscard]] auto clockrate() const -> uint64_t override; - unsigned long long timestamp() const override; + [[nodiscard]] auto timestamp() const -> uint64_t override; - unsigned familyId() const { return this->cpuInfo.familyId(); } - unsigned modelId() const { return this->cpuInfo.modelId(); } - unsigned stepping() const { return this->cpuInfo.stepping(); } + [[nodiscard]] auto familyId() const -> unsigned { return this->CpuInfo.familyId(); } + [[nodiscard]] auto modelId() const -> unsigned { return this->CpuInfo.modelId(); } + [[nodiscard]] auto stepping() const -> unsigned { return this->CpuInfo.stepping(); } private: - bool hasRdtsc() const { return this->_hasRdtsc; } - bool hasInvariantRdtsc() const { return this->_hasInvariantRdtsc; } - void cpuid(unsigned long long* a, unsigned long long* b, unsigned long long* c, unsigned long long* d) const; + [[nodiscard]] auto hasRdtsc() const -> bool { return this->HasRdtsc; } + [[nodiscard]] auto hasInvariantRdtsc() const -> bool { return this->HasInvariantRdtsc; } + void cpuid(uint64_t* A, uint64_t* B, uint64_t* C, uint64_t* D) const; - asmjit::CpuInfo cpuInfo; - std::list featureList; + asmjit::CpuInfo CpuInfo; + std::list FeatureList; - bool _hasRdtsc; - bool _hasInvariantRdtsc; - std::string _vendor; - std::string _model; + bool HasRdtsc; + bool HasInvariantRdtsc; + std::string Vendor; + std::string Model; }; -inline std::ostream& operator<<(std::ostream& stream, X86CPUTopology const& cpuTopology) { - return cpuTopology.print(stream); +inline auto operator<<(std::ostream& Stream, X86CPUTopology const& CpuTopology) -> std::ostream& { + return CpuTopology.print(Stream); } -} // namespace firestarter::environment::x86 +} // namespace firestarter::environment::x86 \ No newline at end of file diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp index b0e3aa8d..7873c9c4 100644 --- a/include/firestarter/Environment/X86/X86Environment.hpp +++ b/include/firestarter/Environment/X86/X86Environment.hpp @@ -54,21 +54,21 @@ class X86Environment final : public Environment { : Environment(new X86CPUTopology()) {} ~X86Environment() { - for (auto const& config : platformConfigs) { - delete config; + for (auto const& Config : PlatformConfigs) { + delete Config; } - for (auto const& config : fallbackPlatformConfigs) { - delete config; + for (auto const& Config : FallbackPlatformConfigs) { + delete Config; } } - X86CPUTopology const& topology() { return *reinterpret_cast(this->_topology); } + auto topology() -> X86CPUTopology const& { return *reinterpret_cast(this->Topology); } void evaluateFunctions() override; - int selectFunction(unsigned functionId, bool allowUnavailablePayload) override; - int selectInstructionGroups(std::string groups) override; + auto selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) -> int override; + auto selectInstructionGroups(std::string Groups) -> int override; void printAvailableInstructionGroups() override; - void setLineCount(unsigned lineCount) override; + void setLineCount(unsigned LineCount) override; void printSelectedCodePathSummary() override; void printFunctionSummary() override; @@ -77,16 +77,16 @@ class X86Environment final : public Environment { // of PlatformConfig. Add new PlatformConfig at the bottom to maintain // stable IDs. const std::list> - platformConfigsCtor = {REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig), REGISTER(SkylakeSPConfig), + PlatformConfigsCtor = {REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig), REGISTER(SkylakeSPConfig), REGISTER(HaswellConfig), REGISTER(HaswellEPConfig), REGISTER(SandyBridgeConfig), REGISTER(SandyBridgeEPConfig), REGISTER(NehalemConfig), REGISTER(NehalemEPConfig), REGISTER(BulldozerConfig), REGISTER(NaplesConfig), REGISTER(RomeConfig)}; - std::list platformConfigs; + std::list PlatformConfigs; // List of fallback PlatformConfig. Add one for each x86 extension. const std::list> - fallbackPlatformConfigsCtor = { + FallbackPlatformConfigsCtor = { REGISTER(SkylakeSPConfig), // AVX512 REGISTER(BulldozerConfig), // FMA4 REGISTER(HaswellConfig), // FMA @@ -94,7 +94,7 @@ class X86Environment final : public Environment { REGISTER(NehalemConfig) // SSE2 }; - std::list fallbackPlatformConfigs; + std::list FallbackPlatformConfigs; #undef REGISTER }; diff --git a/include/firestarter/ErrorDetectionStruct.hpp b/include/firestarter/ErrorDetectionStruct.hpp index 4ed2e9fa..598cc4ed 100644 --- a/include/firestarter/ErrorDetectionStruct.hpp +++ b/include/firestarter/ErrorDetectionStruct.hpp @@ -21,6 +21,7 @@ #pragma once +#include namespace firestarter { struct ErrorDetectionStruct { @@ -28,19 +29,19 @@ struct ErrorDetectionStruct { // one ptr (8B) // the pointer to 16B of communication - volatile unsigned long long* communicationLeft; - volatile unsigned long long localsLeft[4]; + volatile uint64_t* CommunicationLeft; + volatile uint64_t LocalsLeft[4]; // if this variable is not 0, an error occured in the comparison with the // left thread. - volatile unsigned long long errorLeft; - volatile unsigned long long paddingLeft[2]; + volatile uint64_t ErrorLeft; + volatile uint64_t PaddingLeft[2]; - volatile unsigned long long* communicationRight; - volatile unsigned long long localsRight[4]; + volatile uint64_t* CommunicationRight; + volatile uint64_t LocalsRight[4]; // if this variable is not 0, an error occured in the comparison with the // right thread. - volatile unsigned long long errorRight; - volatile unsigned long long paddingRight[2]; + volatile uint64_t ErrorRight; + volatile uint64_t PaddingRight[2]; }; } // namespace firestarter diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp index cb0218f0..8009c1c9 100644 --- a/include/firestarter/Firestarter.hpp +++ b/include/firestarter/Firestarter.hpp @@ -47,7 +47,6 @@ #include #include -#include #include #include #include @@ -63,57 +62,57 @@ namespace firestarter { class Firestarter { public: - Firestarter(const int argc, const char** argv, std::chrono::seconds const& timeout, unsigned loadPercent, - std::chrono::microseconds const& period, unsigned requestedNumThreads, std::string const& cpuBind, - bool printFunctionSummary, unsigned functionId, bool listInstructionGroups, - std::string const& instructionGroups, unsigned lineCount, bool allowUnavailablePayload, - bool dumpRegisters, std::chrono::seconds const& dumpRegistersTimeDelta, - std::string const& dumpRegistersOutpath, bool errorDetection, int gpus, unsigned gpuMatrixSize, - bool gpuUseFloat, bool gpuUseDouble, bool listMetrics, bool measurement, - std::chrono::milliseconds const& startDelta, std::chrono::milliseconds const& stopDelta, - std::chrono::milliseconds const& measurementInterval, std::vector const& metricPaths, - std::vector const& stdinMetrics, bool optimize, std::chrono::seconds const& preheat, - std::string const& optimizationAlgorithm, std::vector const& optimizationMetrics, - std::chrono::seconds const& evaluationDuration, unsigned individuals, std::string const& optimizeOutfile, - unsigned generations, double nsga2_cr, double nsga2_m); + Firestarter(int Argc, const char** Argv, std::chrono::seconds const& Timeout, unsigned LoadPercent, + std::chrono::microseconds const& Period, unsigned RequestedNumThreads, std::string const& CpuBind, + bool PrintFunctionSummary, unsigned FunctionId, bool ListInstructionGroups, + std::string const& InstructionGroups, unsigned LineCount, bool AllowUnavailablePayload, + bool DumpRegisters, std::chrono::seconds const& DumpRegistersTimeDelta, + std::string const& DumpRegistersOutpath, bool ErrorDetection, int Gpus, unsigned GpuMatrixSize, + bool GpuUseFloat, bool GpuUseDouble, bool ListMetrics, bool Measurement, + std::chrono::milliseconds const& StartDelta, std::chrono::milliseconds const& StopDelta, + std::chrono::milliseconds const& MeasurementInterval, std::vector const& MetricPaths, + std::vector const& StdinMetrics, bool Optimize, std::chrono::seconds const& Preheat, + std::string const& OptimizationAlgorithm, std::vector const& OptimizationMetrics, + std::chrono::seconds const& EvaluationDuration, unsigned Individuals, std::string const& OptimizeOutfile, + unsigned Generations, double Nsga2Cr, double Nsga2M); ~Firestarter(); void mainThread(); private: - const int _argc; - const char** _argv; - const std::chrono::seconds _timeout; - const unsigned _loadPercent; - std::chrono::microseconds _load; - std::chrono::microseconds _period; - const bool _dumpRegisters; - const std::chrono::seconds _dumpRegistersTimeDelta; - const std::string _dumpRegistersOutpath; - const bool _errorDetection; - const int _gpus; - const unsigned _gpuMatrixSize; - const bool _gpuUseFloat; - const bool _gpuUseDouble; - const std::chrono::milliseconds _startDelta; - const std::chrono::milliseconds _stopDelta; - const bool _measurement; - const bool _optimize; - const std::chrono::seconds _preheat; - const std::string _optimizationAlgorithm; - const std::vector _optimizationMetrics; - const std::chrono::seconds _evaluationDuration; - const unsigned _individuals; - const std::string _optimizeOutfile; - const unsigned _generations; - const double _nsga2_cr; - const double _nsga2_m; + const int Argc; + const char** Argv; + const std::chrono::seconds Timeout; + const unsigned LoadPercent; + std::chrono::microseconds Load; + std::chrono::microseconds Period; + const bool DumpRegisters; + const std::chrono::seconds DumpRegistersTimeDelta; + const std::string DumpRegistersOutpath; + const bool ErrorDetection; + const int Gpus; + const unsigned GpuMatrixSize; + const bool GpuUseFloat; + const bool GpuUseDouble; + const std::chrono::milliseconds StartDelta; + const std::chrono::milliseconds StopDelta; + const bool Measurement; + const bool Optimize; + const std::chrono::seconds Preheat; + const std::string OptimizationAlgorithm; + const std::vector OptimizationMetrics; + const std::chrono::seconds EvaluationDuration; + const unsigned Individuals; + const std::string OptimizeOutfile; + const unsigned Generations; + const double Nsga2Cr; + const double Nsga2M; #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) - environment::x86::X86Environment* _environment = nullptr; + environment::x86::X86Environment* Environment = nullptr; - environment::x86::X86Environment& environment() const { return *_environment; } + [[nodiscard]] auto environment() const -> environment::x86::X86Environment& { return *Environment; } #else #error "FIRESTARTER is not implemented for this ISA" #endif @@ -127,14 +126,14 @@ class Firestarter { #endif #if defined(linux) || defined(__linux__) - inline static std::unique_ptr _optimizer; - std::shared_ptr _measurementWorker; - std::unique_ptr _algorithm; - firestarter::optimizer::Population _population; + inline static std::unique_ptr Optimizer; + std::shared_ptr MeasurementWorker; + std::unique_ptr Algorithm; + firestarter::optimizer::Population Population; #endif // LoadThreadWorker.cpp - int initLoadWorkers(bool lowLoad, unsigned long long period); + auto initLoadWorkers(bool LowLoad, uint64_t Period) -> int; void joinLoadWorkers(); void printThreadErrorReport(); void printPerformanceReport(); @@ -142,42 +141,43 @@ class Firestarter { void signalWork() { signalLoadWorkers(THREAD_WORK); }; // WatchdogWorker.cpp - int watchdogWorker(std::chrono::microseconds period, std::chrono::microseconds load, std::chrono::seconds timeout); + auto watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load, std::chrono::seconds Timeout) + -> int; #ifdef FIRESTARTER_DEBUG_FEATURES // DumpRegisterWorker.cpp - int initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta, std::string dumpFilePath); + auto initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, std::string DumpFilePath) -> int; void joinDumpRegisterWorker(); #endif // LoadThreadWorker.cpp - void signalLoadWorkers(int comm); - static void loadThreadWorker(std::shared_ptr td); + void signalLoadWorkers(int Comm); + static void loadThreadWorker(std::shared_ptr Td); #ifdef FIRESTARTER_DEBUG_FEATURES // DumpRegisterWorker.cpp - static void dumpRegisterWorker(std::unique_ptr data); + static void dumpRegisterWorker(std::unique_ptr Data); #endif - static void setLoad(unsigned long long value); + static void setLoad(uint64_t Value); - static void sigalrmHandler(int signum); - static void sigtermHandler(int signum); + static void sigalrmHandler(int Signum); + static void sigtermHandler(int Signum); // variables to control the termination of the watchdog - inline static bool _watchdog_terminate = false; - inline static std::condition_variable _watchdogTerminateAlert; - inline static std::mutex _watchdogTerminateMutex; + inline static bool WatchdogTerminate = false; + inline static std::condition_variable WatchdogTerminateAlert; + inline static std::mutex WatchdogTerminateMutex; // variable to control the load of the threads - inline static volatile unsigned long long loadVar = LOAD_LOW; + inline static volatile uint64_t LoadVar = LOAD_LOW; - std::vector>> loadThreads; + std::vector>> LoadThreads; - std::vector> errorCommunication; + std::vector> ErrorCommunication; #ifdef FIRESTARTER_DEBUG_FEATURES - std::thread dumpRegisterWorkerThread; + std::thread DumpRegisterWorkerThread; #endif }; diff --git a/include/firestarter/Json/Summary.hpp b/include/firestarter/Json/Summary.hpp index d9a923cc..a2e8e03a 100644 --- a/include/firestarter/Json/Summary.hpp +++ b/include/firestarter/Json/Summary.hpp @@ -34,10 +34,10 @@ template <> struct adl_serializer { static void to_json(json& j, firestarter::measurement::Summary s) { j = json::object(); - j["num_timepoints"] = s.num_timepoints; - j["duration"] = s.duration.count(); - j["average"] = s.average; - j["stddev"] = s.stddev; + j["num_timepoints"] = s.NumTimepoints; + j["duration"] = s.Duration.count(); + j["average"] = s.Average; + j["stddev"] = s.Stddev; } }; } // namespace nlohmann diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp index 78b11b80..eb7e0c3c 100644 --- a/include/firestarter/LoadWorkerData.hpp +++ b/include/firestarter/LoadWorkerData.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #define PAD_SIZE(size, align) align*(int)std::ceil((double)size / (double)align) @@ -49,72 +50,72 @@ namespace firestarter { class LoadWorkerData { public: - LoadWorkerData(int id, environment::Environment& environment, volatile unsigned long long* loadVar, - unsigned long long period, bool dumpRegisters, bool errorDetection) - : addrHigh(loadVar) - , period(period) - , dumpRegisters(dumpRegisters) - , errorDetection(errorDetection) - , _id(id) - , _environment(environment) - , _config(new environment::platform::RuntimeConfig(environment.selectedConfig())) { + LoadWorkerData(int Id, environment::Environment& Environment, volatile uint64_t* LoadVar, uint64_t Period, + bool DumpRegisters, bool ErrorDetection) + : AddrHigh(LoadVar) + , Period(Period) + , DumpRegisters(DumpRegisters) + , ErrorDetection(ErrorDetection) + , Id(Id) + , Environment(Environment) + , Config(new environment::platform::RuntimeConfig(Environment.selectedConfig())) { // use REGISTER_MAX_NUM cache lines for the dumped registers // and another cache line for the control variable. // as we are doing aligned moves we only have the option to waste a // whole cacheline - addrOffset = dumpRegisters ? sizeof(DumpRegisterStruct) / sizeof(unsigned long long) : 0; + AddrOffset += DumpRegisters ? sizeof(DumpRegisterStruct) / sizeof(uint64_t) : 0; - addrOffset += errorDetection ? sizeof(ErrorDetectionStruct) / sizeof(unsigned long long) : 0; + AddrOffset += ErrorDetection ? sizeof(ErrorDetectionStruct) / sizeof(uint64_t) : 0; } ~LoadWorkerData() { - delete _config; - if (addrMem - addrOffset != nullptr) { - ALIGNED_FREE(addrMem - addrOffset); + delete Config; + if (AddrMem - AddrOffset != nullptr) { + ALIGNED_FREE(AddrMem - AddrOffset); } } - void setErrorCommunication(std::shared_ptr communicationLeft, - std::shared_ptr communicationRight) { - this->communicationLeft = communicationLeft; - this->communicationRight = communicationRight; + void setErrorCommunication(std::shared_ptr CommunicationLeft, + std::shared_ptr CommunicationRight) { + this->CommunicationLeft = std::move(CommunicationLeft); + this->CommunicationRight = std::move(CommunicationRight); } - int id() const { return _id; } - environment::Environment& environment() const { return _environment; } - environment::platform::RuntimeConfig& config() const { return *_config; } + [[nodiscard]] auto id() const -> int { return Id; } + [[nodiscard]] auto environment() const -> environment::Environment& { return Environment; } + [[nodiscard]] auto config() const -> environment::platform::RuntimeConfig& { return *Config; } - const ErrorDetectionStruct* errorDetectionStruct() const { - return reinterpret_cast(addrMem - addrOffset); + [[nodiscard]] auto errorDetectionStruct() const -> const ErrorDetectionStruct* { + return reinterpret_cast(AddrMem - AddrOffset); } - int comm = THREAD_WAIT; - bool ack = false; - std::mutex mutex; - unsigned long long* addrMem = nullptr; - unsigned long long addrOffset; - volatile unsigned long long* addrHigh; - unsigned long long buffersizeMem; - unsigned long long iterations = 0; + int Comm = THREAD_WAIT; + bool Ack = false; + std::mutex Mutex; + uint64_t* AddrMem = nullptr; + uint64_t AddrOffset = 0; + volatile uint64_t* AddrHigh; + uint64_t BuffersizeMem{}; + uint64_t Iterations = 0; // save the last iteration count when switching payloads - std::atomic lastIterations; - unsigned long long flops; - unsigned long long startTsc; - unsigned long long stopTsc; - std::atomic lastStartTsc; - std::atomic lastStopTsc; + std::atomic LastIterations{}; + uint64_t Flops{}; + uint64_t StartTsc{}; + uint64_t StopTsc{}; + std::atomic LastStartTsc{}; + std::atomic LastStopTsc{}; // period in usecs // used in low load routine to sleep 1/100th of this time - unsigned long long period; - bool dumpRegisters; - bool errorDetection; - std::shared_ptr communicationLeft; - std::shared_ptr communicationRight; + uint64_t Period; + bool DumpRegisters; + bool ErrorDetection; + std::shared_ptr CommunicationLeft; + std::shared_ptr CommunicationRight; private: - int _id; - environment::Environment& _environment; - environment::platform::RuntimeConfig* _config; + int Id; + environment::Environment& Environment; + environment::platform::RuntimeConfig* Config; }; } // namespace firestarter diff --git a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp index a91e1228..3a0e68fc 100644 --- a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp +++ b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp @@ -25,23 +25,19 @@ #include #include -namespace firestarter { - -namespace logging { +namespace firestarter::logging { template class FirstWorkerThreadFilter { public: - typedef Record record_type; + using record_type = Record; - static void setFirstThread(std::thread::id newFirstThread) { firstThread = newFirstThread; } + static void setFirstThread(std::thread::id NewFirstThread) { FirstThread = NewFirstThread; } - bool filter(Record& r) const { - return r.std_thread_id() == firstThread || r.severity() >= nitro::log::severity_level::error; + auto filter(Record& r) const -> bool { + return r.std_thread_id() == FirstThread || r.severity() >= nitro::log::severity_level::error; } private: - inline static std::thread::id firstThread{}; + inline static std::thread::id FirstThread{}; }; -} // namespace logging - -} // namespace firestarter +} // namespace firestarter::logging diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp index c115a476..2045bd43 100644 --- a/include/firestarter/Measurement/MeasurementWorker.hpp +++ b/include/firestarter/Measurement/MeasurementWorker.hpp @@ -23,82 +23,83 @@ #include #include +#include +#include +#include +#include #include #include #include #include extern "C" { -#include -#include -#include -#include #include } -void insertCallback(void* cls, const char* metricName, int64_t timeSinceEpoch, double value); +void insertCallback(void* Cls, const char* MetricName, int64_t TimeSinceEpoch, double Value); namespace firestarter::measurement { class MeasurementWorker { private: - pthread_t workerThread; - pthread_t stdinThread; + pthread_t WorkerThread; + pthread_t StdinThread; - std::vector metrics = {&rapl_metric, &perf_ipc_metric, &perf_freq_metric, &ipc_estimate_metric}; + std::vector Metrics = {&RaplMetric, &PerfIpcMetric, &PerfFreqMetric, &IpcEstimateMetric}; - std::mutex values_mutex; - std::map> values = {}; + std::mutex ValuesMutex; + std::map> Values; - static int* dataAcquisitionWorker(void* measurementWorker); + static auto dataAcquisitionWorker(void* MeasurementWorker) -> int*; - static int* stdinDataAcquisitionWorker(void* measurementWorker); + static auto stdinDataAcquisitionWorker(void* MeasurementWorker) -> int*; - const metric_interface_t* findMetricByName(std::string metricName); + auto findMetricByName(std::string MetricName) -> const MetricInterface*; - std::chrono::milliseconds updateInterval; + std::chrono::milliseconds UpdateInterval; - std::chrono::high_resolution_clock::time_point startTime; + std::chrono::high_resolution_clock::time_point StartTime; // some metric values have to be devided by this - const unsigned long long numThreads; + const uint64_t NumThreads; - std::string availableMetricsString; + std::string AvailableMetricsString; #ifndef FIRESTARTER_LINK_STATIC std::vector _metricDylibs = {}; #endif - std::vector _stdinMetrics = {}; + std::vector StdinMetrics; public: // creates the worker thread - MeasurementWorker(std::chrono::milliseconds updateInterval, unsigned long long numThreads, - std::vector const& metricDylibs, std::vector const& stdinMetrics); + MeasurementWorker(std::chrono::milliseconds UpdateInterval, uint64_t NumThreads, + std::vector const& MetricDylibs, std::vector const& StdinMetrics); // stops the worker threads ~MeasurementWorker(); - std::string const& availableMetrics() const { return this->availableMetricsString; } + [[nodiscard]] auto availableMetrics() const -> std::string const& { return this->AvailableMetricsString; } - std::vector const& stdinMetrics() { return _stdinMetrics; } + auto stdinMetrics() -> std::vector const& { return StdinMetrics; } // returns a list of metrics - std::vector metricNames(); + auto metricNames() -> std::vector; // setup the selected metrics // returns a vector with the names of inialized metrics - std::vector initMetrics(std::vector const& metricNames); + auto initMetrics(std::vector const& MetricNames) -> std::vector; // callback function for metrics - void insertCallback(const char* metricName, int64_t timeSinceEpoch, double value); + void insertCallback(const char* MetricName, int64_t TimeSinceEpoch, double Value); // start the measurement void startMeasurement(); // get the measurement values begining from measurement start until now. - std::map getValues(std::chrono::milliseconds startDelta = std::chrono::milliseconds::zero(), - std::chrono::milliseconds stopDelta = std::chrono::milliseconds::zero()); + auto getValues(std::chrono::milliseconds StartDelta = std::chrono::milliseconds::zero(), + std::chrono::milliseconds StopDelta = std::chrono::milliseconds::zero()) + -> std::map; }; } // namespace firestarter::measurement diff --git a/include/firestarter/Measurement/Metric/IPCEstimate.h b/include/firestarter/Measurement/Metric/IPCEstimate.h index 2c14bb0d..360c1d91 100644 --- a/include/firestarter/Measurement/Metric/IPCEstimate.h +++ b/include/firestarter/Measurement/Metric/IPCEstimate.h @@ -23,6 +23,9 @@ #include -extern metric_interface_t ipc_estimate_metric; +extern "C" { -extern void ipc_estimate_metric_insert(double value); +extern MetricInterface IpcEstimateMetric; + +extern void ipcEstimateMetricInsert(double Value); +}; \ No newline at end of file diff --git a/include/firestarter/Measurement/Metric/Perf.h b/include/firestarter/Measurement/Metric/Perf.h index 72221cca..39a070f6 100644 --- a/include/firestarter/Measurement/Metric/Perf.h +++ b/include/firestarter/Measurement/Metric/Perf.h @@ -23,6 +23,9 @@ #include -extern metric_interface_t perf_ipc_metric; +extern "C" { -extern metric_interface_t perf_freq_metric; +extern MetricInterface PerfIpcMetric; + +extern MetricInterface PerfFreqMetric; +}; \ No newline at end of file diff --git a/include/firestarter/Measurement/Metric/RAPL.h b/include/firestarter/Measurement/Metric/RAPL.h index d88e3d91..726ff61a 100644 --- a/include/firestarter/Measurement/Metric/RAPL.h +++ b/include/firestarter/Measurement/Metric/RAPL.h @@ -23,4 +23,7 @@ #include -extern metric_interface_t rapl_metric; +extern "C" { + +extern MetricInterface RaplMetric; +}; \ No newline at end of file diff --git a/include/firestarter/Measurement/MetricInterface.h b/include/firestarter/Measurement/MetricInterface.h index c0c1c58b..87352868 100644 --- a/include/firestarter/Measurement/MetricInterface.h +++ b/include/firestarter/Measurement/MetricInterface.h @@ -21,63 +21,73 @@ #pragma once +#ifdef __cplusplus +extern "C" { +#endif + #include -// clang-format off +// NOLINTBEGIN(modernize-use-using) typedef struct { - // Either set absolute or accumalative to specify the type of values from the - // metric. - uint32_t absolute : 1, - accumalative : 1, - // Set to divide metric values by thread count. - divide_by_thread_count : 1, - // Set to insert time-value pairs via callback function passed by - // register_insert_callback. - insert_callback : 1, - // ignore the start and stop delta set by the user - ignore_start_stop_delta : 1, - __reserved : 27; -} metric_type_t; -// clang-format on + uint32_t + // metric value is absolute + Absolute : 1, + // metric value accumulates + Accumalative : 1, + // Set to divide metric values by thread count. + DivideByThreadCount : 1, + // Set to insert time-value pairs via callback function passed by + // register_insert_callback. + InsertCallback : 1, + // ignore the start and stop delta set by the user + IgnoreStartStopDelta : 1, + // Reserved space to round up to 32 bits + Reserved : 27; +} MetricType; // Define `metric_interface_t metric` inside your shared library to be able to // load it during runtime. typedef struct { // the name of the metric - const char* name; + const char* Name; // metric type with bitfield from metric_type_t - metric_type_t type; + MetricType Type; // the unit of the metric - const char* unit; + const char* Unit; - uint64_t callback_time; + uint64_t CallbackTime; // This function will be called every `callback_time` usecs. Disable by // setting `callback_time` to 0. - void (*callback)(void); + void (*Callback)(); // init the metric. // returns EXIT_SUCCESS on success. - int32_t (*init)(void); + int32_t (*Init)(); // deinit the metric. // returns EXIT_SUCCESS on success. - int32_t (*fini)(void); + int32_t (*Fini)(); // Get a reading of the metric // Return EXIT_SUCCESS if we got a new value. // Set this function pointer to NULL if METRIC_INSERT_CALLBACK is specified. - int32_t (*get_reading)(double* value); + int32_t (*GetReading)(double* Value); // Get error in case return code not being EXIT_SUCCESS - const char* (*get_error)(void); + const char* (*GetError)(); // If METRIC_INSERT_CALLBACK is set in the type, this function will be passed // a callback and the first argument for the callback. // Further arguments of callback are the metric name, an unix timestamp (time // since epoch) and a metric value. - int32_t (*register_insert_callback)(void (*)(void*, const char*, int64_t, double), void*); + int32_t (*RegisterInsertCallback)(void (*)(void*, const char*, int64_t, double), void*); + +} MetricInterface; +// NOLINTEND(modernize-use-using) -} metric_interface_t; +#ifdef __cplusplus +}; +#endif \ No newline at end of file diff --git a/include/firestarter/Measurement/Summary.hpp b/include/firestarter/Measurement/Summary.hpp index 7f0d7899..09c91016 100644 --- a/include/firestarter/Measurement/Summary.hpp +++ b/include/firestarter/Measurement/Summary.hpp @@ -33,14 +33,14 @@ extern "C" { namespace firestarter::measurement { struct Summary { - size_t num_timepoints; - std::chrono::milliseconds duration; + size_t NumTimepoints; + std::chrono::milliseconds Duration; - double average; - double stddev; + double Average; + double Stddev; - static Summary calculate(std::vector::iterator begin, std::vector::iterator end, - metric_type_t metricType, unsigned long long numThreads); + static auto calculate(std::vector::iterator Begin, std::vector::iterator End, + MetricType MetricType, uint64_t NumThreads) -> Summary; }; } // namespace firestarter::measurement diff --git a/include/firestarter/Measurement/TimeValue.hpp b/include/firestarter/Measurement/TimeValue.hpp index bf9377c9..cc168ad2 100644 --- a/include/firestarter/Measurement/TimeValue.hpp +++ b/include/firestarter/Measurement/TimeValue.hpp @@ -28,12 +28,12 @@ namespace firestarter::measurement { struct TimeValue { TimeValue() = default; - constexpr TimeValue(std::chrono::high_resolution_clock::time_point t, double v) - : time(t) - , value(v){}; + constexpr TimeValue(std::chrono::high_resolution_clock::time_point Time, double Value) + : Time(Time) + , Value(Value){}; - std::chrono::high_resolution_clock::time_point time; - double value; + std::chrono::high_resolution_clock::time_point Time; + double Value; }; } // namespace firestarter::measurement diff --git a/include/firestarter/OneAPI/OneAPI.hpp b/include/firestarter/OneAPI/OneAPI.hpp index 0ed1844c..f6931e4d 100644 --- a/include/firestarter/OneAPI/OneAPI.hpp +++ b/include/firestarter/OneAPI/OneAPI.hpp @@ -24,25 +24,24 @@ #include #include #include -#include namespace firestarter::oneapi { class OneAPI { private: - std::thread _initThread; - std::condition_variable _waitForInitCv; - std::mutex _waitForInitCvMutex; + std::thread InitThread; + std::condition_variable WaitForInitCv; + std::mutex WaitForInitCvMutex; - static void initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble, - unsigned matrixSize, int gpus); + static void initGpus(std::condition_variable& Cv, volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble, + unsigned MatrixSize, int Gpus); public: - OneAPI(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus); + OneAPI(volatile uint64_t* LoadVar, bool UseFloat, bool UseDouble, unsigned MatrixSize, int Gpus); ~OneAPI() { - if (_initThread.joinable()) { - _initThread.join(); + if (InitThread.joinable()) { + InitThread.join(); } } }; diff --git a/include/firestarter/Optimizer/Algorithm.hpp b/include/firestarter/Optimizer/Algorithm.hpp index d9186322..4cdae1ec 100644 --- a/include/firestarter/Optimizer/Algorithm.hpp +++ b/include/firestarter/Optimizer/Algorithm.hpp @@ -27,12 +27,12 @@ namespace firestarter::optimizer { class Algorithm { public: - Algorithm() {} - virtual ~Algorithm() {} + Algorithm() = default; + virtual ~Algorithm() = default; - virtual void checkPopulation(Population const& pop, std::size_t populationSize) = 0; + virtual void checkPopulation(Population const& Pop, std::size_t PopulationSize) = 0; - virtual Population evolve(Population& pop) = 0; + virtual auto evolve(Population& Pop) -> Population = 0; }; } // namespace firestarter::optimizer diff --git a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp index a144bb05..e02e7e14 100644 --- a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp +++ b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp @@ -27,17 +27,17 @@ namespace firestarter::optimizer::algorithm { class NSGA2 : public Algorithm { public: - NSGA2(unsigned gen, double cr, double m); - ~NSGA2() {} + NSGA2(unsigned Gen, double Cr, double M); + ~NSGA2() override = default; - void checkPopulation(firestarter::optimizer::Population const& pop, std::size_t populationSize) override; + void checkPopulation(firestarter::optimizer::Population const& Pop, std::size_t PopulationSize) override; - firestarter::optimizer::Population evolve(firestarter::optimizer::Population& pop) override; + auto evolve(firestarter::optimizer::Population& Pop) -> firestarter::optimizer::Population override; private: - unsigned _gen; - double _cr; - double _m; + unsigned Gen; + double Cr; + double M; }; } // namespace firestarter::optimizer::algorithm diff --git a/include/firestarter/Optimizer/History.hpp b/include/firestarter/Optimizer/History.hpp index 2922301f..332b49c5 100644 --- a/include/firestarter/Optimizer/History.hpp +++ b/include/firestarter/Optimizer/History.hpp @@ -31,10 +31,8 @@ #include #include #include -#include #include #include -#include #include extern "C" { @@ -46,58 +44,58 @@ namespace firestarter::optimizer { struct History { private: // https://stackoverflow.com/questions/17074324/how-can-i-sort-two-vectors-in-the-same-way-with-criteria-that-uses-only-one-of/17074810#17074810 - template - inline static std::vector sortPermutation(const std::vector& vec, Compare& compare) { - std::vector p(vec.size()); - std::iota(p.begin(), p.end(), 0); - std::sort(p.begin(), p.end(), [&](std::size_t i, std::size_t j) { return compare(vec[i], vec[j]); }); - return p; + template + static auto sortPermutation(const std::vector& Vec, CompareT& Compare) -> std::vector { + std::vector P(Vec.size()); + std::iota(P.begin(), P.end(), 0); + std::sort(P.begin(), P.end(), [&](std::size_t I, std::size_t J) { return Compare(Vec[I], Vec[J]); }); + return P; } - inline static void padding(std::stringstream& ss, std::size_t width, std::size_t taken, char c) { - for (std::size_t i = 0; i < (std::max)(width, taken) - taken; ++i) { - ss << c; + static void padding(std::stringstream& Ss, std::size_t Width, std::size_t Taken, char C) { + for (std::size_t I = 0; I < (std::max)(Width, Taken) - Taken; ++I) { + Ss << C; } } - inline static int MAX_ELEMENT_PRINT_COUNT = 20; - inline static std::size_t MIN_COLUMN_WIDTH = 10; + inline static int MaxElementPrintCount = 20; + inline static std::size_t MinColumnWidth = 10; - inline static std::vector _x = {}; - inline static std::vector> _f = {}; + inline static std::vector X = {}; + inline static std::vector> F = {}; public: - inline static void append(std::vector const& ind, - std::map const& metric) { - _x.push_back(ind); - _f.push_back(metric); + static void append(std::vector const& Ind, + std::map const& Metric) { + X.push_back(Ind); + F.push_back(Metric); } - inline static std::optional> - find(std::vector const& individual) { - auto findEqual = [individual](auto const& ind) { return ind == individual; }; - auto ind = std::find_if(_x.begin(), _x.end(), findEqual); - if (ind == _x.end()) { + static auto find(std::vector const& Individual) + -> std::optional> { + auto FindEqual = [Individual](auto const& ind) { return ind == Individual; }; + auto Ind = std::find_if(X.begin(), X.end(), FindEqual); + if (Ind == X.end()) { return {}; } - auto dist = std::distance(_x.begin(), ind); - return _f[dist]; + auto Dist = std::distance(X.begin(), Ind); + return F[Dist]; } - inline static void printBest(std::vector const& optimizationMetrics, - std::vector const& payloadItems) { + static void printBest(std::vector const& OptimizationMetrics, + std::vector const& PayloadItems) { // TODO: print paretto front // print the best 20 individuals for each metric in a format // where the user can give it to --run-instruction-groups directly std::map columnWidth; - for (auto const& metric : optimizationMetrics) { - columnWidth[metric] = (std::max)(metric.size(), MIN_COLUMN_WIDTH); + for (auto const& metric : OptimizationMetrics) { + columnWidth[metric] = (std::max)(metric.size(), MinColumnWidth); firestarter::log::trace() << metric << ": " << columnWidth[metric]; } - for (auto const& metric : optimizationMetrics) { + for (auto const& metric : OptimizationMetrics) { using SummaryMap = std::map; auto compareIndividual = [&metric](SummaryMap const& mapA, SummaryMap const& mapB) { auto summaryA = mapA.find(metric); @@ -108,19 +106,19 @@ struct History { summaryB = mapB.find(metric.substr(1)); assert(summaryA != mapA.end()); assert(summaryB != mapB.end()); - return summaryA->second.average < summaryB->second.average; + return summaryA->second.Average < summaryB->second.Average; } assert(summaryA != mapA.end()); assert(summaryB != mapB.end()); - return summaryA->second.average > summaryB->second.average; + return summaryA->second.Average > summaryB->second.Average; }; - auto perm = sortPermutation(_f, compareIndividual); + auto perm = sortPermutation(F, compareIndividual); - auto formatIndividual = [&payloadItems](std::vector const& individual) { + auto formatIndividual = [&PayloadItems](std::vector const& individual) { std::string result = ""; - assert(payloadItems.size() == individual.size()); + assert(PayloadItems.size() == individual.size()); for (std::size_t i = 0; i < individual.size(); ++i) { // skip zero values @@ -131,7 +129,7 @@ struct History { if (result.size() != 0) { result += ","; } - result += payloadItems[i] + ":" + std::to_string(individual[i]); + result += PayloadItems[i] + ":" + std::to_string(individual[i]); } return result; @@ -140,16 +138,16 @@ struct History { auto begin = perm.begin(); auto end = perm.end(); - // stop printing at a max of MAX_ELEMENT_PRINT_COUNT - if (std::distance(begin, end) > MAX_ELEMENT_PRINT_COUNT) { + // stop printing at a max of MaxElementPrintCount + if (std::distance(begin, end) > MaxElementPrintCount) { end = perm.begin(); - std::advance(end, MAX_ELEMENT_PRINT_COUNT); + std::advance(end, MaxElementPrintCount); } // print each of the best elements std::size_t max = 0; for (auto it = begin; it != end; ++it) { - max = (std::max)(max, formatIndividual(_x[*it]).size()); + max = (std::max)(max, formatIndividual(X[*it]).size()); } std::stringstream firstLine; @@ -162,7 +160,7 @@ struct History { secondLine << " "; padding(secondLine, (std::max)(max, ind.size()), 0, '-'); - for (auto const& metric : optimizationMetrics) { + for (auto const& metric : OptimizationMetrics) { auto width = columnWidth[metric]; firstLine << " | "; @@ -182,13 +180,13 @@ struct History { // print INDIVIDUAL | metric 1 | metric 2 | ... | metric N for (auto it = begin; it != end; ++it) { - auto const fitness = _f[*it]; - auto const ind = formatIndividual(_x[*it]); + auto const fitness = F[*it]; + auto const ind = formatIndividual(X[*it]); ss << " " << ind; padding(ss, max, ind.size(), ' '); - for (auto const& metric : optimizationMetrics) { + for (auto const& metric : OptimizationMetrics) { auto width = columnWidth[metric]; std::string value; @@ -197,9 +195,9 @@ struct History { auto fitnessOfInvertedMetric = fitness.find(invertedMetric); if (fitnessOfMetric != fitness.end()) { - value = std::to_string(fitnessOfMetric->second.average); + value = std::to_string(fitnessOfMetric->second.Average); } else if (fitnessOfInvertedMetric != fitness.end()) { - value = std::to_string(fitnessOfInvertedMetric->second.average); + value = std::to_string(fitnessOfInvertedMetric->second.Average); } else { assert(false); } @@ -220,86 +218,86 @@ struct History { "`--run-instruction-groups=INDIVIDUAL`"; } - inline static void save(std::string const& path, std::string const& startTime, - std::vector const& payloadItems, const int argc, const char** argv) { + static void save(std::string const& Path, std::string const& StartTime, std::vector const& PayloadItems, + const int Argc, const char** Argv) { using json = nlohmann::json; - json j = json::object(); + json J = json::object(); - j["individuals"] = json::array(); - for (auto const& ind : _x) { - j["individuals"].push_back(ind); + J["individuals"] = json::array(); + for (auto const& Ind : X) { + J["individuals"].push_back(Ind); } - j["metrics"] = json::array(); - for (auto const& eval : _f) { - j["metrics"].push_back(eval); + J["metrics"] = json::array(); + for (auto const& Eval : F) { + J["metrics"].push_back(Eval); } // get the hostname - char cHostname[256]; - std::string hostname; - if (0 != gethostname(cHostname, sizeof(cHostname))) { - hostname = "unknown"; + char CHostname[256]; + std::string Hostname; + if (0 != gethostname(CHostname, sizeof(CHostname))) { + Hostname = "unknown"; } else { - hostname = cHostname; + Hostname = CHostname; } - j["hostname"] = hostname; + J["hostname"] = Hostname; - j["startTime"] = startTime; - j["endTime"] = getTime(); + J["startTime"] = StartTime; + J["endTime"] = getTime(); // save the payload items - j["payloadItems"] = json::array(); - for (auto const& item : payloadItems) { - j["payloadItems"].push_back(item); + J["payloadItems"] = json::array(); + for (auto const& Item : PayloadItems) { + J["payloadItems"].push_back(Item); } // save the arguments - j["args"] = json::array(); - for (int i = 0; i < argc; ++i) { - j["args"].push_back(argv[i]); + J["args"] = json::array(); + for (int I = 0; I < Argc; ++I) { + J["args"].push_back(Argv[I]); } // dump the output - std::string s = j.dump(); + std::string S = J.dump(); - firestarter::log::trace() << s; + firestarter::log::trace() << S; - std::string outpath = path; - if (outpath.empty()) { - char* pwd = get_current_dir_name(); - if (pwd) { - outpath = pwd; - free(pwd); + std::string Outpath = Path; + if (Outpath.empty()) { + char* Pwd = get_current_dir_name(); + if (Pwd) { + Outpath = Pwd; + free(Pwd); } else { firestarter::log::warn() << "Could not find $PWD."; - outpath = "/tmp"; + Outpath = "/tmp"; } - outpath += "/" + hostname + "_" + startTime + ".json"; + Outpath += "/" + Hostname + "_" + StartTime + ".json"; } - firestarter::log::info() << "\nDumping output json in " << outpath; + firestarter::log::info() << "\nDumping output json in " << Outpath; - std::ofstream fp(outpath); + std::ofstream Fp(Outpath); - if (fp.bad()) { - firestarter::log::error() << "Could not open " << outpath; + if (Fp.bad()) { + firestarter::log::error() << "Could not open " << Outpath; return; } - fp << s; + Fp << S; - fp.close(); + Fp.close(); } - inline static std::string getTime() { - auto t = std::time(nullptr); - auto tm = *std::localtime(&t); - std::stringstream ss; - ss << std::put_time(&tm, "%F_%T%z"); - return ss.str(); + static auto getTime() -> std::string { + auto T = std::time(nullptr); + auto Tm = *std::localtime(&T); + std::stringstream Ss; + Ss << std::put_time(&Tm, "%F_%T%z"); + return Ss.str(); } }; } // namespace firestarter::optimizer diff --git a/include/firestarter/Optimizer/OptimizerWorker.hpp b/include/firestarter/Optimizer/OptimizerWorker.hpp index 816f4882..e98c25b9 100644 --- a/include/firestarter/Optimizer/OptimizerWorker.hpp +++ b/include/firestarter/Optimizer/OptimizerWorker.hpp @@ -32,26 +32,26 @@ namespace firestarter::optimizer { class OptimizerWorker { public: - OptimizerWorker(std::unique_ptr&& algorithm, - firestarter::optimizer::Population& population, std::string const& optimizationAlgorithm, - unsigned individuals, std::chrono::seconds const& preheat); + OptimizerWorker(std::unique_ptr&& Algorithm, + firestarter::optimizer::Population& Population, std::string const& OptimizationAlgorithm, + unsigned Individuals, std::chrono::seconds const& Preheat); - ~OptimizerWorker() {} + ~OptimizerWorker() = default; void join(); void kill(); private: - static void* optimizerThread(void* optimizerWorker); + static auto optimizerThread(void* OptimizerWorker) -> void*; - std::unique_ptr _algorithm; - firestarter::optimizer::Population _population; - std::string _optimizationAlgorithm; - unsigned _individuals; - std::chrono::seconds _preheat; + std::unique_ptr Algorithm; + firestarter::optimizer::Population Population; + std::string OptimizationAlgorithm; + unsigned Individuals; + std::chrono::seconds Preheat; - pthread_t workerThread; + pthread_t WorkerThread; }; } // namespace firestarter::optimizer diff --git a/include/firestarter/Optimizer/Population.hpp b/include/firestarter/Optimizer/Population.hpp index 3bf3ac38..757a2e46 100644 --- a/include/firestarter/Optimizer/Population.hpp +++ b/include/firestarter/Optimizer/Population.hpp @@ -29,7 +29,6 @@ #include #include #include -#include #include namespace firestarter::optimizer { @@ -39,60 +38,60 @@ class Population { // Construct a population from a problem. Population() = default; - Population(std::shared_ptr&& problem) - : _problem(std::move(problem)) - , gen(rd()) {} + explicit Population(std::shared_ptr&& ProblemPtr) + : ProblemPtr(std::move(ProblemPtr)) + , Gen(Rd()) {} - Population(Population& pop) - : _problem(pop._problem) - , _x(pop._x) - , _f(pop._f) - , gen(rd()) {} + Population(Population& Pop) + : ProblemPtr(Pop.ProblemPtr) + , X(Pop.X) + , F(Pop.F) + , Gen(Rd()) {} - Population& operator=(Population const& pop) { - _problem = std::move(pop._problem); - _x = pop._x; - _f = pop._f; - gen = pop.gen; + auto operator=(Population const& Pop) -> Population& { + ProblemPtr = Pop.ProblemPtr; + X = Pop.X; + F = Pop.F; + Gen = Pop.Gen; return *this; } - ~Population() {} + ~Population() = default; - void generateInitialPopulation(std::size_t populationSize = 0); + void generateInitialPopulation(std::size_t PopulationSize = 0); - std::size_t size() const; + [[nodiscard]] auto size() const -> std::size_t; // add one individual to the population. fitness will be evaluated. - void append(Individual const& ind); + void append(Individual const& Ind); - void insert(std::size_t idx, Individual const& ind, std::vector const& fit); + void insert(std::size_t Idx, Individual const& Ind, std::vector const& Fit); // get a random individual inside bounds of problem - Individual getRandomIndividual(); + auto getRandomIndividual() -> Individual; // returns the best individual in case of single-objective. // return nothing in case of mutli-objective. - std::optional bestIndividual() const; + [[nodiscard]] auto bestIndividual() const -> std::optional; - Problem const& problem() const { return *_problem; } + [[nodiscard]] auto problem() const -> Problem const& { return *ProblemPtr; } - std::vector const& x() const { return _x; } - std::vector> const& f() const { return _f; } + [[nodiscard]] auto x() const -> std::vector const& { return X; } + [[nodiscard]] auto f() const -> std::vector> const& { return F; } private: // add one individual to the population with a fitness. - void append(Individual const& ind, std::vector const& fit); + void append(Individual const& Ind, std::vector const& Fit); // our problem. - std::shared_ptr _problem; + std::shared_ptr ProblemPtr; - std::vector _x; - std::vector> _f; + std::vector X; + std::vector> F; - std::random_device rd; - std::mt19937 gen; + std::random_device Rd; + std::mt19937 Gen; }; } // namespace firestarter::optimizer diff --git a/include/firestarter/Optimizer/Problem.hpp b/include/firestarter/Optimizer/Problem.hpp index 009b4d01..df31ec98 100644 --- a/include/firestarter/Optimizer/Problem.hpp +++ b/include/firestarter/Optimizer/Problem.hpp @@ -32,33 +32,33 @@ namespace firestarter::optimizer { class Problem { public: - Problem() - : _fevals(0) {} - virtual ~Problem() {} + Problem() = default; + virtual ~Problem() = default; // return the fitness for an individual - virtual std::map metrics(Individual const& individual) = 0; + virtual auto metrics(Individual const& Individual) -> std::map = 0; - virtual std::vector fitness(std::map const& summaries) = 0; + virtual auto fitness(std::map const& Summaries) + -> std::vector = 0; // get the bounds of the problem - virtual std::vector> getBounds() const = 0; + [[nodiscard]] virtual auto getBounds() const -> std::vector> = 0; // get the number of dimensions of the problem - std::size_t getDims() const { return this->getBounds().size(); }; + [[nodiscard]] auto getDims() const -> std::size_t { return this->getBounds().size(); }; // get the number of objectives. - virtual std::size_t getNobjs() const = 0; + [[nodiscard]] virtual auto getNobjs() const -> std::size_t = 0; // is the problem multiobjective - bool isMO() const { return this->getNobjs() > 1; }; + [[nodiscard]] auto isMO() const -> bool { return this->getNobjs() > 1; }; // get the number of fitness evaluations - unsigned long long getFevals() const { return _fevals; }; + [[nodiscard]] auto getFevals() const -> uint64_t { return Fevals; }; protected: // number of fitness evaluations - unsigned long long _fevals; + uint64_t Fevals = 0; }; } // namespace firestarter::optimizer diff --git a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp index f24ae2f2..74346a74 100644 --- a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp +++ b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp @@ -24,9 +24,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -35,103 +35,105 @@ namespace firestarter::optimizer::problem { class CLIArgumentProblem final : public firestarter::optimizer::Problem { public: - CLIArgumentProblem(std::function> const&)>&& changePayloadFunction, - std::shared_ptr const& measurementWorker, - std::vector const& metrics, std::chrono::seconds timeout, - std::chrono::milliseconds startDelta, std::chrono::milliseconds stopDelta, - std::vector const& instructionGroups) - : _changePayloadFunction(changePayloadFunction) - , _measurementWorker(measurementWorker) - , _metrics(metrics) - , _timeout(timeout) - , _startDelta(startDelta) - , _stopDelta(stopDelta) - , _instructionGroups(instructionGroups) { - assert(_metrics.size() != 0); + CLIArgumentProblem(std::function> const&)>&& ChangePayloadFunction, + std::shared_ptr const& MeasurementWorker, + std::vector const& Metrics, std::chrono::seconds Timeout, + std::chrono::milliseconds StartDelta, std::chrono::milliseconds StopDelta, + std::vector const& InstructionGroups) + : ChangePayloadFunction(ChangePayloadFunction) + , MeasurementWorker(MeasurementWorker) + , Metrics(Metrics) + , Timeout(Timeout) + , StartDelta(StartDelta) + , StopDelta(StopDelta) + , InstructionGroups(InstructionGroups) { + assert(Metrics.size() != 0); } - ~CLIArgumentProblem() {} + ~CLIArgumentProblem() override = default; // return all available metrics for the individual - std::map metrics(std::vector const& individual) override { + auto metrics(std::vector const& Individual) + -> std::map override { // increment evaluation idx - _fevals++; + Fevals++; // change the payload - assert(_instructionGroups.size() == individual.size()); - std::vector> payload = {}; - auto it1 = _instructionGroups.begin(); - auto it2 = individual.begin(); - for (; it1 != _instructionGroups.end(); ++it1, ++it2) { - payload.push_back(std::make_pair(*it1, *it2)); + assert(InstructionGroups.size() == Individual.size()); + std::vector> Payload = {}; + auto It1 = InstructionGroups.begin(); + auto It2 = Individual.begin(); + for (; It1 != InstructionGroups.end(); ++It1, ++It2) { + Payload.emplace_back(*It1, *It2); } - _changePayloadFunction(payload); + ChangePayloadFunction(Payload); // start the measurement // NOTE: starting the measurement must happen after switching to not // mess up ipc-estimate metric - _measurementWorker->startMeasurement(); + MeasurementWorker->startMeasurement(); // wait for the measurement to finish - std::this_thread::sleep_for(_timeout); + std::this_thread::sleep_for(Timeout); // FIXME: this is an ugly workaround for the ipc-estimate metric // changeing the payload triggers a write of the iteration counter of // the last payload, which we use to estimate the ipc. - _changePayloadFunction(payload); + ChangePayloadFunction(Payload); // return the results - return _measurementWorker->getValues(_startDelta, _stopDelta); + return MeasurementWorker->getValues(StartDelta, StopDelta); } - std::vector fitness(std::map const& summaries) override { - std::vector values = {}; + auto fitness(std::map const& Summaries) + -> std::vector override { + std::vector Values = {}; - for (auto const& metricName : _metrics) { - auto findName = [metricName](auto const& summary) { - auto invertedName = "-" + summary.first; - return metricName.compare(summary.first) == 0 || metricName.compare(invertedName) == 0; + for (auto const& MetricName : Metrics) { + auto FindName = [MetricName](auto const& Summary) { + auto InvertedName = "-" + Summary.first; + return MetricName.compare(Summary.first) == 0 || MetricName.compare(InvertedName) == 0; }; - auto it = std::find_if(summaries.begin(), summaries.end(), findName); + auto It = std::find_if(Summaries.begin(), Summaries.end(), FindName); - if (it == summaries.end()) { + if (It == Summaries.end()) { continue; } // round to two decimal places after the comma - auto value = std::round(it->second.average * 100.0) / 100.0; + auto Value = std::round(It->second.Average * 100.0) / 100.0; // invert metric - if (metricName[0] == '-') { - value *= -1.0; + if (MetricName[0] == '-') { + Value *= -1.0; } - values.push_back(value); + Values.push_back(Value); } - return values; + return Values; } // get the bounds of the problem - std::vector> getBounds() const override { - std::vector> vec(_instructionGroups.size(), + [[nodiscard]] auto getBounds() const -> std::vector> override { + std::vector> Vec(InstructionGroups.size(), std::make_tuple(0, 100)); - return vec; + return Vec; } // get the number of objectives. - std::size_t getNobjs() const override { return _metrics.size(); } + [[nodiscard]] auto getNobjs() const -> std::size_t override { return Metrics.size(); } private: - std::function> const&)> _changePayloadFunction; - std::shared_ptr _measurementWorker; - std::vector _metrics; - std::chrono::seconds _timeout; - std::chrono::milliseconds _startDelta; - std::chrono::milliseconds _stopDelta; - std::vector _instructionGroups; + std::function> const&)> ChangePayloadFunction; + std::shared_ptr MeasurementWorker; + std::vector Metrics; + std::chrono::seconds Timeout; + std::chrono::milliseconds StartDelta; + std::chrono::milliseconds StopDelta; + std::vector InstructionGroups; }; } // namespace firestarter::optimizer::problem diff --git a/include/firestarter/Optimizer/Util/MultiObjective.hpp b/include/firestarter/Optimizer/Util/MultiObjective.hpp index da61bf73..fab62be8 100644 --- a/include/firestarter/Optimizer/Util/MultiObjective.hpp +++ b/include/firestarter/Optimizer/Util/MultiObjective.hpp @@ -28,32 +28,31 @@ namespace firestarter::optimizer::util { -bool less_than_f(double a, double b); +auto lessThanF(double A, double B) -> bool; -bool greater_than_f(double a, double b); +auto greaterThanF(double A, double B) -> bool; -bool pareto_dominance(const std::vector& obj1, const std::vector& obj2); +auto paretoDominance(const std::vector& Obj1, const std::vector& Obj2) -> bool; -std::tuple>, std::vector>, std::vector, - std::vector> -fast_non_dominated_sorting(const std::vector>& points); +auto fastNonDominatedSorting(const std::vector>& Points) + -> std::tuple>, std::vector>, + std::vector, std::vector>; -std::vector crowding_distance(const std::vector>& non_dom_front); +auto crowdingDistance(const std::vector>& NonDomFront) -> std::vector; -std::vector::size_type -mo_tournament_selection(std::vector::size_type idx1, std::vector::size_type idx2, - const std::vector::size_type>& non_domination_rank, - const std::vector& crowding_d, std::mt19937& mt); +auto moTournamentSelection(std::vector::size_type Idx1, std::vector::size_type Idx2, + const std::vector::size_type>& NonDominationRank, + const std::vector& CrowdingD, std::mt19937& Mt) -> std::vector::size_type; -std::pair -sbx_crossover(const firestarter::optimizer::Individual& parent1, const firestarter::optimizer::Individual& parent2, - const double p_cr, std::mt19937& mt); +auto sbxCrossover(const firestarter::optimizer::Individual& Parent1, const firestarter::optimizer::Individual& Parent2, + double PCr, std::mt19937& Mt) + -> std::pair; -void polynomial_mutation(firestarter::optimizer::Individual& child, - const std::vector>& bounds, const double p_m, std::mt19937& mt); +void polynomialMutation(firestarter::optimizer::Individual& Child, + const std::vector>& Bounds, double PM, std::mt19937& Mt); -std::vector select_best_N_mo(const std::vector>& input_f, std::size_t N); +auto selectBestNMo(const std::vector>& InputF, std::size_t N) -> std::vector; -std::vector ideal(const std::vector>& points); +auto ideal(const std::vector>& Points) -> std::vector; } // namespace firestarter::optimizer::util diff --git a/src/firestarter/Cuda/Cuda.cpp b/src/firestarter/Cuda/Cuda.cpp index 8a17021f..2e5290a2 100644 --- a/src/firestarter/Cuda/Cuda.cpp +++ b/src/firestarter/Cuda/Cuda.cpp @@ -326,7 +326,7 @@ static CONCAT(FS_ACCEL_PREFIX_LC, randStatus_t) // GPU index. Used to pin this thread to the GPU. template static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index, - std::atomic& initCount, volatile unsigned long long* loadVar, int matrixSize) { + std::atomic& initCount, volatile uint64_t* loadVar, int matrixSize) { static_assert(std::is_same::value || std::is_same::value, "create_load: Template argument T must be either float or double"); @@ -515,7 +515,7 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait #endif } -Cuda::Cuda(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) { +Cuda::Cuda(volatile uint64_t* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) { std::thread t(Cuda::initGpus, std::ref(_waitForInitCv), loadVar, useFloat, useDouble, matrixSize, gpus); _initThread = std::move(t); @@ -524,7 +524,7 @@ Cuda::Cuda(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, _waitForInitCv.wait(lk); } -void Cuda::initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble, +void Cuda::initGpus(std::condition_variable& cv, volatile uint64_t* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) { std::condition_variable waitForInitCv; std::mutex waitForInitCvMutex; diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp index c5d7b34e..06d7e417 100644 --- a/src/firestarter/DumpRegisterWorker.cpp +++ b/src/firestarter/DumpRegisterWorker.cpp @@ -31,10 +31,10 @@ using namespace firestarter; namespace { -static unsigned hammingDistance(unsigned long long x, unsigned long long y) { +static unsigned hammingDistance(uint64_t x, uint64_t y) { unsigned dist = 0; - for (unsigned long long val = x ^ y; val > 0; val >>= 1) { + for (uint64_t val = x ^ y; val > 0; val >>= 1) { dist += val & 1; } @@ -57,34 +57,34 @@ static std::string registerNameBySize(unsigned registerSize) { int Firestarter::initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta, std::string dumpFilePath) { - auto data = std::make_unique(this->loadThreads.begin()->second, dumpTimeDelta, dumpFilePath); + auto data = std::make_unique(this->LoadThreads.begin()->second, dumpTimeDelta, dumpFilePath); - this->dumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(data)); + this->DumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(data)); return EXIT_SUCCESS; } -void Firestarter::joinDumpRegisterWorker() { this->dumpRegisterWorkerThread.join(); } +void Firestarter::joinDumpRegisterWorker() { this->DumpRegisterWorkerThread.join(); } void Firestarter::dumpRegisterWorker(std::unique_ptr data) { pthread_setname_np(pthread_self(), "DumpRegWorker"); - int registerCount = data->loadWorkerData->config().payload().registerCount(); - int registerSize = data->loadWorkerData->config().payload().registerSize(); + int registerCount = data->LoadWorkerDataPtr->config().payload().registerCount(); + int registerSize = data->LoadWorkerDataPtr->config().payload().registerSize(); std::string registerPrefix = registerNameBySize(registerSize); - auto offset = sizeof(DumpRegisterStruct) / sizeof(unsigned long long); + auto offset = sizeof(DumpRegisterStruct) / sizeof(uint64_t); - auto dumpRegisterStruct = reinterpret_cast(data->loadWorkerData->addrMem - offset); + auto dumpRegisterStruct = reinterpret_cast(data->LoadWorkerDataPtr->AddrMem - offset); - auto dumpVar = reinterpret_cast(&dumpRegisterStruct->dumpVar); + auto dumpVar = reinterpret_cast(&dumpRegisterStruct->DumpVar); // memory of simd variables is before the padding - volatile unsigned long long* dumpMemAddr = dumpRegisterStruct->padding - registerCount * registerSize; + volatile uint64_t* dumpMemAddr = dumpRegisterStruct->Padding - registerCount * registerSize; // TODO: maybe use aligned_malloc to make memcpy more efficient and don't // interrupt the workload as much? - unsigned long long* last = reinterpret_cast(malloc(sizeof(unsigned long long) * offset)); - unsigned long long* current = reinterpret_cast(malloc(sizeof(unsigned long long) * offset)); + uint64_t* last = reinterpret_cast(malloc(sizeof(uint64_t) * offset)); + uint64_t* current = reinterpret_cast(malloc(sizeof(uint64_t) * offset)); if (last == nullptr || current == nullptr) { log::error() << "Malloc failed in Firestarter::dumpRegisterWorker"; @@ -92,7 +92,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr dat } std::stringstream dumpFilePath; - dumpFilePath << data->dumpFilePath; + dumpFilePath << data->DumpFilePath; #if defined(__MINGW32__) || defined(__MINGW64__) dumpFilePath << "\\"; #else @@ -123,7 +123,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr dat // continue until stop and dump the registers every data->dumpTimeDelta // seconds - for (; *data->loadWorkerData->addrHigh != LOAD_STOP;) { + for (; *data->LoadWorkerDataPtr->AddrHigh != LOAD_STOP;) { // signal the thread to dump its largest SIMD registers *dumpVar = DumpVariable::Start; __asm__ __volatile__("mfence;"); @@ -132,7 +132,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr dat } // copy the register content to minimize the interruption of the load worker - std::memcpy(current, (void*)dumpMemAddr, sizeof(unsigned long long) * offset); + std::memcpy(current, (void*)dumpMemAddr, sizeof(uint64_t) * offset); // skip the first output, as we first have to get some valid values for last if (!skipFirst) { @@ -150,7 +150,7 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr dat for (auto j = 0; j < registerSize; j++) { auto index = registerSize * i + j; - auto hd = static_cast(hammingDistance(current[index], last[index])); + auto hd = static_cast(hammingDistance(current[index], last[index])); dumpFile << hd; if (j != registerSize - 1) { @@ -168,9 +168,9 @@ void Firestarter::dumpRegisterWorker(std::unique_ptr dat skipFirst = false; } - std::memcpy(last, current, sizeof(unsigned long long) * offset); + std::memcpy(last, current, sizeof(uint64_t) * offset); - std::this_thread::sleep_for(std::chrono::seconds(data->dumpTimeDelta)); + std::this_thread::sleep_for(std::chrono::seconds(data->DumpTimeDelta)); } dumpFile.close(); diff --git a/src/firestarter/Environment/CPUTopology.cpp b/src/firestarter/Environment/CPUTopology.cpp index a21bd9b8..b3e9a862 100644 --- a/src/firestarter/Environment/CPUTopology.cpp +++ b/src/firestarter/Environment/CPUTopology.cpp @@ -19,20 +19,21 @@ * Contact: daniel.hackenberg@tu-dresden.de *****************************************************************************/ +#include #include #include -#include #include #include +#include extern "C" { #include } -using namespace firestarter::environment; +namespace firestarter::environment { -std::ostream& CPUTopology::print(std::ostream& stream) const { +auto CPUTopology::print(std::ostream& stream) const -> std::ostream& { stream << " system summary:\n" << " number of processors: " << this->numPackages() << "\n" << " number of cores (total)): " << this->numCoresTotal() << "\n" @@ -43,8 +44,8 @@ std::ostream& CPUTopology::print(std::ostream& stream) const { std::stringstream ss; - for (auto const& ent : this->features()) { - ss << ent << " "; + for (auto const& Entry : this->features()) { + ss << Entry << " "; } stream << " processor characteristics:\n" @@ -56,45 +57,42 @@ std::ostream& CPUTopology::print(std::ostream& stream) const { << " supported features: " << ss.str() << "\n" << " Caches:"; - std::vector caches = { + std::vector Caches = { HWLOC_OBJ_L1CACHE, HWLOC_OBJ_L1ICACHE, HWLOC_OBJ_L2CACHE, HWLOC_OBJ_L2ICACHE, HWLOC_OBJ_L3CACHE, HWLOC_OBJ_L3ICACHE, HWLOC_OBJ_L4CACHE, HWLOC_OBJ_L5CACHE, }; - std::vector cacheStrings = {}; + std::vector CacheStrings = {}; - for (hwloc_obj_type_t const& cache : caches) { - int width; - char string[128]; - int shared; - hwloc_obj_t cacheObj; + for (hwloc_obj_type_t const& Cache : Caches) { std::stringstream ss; - width = hwloc_get_nbobjs_by_type(this->topology, cache); + auto Width = hwloc_get_nbobjs_by_type(this->topology, Cache); - if (width >= 1) { + if (Width >= 1) { ss << "\n - "; - cacheObj = hwloc_get_obj_by_type(this->topology, cache, 0); - hwloc_obj_type_snprintf(string, sizeof(string), cacheObj, 0); + auto* CacheObj = hwloc_get_obj_by_type(this->topology, Cache, 0); + std::array String{}; + hwloc_obj_type_snprintf(String.begin(), sizeof(String), CacheObj, 0); - switch (cacheObj->attr->cache.type) { + switch (CacheObj->attr->cache.type) { case HWLOC_OBJ_CACHE_DATA: - ss << "Level " << cacheObj->attr->cache.depth << " Data"; + ss << "Level " << CacheObj->attr->cache.depth << " Data"; break; case HWLOC_OBJ_CACHE_INSTRUCTION: - ss << "Level " << cacheObj->attr->cache.depth << " Instruction"; + ss << "Level " << CacheObj->attr->cache.depth << " Instruction"; break; case HWLOC_OBJ_CACHE_UNIFIED: default: - ss << "Unified Level " << cacheObj->attr->cache.depth; + ss << "Unified Level " << CacheObj->attr->cache.depth; break; } - ss << " Cache, " << cacheObj->attr->cache.size / 1024 << " KiB, " << cacheObj->attr->cache.linesize + ss << " Cache, " << CacheObj->attr->cache.size / 1024 << " KiB, " << CacheObj->attr->cache.linesize << " B Cacheline, "; - switch (cacheObj->attr->cache.associativity) { + switch (CacheObj->attr->cache.associativity) { case -1: ss << "full"; break; @@ -102,16 +100,16 @@ std::ostream& CPUTopology::print(std::ostream& stream) const { ss << "unknown"; break; default: - ss << cacheObj->attr->cache.associativity << "-way set"; + ss << CacheObj->attr->cache.associativity << "-way set"; break; } ss << " associative, "; - shared = this->numThreads() / width; + auto Shared = this->numThreads() / Width; - if (shared > 1) { - ss << "shared among " << shared << " threads."; + if (Shared > 1) { + ss << "shared among " << Shared << " threads."; } else { ss << "per thread."; } @@ -124,7 +122,7 @@ std::ostream& CPUTopology::print(std::ostream& stream) const { } CPUTopology::CPUTopology(std::string architecture) - : _architecture(architecture) { + : _architecture(std::move(architecture)) { hwloc_topology_init(&this->topology); @@ -413,3 +411,5 @@ unsigned CPUTopology::maxNumThreads() const { return max; } + +}; // namespace firestarter::environment \ No newline at end of file diff --git a/src/firestarter/Environment/Environment.cpp b/src/firestarter/Environment/Environment.cpp index 34022c93..67e62d9d 100644 --- a/src/firestarter/Environment/Environment.cpp +++ b/src/firestarter/Environment/Environment.cpp @@ -19,14 +19,14 @@ * Contact: daniel.hackenberg@tu-dresden.de *****************************************************************************/ +#include #include #include -#include #include #include -using namespace firestarter::environment; +namespace firestarter::environment { #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY) @@ -55,70 +55,70 @@ extern "C" { } \ } while (0) -int Environment::cpuSet(unsigned id) { - cpu_set_t mask; +auto Environment::cpuSet(unsigned Id) -> int { + cpu_set_t Mask; - CPU_ZERO(&mask); - CPU_SET(id, &mask); + CPU_ZERO(&Mask); + CPU_SET(Id, &Mask); - return sched_setaffinity(0, sizeof(cpu_set_t), &mask); + return sched_setaffinity(0, sizeof(cpu_set_t), &Mask); } -int Environment::cpuAllowed(unsigned id) { - cpu_set_t mask; +auto Environment::cpuAllowed(unsigned Id) -> int { + cpu_set_t Mask; - CPU_ZERO(&mask); + CPU_ZERO(&Mask); - if (!sched_getaffinity(0, sizeof(cpu_set_t), &mask)) { - return CPU_ISSET(id, &mask); + if (!sched_getaffinity(0, sizeof(cpu_set_t), &Mask)) { + return CPU_ISSET(Id, &Mask); } return 0; } #endif -int Environment::evaluateCpuAffinity(unsigned requestedNumThreads, std::string cpuBind) { +auto Environment::evaluateCpuAffinity(unsigned RequestedNumThreads, std::string cpuBind) -> int { #if not((defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY)) (void)cpuBind; #endif - if (requestedNumThreads > 0 && requestedNumThreads > this->topology().numThreads()) { + if (RequestedNumThreads > 0 && RequestedNumThreads > this->topology().numThreads()) { log::warn() << "Not enough CPUs for requested number of threads"; } #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY) - cpu_set_t cpuset; + cpu_set_t Cpuset; - CPU_ZERO(&cpuset); + CPU_ZERO(&Cpuset); if (cpuBind.empty()) { // no cpu binding defined // use all CPUs if not defined otherwise - if (requestedNumThreads == 0) { - for (unsigned i = 0; i < this->topology().maxNumThreads(); i++) { - if (this->cpuAllowed(i)) { - CPU_SET(i, &cpuset); - requestedNumThreads++; + if (RequestedNumThreads == 0) { + for (unsigned I = 0; I < this->topology().maxNumThreads(); I++) { + if (this->cpuAllowed(I)) { + CPU_SET(I, &Cpuset); + RequestedNumThreads++; } } } else { // if -n / --threads is set - unsigned cpu_count = 0; - for (unsigned i = 0; i < this->topology().maxNumThreads(); i++) { + unsigned CpuCount = 0; + for (unsigned I = 0; I < this->topology().maxNumThreads(); I++) { // skip if cpu is not available - if (!this->cpuAllowed(i)) { + if (!this->cpuAllowed(I)) { continue; } - ADD_CPU_SET(i, cpuset); - cpu_count++; + ADD_CPU_SET(I, Cpuset); + CpuCount++; // we reached the desired amounts of threads - if (cpu_count >= requestedNumThreads) { + if (CpuCount >= RequestedNumThreads) { break; } } // requested to many threads - if (cpu_count < requestedNumThreads) { + if (CpuCount < RequestedNumThreads) { log::error() << "You are requesting more threads than " "there are CPUs available in the given cpuset.\n" << "This can be caused by the taskset tool, cgrous, " @@ -130,41 +130,42 @@ int Environment::evaluateCpuAffinity(unsigned requestedNumThreads, std::string c } } else { // parse CPULIST for binding - const std::string delimiter = ","; - const std::regex re("^(?:(\\d+)(?:-([1-9]\\d*)(?:\\/([1-9]\\d*))?)?)$"); + const std::string Delimiter = ","; + const std::regex Re(R"(^(?:(\d+)(?:-([1-9]\d*)(?:\/([1-9]\d*))?)?)$)"); - std::stringstream ss(cpuBind); + std::stringstream Ss(cpuBind); - while (ss.good()) { - std::string token; - std::smatch m; - std::getline(ss, token, ','); + while (Ss.good()) { + std::string Token; + std::smatch M; + std::getline(Ss, Token, ','); ; - if (std::regex_match(token, m, re)) { - unsigned long x, y, s; + if (std::regex_match(Token, M, Re)) { + unsigned long Y; + unsigned long S; - x = std::stoul(m[1].str()); - if (m[2].matched) { - y = std::stoul(m[2].str()); + unsigned long X = std::stoul(M[1].str()); + if (M[2].matched) { + Y = std::stoul(M[2].str()); } else { - y = x; + Y = X; } - if (m[3].matched) { - s = std::stoul(m[3].str()); + if (M[3].matched) { + S = std::stoul(M[3].str()); } else { - s = 1; + S = 1; } - if (y < x) { - log::error() << "y has to be >= x in x-y expressions of CPU list: " << token; + if (Y < X) { + log::error() << "y has to be >= x in x-y expressions of CPU list: " << Token; return EXIT_FAILURE; } - for (unsigned long i = x; i <= y; i += s) { - ADD_CPU_SET(i, cpuset); - requestedNumThreads++; + for (unsigned long I = X; I <= Y; I += S) { + ADD_CPU_SET(I, Cpuset); + RequestedNumThreads++; } } else { - log::error() << "Invalid symbols in CPU list: " << token; + log::error() << "Invalid symbols in CPU list: " << Token; return EXIT_FAILURE; } } @@ -175,25 +176,22 @@ int Environment::evaluateCpuAffinity(unsigned requestedNumThreads, std::string c } #endif - if (requestedNumThreads == 0) { + if (RequestedNumThreads == 0) { log::error() << "Found no usable CPUs!"; return 127; } #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY) - else { - for (unsigned i = 0; i < this->topology().maxNumThreads(); i++) { - if (CPU_ISSET(i, &cpuset)) { - this->cpuBind.push_back(i); - } + for (unsigned I = 0; I < this->topology().maxNumThreads(); I++) { + if (CPU_ISSET(I, &Cpuset)) { + this->CpuBind.push_back(I); } } + #endif - if (requestedNumThreads > this->topology().maxNumThreads()) { - requestedNumThreads = this->topology().maxNumThreads(); - } + RequestedNumThreads = std::min(RequestedNumThreads, this->topology().maxNumThreads()); - this->_requestedNumThreads = requestedNumThreads; + this->RequestedNumThreads = RequestedNumThreads; return EXIT_SUCCESS; } @@ -202,38 +200,40 @@ void Environment::printThreadSummary() { log::info() << "\n using " << this->requestedNumThreads() << " threads"; #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY) - bool printCoreIdInfo = false; + bool PrintCoreIdInfo = false; size_t i = 0; - std::vector cpuBind(this->cpuBind); - cpuBind.resize(this->requestedNumThreads()); - for (auto const& bind : cpuBind) { - int coreId = this->topology().getCoreIdFromPU(bind); - int pkgId = this->topology().getPkgIdFromPU(bind); + std::vector CpuBind(this->CpuBind); + CpuBind.resize(this->requestedNumThreads()); + for (auto const& Bind : CpuBind) { + int CoreId = this->topology().getCoreIdFromPU(Bind); + int PkgId = this->topology().getPkgIdFromPU(Bind); - if (coreId != -1 && pkgId != -1) { - log::info() << " - Thread " << i << " run on CPU " << bind << ", core " << coreId << " in package: " << pkgId; - printCoreIdInfo = true; + if (CoreId != -1 && PkgId != -1) { + log::info() << " - Thread " << i << " run on CPU " << Bind << ", core " << CoreId << " in package: " << PkgId; + PrintCoreIdInfo = true; } i++; } - if (printCoreIdInfo) { + if (PrintCoreIdInfo) { log::info() << " The cores are numbered using the logical_index from hwloc."; } #endif } -int Environment::setCpuAffinity(unsigned thread) { - if (thread >= this->requestedNumThreads()) { +auto Environment::setCpuAffinity(unsigned Thread) -> int { + if (Thread >= this->requestedNumThreads()) { log::error() << "Trying to set more CPUs than available."; return EXIT_FAILURE; } #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY) - this->cpuSet(this->cpuBind.at(thread)); + this->cpuSet(this->CpuBind.at(Thread)); #endif return EXIT_SUCCESS; } + +}; // namespace firestarter::environment \ No newline at end of file diff --git a/src/firestarter/Environment/Payload/Payload.cpp b/src/firestarter/Environment/Payload/Payload.cpp index 5cda6abc..39c0e6a2 100644 --- a/src/firestarter/Environment/Payload/Payload.cpp +++ b/src/firestarter/Environment/Payload/Payload.cpp @@ -24,73 +24,76 @@ #include -using namespace firestarter::environment::payload; +namespace firestarter::environment::payload { -unsigned Payload::getSequenceStartCount(const std::vector& sequence, const std::string start) { - unsigned i = 0; +auto Payload::getSequenceStartCount(const std::vector& Sequence, const std::string& Start) -> unsigned { + unsigned I = 0; - for (const auto& item : sequence) { - if (0 == item.rfind(start, 0)) { - i++; + for (const auto& Item : Sequence) { + if (0 == Item.rfind(Start, 0)) { + I++; } } - return i; + return I; } -std::vector Payload::generateSequence(std::vector> const& proportions) { - std::vector> prop = proportions; +auto Payload::generateSequence(std::vector> const& Proportions) + -> std::vector { + std::vector> Prop = Proportions; - prop.erase(std::remove_if(prop.begin(), prop.end(), [](auto const& pair) { return pair.second == 0; }), prop.end()); + Prop.erase(std::remove_if(Prop.begin(), Prop.end(), [](auto const& Pair) { return Pair.second == 0; }), Prop.end()); - std::vector sequence = {}; + std::vector Sequence = {}; - if (prop.size() == 0) { - return sequence; + if (Prop.size() == 0) { + return Sequence; } - auto it = prop.begin(); - auto insertIt = sequence.begin(); + auto It = Prop.begin(); + auto InsertIt = Sequence.begin(); - sequence.insert(insertIt, it->second, it->first); + Sequence.insert(InsertIt, It->second, It->first); - for (++it; it != prop.end(); ++it) { - for (unsigned i = 0; i < it->second; i++) { - insertIt = sequence.begin(); - std::advance(insertIt, 1 + floor(i * (sequence.size() + it->second - i) / (float)it->second)); - sequence.insert(insertIt, it->first); + for (++It; It != Prop.end(); ++It) { + for (unsigned I = 0; I < It->second; I++) { + InsertIt = Sequence.begin(); + std::advance(InsertIt, 1 + std::floor(I * (Sequence.size() + It->second - I) / static_cast(It->second))); + Sequence.insert(InsertIt, It->first); } } - return sequence; + return Sequence; } -unsigned Payload::getL2LoopCount(const std::vector& sequence, const unsigned numberOfLines, - const unsigned size, const unsigned threads) { - if (this->getL2SequenceCount(sequence) == 0) { +auto Payload::getL2LoopCount(const std::vector& Sequence, const unsigned NumberOfLines, + const unsigned Size, const unsigned Threads) -> unsigned { + if (getL2SequenceCount(Sequence) == 0) { return 0; } - return ( - 0.8 * size / 64 / threads / - (this->getL2SequenceCount(sequence) * this->getNumberOfSequenceRepetitions(sequence, numberOfLines / threads))); + return static_cast( + (0.8 * Size / 64 / Threads / + (getL2SequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Threads)))); } -unsigned Payload::getL3LoopCount(const std::vector& sequence, const unsigned numberOfLines, - const unsigned size, const unsigned threads) { - if (this->getL3SequenceCount(sequence) == 0) { +auto Payload::getL3LoopCount(const std::vector& Sequence, const unsigned NumberOfLines, + const unsigned Size, const unsigned Threads) -> unsigned { + if (getL3SequenceCount(Sequence) == 0) { return 0; } - return ( - 0.8 * size / 64 / threads / - (this->getL3SequenceCount(sequence) * this->getNumberOfSequenceRepetitions(sequence, numberOfLines / threads))); + return static_cast( + (0.8 * Size / 64 / Threads / + (getL3SequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Threads)))); } -unsigned Payload::getRAMLoopCount(const std::vector& sequence, const unsigned numberOfLines, - const unsigned size, const unsigned threads) { - if (this->getRAMSequenceCount(sequence) == 0) { +auto Payload::getRAMLoopCount(const std::vector& Sequence, const unsigned NumberOfLines, + const unsigned Size, const unsigned Threads) -> unsigned { + if (getRAMSequenceCount(Sequence) == 0) { return 0; } - return ( - 1.0 * size / 64 / threads / - (this->getRAMSequenceCount(sequence) * this->getNumberOfSequenceRepetitions(sequence, numberOfLines / threads))); + return static_cast( + (1.0 * Size / 64 / Threads / + (getRAMSequenceCount(Sequence) * getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Threads)))); } + +}; // namespace firestarter::environment::payload \ No newline at end of file diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp index 2c23d1c4..8e29715f 100644 --- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp @@ -25,61 +25,61 @@ using namespace firestarter::environment::x86::payload; using namespace asmjit; using namespace asmjit::x86; -int AVX512Payload::compilePayload(std::vector> const& proportion, - unsigned instructionCacheSize, std::list const& dataCacheBufferSize, - unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) { +auto AVX512Payload::compilePayload(std::vector> const& Proportion, + unsigned InstructionCacheSize, std::list const& DataCacheBufferSize, + unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters, + bool ErrorDetection) -> int { // Compute the sequence of instruction groups and the number of its repetions // to reach the desired size - auto sequence = this->generateSequence(proportion); - auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); + auto sequence = this->generateSequence(Proportion); + auto repetitions = this->getNumberOfSequenceRepetitions(sequence, NumberOfLines / Thread); // compute count of flops and memory access for performance report unsigned flops = 0; unsigned bytes = 0; for (const auto& item : sequence) { - auto it = this->instructionFlops.find(item); + auto it = this->InstructionFlops.find(item); - if (it == this->instructionFlops.end()) { + if (it == this->InstructionFlops.end()) { workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; return EXIT_FAILURE; } flops += it->second; - it = this->instructionMemory.find(item); + it = this->InstructionMemory.find(item); - if (it != this->instructionMemory.end()) { + if (it != this->InstructionMemory.end()) { bytes += it->second; } } - this->_flops = repetitions * flops; - this->_bytes = repetitions * bytes; - this->_instructions = repetitions * sequence.size() * 4 + 6; + this->Flops = repetitions * flops; + this->Bytes = repetitions * bytes; + this->Instructions = repetitions * sequence.size() * 4 + 6; // calculate the buffer sizes - auto l1i_cache_size = instructionCacheSize / thread; - auto dataCacheBufferSizeIterator = dataCacheBufferSize.begin(); - auto l1_size = *dataCacheBufferSizeIterator / thread; + auto l1i_cache_size = InstructionCacheSize / Thread; + auto dataCacheBufferSizeIterator = DataCacheBufferSize.begin(); + auto l1_size = *dataCacheBufferSizeIterator / Thread; std::advance(dataCacheBufferSizeIterator, 1); - auto l2_size = *dataCacheBufferSizeIterator / thread; + auto l2_size = *dataCacheBufferSizeIterator / Thread; std::advance(dataCacheBufferSizeIterator, 1); - auto l3_size = *dataCacheBufferSizeIterator / thread; - auto ram_size = ramBufferSize / thread; + auto l3_size = *dataCacheBufferSizeIterator / Thread; + auto ram_size = RamBufferSize / Thread; // calculate the reset counters for the buffers - auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); - auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); - auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); + auto l2_loop_count = getL2LoopCount(sequence, NumberOfLines, l2_size * Thread, Thread); + auto l3_loop_count = getL3LoopCount(sequence, NumberOfLines, l3_size * Thread, Thread); + auto ram_loop_count = getRAMLoopCount(sequence, NumberOfLines, ram_size * Thread, Thread); CodeHolder code; - code.init(this->rt.environment()); + code.init(this->Rt.environment()); - if (nullptr != this->loadFunction) { - this->rt.release(&this->loadFunction); + if (nullptr != this->LoadFunction) { + this->Rt.release(&this->LoadFunction); } Builder cb(&code); @@ -108,9 +108,8 @@ int AVX512Payload::compilePayload(std::vector> auto ram_reg = zmm30; FuncDetail func; - func.init(FuncSignatureT( - CallConvId::kCDecl), - this->rt.environment()); + func.init(FuncSignatureT(CallConvId::kCDecl), + this->Rt.environment()); FuncFrame frame; frame.init(func); @@ -306,7 +305,7 @@ int AVX512Payload::compilePayload(std::vector> } cb.movq(temp_reg, iter_reg); // restore iteration counter - if (this->getRAMSequenceCount(sequence) > 0) { + if (getRAMSequenceCount(sequence) > 0) { // reset RAM counter auto NoRamReset = cb.newLabel(); @@ -317,10 +316,10 @@ int AVX512Payload::compilePayload(std::vector> cb.add(ram_addr, Imm(l3_size)); cb.bind(NoRamReset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.inc(temp_reg); // increment iteration counter - if (this->getL2SequenceCount(sequence) > 0) { + if (getL2SequenceCount(sequence) > 0) { // reset L2-Cache counter auto NoL2Reset = cb.newLabel(); @@ -331,10 +330,10 @@ int AVX512Payload::compilePayload(std::vector> cb.add(l2_addr, Imm(l1_size)); cb.bind(NoL2Reset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.movq(iter_reg, temp_reg); // store iteration counter - if (this->getL3SequenceCount(sequence) > 0) { + if (getL3SequenceCount(sequence) > 0) { // reset L3-Cache counter auto NoL3Reset = cb.newLabel(); @@ -345,11 +344,11 @@ int AVX512Payload::compilePayload(std::vector> cb.add(l3_addr, Imm(l2_size)); cb.bind(NoL3Reset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.mov(l1_addr, pointer_reg); - if (dumpRegisters) { + if (DumpRegisters) { auto SkipRegistersDump = cb.newLabel(); cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait)); @@ -366,7 +365,7 @@ int AVX512Payload::compilePayload(std::vector> cb.bind(SkipRegistersDump); } - if (errorDetection) { + if (ErrorDetection) { this->emitErrorDetectionCode(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); } @@ -384,7 +383,7 @@ int AVX512Payload::compilePayload(std::vector> // String sb; // cb.dump(sb); - Error err = this->rt.add(&this->loadFunction, &code); + Error err = this->Rt.add(&this->LoadFunction, &code); if (err) { workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; @@ -408,15 +407,15 @@ int AVX512Payload::compilePayload(std::vector> return EXIT_SUCCESS; } -std::list AVX512Payload::getAvailableInstructions() const { - std::list instructions; +auto AVX512Payload::getAvailableInstructions() const -> std::list { + std::list Instructions; - transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions), + transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(Instructions), [](const auto& item) { return item.first; }); - return instructions; + return Instructions; } -void AVX512Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) { - X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4); +void AVX512Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize) { + X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4); } diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp index c925f538..f3905ff0 100644 --- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp +++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp @@ -19,6 +19,7 @@ * Contact: daniel.hackenberg@tu-dresden.de *****************************************************************************/ +#include #include #include @@ -43,25 +44,25 @@ int AVXPayload::compilePayload(std::vector> con unsigned bytes = 0; for (const auto& item : sequence) { - auto it = this->instructionFlops.find(item); + auto it = this->InstructionFlops.find(item); - if (it == this->instructionFlops.end()) { + if (it == this->InstructionFlops.end()) { workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; return EXIT_FAILURE; } flops += it->second; - it = this->instructionMemory.find(item); + it = this->InstructionMemory.find(item); - if (it != this->instructionMemory.end()) { + if (it != this->InstructionMemory.end()) { bytes += it->second; } } - this->_flops = repetitions * flops; - this->_bytes = repetitions * bytes; - this->_instructions = repetitions * sequence.size() * 2 + 4; + this->Flops = repetitions * flops; + this->Bytes = repetitions * bytes; + this->Instructions = repetitions * sequence.size() * 2 + 4; // calculate the buffer sizes auto l1i_cache_size = instructionCacheSize / thread; @@ -79,10 +80,10 @@ int AVXPayload::compilePayload(std::vector> con auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); CodeHolder code; - code.init(this->rt.environment()); + code.init(this->Rt.environment()); - if (nullptr != this->loadFunction) { - this->rt.release(&this->loadFunction); + if (nullptr != this->LoadFunction) { + this->Rt.release(&this->LoadFunction); } Builder cb(&code); @@ -107,9 +108,8 @@ int AVXPayload::compilePayload(std::vector> con auto trans_regs = 6; FuncDetail func; - func.init(FuncSignatureT( - CallConvId::kCDecl), - this->rt.environment()); + func.init(FuncSignatureT(CallConvId::kCDecl), + this->Rt.environment()); FuncFrame frame; frame.init(func); @@ -244,12 +244,12 @@ int AVXPayload::compilePayload(std::vector> con cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.vmovapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest)); L1_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "L1_LS") { cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32)); cb.vmovapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest)); L1_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "L2_L") { cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l2_addr, 64)); L2_INCREMENT(); @@ -257,12 +257,12 @@ int AVXPayload::compilePayload(std::vector> con cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.vmovapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest)); L2_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "L2_LS") { cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l2_addr, 64)); cb.vmovapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest)); L2_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "L3_L") { cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64)); L3_INCREMENT(); @@ -270,17 +270,17 @@ int AVXPayload::compilePayload(std::vector> con cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); L3_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "L3_LS") { cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64)); cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); L3_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "L3_P") { cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32)); cb.prefetcht0(ptr(l3_addr)); L3_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "RAM_L") { cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(ram_addr, 64)); RAM_INCREMENT(); @@ -288,24 +288,24 @@ int AVXPayload::compilePayload(std::vector> con cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); RAM_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "RAM_LS") { cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64)); cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); RAM_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "RAM_P") { cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32)); cb.prefetcht2(ptr(ram_addr)); RAM_INCREMENT(); - this->_instructions++; + this->Instructions++; } else { workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; return EXIT_FAILURE; } if (shift_regs > 1) { - this->_instructions++; + this->Instructions++; if (left) { cb.psrlw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs), Mm(shift_dst)); } else { @@ -348,7 +348,7 @@ int AVXPayload::compilePayload(std::vector> con cb.add(ram_addr, Imm(l3_size)); cb.bind(NoRamReset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } if (this->getL2SequenceCount(sequence) > 0) { // reset L2-Cache counter @@ -361,7 +361,7 @@ int AVXPayload::compilePayload(std::vector> con cb.add(l2_addr, Imm(l1_size)); cb.bind(NoL2Reset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } if (this->getL3SequenceCount(sequence) > 0) { // reset L3-Cache counter @@ -374,7 +374,7 @@ int AVXPayload::compilePayload(std::vector> con cb.add(l3_addr, Imm(l2_size)); cb.bind(NoL3Reset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.inc(iter_reg); // increment iteration counter cb.mov(l1_addr, pointer_reg); @@ -414,7 +414,7 @@ int AVXPayload::compilePayload(std::vector> con // String sb; // cb.dump(sb); - Error err = this->rt.add(&this->loadFunction, &code); + Error err = this->Rt.add(&this->LoadFunction, &code); if (err) { workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; @@ -441,12 +441,12 @@ int AVXPayload::compilePayload(std::vector> con std::list AVXPayload::getAvailableInstructions() const { std::list instructions; - transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions), + transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions), [](const auto& item) { return item.first; }); return instructions; } -void AVXPayload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) { +void AVXPayload::init(uint64_t* memoryAddr, uint64_t bufferSize) { X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, 1.654738925401e-15); } diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp index 1e5ffa85..9df404e2 100644 --- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp @@ -19,6 +19,7 @@ * Contact: daniel.hackenberg@tu-dresden.de *****************************************************************************/ +#include #include #include @@ -43,25 +44,25 @@ int FMA4Payload::compilePayload(std::vector> co unsigned bytes = 0; for (const auto& item : sequence) { - auto it = this->instructionFlops.find(item); + auto it = this->InstructionFlops.find(item); - if (it == this->instructionFlops.end()) { + if (it == this->InstructionFlops.end()) { workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; return EXIT_FAILURE; } flops += it->second; - it = this->instructionMemory.find(item); + it = this->InstructionMemory.find(item); - if (it != this->instructionMemory.end()) { + if (it != this->InstructionMemory.end()) { bytes += it->second; } } - this->_flops = repetitions * flops; - this->_bytes = repetitions * bytes; - this->_instructions = repetitions * sequence.size() * 4 + 6; + this->Flops = repetitions * flops; + this->Bytes = repetitions * bytes; + this->Instructions = repetitions * sequence.size() * 4 + 6; // calculate the buffer sizes auto l1i_cache_size = instructionCacheSize / thread; @@ -79,10 +80,10 @@ int FMA4Payload::compilePayload(std::vector> co auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); CodeHolder code; - code.init(this->rt.environment()); + code.init(this->Rt.environment()); - if (nullptr != this->loadFunction) { - this->rt.release(&this->loadFunction); + if (nullptr != this->LoadFunction) { + this->Rt.release(&this->LoadFunction); } Builder cb(&code); @@ -111,9 +112,8 @@ int FMA4Payload::compilePayload(std::vector> co auto ram_reg = xmm15; FuncDetail func; - func.init(FuncSignatureT( - CallConvId::kCDecl), - this->rt.environment()); + func.init(FuncSignatureT(CallConvId::kCDecl), + this->Rt.environment()); FuncFrame frame; frame.init(func); @@ -325,7 +325,7 @@ int FMA4Payload::compilePayload(std::vector> co cb.add(ram_addr, Imm(l3_size)); cb.bind(NoRamReset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.inc(temp_reg); // increment iteration counter if (this->getL2SequenceCount(sequence) > 0) { @@ -339,7 +339,7 @@ int FMA4Payload::compilePayload(std::vector> co cb.add(l2_addr, Imm(l1_size)); cb.bind(NoL2Reset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.movq(iter_reg, temp_reg); // store iteration counter if (this->getL3SequenceCount(sequence) > 0) { @@ -353,7 +353,7 @@ int FMA4Payload::compilePayload(std::vector> co cb.add(l3_addr, Imm(l2_size)); cb.bind(NoL3Reset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.mov(l1_addr, pointer_reg); @@ -392,7 +392,7 @@ int FMA4Payload::compilePayload(std::vector> co // String sb; // cb.dump(sb); - Error err = this->rt.add(&this->loadFunction, &code); + Error err = this->Rt.add(&this->LoadFunction, &code); if (err) { workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; @@ -419,12 +419,12 @@ int FMA4Payload::compilePayload(std::vector> co std::list FMA4Payload::getAvailableInstructions() const { std::list instructions; - transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions), + transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions), [](const auto& item) { return item.first; }); return instructions; } -void FMA4Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) { +void FMA4Payload::init(uint64_t* memoryAddr, uint64_t bufferSize) { X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4); } diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp index 3a432bfb..ba6534a9 100644 --- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp +++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp @@ -43,25 +43,25 @@ int FMAPayload::compilePayload(std::vector> con unsigned bytes = 0; for (const auto& item : sequence) { - auto it = this->instructionFlops.find(item); + auto it = this->InstructionFlops.find(item); - if (it == this->instructionFlops.end()) { + if (it == this->InstructionFlops.end()) { workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; return EXIT_FAILURE; } flops += it->second; - it = this->instructionMemory.find(item); + it = this->InstructionMemory.find(item); - if (it != this->instructionMemory.end()) { + if (it != this->InstructionMemory.end()) { bytes += it->second; } } - this->_flops = repetitions * flops; - this->_bytes = repetitions * bytes; - this->_instructions = repetitions * sequence.size() * 4 + 6; + this->Flops = repetitions * flops; + this->Bytes = repetitions * bytes; + this->Instructions = repetitions * sequence.size() * 4 + 6; // calculate the buffer sizes auto l1i_cache_size = instructionCacheSize / thread; @@ -79,10 +79,10 @@ int FMAPayload::compilePayload(std::vector> con auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); CodeHolder code; - code.init(this->rt.environment()); + code.init(this->Rt.environment()); - if (nullptr != this->loadFunction) { - this->rt.release(&this->loadFunction); + if (nullptr != this->LoadFunction) { + this->Rt.release(&this->LoadFunction); } Builder cb(&code); @@ -111,9 +111,8 @@ int FMAPayload::compilePayload(std::vector> con auto ram_reg = ymm15; FuncDetail func; - func.init(FuncSignatureT( - CallConvId::kCDecl), - this->rt.environment()); + func.init(FuncSignatureT(CallConvId::kCDecl), + this->Rt.environment()); FuncFrame frame; frame.init(func); @@ -353,7 +352,7 @@ int FMAPayload::compilePayload(std::vector> con cb.add(ram_addr, Imm(l3_size)); cb.bind(NoRamReset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.inc(temp_reg); // increment iteration counter if (this->getL2SequenceCount(sequence) > 0) { @@ -367,7 +366,7 @@ int FMAPayload::compilePayload(std::vector> con cb.add(l2_addr, Imm(l1_size)); cb.bind(NoL2Reset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.movq(iter_reg, temp_reg); // store iteration counter if (this->getL3SequenceCount(sequence) > 0) { @@ -381,7 +380,7 @@ int FMAPayload::compilePayload(std::vector> con cb.add(l3_addr, Imm(l2_size)); cb.bind(NoL3Reset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.mov(l1_addr, pointer_reg); @@ -420,7 +419,7 @@ int FMAPayload::compilePayload(std::vector> con // String sb; // cb.dump(sb); - Error err = this->rt.add(&this->loadFunction, &code); + Error err = this->Rt.add(&this->LoadFunction, &code); if (err) { workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; @@ -447,12 +446,12 @@ int FMAPayload::compilePayload(std::vector> con std::list FMAPayload::getAvailableInstructions() const { std::list instructions; - transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions), + transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions), [](const auto& item) { return item.first; }); return instructions; } -void FMAPayload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) { +void FMAPayload::init(uint64_t* memoryAddr, uint64_t bufferSize) { X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4); } diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp index d3d0147f..60a98ef1 100644 --- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp @@ -43,25 +43,25 @@ int SSE2Payload::compilePayload(std::vector> co unsigned bytes = 0; for (const auto& item : sequence) { - auto it = this->instructionFlops.find(item); + auto it = this->InstructionFlops.find(item); - if (it == this->instructionFlops.end()) { + if (it == this->InstructionFlops.end()) { workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; return EXIT_FAILURE; } flops += it->second; - it = this->instructionMemory.find(item); + it = this->InstructionMemory.find(item); - if (it != this->instructionMemory.end()) { + if (it != this->InstructionMemory.end()) { bytes += it->second; } } - this->_flops = repetitions * flops; - this->_bytes = repetitions * bytes; - this->_instructions = repetitions * sequence.size() * 2 + 4; + this->Flops = repetitions * flops; + this->Bytes = repetitions * bytes; + this->Instructions = repetitions * sequence.size() * 2 + 4; // calculate the buffer sizes auto l1i_cache_size = instructionCacheSize / thread; @@ -79,10 +79,10 @@ int SSE2Payload::compilePayload(std::vector> co auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); CodeHolder code; - code.init(this->rt.environment()); + code.init(this->Rt.environment()); - if (nullptr != this->loadFunction) { - this->rt.release(&this->loadFunction); + if (nullptr != this->LoadFunction) { + this->Rt.release(&this->LoadFunction); } Builder cb(&code); @@ -107,9 +107,7 @@ int SSE2Payload::compilePayload(std::vector> co auto trans_regs = 2; FuncDetail func; - func.init(FuncSignatureT( - CallConvId::kCDecl), - this->rt.environment()); + func.init(FuncSignatureT(CallConvId::kCDecl), this->Rt.environment()); FuncFrame frame; frame.init(func); @@ -241,12 +239,12 @@ int SSE2Payload::compilePayload(std::vector> co cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.movapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest)); L1_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "L1_LS") { cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32)); cb.movapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest)); L1_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "L2_L") { cb.addpd(Xmm(add_dest), xmmword_ptr(l2_addr, 64)); L2_INCREMENT(); @@ -254,12 +252,12 @@ int SSE2Payload::compilePayload(std::vector> co cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.movapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest)); L2_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "L2_LS") { cb.addpd(Xmm(add_dest), xmmword_ptr(l2_addr, 64)); cb.movapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest)); L2_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "L3_L") { cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64)); L3_INCREMENT(); @@ -267,17 +265,17 @@ int SSE2Payload::compilePayload(std::vector> co cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.movapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); L3_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "L3_LS") { cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64)); cb.movapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); L3_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "L3_P") { cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32)); cb.prefetcht0(ptr(l3_addr)); L3_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "RAM_L") { cb.addpd(Xmm(add_dest), xmmword_ptr(ram_addr, 64)); RAM_INCREMENT(); @@ -285,24 +283,24 @@ int SSE2Payload::compilePayload(std::vector> co cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); cb.movapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); RAM_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "RAM_LS") { cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64)); cb.movapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); RAM_INCREMENT(); - this->_instructions++; + this->Instructions++; } else if (item == "RAM_P") { cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32)); cb.prefetcht2(ptr(ram_addr)); RAM_INCREMENT(); - this->_instructions++; + this->Instructions++; } else { workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; return EXIT_FAILURE; } if (mov_regs > 0) { - this->_instructions++; + this->Instructions++; cb.movq(Mm(mov_start + (movq_dst - mov_start + mov_regs - 1) % mov_regs), Mm(movq_dst)); } @@ -340,7 +338,7 @@ int SSE2Payload::compilePayload(std::vector> co cb.add(ram_addr, Imm(l3_size)); cb.bind(NoRamReset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } if (this->getL2SequenceCount(sequence) > 0) { // reset L2-Cache counter @@ -353,7 +351,7 @@ int SSE2Payload::compilePayload(std::vector> co cb.add(l2_addr, Imm(l1_size)); cb.bind(NoL2Reset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } if (this->getL3SequenceCount(sequence) > 0) { // reset L3-Cache counter @@ -366,7 +364,7 @@ int SSE2Payload::compilePayload(std::vector> co cb.add(l3_addr, Imm(l2_size)); cb.bind(NoL3Reset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.inc(iter_reg); // increment iteration counter cb.mov(l1_addr, pointer_reg); @@ -406,7 +404,7 @@ int SSE2Payload::compilePayload(std::vector> co // String sb; // cb.dump(sb); - Error err = this->rt.add(&this->loadFunction, &code); + Error err = this->Rt.add(&this->LoadFunction, &code); if (err) { workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; @@ -433,12 +431,12 @@ int SSE2Payload::compilePayload(std::vector> co std::list SSE2Payload::getAvailableInstructions() const { std::list instructions; - transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions), + transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions), [](const auto& item) { return item.first; }); return instructions; } -void SSE2Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) { +void SSE2Payload::init(uint64_t* memoryAddr, uint64_t bufferSize) { X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, 1.654738925401e-15); } diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp index 8d85dc2d..73175bd5 100644 --- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp @@ -32,7 +32,7 @@ using namespace firestarter::environment::x86::payload; -void X86Payload::lowLoadFunction(volatile unsigned long long* addrHigh, unsigned long long period) { +void X86Payload::lowLoadFunction(volatile uint64_t* addrHigh, uint64_t period) { int nap; #ifdef _MSC_VER std::array cpuid; @@ -69,53 +69,52 @@ void X86Payload::lowLoadFunction(volatile unsigned long long* addrHigh, unsigned } } -void X86Payload::init(unsigned long long* memoryAddr, unsigned long long bufferSize, double firstValue, - double lastValue) { - unsigned long long i = 0; +void X86Payload::init(uint64_t* memoryAddr, uint64_t bufferSize, double firstValue, double lastValue) { + uint64_t i = 0; for (; i < INIT_BLOCKSIZE; i++) *((double*)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * firstValue; for (; i <= bufferSize - INIT_BLOCKSIZE; i += INIT_BLOCKSIZE) - std::memcpy(memoryAddr + i, memoryAddr + i - INIT_BLOCKSIZE, sizeof(unsigned long long) * INIT_BLOCKSIZE); + std::memcpy(memoryAddr + i, memoryAddr + i - INIT_BLOCKSIZE, sizeof(uint64_t) * INIT_BLOCKSIZE); for (; i < bufferSize; i++) *((double*)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * lastValue; } -unsigned long long X86Payload::highLoadFunction(unsigned long long* addrMem, volatile unsigned long long* addrHigh, - unsigned long long iterations) { - return this->loadFunction(addrMem, addrHigh, iterations); +uint64_t X86Payload::highLoadFunction(uint64_t* addrMem, volatile uint64_t* addrHigh, uint64_t iterations) { + return this->LoadFunction(addrMem, addrHigh, iterations); } // add MM regs to dirty regs // zmm31 is used for backup if VectorReg is of type asmjit::x86::Zmm -template -void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder& cb, IterReg iter_reg, asmjit::x86::Gpq addrHigh_reg, +template +void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder& Cb, IterRegT IterReg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2) { // we don't want anything to break... so we use asserts for everything that // could break it - static_assert(std::is_base_of::value, "VectorReg must be of asmjit::asmjit::x86::Vec"); - static_assert(std::is_same::value || std::is_same::value || - std::is_same::value, + static_assert(std::is_base_of::value, "VectorReg must be of asmjit::asmjit::x86::Vec"); + static_assert(std::is_same::value || + std::is_same::value || + std::is_same::value, "VectorReg ist not of any supported type"); - static_assert(std::is_same::value || std::is_same::value, + static_assert(std::is_same::value || std::is_same::value, "IterReg is not of any supported type"); - if constexpr (std::is_same::value) { - assert((iter_reg == asmjit::x86::mm0, "iter_reg must be mm0")); + if constexpr (std::is_same::value) { + assert((IterReg == asmjit::x86::mm0, "iter_reg must be mm0")); } - assert((iter_reg != temp_reg, "iter_reg must be != temp_reg")); + assert((IterReg != temp_reg, "iter_reg must be != temp_reg")); assert((temp_reg != temp_reg2, "temp_reg must be != temp_reg2")); assert((temp_reg != addrHigh_reg, "temp_reg must be != addrHigh_reg")); assert((temp_reg != pointer_reg, "temp_reg must be != pointer_reg")); - assert((iter_reg != asmjit::x86::r8, "iter_reg must be != r8")); - assert((iter_reg != asmjit::x86::r9, "iter_reg must be != r9")); - assert((iter_reg != asmjit::x86::rax, "iter_reg must be != rax")); - assert((iter_reg != asmjit::x86::rbx, "iter_reg must be != rbx")); - assert((iter_reg != asmjit::x86::rcx, "iter_reg must be != rcx")); - assert((iter_reg != asmjit::x86::rdx, "iter_reg must be != rdx")); + assert((IterReg != asmjit::x86::r8, "iter_reg must be != r8")); + assert((IterReg != asmjit::x86::r9, "iter_reg must be != r9")); + assert((IterReg != asmjit::x86::rax, "iter_reg must be != rax")); + assert((IterReg != asmjit::x86::rbx, "iter_reg must be != rbx")); + assert((IterReg != asmjit::x86::rcx, "iter_reg must be != rcx")); + assert((IterReg != asmjit::x86::rdx, "iter_reg must be != rdx")); assert((temp_reg != asmjit::x86::r8, "temp_reg must be != r8")); assert((temp_reg != asmjit::x86::r9, "temp_reg must be != r9")); @@ -138,172 +137,172 @@ void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder& cb, IterReg iter_r assert((addrHigh_reg != asmjit::x86::rcx, "addrHigh_reg must be != rcx")); assert((addrHigh_reg != asmjit::x86::rdx, "addrHigh_reg must be != rdx")); - auto SkipErrorDetection = cb.newLabel(); + auto SkipErrorDetection = Cb.newLabel(); - if constexpr (std::is_same::value) { - cb.movq(temp_reg, iter_reg); + if constexpr (std::is_same::value) { + Cb.movq(temp_reg, IterReg); } else { - cb.mov(temp_reg, iter_reg); + Cb.mov(temp_reg, IterReg); } // round about 50-100 Hz // more or less, but this isn't really that relevant - cb.and_(temp_reg, asmjit::Imm(0x3fff)); - cb.test(temp_reg, temp_reg); - cb.jnz(SkipErrorDetection); + Cb.and_(temp_reg, asmjit::Imm(0x3fff)); + Cb.test(temp_reg, temp_reg); + Cb.jnz(SkipErrorDetection); - cb.mov(temp_reg, asmjit::Imm(0xffffffff)); + Cb.mov(temp_reg, asmjit::Imm(0xffffffff)); int registerCount = (int)this->registerCount(); // Create a backup of VectorReg(0) - if constexpr (std::is_same::value) { - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.push(temp_reg2); - cb.crc32(temp_reg, temp_reg2); - cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.push(temp_reg2); - cb.crc32(temp_reg, temp_reg2); - - } else if constexpr (std::is_same::value && - std::is_same::value) { - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.movq(asmjit::x86::Mm(7), temp_reg2); - cb.crc32(temp_reg, temp_reg2); - cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.movq(asmjit::x86::Mm(6), temp_reg2); - cb.crc32(temp_reg, temp_reg2); - - cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1)); - - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.movq(asmjit::x86::Mm(5), temp_reg2); - cb.crc32(temp_reg, temp_reg2); - cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.movq(asmjit::x86::Mm(4), temp_reg2); - cb.crc32(temp_reg, temp_reg2); - } else if constexpr (std::is_same::value && - std::is_same::value) { + if constexpr (std::is_same::value) { + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.push(temp_reg2); + Cb.crc32(temp_reg, temp_reg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.push(temp_reg2); + Cb.crc32(temp_reg, temp_reg2); + + } else if constexpr (std::is_same::value && + std::is_same::value) { + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.movq(asmjit::x86::Mm(7), temp_reg2); + Cb.crc32(temp_reg, temp_reg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.movq(asmjit::x86::Mm(6), temp_reg2); + Cb.crc32(temp_reg, temp_reg2); + + Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1)); + + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.movq(asmjit::x86::Mm(5), temp_reg2); + Cb.crc32(temp_reg, temp_reg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.movq(asmjit::x86::Mm(4), temp_reg2); + Cb.crc32(temp_reg, temp_reg2); + } else if constexpr (std::is_same::value && + std::is_same::value) { // We use vector registers zmm31 for our backup - cb.vmovapd(asmjit::x86::zmm31, asmjit::x86::zmm0); + Cb.vmovapd(asmjit::x86::zmm31, asmjit::x86::zmm0); registerCount--; } // Calculate the hash of the remaining VectorReg // use VectorReg(0) as a temporary place to unpack values for (int i = 1; i < registerCount; i++) { - if constexpr (std::is_same::value) { - cb.vmovapd(asmjit::x86::xmm0, asmjit::x86::Xmm(i)); - - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); - cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); - } else if constexpr (std::is_same::value) { - cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(i)); - - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); - cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); - - cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1)); - - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); - cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); - } else if constexpr (std::is_same::value) { - cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(i)); - - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); - cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); - - cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1)); - - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); - cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); - - cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(i), asmjit::Imm(2)); - - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); - cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); - - cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(i), asmjit::Imm(3)); - - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); - cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - cb.movq(temp_reg2, asmjit::x86::xmm0); - cb.crc32(temp_reg, temp_reg2); + if constexpr (std::is_same::value) { + Cb.vmovapd(asmjit::x86::xmm0, asmjit::x86::Xmm(i)); + + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); + } else if constexpr (std::is_same::value) { + Cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(i)); + + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); + + Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1)); + + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); + } else if constexpr (std::is_same::value) { + Cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(i)); + + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); + + Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1)); + + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); + + Cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(i), asmjit::Imm(2)); + + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); + + Cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(i), asmjit::Imm(3)); + + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(temp_reg2, asmjit::x86::xmm0); + Cb.crc32(temp_reg, temp_reg2); } } // Restore VectorReg(0) from backup - if constexpr (std::is_same::value) { - cb.pop(temp_reg2); - cb.movq(asmjit::x86::xmm0, temp_reg2); - cb.movlhps(asmjit::x86::xmm0, asmjit::x86::xmm0); - cb.pop(temp_reg2); - cb.pinsrw(asmjit::x86::xmm0, temp_reg2.r32(), asmjit::Imm(0)); - cb.shr(temp_reg2, asmjit::Imm(32)); - cb.movd(temp_reg2.r32(), asmjit::x86::Mm(7)); - cb.pinsrw(asmjit::x86::xmm0, temp_reg2.r32(), asmjit::Imm(1)); - } else if constexpr (std::is_same::value && - std::is_same::value) { - cb.movq(temp_reg2, asmjit::x86::Mm(5)); - cb.movq(asmjit::x86::xmm0, temp_reg2); - cb.movq(temp_reg2, asmjit::x86::Mm(4)); - cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1)); - - cb.vinsertf128(asmjit::x86::ymm0, asmjit::x86::ymm0, asmjit::x86::xmm0, asmjit::Imm(1)); - - cb.movq(temp_reg2, asmjit::x86::Mm(7)); - cb.movq(asmjit::x86::xmm0, temp_reg2); - cb.movq(temp_reg2, asmjit::x86::Mm(6)); - cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1)); - } else if constexpr (std::is_same::value && - std::is_same::value) { + if constexpr (std::is_same::value) { + Cb.pop(temp_reg2); + Cb.movq(asmjit::x86::xmm0, temp_reg2); + Cb.movlhps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.pop(temp_reg2); + Cb.pinsrw(asmjit::x86::xmm0, temp_reg2.r32(), asmjit::Imm(0)); + Cb.shr(temp_reg2, asmjit::Imm(32)); + Cb.movd(temp_reg2.r32(), asmjit::x86::Mm(7)); + Cb.pinsrw(asmjit::x86::xmm0, temp_reg2.r32(), asmjit::Imm(1)); + } else if constexpr (std::is_same::value && + std::is_same::value) { + Cb.movq(temp_reg2, asmjit::x86::Mm(5)); + Cb.movq(asmjit::x86::xmm0, temp_reg2); + Cb.movq(temp_reg2, asmjit::x86::Mm(4)); + Cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1)); + + Cb.vinsertf128(asmjit::x86::ymm0, asmjit::x86::ymm0, asmjit::x86::xmm0, asmjit::Imm(1)); + + Cb.movq(temp_reg2, asmjit::x86::Mm(7)); + Cb.movq(asmjit::x86::xmm0, temp_reg2); + Cb.movq(temp_reg2, asmjit::x86::Mm(6)); + Cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1)); + } else if constexpr (std::is_same::value && + std::is_same::value) { // We use vector registers zmm31 for our backup - cb.vmovapd(asmjit::x86::zmm0, asmjit::x86::zmm31); + Cb.vmovapd(asmjit::x86::zmm0, asmjit::x86::zmm31); } // before starting the communication, backup r8, r9, rax, rbx, rcx and rdx - if constexpr (std::is_same::value) { - cb.movq(asmjit::x86::Mm(7), asmjit::x86::rax); - cb.movq(asmjit::x86::Mm(6), asmjit::x86::rbx); - cb.movq(asmjit::x86::Mm(5), asmjit::x86::rcx); - cb.movq(asmjit::x86::Mm(4), asmjit::x86::rdx); - cb.movq(asmjit::x86::Mm(3), asmjit::x86::r8); - cb.movq(asmjit::x86::Mm(2), asmjit::x86::r9); + if constexpr (std::is_same::value) { + Cb.movq(asmjit::x86::Mm(7), asmjit::x86::rax); + Cb.movq(asmjit::x86::Mm(6), asmjit::x86::rbx); + Cb.movq(asmjit::x86::Mm(5), asmjit::x86::rcx); + Cb.movq(asmjit::x86::Mm(4), asmjit::x86::rdx); + Cb.movq(asmjit::x86::Mm(3), asmjit::x86::r8); + Cb.movq(asmjit::x86::Mm(2), asmjit::x86::r9); } else { - cb.push(asmjit::x86::rax); - cb.push(asmjit::x86::rbx); - cb.push(asmjit::x86::rcx); - cb.push(asmjit::x86::rdx); - cb.push(asmjit::x86::r8); - cb.push(asmjit::x86::r9); + Cb.push(asmjit::x86::rax); + Cb.push(asmjit::x86::rbx); + Cb.push(asmjit::x86::rcx); + Cb.push(asmjit::x86::rdx); + Cb.push(asmjit::x86::r8); + Cb.push(asmjit::x86::r9); } // do the actual communication // temp_reg contains our hash // save the pointer_reg. it might be any of r8, r9, rax, rbx, rcx or rdx - cb.mov(temp_reg2, pointer_reg); + Cb.mov(temp_reg2, pointer_reg); // Don't touch me! // This sychronization and communication works even if the threads run at @@ -311,144 +310,144 @@ void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder& cb, IterReg iter_r // by a few hours of headache for two people. auto communication = [&](auto offset) { // communication - cb.mov(asmjit::x86::r8, asmjit::x86::ptr_64(temp_reg2, offset)); + Cb.mov(asmjit::x86::r8, asmjit::x86::ptr_64(temp_reg2, offset)); // temp data - cb.mov(asmjit::x86::r9, temp_reg2); - cb.add(asmjit::x86::r9, asmjit::Imm(offset + 8)); + Cb.mov(asmjit::x86::r9, temp_reg2); + Cb.add(asmjit::x86::r9, asmjit::Imm(offset + 8)); - cb.mov(asmjit::x86::rdx, asmjit::x86::ptr_64(asmjit::x86::r9, 0)); - cb.mov(asmjit::x86::rax, asmjit::x86::ptr_64(asmjit::x86::r9, 8)); + Cb.mov(asmjit::x86::rdx, asmjit::x86::ptr_64(asmjit::x86::r9, 0)); + Cb.mov(asmjit::x86::rax, asmjit::x86::ptr_64(asmjit::x86::r9, 8)); - auto L0 = cb.newLabel(); - cb.bind(L0); + auto L0 = Cb.newLabel(); + Cb.bind(L0); - cb.lock(); - cb.cmpxchg16b(asmjit::x86::ptr(asmjit::x86::r8)); + Cb.lock(); + Cb.cmpxchg16b(asmjit::x86::ptr(asmjit::x86::r8)); - auto L1 = cb.newLabel(); - cb.jnz(L1); + auto L1 = Cb.newLabel(); + Cb.jnz(L1); - cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx); - cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx); - cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0)); - cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0)); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0)); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0)); - cb.mov(asmjit::x86::rax, asmjit::Imm(2)); + Cb.mov(asmjit::x86::rax, asmjit::Imm(2)); - auto L6 = cb.newLabel(); - cb.jmp(L6); + auto L6 = Cb.newLabel(); + Cb.jmp(L6); - cb.bind(L1); + Cb.bind(L1); - cb.cmp(asmjit::x86::rcx, asmjit::x86::rdx); + Cb.cmp(asmjit::x86::rcx, asmjit::x86::rdx); - auto L2 = cb.newLabel(); - cb.jle(L2); + auto L2 = Cb.newLabel(); + Cb.jle(L2); - cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx); - cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx); - cb.jmp(L0); + Cb.jmp(L0); - cb.bind(L2); + Cb.bind(L2); - auto L3 = cb.newLabel(); + auto L3 = Cb.newLabel(); - cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0)); - cb.jne(L3); - cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0)); - cb.jne(L3); + Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0)); + Cb.jne(L3); + Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0)); + Cb.jne(L3); - cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::x86::rdx); - cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::x86::rax); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::x86::rdx); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::x86::rax); - cb.bind(L3); + Cb.bind(L3); - cb.cmp(asmjit::x86::rcx, asmjit::x86::ptr_64(asmjit::x86::r9, 16)); - cb.mov(asmjit::x86::rax, asmjit::Imm(4)); - cb.jne(L6); + Cb.cmp(asmjit::x86::rcx, asmjit::x86::ptr_64(asmjit::x86::r9, 16)); + Cb.mov(asmjit::x86::rax, asmjit::Imm(4)); + Cb.jne(L6); - cb.cmp(asmjit::x86::rbx, asmjit::x86::ptr_64(asmjit::x86::r9, 24)); - auto L4 = cb.newLabel(); - cb.jne(L4); + Cb.cmp(asmjit::x86::rbx, asmjit::x86::ptr_64(asmjit::x86::r9, 24)); + auto L4 = Cb.newLabel(); + Cb.jne(L4); - cb.mov(asmjit::x86::rax, asmjit::Imm(0)); + Cb.mov(asmjit::x86::rax, asmjit::Imm(0)); - auto L5 = cb.newLabel(); - cb.jmp(L5); + auto L5 = Cb.newLabel(); + Cb.jmp(L5); - cb.bind(L4); + Cb.bind(L4); - cb.mov(asmjit::x86::rax, asmjit::Imm(1)); + Cb.mov(asmjit::x86::rax, asmjit::Imm(1)); - cb.bind(L5); + Cb.bind(L5); - cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0)); - cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0)); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0)); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0)); - cb.bind(L6); + Cb.bind(L6); // if check failed - cb.cmp(asmjit::x86::rax, asmjit::Imm(1)); - auto L7 = cb.newLabel(); - cb.jne(L7); + Cb.cmp(asmjit::x86::rax, asmjit::Imm(1)); + auto L7 = Cb.newLabel(); + Cb.jne(L7); // write the error flag - cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 32), asmjit::Imm(1)); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 32), asmjit::Imm(1)); // stop the execution after some time - cb.mov(asmjit::x86::ptr_64(addrHigh_reg), asmjit::Imm(LOAD_STOP)); - cb.mfence(); + Cb.mov(asmjit::x86::ptr_64(addrHigh_reg), asmjit::Imm(LOAD_STOP)); + Cb.mfence(); - cb.bind(L7); + Cb.bind(L7); - auto L9 = cb.newLabel(); - cb.jmp(L9); + auto L9 = Cb.newLabel(); + Cb.jmp(L9); }; // left communication // move hash - cb.mov(asmjit::x86::rbx, temp_reg); + Cb.mov(asmjit::x86::rbx, temp_reg); // move iterations counter - if constexpr (std::is_same::value) { - cb.movq(asmjit::x86::rcx, iter_reg); + if constexpr (std::is_same::value) { + Cb.movq(asmjit::x86::rcx, IterReg); } else { - cb.mov(asmjit::x86::rcx, iter_reg); + Cb.mov(asmjit::x86::rcx, IterReg); } communication(-128); // right communication // move hash - cb.mov(asmjit::x86::rbx, temp_reg); + Cb.mov(asmjit::x86::rbx, temp_reg); // move iterations counter - if constexpr (std::is_same::value) { - cb.movq(asmjit::x86::rcx, iter_reg); + if constexpr (std::is_same::value) { + Cb.movq(asmjit::x86::rcx, IterReg); } else { - cb.mov(asmjit::x86::rcx, iter_reg); + Cb.mov(asmjit::x86::rcx, IterReg); } communication(-64); // restore r8, r9, rax, rbx, rcx and rdx - if constexpr (std::is_same::value) { - cb.movq(asmjit::x86::rax, asmjit::x86::Mm(7)); - cb.movq(asmjit::x86::rbx, asmjit::x86::Mm(6)); - cb.movq(asmjit::x86::rcx, asmjit::x86::Mm(5)); - cb.movq(asmjit::x86::rdx, asmjit::x86::Mm(4)); - cb.movq(asmjit::x86::r8, asmjit::x86::Mm(3)); - cb.movq(asmjit::x86::r9, asmjit::x86::Mm(2)); + if constexpr (std::is_same::value) { + Cb.movq(asmjit::x86::rax, asmjit::x86::Mm(7)); + Cb.movq(asmjit::x86::rbx, asmjit::x86::Mm(6)); + Cb.movq(asmjit::x86::rcx, asmjit::x86::Mm(5)); + Cb.movq(asmjit::x86::rdx, asmjit::x86::Mm(4)); + Cb.movq(asmjit::x86::r8, asmjit::x86::Mm(3)); + Cb.movq(asmjit::x86::r9, asmjit::x86::Mm(2)); } else { - cb.pop(asmjit::x86::r9); - cb.pop(asmjit::x86::r8); - cb.pop(asmjit::x86::rdx); - cb.pop(asmjit::x86::rcx); - cb.pop(asmjit::x86::rbx); - cb.pop(asmjit::x86::rax); + Cb.pop(asmjit::x86::r9); + Cb.pop(asmjit::x86::r8); + Cb.pop(asmjit::x86::rdx); + Cb.pop(asmjit::x86::rcx); + Cb.pop(asmjit::x86::rbx); + Cb.pop(asmjit::x86::rax); } - cb.bind(SkipErrorDetection); + Cb.bind(SkipErrorDetection); } template void X86Payload::emitErrorDetectionCode( diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp index b933dcd1..ac7550e1 100644 --- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp +++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp @@ -43,25 +43,25 @@ int ZENFMAPayload::compilePayload(std::vector> unsigned bytes = 0; for (const auto& item : sequence) { - auto it = this->instructionFlops.find(item); + auto it = this->InstructionFlops.find(item); - if (it == this->instructionFlops.end()) { + if (it == this->InstructionFlops.end()) { workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; return EXIT_FAILURE; } flops += it->second; - it = this->instructionMemory.find(item); + it = this->InstructionMemory.find(item); - if (it != this->instructionMemory.end()) { + if (it != this->InstructionMemory.end()) { bytes += it->second; } } - this->_flops = repetitions * flops; - this->_bytes = repetitions * bytes; - this->_instructions = repetitions * sequence.size() * 4 + 6; + this->Flops = repetitions * flops; + this->Bytes = repetitions * bytes; + this->Instructions = repetitions * sequence.size() * 4 + 6; // calculate the buffer sizes auto l1i_cache_size = instructionCacheSize / thread; @@ -79,10 +79,10 @@ int ZENFMAPayload::compilePayload(std::vector> auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); CodeHolder code; - code.init(this->rt.environment()); + code.init(this->Rt.environment()); - if (nullptr != this->loadFunction) { - this->rt.release(&this->loadFunction); + if (nullptr != this->LoadFunction) { + this->Rt.release(&this->LoadFunction); } Builder cb(&code); @@ -108,9 +108,8 @@ int ZENFMAPayload::compilePayload(std::vector> auto ram_reg = ymm15; FuncDetail func; - func.init(FuncSignatureT( - CallConvId::kCDecl), - this->rt.environment()); + func.init(FuncSignatureT(CallConvId::kCDecl), + this->Rt.environment()); FuncFrame frame; frame.init(func); @@ -304,7 +303,7 @@ int ZENFMAPayload::compilePayload(std::vector> cb.add(ram_addr, Imm(l3_size)); cb.bind(NoRamReset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.inc(temp_reg); // increment iteration counter if (this->getL2SequenceCount(sequence) > 0) { @@ -318,7 +317,7 @@ int ZENFMAPayload::compilePayload(std::vector> cb.add(l2_addr, Imm(l1_size)); cb.bind(NoL2Reset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.movq(iter_reg, temp_reg); // store iteration counter if (this->getL3SequenceCount(sequence) > 0) { @@ -332,7 +331,7 @@ int ZENFMAPayload::compilePayload(std::vector> cb.add(l3_addr, Imm(l2_size)); cb.bind(NoL3Reset); // adds always two instruction - this->_instructions += 2; + this->Instructions += 2; } cb.mov(l1_addr, pointer_reg); @@ -371,7 +370,7 @@ int ZENFMAPayload::compilePayload(std::vector> // String sb; // cb.dump(sb); - Error err = this->rt.add(&this->loadFunction, &code); + Error err = this->Rt.add(&this->LoadFunction, &code); if (err) { workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; @@ -398,12 +397,12 @@ int ZENFMAPayload::compilePayload(std::vector> std::list ZENFMAPayload::getAvailableInstructions() const { std::list instructions; - transform(this->instructionFlops.begin(), this->instructionFlops.end(), back_inserter(instructions), + transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions), [](const auto& item) { return item.first; }); return instructions; } -void ZENFMAPayload::init(unsigned long long* memoryAddr, unsigned long long bufferSize) { +void ZENFMAPayload::init(uint64_t* memoryAddr, uint64_t bufferSize) { X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4); } diff --git a/src/firestarter/Environment/X86/X86CPUTopology.cpp b/src/firestarter/Environment/X86/X86CPUTopology.cpp index 6e7eb288..dae61165 100644 --- a/src/firestarter/Environment/X86/X86CPUTopology.cpp +++ b/src/firestarter/Environment/X86/X86CPUTopology.cpp @@ -35,29 +35,29 @@ using namespace firestarter::environment::x86; X86CPUTopology::X86CPUTopology() : CPUTopology("x86_64") - , cpuInfo(asmjit::CpuInfo::host()) - , _vendor(this->cpuInfo.vendor()) { + , CpuInfo(asmjit::CpuInfo::host()) + , Vendor(this->CpuInfo.vendor()) { std::stringstream ss; ss << "Family " << this->familyId() << ", Model " << this->modelId() << ", Stepping " << this->stepping(); - this->_model = ss.str(); + this->Model = ss.str(); for (int i = 0; i <= (int)asmjit::CpuFeatures::X86::Id::kMaxValue; i++) { - if (!this->cpuInfo.hasFeature(i)) { + if (!this->CpuInfo.hasFeature(i)) { continue; } asmjit::String sb; - auto error = asmjit::Formatter::formatFeature(sb, this->cpuInfo.arch(), i); + auto error = asmjit::Formatter::formatFeature(sb, this->CpuInfo.arch(), i); if (error != asmjit::ErrorCode::kErrorOk) { log::warn() << "Formatting cpu features got asmjit error: " << error; } - this->featureList.push_back(std::string(sb.data())); + this->FeatureList.push_back(std::string(sb.data())); } - unsigned long long a = 0, b = 0, c = 0, d = 0; + uint64_t a = 0, b = 0, c = 0, d = 0; // check if we have rdtsc this->cpuid(&a, &b, &c, &d); @@ -65,9 +65,9 @@ X86CPUTopology::X86CPUTopology() a = 1; this->cpuid(&a, &b, &c, &d); if ((int)d & (1 << 4)) { - this->_hasRdtsc = true; + this->HasRdtsc = true; } else { - this->_hasRdtsc = false; + this->HasRdtsc = false; } } @@ -75,7 +75,7 @@ X86CPUTopology::X86CPUTopology() if (this->hasRdtsc()) { a = 0, b = 0, c = 0, d = 0; - this->_hasInvariantRdtsc = true; + this->HasInvariantRdtsc = true; /* TSCs are usable if CPU supports only one frequency in C0 (no speedstep/Cool'n'Quite) @@ -88,7 +88,7 @@ X86CPUTopology::X86CPUTopology() this->cpuid(&a, &b, &c, &d); /* no Frequency control */ if ((!(d & (1 << 22))) && (!(c & (1 << 7)))) { - this->_hasInvariantRdtsc = true; + this->HasInvariantRdtsc = true; } else { a = 0x80000000; this->cpuid(&a, &b, &c, &d); @@ -97,7 +97,7 @@ X86CPUTopology::X86CPUTopology() this->cpuid(&a, &b, &c, &d); /* invariant TSC */ if (d & (1 << 8)) { - this->_hasInvariantRdtsc = true; + this->HasInvariantRdtsc = true; } } } @@ -113,17 +113,17 @@ X86CPUTopology::X86CPUTopology() /* no Frequency control */ if ((!(d & (1 << 7))) && (!(d & (1 << 1)))) { - this->_hasInvariantRdtsc = true; + this->HasInvariantRdtsc = true; } /* invariant TSC */ if (d & (1 << 8)) { - this->_hasInvariantRdtsc = true; + this->HasInvariantRdtsc = true; } } /* assuming no frequency control if cpuid does not provide the extended function to test for it */ else { - this->_hasInvariantRdtsc = true; + this->HasInvariantRdtsc = true; } } } @@ -133,14 +133,14 @@ X86CPUTopology::X86CPUTopology() // only constant TSCs will be used (i.e. power management indepent TSCs) // save frequency in highest P-State or use generic fallback if no invarient TSC // is available -unsigned long long X86CPUTopology::clockrate() const { +uint64_t X86CPUTopology::clockrate() const { typedef std::chrono::high_resolution_clock Clock; typedef std::chrono::microseconds ticks; - unsigned long long start1_tsc, start2_tsc, end1_tsc, end2_tsc; - unsigned long long time_diff; - unsigned long long clock_lower_bound, clock_upper_bound, clock; - unsigned long long clockrate = 0; + uint64_t start1_tsc, start2_tsc, end1_tsc, end2_tsc; + uint64_t time_diff; + uint64_t clock_lower_bound, clock_upper_bound, clock; + uint64_t clockrate = 0; int i, num_measurements = 0, min_measurements; Clock::time_point start_time, end_time; @@ -207,11 +207,11 @@ unsigned long long X86CPUTopology::clockrate() const { return clockrate; } -unsigned long long X86CPUTopology::timestamp() const { +uint64_t X86CPUTopology::timestamp() const { #ifndef _MSC_VER - unsigned long long reg_a, reg_d; + uint64_t reg_a, reg_d; #else - unsigned long long i; + uint64_t i; #endif if (!this->hasRdtsc()) { @@ -227,11 +227,9 @@ unsigned long long X86CPUTopology::timestamp() const { #endif } -void X86CPUTopology::cpuid(unsigned long long* a, unsigned long long* b, unsigned long long* c, - unsigned long long* d) const { +void X86CPUTopology::cpuid(uint64_t* a, uint64_t* b, uint64_t* c, uint64_t* d) const { #ifndef _MSC_VER - unsigned long long reg_a, reg_b, reg_c, reg_d; - + uint64_t reg_a, reg_b, reg_c, reg_d; __asm__ __volatile__("cpuid;" : "=a"(reg_a), "=b"(reg_b), "=c"(reg_c), "=d"(reg_d) : "a"(*a), "b"(*b), "c"(*c), "d"(*d)); diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp index b923fbf4..508b01c6 100644 --- a/src/firestarter/Environment/X86/X86Environment.cpp +++ b/src/firestarter/Environment/X86/X86Environment.cpp @@ -29,14 +29,14 @@ using namespace firestarter::environment::x86; void X86Environment::evaluateFunctions() { - for (auto ctor : this->platformConfigsCtor) { + for (auto ctor : this->PlatformConfigsCtor) { // add asmjit for model and family detection - this->platformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(), + this->PlatformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(), this->topology().modelId(), this->topology().numThreadsPerCore())); } - for (auto ctor : this->fallbackPlatformConfigsCtor) { - this->fallbackPlatformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(), + for (auto ctor : this->FallbackPlatformConfigsCtor) { + this->FallbackPlatformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(), this->topology().modelId(), this->topology().numThreadsPerCore())); } } @@ -46,7 +46,7 @@ int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePay std::string defaultPayloadName(""); // if functionId is 0 get the default or fallback - for (auto config : this->platformConfigs) { + for (auto config : this->PlatformConfigs) { for (auto const& [thread, functionName] : config->getThreadMap()) { // the selected function if (id == functionId) { @@ -58,14 +58,14 @@ int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePay } } // found function - this->_selectedConfig = new ::firestarter::environment::platform::RuntimeConfig( + this->SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig( *config, thread, this->topology().instructionCacheSize()); return EXIT_SUCCESS; } // default function if (0 == functionId && config->isDefault()) { if (thread == this->topology().numThreadsPerCore()) { - this->_selectedConfig = new ::firestarter::environment::platform::RuntimeConfig( + this->SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig( *config, thread, this->topology().instructionCacheSize()); return EXIT_SUCCESS; } else { @@ -91,7 +91,7 @@ int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePay // loop over available implementation and check if they are marked as // fallback - for (auto config : this->fallbackPlatformConfigs) { + for (auto config : this->FallbackPlatformConfigs) { if (config->isAvailable()) { auto selectedThread = 0; auto selectedFunctionName = std::string(""); @@ -105,7 +105,7 @@ int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePay selectedThread = config->getThreadMap().begin()->first; selectedFunctionName = config->getThreadMap().begin()->second; } - this->_selectedConfig = new ::firestarter::environment::platform::RuntimeConfig( + this->SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig( *config, selectedThread, this->topology().instructionCacheSize()); log::warn() << "Using function " << selectedFunctionName << " as fallback.\n" << "You can use the parameter --function to try other " @@ -200,7 +200,7 @@ void X86Environment::printFunctionSummary() { unsigned id = 1; - for (auto const& config : this->platformConfigs) { + for (auto const& config : this->PlatformConfigs) { for (auto const& [thread, functionName] : config->getThreadMap()) { const char* available = config->isAvailable() ? "yes" : "no"; const char* fmt = " %4u | %-30s | %-24s | %s"; diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp index 7dd511f5..0df2c6c3 100644 --- a/src/firestarter/Firestarter.cpp +++ b/src/firestarter/Firestarter.cpp @@ -40,81 +40,81 @@ extern "C" { using namespace firestarter; -Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds const& timeout, unsigned loadPercent, - std::chrono::microseconds const& period, unsigned requestedNumThreads, - std::string const& cpuBind, bool printFunctionSummary, unsigned functionId, - bool listInstructionGroups, std::string const& instructionGroups, unsigned lineCount, - bool allowUnavailablePayload, bool dumpRegisters, - std::chrono::seconds const& dumpRegistersTimeDelta, std::string const& dumpRegistersOutpath, - bool errorDetection, int gpus, unsigned gpuMatrixSize, bool gpuUseFloat, bool gpuUseDouble, - bool listMetrics, bool measurement, std::chrono::milliseconds const& startDelta, - std::chrono::milliseconds const& stopDelta, - std::chrono::milliseconds const& measurementInterval, - std::vector const& metricPaths, std::vector const& stdinMetrics, - bool optimize, std::chrono::seconds const& preheat, std::string const& optimizationAlgorithm, - std::vector const& optimizationMetrics, - std::chrono::seconds const& evaluationDuration, unsigned individuals, - std::string const& optimizeOutfile, unsigned generations, double nsga2_cr, double nsga2_m) - : _argc(argc) - , _argv(argv) - , _timeout(timeout) - , _loadPercent(loadPercent) - , _period(period) - , _dumpRegisters(dumpRegisters) - , _dumpRegistersTimeDelta(dumpRegistersTimeDelta) - , _dumpRegistersOutpath(dumpRegistersOutpath) - , _errorDetection(errorDetection) - , _gpus(gpus) - , _gpuMatrixSize(gpuMatrixSize) - , _gpuUseFloat(gpuUseFloat) - , _gpuUseDouble(gpuUseDouble) - , _startDelta(startDelta) - , _stopDelta(stopDelta) - , _measurement(measurement) - , _optimize(optimize) - , _preheat(preheat) - , _optimizationAlgorithm(optimizationAlgorithm) - , _optimizationMetrics(optimizationMetrics) - , _evaluationDuration(evaluationDuration) - , _individuals(individuals) - , _optimizeOutfile(optimizeOutfile) - , _generations(generations) - , _nsga2_cr(nsga2_cr) - , _nsga2_m(nsga2_m) { +Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds const& Timeout, unsigned LoadPercent, + std::chrono::microseconds const& Period, unsigned RequestedNumThreads, + std::string const& CpuBind, bool PrintFunctionSummary, unsigned FunctionId, + bool ListInstructionGroups, std::string const& InstructionGroups, unsigned LineCount, + bool AllowUnavailablePayload, bool DumpRegisters, + std::chrono::seconds const& DumpRegistersTimeDelta, std::string const& DumpRegistersOutpath, + bool ErrorDetection, int Gpus, unsigned GpuMatrixSize, bool GpuUseFloat, bool GpuUseDouble, + bool ListMetrics, bool Measurement, std::chrono::milliseconds const& StartDelta, + std::chrono::milliseconds const& StopDelta, + std::chrono::milliseconds const& MeasurementInterval, + std::vector const& MetricPaths, std::vector const& StdinMetrics, + bool Optimize, std::chrono::seconds const& Preheat, std::string const& OptimizationAlgorithm, + std::vector const& OptimizationMetrics, + std::chrono::seconds const& EvaluationDuration, unsigned Individuals, + std::string const& OptimizeOutfile, unsigned Generations, double Nsga2Cr, double Nsga2M) + : Argc(Argc) + , Argv(Argv) + , Timeout(Timeout) + , LoadPercent(LoadPercent) + , Period(Period) + , DumpRegisters(DumpRegisters) + , DumpRegistersTimeDelta(DumpRegistersTimeDelta) + , DumpRegistersOutpath(DumpRegistersOutpath) + , ErrorDetection(ErrorDetection) + , Gpus(Gpus) + , GpuMatrixSize(GpuMatrixSize) + , GpuUseFloat(GpuUseFloat) + , GpuUseDouble(GpuUseDouble) + , StartDelta(StartDelta) + , StopDelta(StopDelta) + , Measurement(Measurement) + , Optimize(Optimize) + , Preheat(Preheat) + , OptimizationAlgorithm(OptimizationAlgorithm) + , OptimizationMetrics(OptimizationMetrics) + , EvaluationDuration(EvaluationDuration) + , Individuals(Individuals) + , OptimizeOutfile(OptimizeOutfile) + , Generations(Generations) + , Nsga2Cr(Nsga2Cr) + , Nsga2M(Nsga2M) { int returnCode; - _load = (_period * _loadPercent) / 100; - if (_loadPercent == 100 || _load == std::chrono::microseconds::zero()) { - _period = std::chrono::microseconds::zero(); + Load = (Period * LoadPercent) / 100; + if (LoadPercent == 100 || Load == std::chrono::microseconds::zero()) { + this->Period = std::chrono::microseconds::zero(); } #if defined(linux) || defined(__linux__) #else - (void)listMetrics; - (void)measurementInterval; - (void)metricPaths; - (void)stdinMetrics; + (void)ListMetrics; + (void)MeasurementInterval; + (void)MetricPaths; + (void)StdinMetrics; #endif #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) - this->_environment = new environment::x86::X86Environment(); + this->Environment = new environment::x86::X86Environment(); #endif - if (EXIT_SUCCESS != (returnCode = this->environment().evaluateCpuAffinity(requestedNumThreads, cpuBind))) { + if (EXIT_SUCCESS != (returnCode = this->environment().evaluateCpuAffinity(RequestedNumThreads, CpuBind))) { std::exit(returnCode); } #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) // Error detection uses crc32 instruction added by the SSE4.2 extension to x86 - if (_errorDetection) { - if (!_environment->topology().featuresAsmjit().has(asmjit::CpuFeatures::X86::kSSE4_2)) { + if (ErrorDetection) { + if (!Environment->topology().featuresAsmjit().has(asmjit::CpuFeatures::X86::kSSE4_2)) { throw std::invalid_argument("Option --error-detection requires the crc32 " "instruction added with SSE_4_2.\n"); } } #endif - if (_errorDetection && this->environment().requestedNumThreads() < 2) { + if (ErrorDetection && this->environment().requestedNumThreads() < 2) { throw std::invalid_argument("Option --error-detection must run with 2 or more threads. Number of " "threads is " + std::to_string(this->environment().requestedNumThreads()) + "\n"); @@ -122,43 +122,43 @@ Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds this->environment().evaluateFunctions(); - if (printFunctionSummary) { + if (PrintFunctionSummary) { this->environment().printFunctionSummary(); std::exit(EXIT_SUCCESS); } - if (EXIT_SUCCESS != (returnCode = this->environment().selectFunction(functionId, allowUnavailablePayload))) { + if (EXIT_SUCCESS != (returnCode = this->environment().selectFunction(FunctionId, AllowUnavailablePayload))) { std::exit(returnCode); } - if (listInstructionGroups) { + if (ListInstructionGroups) { this->environment().printAvailableInstructionGroups(); std::exit(EXIT_SUCCESS); } - if (!instructionGroups.empty()) { - if (EXIT_SUCCESS != (returnCode = this->environment().selectInstructionGroups(instructionGroups))) { + if (!InstructionGroups.empty()) { + if (EXIT_SUCCESS != (returnCode = this->environment().selectInstructionGroups(InstructionGroups))) { std::exit(returnCode); } } - if (lineCount != 0) { - this->environment().setLineCount(lineCount); + if (LineCount != 0) { + this->environment().setLineCount(LineCount); } #if defined(linux) || defined(__linux__) - if (_measurement || listMetrics || _optimize) { - _measurementWorker = std::make_shared( - measurementInterval, this->environment().requestedNumThreads(), metricPaths, stdinMetrics); + if (Measurement || ListMetrics || Optimize) { + MeasurementWorker = std::make_shared( + MeasurementInterval, this->environment().requestedNumThreads(), MetricPaths, StdinMetrics); - if (listMetrics) { - log::info() << _measurementWorker->availableMetrics(); + if (ListMetrics) { + log::info() << MeasurementWorker->availableMetrics(); std::exit(EXIT_SUCCESS); } // init all metrics - auto all = _measurementWorker->metricNames(); - auto initialized = _measurementWorker->initMetrics(all); + auto all = MeasurementWorker->metricNames(); + auto initialized = MeasurementWorker->initMetrics(all); if (initialized.size() == 0) { log::error() << "No metrics initialized"; @@ -166,7 +166,7 @@ Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds } // check if selected metrics are initialized - for (auto const& optimizationMetric : optimizationMetrics) { + for (auto const& optimizationMetric : OptimizationMetrics) { auto nameEqual = [optimizationMetric](auto const& name) { auto invertedName = "-" + name; return name.compare(optimizationMetric) == 0 || invertedName.compare(optimizationMetric) == 0; @@ -184,71 +184,71 @@ Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds } } - if (_optimize) { + if (Optimize) { auto applySettings = std::bind( [this](std::vector> const& setting) { using Clock = std::chrono::high_resolution_clock; auto start = Clock::now(); - for (auto& thread : this->loadThreads) { + for (auto& thread : this->LoadThreads) { auto td = thread.second; td->config().setPayloadSettings(setting); } - for (auto const& thread : this->loadThreads) { + for (auto const& thread : this->LoadThreads) { auto td = thread.second; - td->mutex.lock(); + td->Mutex.lock(); } - for (auto const& thread : this->loadThreads) { + for (auto const& thread : this->LoadThreads) { auto td = thread.second; - td->comm = THREAD_SWITCH; - td->mutex.unlock(); + td->Comm = THREAD_SWITCH; + td->Mutex.unlock(); } - this->loadVar = LOAD_SWITCH; + this->LoadVar = LOAD_SWITCH; - for (auto const& thread : this->loadThreads) { + for (auto const& thread : this->LoadThreads) { auto td = thread.second; bool ack; do { - td->mutex.lock(); - ack = td->ack; - td->mutex.unlock(); + td->Mutex.lock(); + ack = td->Ack; + td->Mutex.unlock(); } while (!ack); - td->mutex.lock(); - td->ack = false; - td->mutex.unlock(); + td->Mutex.lock(); + td->Ack = false; + td->Mutex.unlock(); } - this->loadVar = LOAD_HIGH; + this->LoadVar = LOAD_HIGH; this->signalWork(); - unsigned long long startTimestamp = 0xffffffffffffffff; - unsigned long long stopTimestamp = 0; + uint64_t startTimestamp = 0xffffffffffffffff; + uint64_t stopTimestamp = 0; - for (auto const& thread : this->loadThreads) { + for (auto const& thread : this->LoadThreads) { auto td = thread.second; - if (startTimestamp > td->lastStartTsc) { - startTimestamp = td->lastStartTsc; + if (startTimestamp > td->LastStartTsc) { + startTimestamp = td->LastStartTsc; } - if (stopTimestamp < td->lastStopTsc) { - stopTimestamp = td->lastStopTsc; + if (stopTimestamp < td->LastStopTsc) { + stopTimestamp = td->LastStopTsc; } } - for (auto const& thread : this->loadThreads) { + for (auto const& thread : this->LoadThreads) { auto td = thread.second; - ipc_estimate_metric_insert((double)td->lastIterations * - (double)this->loadThreads.front().second->config().payload().instructions() / - (double)(stopTimestamp - startTimestamp)); + ipcEstimateMetricInsert((double)td->LastIterations * + (double)this->LoadThreads.front().second->config().payload().instructions() / + (double)(stopTimestamp - startTimestamp)); } auto end = Clock::now(); @@ -259,18 +259,18 @@ Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds std::placeholders::_1); auto prob = std::make_shared( - std::move(applySettings), _measurementWorker, _optimizationMetrics, _evaluationDuration, _startDelta, - _stopDelta, this->environment().selectedConfig().payloadItems()); + std::move(applySettings), MeasurementWorker, OptimizationMetrics, EvaluationDuration, StartDelta, StopDelta, + this->environment().selectedConfig().payloadItems()); - _population = firestarter::optimizer::Population(std::move(prob)); + Population = firestarter::optimizer::Population(std::move(prob)); - if (_optimizationAlgorithm == "NSGA2") { - _algorithm = std::make_unique(_generations, _nsga2_cr, _nsga2_m); + if (OptimizationAlgorithm == "NSGA2") { + Algorithm = std::make_unique(Generations, Nsga2Cr, Nsga2M); } else { - throw std::invalid_argument("Algorithm " + _optimizationAlgorithm + " unknown."); + throw std::invalid_argument("Algorithm " + OptimizationAlgorithm + " unknown."); } - _algorithm->checkPopulation(static_cast(_population), _individuals); + Algorithm->checkPopulation(static_cast(Population), Individuals); } #endif @@ -280,7 +280,7 @@ Firestarter::Firestarter(const int argc, const char** argv, std::chrono::seconds // setup thread with either high or low load configured at the start // low loads has to know the length of the period - if (EXIT_SUCCESS != (returnCode = this->initLoadWorkers((_loadPercent == 0), _period.count()))) { + if (EXIT_SUCCESS != (returnCode = this->initLoadWorkers((LoadPercent == 0), Period.count()))) { std::exit(returnCode); } @@ -301,7 +301,7 @@ Firestarter::~Firestarter() { _oneapi.reset(); #endif - delete _environment; + delete Environment; } void Firestarter::mainThread() { @@ -317,42 +317,42 @@ void Firestarter::mainThread() { #if defined(linux) || defined(__linux__) // if measurement is enabled, start it here - if (_measurement) { - _measurementWorker->startMeasurement(); + if (Measurement) { + MeasurementWorker->startMeasurement(); } #endif this->signalWork(); #ifdef FIRESTARTER_DEBUG_FEATURES - if (_dumpRegisters) { + if (DumpRegisters) { int returnCode; - if (EXIT_SUCCESS != (returnCode = this->initDumpRegisterWorker(_dumpRegistersTimeDelta, _dumpRegistersOutpath))) { + if (EXIT_SUCCESS != (returnCode = this->initDumpRegisterWorker(DumpRegistersTimeDelta, DumpRegistersOutpath))) { std::exit(returnCode); } } #endif // worker thread for load control - this->watchdogWorker(_period, _load, _timeout); + this->watchdogWorker(Period, Load, Timeout); #if defined(linux) || defined(__linux__) // check if optimization is selected - if (_optimize) { + if (Optimize) { auto startTime = optimizer::History::getTime(); - Firestarter::_optimizer = std::make_unique( - std::move(_algorithm), _population, _optimizationAlgorithm, _individuals, _preheat); + Firestarter::Optimizer = std::make_unique(std::move(Algorithm), Population, + OptimizationAlgorithm, Individuals, Preheat); // wait here until optimizer thread terminates - Firestarter::_optimizer->join(); + Firestarter::Optimizer->join(); auto payloadItems = this->environment().selectedConfig().payloadItems(); - firestarter::optimizer::History::save(_optimizeOutfile, startTime, payloadItems, _argc, _argv); + firestarter::optimizer::History::save(OptimizeOutfile, startTime, payloadItems, Argc, Argv); // print the best 20 according to each metric - firestarter::optimizer::History::printBest(_optimizationMetrics, payloadItems); + firestarter::optimizer::History::printBest(OptimizationMetrics, payloadItems); // stop all the load threads std::raise(SIGTERM); @@ -362,35 +362,35 @@ void Firestarter::mainThread() { // wait for watchdog to timeout or until user terminates this->joinLoadWorkers(); #ifdef FIRESTARTER_DEBUG_FEATURES - if (_dumpRegisters) { + if (DumpRegisters) { this->joinDumpRegisterWorker(); } #endif - if (!_optimize) { + if (!Optimize) { this->printPerformanceReport(); } #if defined(linux) || defined(__linux__) // if measurment is enabled, stop it here - if (_measurement) { + if (Measurement) { // TODO: clear this up log::info() << "metric,num_timepoints,duration_ms,average,stddev"; - for (auto const& [name, sum] : _measurementWorker->getValues(_startDelta, _stopDelta)) { - log::info() << std::quoted(name) << "," << sum.num_timepoints << "," << sum.duration.count() << "," << sum.average - << "," << sum.stddev; + for (auto const& [name, sum] : MeasurementWorker->getValues(StartDelta, StopDelta)) { + log::info() << std::quoted(name) << "," << sum.NumTimepoints << "," << sum.Duration.count() << "," << sum.Average + << "," << sum.Stddev; } } #endif - if (_errorDetection) { + if (ErrorDetection) { this->printThreadErrorReport(); } } -void Firestarter::setLoad(unsigned long long value) { +void Firestarter::setLoad(uint64_t value) { // signal load change to workers - Firestarter::loadVar = value; + Firestarter::LoadVar = value; #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) #ifndef _MSC_VER __asm__ __volatile__("mfence;"); @@ -412,15 +412,15 @@ void Firestarter::sigtermHandler(int signum) { // used in case of 0 < load < 100 // or interrupt sleep for timeout { - std::lock_guard lk(Firestarter::_watchdogTerminateMutex); - Firestarter::_watchdog_terminate = true; + std::lock_guard lk(Firestarter::WatchdogTerminateMutex); + Firestarter::WatchdogTerminate = true; } - Firestarter::_watchdogTerminateAlert.notify_all(); + Firestarter::WatchdogTerminateAlert.notify_all(); #if defined(linux) || defined(__linux__) // if we have optimization running stop it - if (Firestarter::_optimizer) { - Firestarter::_optimizer->kill(); + if (Firestarter::Optimizer) { + Firestarter::Optimizer->kill(); } #endif } diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp index 53323187..ed925cf1 100644 --- a/src/firestarter/LoadWorker.cpp +++ b/src/firestarter/LoadWorker.cpp @@ -45,7 +45,7 @@ using namespace firestarter; auto aligned_free_deleter = [](void* p) { ALIGNED_FREE(p); }; -int Firestarter::initLoadWorkers(bool lowLoad, unsigned long long period) { +int Firestarter::initLoadWorkers(bool lowLoad, uint64_t period) { int returnCode; if (EXIT_SUCCESS != (returnCode = this->environment().setCpuAffinity(0))) { @@ -54,40 +54,39 @@ int Firestarter::initLoadWorkers(bool lowLoad, unsigned long long period) { // setup load variable to execute low or high load once the threads switch to // work. - this->loadVar = lowLoad ? LOAD_LOW : LOAD_HIGH; + this->LoadVar = lowLoad ? LOAD_LOW : LOAD_HIGH; auto numThreads = this->environment().requestedNumThreads(); // create a std::vector> of requestenNumThreads() // communication pointers and add these to the threaddata - if (_errorDetection) { - for (unsigned long long i = 0; i < numThreads; i++) { - auto commPtr = reinterpret_cast(ALIGNED_MALLOC(2 * sizeof(unsigned long long), 64)); + if (ErrorDetection) { + for (uint64_t i = 0; i < numThreads; i++) { + auto commPtr = reinterpret_cast(ALIGNED_MALLOC(2 * sizeof(uint64_t), 64)); assert(commPtr); - this->errorCommunication.push_back(std::shared_ptr(commPtr, aligned_free_deleter)); + this->ErrorCommunication.push_back(std::shared_ptr(commPtr, aligned_free_deleter)); log::debug() << "Threads " << (i + numThreads - 1) % numThreads << " and " << i << " commPtr = 0x" - << std::setfill('0') << std::setw(sizeof(unsigned long long) * 2) << std::hex - << (unsigned long long)commPtr; + << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex << (uint64_t)commPtr; } } - for (unsigned long long i = 0; i < numThreads; i++) { - auto td = std::make_shared(i, this->environment(), &this->loadVar, period, _dumpRegisters, - _errorDetection); + for (uint64_t i = 0; i < numThreads; i++) { + auto td = + std::make_shared(i, this->environment(), &this->LoadVar, period, DumpRegisters, ErrorDetection); - if (_errorDetection) { + if (ErrorDetection) { // distribute pointers for error deteciton. (set threads in a ring) // give this thread the left pointer i and right pointer (i+1) % // requestedNumThreads(). - td->setErrorCommunication(this->errorCommunication[i], this->errorCommunication[(i + 1) % numThreads]); + td->setErrorCommunication(this->ErrorCommunication[i], this->ErrorCommunication[(i + 1) % numThreads]); } auto dataCacheSizeIt = td->config().platformConfig().dataCacheBufferSize().begin(); auto ramBufferSize = td->config().platformConfig().ramBufferSize(); - td->buffersizeMem = + td->BuffersizeMem = (*dataCacheSizeIt + *std::next(dataCacheSizeIt, 1) + *std::next(dataCacheSizeIt, 2) + ramBufferSize) / - td->config().thread() / sizeof(unsigned long long); + td->config().thread() / sizeof(uint64_t); // create the thread std::thread t(Firestarter::loadThreadWorker, td); @@ -99,7 +98,7 @@ int Firestarter::initLoadWorkers(bool lowLoad, unsigned long long period) { firestarter::logging::FirstWorkerThreadFilter::setFirstThread(t.get_id()); } - this->loadThreads.push_back(std::make_pair(std::move(t), td)); + this->LoadThreads.push_back(std::make_pair(std::move(t), td)); } this->signalLoadWorkers(THREAD_INIT); @@ -111,54 +110,54 @@ void Firestarter::signalLoadWorkers(int comm) { bool ack; // start the work - for (auto const& thread : this->loadThreads) { + for (auto const& thread : this->LoadThreads) { auto td = thread.second; - td->mutex.lock(); + td->Mutex.lock(); } - for (auto const& thread : this->loadThreads) { + for (auto const& thread : this->LoadThreads) { auto td = thread.second; - td->comm = comm; - td->mutex.unlock(); + td->Comm = comm; + td->Mutex.unlock(); } - for (auto const& thread : this->loadThreads) { + for (auto const& thread : this->LoadThreads) { auto td = thread.second; do { - td->mutex.lock(); - ack = td->ack; - td->mutex.unlock(); + td->Mutex.lock(); + ack = td->Ack; + td->Mutex.unlock(); } while (!ack); - td->mutex.lock(); - td->ack = false; - td->mutex.unlock(); + td->Mutex.lock(); + td->Ack = false; + td->Mutex.unlock(); } } void Firestarter::joinLoadWorkers() { // wait for threads after watchdog has requested termination - for (auto& thread : this->loadThreads) { + for (auto& thread : this->LoadThreads) { thread.first.join(); } } void Firestarter::printThreadErrorReport() { - if (_errorDetection) { - auto maxSize = this->loadThreads.size(); + if (ErrorDetection) { + auto maxSize = this->LoadThreads.size(); std::vector errors(maxSize, false); for (decltype(maxSize) i = 0; i < maxSize; i++) { - auto errorDetectionStruct = this->loadThreads[i].second->errorDetectionStruct(); + auto errorDetectionStruct = this->LoadThreads[i].second->errorDetectionStruct(); - if (errorDetectionStruct->errorLeft) { + if (errorDetectionStruct->ErrorLeft) { errors[(i + maxSize - 1) % maxSize] = true; } - if (errorDetectionStruct->errorRight) { + if (errorDetectionStruct->ErrorRight) { errors[i] = true; } } @@ -174,44 +173,44 @@ void Firestarter::printThreadErrorReport() { void Firestarter::printPerformanceReport() { // performance report - unsigned long long startTimestamp = 0xffffffffffffffff; - unsigned long long stopTimestamp = 0; + uint64_t startTimestamp = 0xffffffffffffffff; + uint64_t stopTimestamp = 0; - unsigned long long iterations = 0; + uint64_t iterations = 0; log::debug() << "\nperformance report:\n"; - for (auto const& thread : this->loadThreads) { + for (auto const& thread : this->LoadThreads) { auto td = thread.second; - log::debug() << "Thread " << td->id() << ": " << td->iterations - << " iterations, tsc_delta: " << td->stopTsc - td->startTsc; + log::debug() << "Thread " << td->id() << ": " << td->Iterations + << " iterations, tsc_delta: " << td->StopTsc - td->StartTsc; - if (startTimestamp > td->startTsc) { - startTimestamp = td->startTsc; + if (startTimestamp > td->StartTsc) { + startTimestamp = td->StartTsc; } - if (stopTimestamp < td->stopTsc) { - stopTimestamp = td->stopTsc; + if (stopTimestamp < td->StopTsc) { + stopTimestamp = td->StopTsc; } - iterations += td->iterations; + iterations += td->Iterations; } double runtime = (double)(stopTimestamp - startTimestamp) / (double)this->environment().topology().clockrate(); double gFlops = - (double)this->loadThreads.front().second->config().payload().flops() * 0.000000001 * (double)iterations / runtime; + (double)this->LoadThreads.front().second->config().payload().flops() * 0.000000001 * (double)iterations / runtime; double bandwidth = - (double)this->loadThreads.front().second->config().payload().bytes() * 0.000000001 * (double)iterations / runtime; + (double)this->LoadThreads.front().second->config().payload().bytes() * 0.000000001 * (double)iterations / runtime; // insert values for ipc-estimate metric // if we are on linux #if defined(linux) || defined(__linux__) - if (_measurement) { - for (auto const& thread : this->loadThreads) { + if (Measurement) { + for (auto const& thread : this->LoadThreads) { auto td = thread.second; - ipc_estimate_metric_insert((double)td->iterations * - (double)this->loadThreads.front().second->config().payload().instructions() / - (double)(stopTimestamp - startTimestamp)); + ipcEstimateMetricInsert((double)td->Iterations * + (double)this->LoadThreads.front().second->config().payload().instructions() / + (double)(stopTimestamp - startTimestamp)); } } #endif @@ -256,16 +255,16 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { #endif for (;;) { - td->mutex.lock(); - int comm = td->comm; - td->mutex.unlock(); + td->Mutex.lock(); + int comm = td->Comm; + td->Mutex.unlock(); if (comm != old) { old = comm; - td->mutex.lock(); - td->ack = true; - td->mutex.unlock(); + td->Mutex.lock(); + td->Ack = true; + td->Mutex.unlock(); } else { std::this_thread::sleep_for(std::chrono::microseconds(1)); continue; @@ -280,47 +279,47 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { // compile payload td->config().payload().compilePayload(td->config().payloadSettings(), td->config().instructionCacheSize(), td->config().dataCacheBufferSize(), td->config().ramBufferSize(), - td->config().thread(), td->config().lines(), td->dumpRegisters, - td->errorDetection); + td->config().thread(), td->config().lines(), td->DumpRegisters, + td->ErrorDetection); // allocate memory // if we should dump some registers, we use the first part of the memory // for them. - td->addrMem = reinterpret_cast( - ALIGNED_MALLOC((td->buffersizeMem + td->addrOffset) * sizeof(unsigned long long), 64)) + - td->addrOffset; + td->AddrMem = + reinterpret_cast(ALIGNED_MALLOC((td->BuffersizeMem + td->AddrOffset) * sizeof(uint64_t), 64)) + + td->AddrOffset; // exit application on error - if (td->addrMem - td->addrOffset == nullptr) { + if (td->AddrMem - td->AddrOffset == nullptr) { workerLog::error() << "Could not allocate memory for CPU load thread " << td->id() << "\n"; exit(ENOMEM); } - if (td->dumpRegisters) { - reinterpret_cast(td->addrMem - td->addrOffset)->dumpVar = DumpVariable::Wait; + if (td->DumpRegisters) { + reinterpret_cast(td->AddrMem - td->AddrOffset)->DumpVar = DumpVariable::Wait; } - if (td->errorDetection) { - auto errorDetectionStruct = reinterpret_cast(td->addrMem - td->addrOffset); + if (td->ErrorDetection) { + auto errorDetectionStruct = reinterpret_cast(td->AddrMem - td->AddrOffset); std::memset(errorDetectionStruct, 0, sizeof(ErrorDetectionStruct)); // distribute left and right communication pointers - errorDetectionStruct->communicationLeft = td->communicationLeft.get(); - errorDetectionStruct->communicationRight = td->communicationRight.get(); + errorDetectionStruct->CommunicationLeft = td->CommunicationLeft.get(); + errorDetectionStruct->CommunicationRight = td->CommunicationRight.get(); // do first touch memset 0 for the communication pointers - std::memset((void*)errorDetectionStruct->communicationLeft, 0, sizeof(unsigned long long) * 2); + std::memset((void*)errorDetectionStruct->CommunicationLeft, 0, sizeof(uint64_t) * 2); } // call init function - td->config().payload().init(td->addrMem, td->buffersizeMem); + td->config().payload().init(td->AddrMem, td->BuffersizeMem); break; // perform stress test case THREAD_WORK: // record threads start timestamp - td->startTsc = td->environment().topology().timestamp(); + td->StartTsc = td->environment().topology().timestamp(); // will be terminated by watchdog for (;;) { @@ -331,7 +330,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { #ifdef ENABLE_SCOREP SCOREP_USER_REGION_BY_NAME_BEGIN("HIGH", SCOREP_USER_REGION_TYPE_COMMON); #endif - td->iterations = td->config().payload().highLoadFunction(td->addrMem, td->addrHigh, td->iterations); + td->Iterations = td->config().payload().highLoadFunction(td->AddrMem, td->AddrHigh, td->Iterations); // call low load function #ifdef ENABLE_VTRACING @@ -342,7 +341,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { SCOREP_USER_REGION_BY_NAME_END("HIGH"); SCOREP_USER_REGION_BY_NAME_BEGIN("LOW", SCOREP_USER_REGION_TYPE_COMMON); #endif - td->config().payload().lowLoadFunction(td->addrHigh, td->period); + td->config().payload().lowLoadFunction(td->AddrHigh, td->Period); #ifdef ENABLE_VTRACING VT_USER_END("LOW_LOAD_FUNC"); #endif @@ -351,14 +350,14 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { #endif // terminate if master signals end of run and record stop timestamp - if (*td->addrHigh == LOAD_STOP) { - td->stopTsc = td->environment().topology().timestamp(); + if (*td->AddrHigh == LOAD_STOP) { + td->StopTsc = td->environment().topology().timestamp(); return; } - if (*td->addrHigh == LOAD_SWITCH) { - td->stopTsc = td->environment().topology().timestamp(); + if (*td->AddrHigh == LOAD_SWITCH) { + td->StopTsc = td->environment().topology().timestamp(); break; } @@ -368,17 +367,17 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { // compile payload td->config().payload().compilePayload(td->config().payloadSettings(), td->config().instructionCacheSize(), td->config().dataCacheBufferSize(), td->config().ramBufferSize(), - td->config().thread(), td->config().lines(), td->dumpRegisters, - td->errorDetection); + td->config().thread(), td->config().lines(), td->DumpRegisters, + td->ErrorDetection); // call init function - td->config().payload().init(td->addrMem, td->buffersizeMem); + td->config().payload().init(td->AddrMem, td->BuffersizeMem); // save old iteration count - td->lastIterations = td->iterations; - td->lastStartTsc = td->startTsc; - td->lastStopTsc = td->stopTsc; - td->iterations = 0; + td->LastIterations = td->Iterations; + td->LastStartTsc = td->StartTsc; + td->LastStopTsc = td->StopTsc; + td->Iterations = 0; break; case THREAD_WAIT: break; diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp index efd7a4bc..36405051 100644 --- a/src/firestarter/Measurement/MeasurementWorker.cpp +++ b/src/firestarter/Measurement/MeasurementWorker.cpp @@ -36,11 +36,11 @@ void insertCallback(void* cls, const char* metricName, int64_t timeSinceEpoch, d using namespace firestarter::measurement; -MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, unsigned long long numThreads, +MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, uint64_t numThreads, std::vector const& metricDylibs, std::vector const& stdinMetrics) - : updateInterval(updateInterval) - , numThreads(numThreads) { + : UpdateInterval(updateInterval) + , NumThreads(numThreads) { #ifndef FIRESTARTER_LINK_STATIC // open dylibs and find metric symbol. @@ -92,18 +92,18 @@ MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, u continue; } - this->_stdinMetrics.push_back(name); + this->StdinMetrics.push_back(name); } std::stringstream ss; unsigned maxLength = 0; std::map available; - for (auto const& metric : this->metrics) { - std::string name(metric->name); + for (auto const& metric : this->Metrics) { + std::string name(metric->Name); maxLength = maxLength < name.size() ? name.size() : maxLength; - int returnCode = metric->init(); - metric->fini(); + int returnCode = metric->Init(); + metric->Fini(); available[name] = returnCode == EXIT_SUCCESS ? true : false; } @@ -115,36 +115,36 @@ MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, u ss << (value ? "yes" : "no") << "\n"; } - this->availableMetricsString = ss.str(); + this->AvailableMetricsString = ss.str(); - pthread_create(&this->workerThread, NULL, + pthread_create(&this->WorkerThread, NULL, reinterpret_cast(MeasurementWorker::dataAcquisitionWorker), this); // create a worker for getting metric values from stdin - if (this->_stdinMetrics.size() > 0) { - pthread_create(&this->stdinThread, NULL, + if (this->StdinMetrics.size() > 0) { + pthread_create(&this->StdinThread, NULL, reinterpret_cast(MeasurementWorker::stdinDataAcquisitionWorker), this); } } MeasurementWorker::~MeasurementWorker() { - pthread_cancel(this->workerThread); + pthread_cancel(this->WorkerThread); - pthread_join(this->workerThread, NULL); + pthread_join(this->WorkerThread, NULL); - if (this->_stdinMetrics.size() > 0) { - pthread_cancel(this->stdinThread); + if (this->StdinMetrics.size() > 0) { + pthread_cancel(this->StdinThread); - pthread_join(this->stdinThread, NULL); + pthread_join(this->StdinThread, NULL); } - for (auto const& [key, value] : this->values) { + for (auto const& [key, value] : this->Values) { auto metric = this->findMetricByName(key); if (metric == nullptr) { continue; } - metric->fini(); + metric->Fini(); } #ifndef FIRESTARTER_LINK_STATIC @@ -156,104 +156,104 @@ MeasurementWorker::~MeasurementWorker() { std::vector MeasurementWorker::metricNames() { std::vector metrics; - std::transform(this->metrics.begin(), this->metrics.end(), std::back_inserter(metrics), - [](auto& metric) -> std::string { return std::string(metric->name); }); - for (auto const& name : this->_stdinMetrics) { + std::transform(this->Metrics.begin(), this->Metrics.end(), std::back_inserter(metrics), + [](auto& metric) -> std::string { return std::string(metric->Name); }); + for (auto const& name : this->StdinMetrics) { metrics.push_back(name); } return metrics; } -const metric_interface_t* MeasurementWorker::findMetricByName(std::string metricName) { - auto name_equal = [metricName](auto& metricInterface) { return metricName.compare(metricInterface->name) == 0; }; - auto metric = std::find_if(this->metrics.begin(), this->metrics.end(), name_equal); +auto MeasurementWorker::findMetricByName(std::string MetricName) -> const MetricInterface* { + auto NameEqual = [MetricName](auto& MetricInterface) { return MetricName.compare(MetricInterface->Name) == 0; }; + auto Metric = std::find_if(this->Metrics.begin(), this->Metrics.end(), NameEqual); // metric not found - if (metric == this->metrics.end()) { + if (Metric == this->Metrics.end()) { return nullptr; } // metric found - return const_cast(*metric); + return const_cast(*Metric); } // this must be called by the main thread. // if not done so things like perf_event_attr.inherit might not work as expected -std::vector MeasurementWorker::initMetrics(std::vector const& metricNames) { - this->values_mutex.lock(); +auto MeasurementWorker::initMetrics(std::vector const& MetricNames) -> std::vector { + this->ValuesMutex.lock(); std::vector initialized = {}; // try to find each metric and initialize it - for (auto const& metricName : metricNames) { + for (auto const& metricName : MetricNames) { // init values map with empty vector auto name_equal = [metricName](auto const& pair) { return metricName.compare(pair.first) == 0; }; - auto pair = std::find_if(this->values.begin(), this->values.end(), name_equal); - if (pair != this->values.end()) { + auto pair = std::find_if(this->Values.begin(), this->Values.end(), name_equal); + if (pair != this->Values.end()) { pair->second.clear(); } else { auto metric = this->findMetricByName(metricName); if (metric != nullptr) { - int returnValue = metric->init(); + int returnValue = metric->Init(); if (returnValue != EXIT_SUCCESS) { - log::error() << "Metric " << metric->name << ": " << metric->get_error(); + log::error() << "Metric " << metric->Name << ": " << metric->GetError(); continue; } } - this->values[metricName] = std::vector(); + this->Values[metricName] = std::vector(); if (metric != nullptr) { - if (metric->type.insert_callback) { - metric->register_insert_callback(::insertCallback, this); + if (metric->Type.InsertCallback) { + metric->RegisterInsertCallback(::insertCallback, this); } } initialized.push_back(metricName); } } - this->values_mutex.unlock(); + this->ValuesMutex.unlock(); return initialized; } void MeasurementWorker::insertCallback(const char* metricName, int64_t timeSinceEpoch, double value) { - this->values_mutex.lock(); + this->ValuesMutex.lock(); using Duration = std::chrono::duration; auto time = std::chrono::time_point(Duration(timeSinceEpoch)); auto name_equal = [metricName](auto const& pair) { return std::string(metricName).compare(pair.first) == 0; }; - auto pair = std::find_if(this->values.begin(), this->values.end(), name_equal); + auto pair = std::find_if(this->Values.begin(), this->Values.end(), name_equal); - if (pair != this->values.end()) { + if (pair != this->Values.end()) { pair->second.push_back(TimeValue(time, value)); } - this->values_mutex.unlock(); + this->ValuesMutex.unlock(); } -void MeasurementWorker::startMeasurement() { this->startTime = std::chrono::high_resolution_clock::now(); } +void MeasurementWorker::startMeasurement() { this->StartTime = std::chrono::high_resolution_clock::now(); } std::map MeasurementWorker::getValues(std::chrono::milliseconds startDelta, std::chrono::milliseconds stopDelta) { std::map measurment = {}; - this->values_mutex.lock(); + this->ValuesMutex.lock(); - for (auto& [key, values] : this->values) { - auto startTime = this->startTime; + for (auto& [key, values] : this->Values) { + auto startTime = this->StartTime; auto endTime = std::chrono::high_resolution_clock::now(); auto metric = this->findMetricByName(key); - metric_type_t type; + MetricType type; std::memset(&type, 0, sizeof(type)); if (metric == nullptr) { - type.absolute = 1; + type.Absolute = 1; startTime += startDelta; endTime -= stopDelta; } else { - std::memcpy(&type, &metric->type, sizeof(type)); + std::memcpy(&type, &metric->Type, sizeof(type)); - if (metric->type.ignore_start_stop_delta == 0) { + if (metric->Type.IgnoreStartStopDelta == 0) { startTime += startDelta; endTime -= stopDelta; } @@ -261,16 +261,16 @@ std::map MeasurementWorker::getValues(std::chrono::millise decltype(values) croppedValues(values.size()); - auto findAll = [startTime, endTime](auto const& tv) { return startTime <= tv.time && tv.time <= endTime; }; + auto findAll = [startTime, endTime](auto const& tv) { return startTime <= tv.Time && tv.Time <= endTime; }; auto it = std::copy_if(values.begin(), values.end(), croppedValues.begin(), findAll); croppedValues.resize(std::distance(croppedValues.begin(), it)); - Summary sum = Summary::calculate(croppedValues.begin(), croppedValues.end(), type, this->numThreads); + Summary sum = Summary::calculate(croppedValues.begin(), croppedValues.end(), type, this->NumThreads); measurment[key] = sum; } - this->values_mutex.unlock(); + this->ValuesMutex.unlock(); return measurment; } @@ -299,36 +299,36 @@ int* MeasurementWorker::dataAcquisitionWorker(void* measurementWorker) { std::priority_queue, decltype(callbackTupleComparator)> callbackQueue( callbackTupleComparator); - _this->values_mutex.lock(); + _this->ValuesMutex.lock(); - for (auto const& [key, value] : _this->values) { + for (auto const& [key, value] : _this->Values) { auto metric_interface = _this->findMetricByName(key); if (metric_interface == nullptr) { continue; } - auto callbackTime = std::chrono::microseconds(metric_interface->callback_time); + auto callbackTime = std::chrono::microseconds(metric_interface->CallbackTime); if (callbackTime.count() == 0) { continue; } auto currentTime = clock::now(); - callbackQueue.push(std::make_tuple(metric_interface->callback, callbackTime, currentTime)); + callbackQueue.push(std::make_tuple(metric_interface->Callback, callbackTime, currentTime)); } - _this->values_mutex.unlock(); + _this->ValuesMutex.unlock(); - auto nextFetch = clock::now() + _this->updateInterval; + auto nextFetch = clock::now() + _this->UpdateInterval; for (;;) { auto now = clock::now(); if (nextFetch <= now) { - _this->values_mutex.lock(); + _this->ValuesMutex.lock(); - for (auto& [metricName, values] : _this->values) { + for (auto& [metricName, values] : _this->Values) { auto metric_interface = _this->findMetricByName(metricName); if (metric_interface == nullptr) { @@ -337,17 +337,17 @@ int* MeasurementWorker::dataAcquisitionWorker(void* measurementWorker) { double value; - if (!metric_interface->type.insert_callback && metric_interface->get_reading != nullptr) { - if (EXIT_SUCCESS == metric_interface->get_reading(&value)) { + if (!metric_interface->Type.InsertCallback && metric_interface->GetReading != nullptr) { + if (EXIT_SUCCESS == metric_interface->GetReading(&value)) { auto tv = TimeValue(std::chrono::high_resolution_clock::now(), value); values.push_back(tv); } } } - _this->values_mutex.unlock(); + _this->ValuesMutex.unlock(); - nextFetch = now + _this->updateInterval; + nextFetch = now + _this->UpdateInterval; } auto nextWake = nextFetch; diff --git a/src/firestarter/Measurement/Metric/IPCEstimate.cpp b/src/firestarter/Measurement/Metric/IPCEstimate.cpp index 145f02ae..9e18a6be 100644 --- a/src/firestarter/Measurement/Metric/IPCEstimate.cpp +++ b/src/firestarter/Measurement/Metric/IPCEstimate.cpp @@ -57,7 +57,7 @@ static int32_t register_insert_callback(void (*c)(void*, const char*, int64_t, d return EXIT_SUCCESS; } -void ipc_estimate_metric_insert(double value) { +void ipcEstimateMetricInsert(double Value) { if (callback == nullptr || callback_arg == nullptr) { return; } @@ -66,23 +66,23 @@ void ipc_estimate_metric_insert(double value) { std::chrono::duration_cast(std::chrono::high_resolution_clock::now().time_since_epoch()) .count(); - callback(callback_arg, "ipc-estimate", t, value); + callback(callback_arg, "ipc-estimate", t, Value); } -metric_interface_t ipc_estimate_metric = { - .name = "ipc-estimate", - .type = {.absolute = 1, - .accumalative = 0, - .divide_by_thread_count = 0, - .insert_callback = 1, - .ignore_start_stop_delta = 1, - .__reserved = 0}, - .unit = "IPC", - .callback_time = 0, - .callback = nullptr, - .init = init, - .fini = fini, - .get_reading = nullptr, - .get_error = get_error, - .register_insert_callback = register_insert_callback, +MetricInterface IpcEstimateMetric = { + .Name = "ipc-estimate", + .Type = {.Absolute = 1, + .Accumalative = 0, + .DivideByThreadCount = 0, + .InsertCallback = 1, + .IgnoreStartStopDelta = 1, + .Reserved = 0}, + .Unit = "IPC", + .CallbackTime = 0, + .Callback = nullptr, + .Init = init, + .Fini = fini, + .GetReading = nullptr, + .GetError = get_error, + .RegisterInsertCallback = register_insert_callback, }; diff --git a/src/firestarter/Measurement/Metric/Perf.cpp b/src/firestarter/Measurement/Metric/Perf.cpp index a7266db2..0d7a0225 100644 --- a/src/firestarter/Measurement/Metric/Perf.cpp +++ b/src/firestarter/Measurement/Metric/Perf.cpp @@ -230,38 +230,38 @@ static const char* get_error(void) { } } -metric_interface_t perf_ipc_metric = { - .name = "perf-ipc", - .type = {.absolute = 1, - .accumalative = 0, - .divide_by_thread_count = 0, - .insert_callback = 0, - .ignore_start_stop_delta = 0, - .__reserved = 0}, - .unit = "IPC", - .callback_time = 0, - .callback = nullptr, - .init = init, - .fini = fini, - .get_reading = get_reading_ipc, - .get_error = get_error, - .register_insert_callback = nullptr, +MetricInterface PerfIpcMetric = { + .Name = "perf-ipc", + .Type = {.Absolute = 1, + .Accumalative = 0, + .DivideByThreadCount = 0, + .InsertCallback = 0, + .IgnoreStartStopDelta = 0, + .Reserved = 0}, + .Unit = "IPC", + .CallbackTime = 0, + .Callback = nullptr, + .Init = init, + .Fini = fini, + .GetReading = get_reading_ipc, + .GetError = get_error, + .RegisterInsertCallback = nullptr, }; -metric_interface_t perf_freq_metric = { - .name = "perf-freq", - .type = {.absolute = 0, - .accumalative = 1, - .divide_by_thread_count = 1, - .insert_callback = 0, - .ignore_start_stop_delta = 0, - .__reserved = 0}, - .unit = "GHz", - .callback_time = 0, - .callback = nullptr, - .init = init, - .fini = fini, - .get_reading = get_reading_freq, - .get_error = get_error, - .register_insert_callback = nullptr, +MetricInterface PerfFreqMetric = { + .Name = "perf-freq", + .Type = {.Absolute = 0, + .Accumalative = 1, + .DivideByThreadCount = 1, + .InsertCallback = 0, + .IgnoreStartStopDelta = 0, + .Reserved = 0}, + .Unit = "GHz", + .CallbackTime = 0, + .Callback = nullptr, + .Init = init, + .Fini = fini, + .GetReading = get_reading_freq, + .GetError = get_error, + .RegisterInsertCallback = nullptr, }; diff --git a/src/firestarter/Measurement/Metric/RAPL.cpp b/src/firestarter/Measurement/Metric/RAPL.cpp index e6d28f1d..c73ef004 100644 --- a/src/firestarter/Measurement/Metric/RAPL.cpp +++ b/src/firestarter/Measurement/Metric/RAPL.cpp @@ -137,27 +137,27 @@ static int32_t init(void) { break; } - unsigned long long reading; - unsigned long long max; + uint64_t reading; + uint64_t max; std::string buffer; int read; std::getline(energyReadingStream, buffer); - read = std::sscanf(buffer.c_str(), "%llu", &reading); + read = std::sscanf(buffer.c_str(), "%lu", &reading); if (read == 0) { std::stringstream ss; - ss << "Contents in file " << energyUjPath.str() << " do not conform to mask (unsigned long long)"; + ss << "Contents in file " << energyUjPath.str() << " do not conform to mask (uint64_t)"; errorString = ss.str(); break; } std::getline(maxEnergyReadingStream, buffer); - read = std::sscanf(buffer.c_str(), "%llu", &max); + read = std::sscanf(buffer.c_str(), "%lu", &max); if (read == 0) { std::stringstream ss; - ss << "Contents in file " << maxEnergyUjRangePath.str() << " do not conform to mask (unsigned long long)"; + ss << "Contents in file " << maxEnergyUjRangePath.str() << " do not conform to mask (uint64_t)"; errorString = ss.str(); break; } @@ -220,23 +220,23 @@ static const char* get_error(void) { // this function will be called periodically to make sure we do not miss an // overflow of the counter -static void callback(void) { get_reading(nullptr); } +static void callback() { get_reading(nullptr); } } -metric_interface_t rapl_metric = { - .name = "sysfs-powercap-rapl", - .type = {.absolute = 0, - .accumalative = 1, - .divide_by_thread_count = 0, - .insert_callback = 0, - .ignore_start_stop_delta = 0, - .__reserved = 0}, - .unit = "J", - .callback_time = 30000000, - .callback = callback, - .init = init, - .fini = fini, - .get_reading = get_reading, - .get_error = get_error, - .register_insert_callback = nullptr, +MetricInterface RaplMetric = { + .Name = "sysfs-powercap-rapl", + .Type = {.Absolute = 0, + .Accumalative = 1, + .DivideByThreadCount = 0, + .InsertCallback = 0, + .IgnoreStartStopDelta = 0, + .Reserved = 0}, + .Unit = "J", + .CallbackTime = 30000000, + .Callback = callback, + .Init = init, + .Fini = fini, + .GetReading = get_reading, + .GetError = get_error, + .RegisterInsertCallback = nullptr, }; diff --git a/src/firestarter/Measurement/Summary.cpp b/src/firestarter/Measurement/Summary.cpp index 2d1bd8f4..730775be 100644 --- a/src/firestarter/Measurement/Summary.cpp +++ b/src/firestarter/Measurement/Summary.cpp @@ -28,73 +28,73 @@ using namespace firestarter::measurement; // this functions borows a lot of code from // https://github.com/metricq/metricq-cpp/blob/master/tools/metricq-summary/src/summary.cpp -Summary Summary::calculate(std::vector::iterator begin, std::vector::iterator end, - metric_type_t metricType, unsigned long long numThreads) { - std::vector values = {}; +auto Summary::calculate(std::vector::iterator Begin, std::vector::iterator End, + MetricType MetricType, uint64_t NumThreads) -> Summary { + std::vector Values = {}; // TODO: i would really like to make this code a bit more readable, but i // could not find a way yet. - if (metricType.accumalative) { + if (MetricType.Accumalative) { TimeValue prev; - if (begin != end) { - prev = *begin++; - for (auto it = begin; it != end; ++it) { + if (Begin != End) { + prev = *Begin++; + for (auto it = Begin; it != End; ++it) { auto time_diff = - 1e-6 * (double)std::chrono::duration_cast(it->time - prev.time).count(); - auto value_diff = it->value - prev.value; + 1e-6 * (double)std::chrono::duration_cast(it->Time - prev.Time).count(); + auto value_diff = it->Value - prev.Value; double value = value_diff / time_diff; - if (metricType.divide_by_thread_count) { - value /= numThreads; + if (MetricType.DivideByThreadCount) { + value /= NumThreads; } - values.push_back(TimeValue(prev.time, value)); + Values.emplace_back(prev.Time, value); prev = *it; } } - } else if (metricType.absolute) { - for (auto it = begin; it != end; ++it) { - double value = it->value; + } else if (MetricType.Absolute) { + for (auto it = Begin; it != End; ++it) { + double value = it->Value; - if (metricType.divide_by_thread_count) { - value /= numThreads; + if (MetricType.DivideByThreadCount) { + value /= NumThreads; } - values.push_back(TimeValue(it->time, value)); + Values.emplace_back(it->Time, value); } } else { assert(false); } - begin = values.begin(); - end = values.end(); + Begin = Values.begin(); + End = Values.end(); - Summary summary{}; + Summary SummaryVal{}; - summary.num_timepoints = std::distance(begin, end); + SummaryVal.NumTimepoints = std::distance(Begin, End); - if (summary.num_timepoints > 0) { + if (SummaryVal.NumTimepoints > 0) { - auto last = begin; - std::advance(last, summary.num_timepoints - 1); - summary.duration = std::chrono::duration_cast(last->time - begin->time); + auto last = Begin; + std::advance(last, SummaryVal.NumTimepoints - 1); + SummaryVal.Duration = std::chrono::duration_cast(last->Time - Begin->Time); - auto sum_over_nths = [&begin, end, summary](auto fn) { + auto sum_over_nths = [&Begin, End, SummaryVal](auto fn) { double acc = 0.0; - for (auto it = begin; it != end; ++it) { - acc += fn(it->value); + for (auto it = Begin; it != End; ++it) { + acc += fn(it->Value); } - return acc / summary.num_timepoints; + return acc / SummaryVal.NumTimepoints; }; - summary.average = sum_over_nths([](double v) { return v; }); - summary.stddev = std::sqrt(sum_over_nths([&summary](double v) { - double centered = v - summary.average; + SummaryVal.Average = sum_over_nths([](double v) { return v; }); + SummaryVal.Stddev = std::sqrt(sum_over_nths([&SummaryVal](double v) { + double centered = v - SummaryVal.Average; return centered * centered; })); } - return summary; + return SummaryVal; } diff --git a/src/firestarter/OneAPI/OneAPI.cpp b/src/firestarter/OneAPI/OneAPI.cpp index c31ae6cd..6ebb2da3 100644 --- a/src/firestarter/OneAPI/OneAPI.cpp +++ b/src/firestarter/OneAPI/OneAPI.cpp @@ -116,7 +116,7 @@ static int round_up(int num_to_round, int multiple) { // GPU index. Used to pin this thread to the GPU. template static void create_load(std::condition_variable& waitForInitCv, std::mutex& waitForInitCvMutex, int device_index, - std::atomic& initCount, volatile unsigned long long* loadVar, int matrixSize) { + std::atomic& initCount, volatile uint64_t* loadVar, int matrixSize) { static_assert(std::is_same::value || std::is_same::value, "create_load: Template argument T must be either float or double"); @@ -236,7 +236,7 @@ static void create_load(std::condition_variable& waitForInitCv, std::mutex& wait } } -OneAPI::OneAPI(volatile unsigned long long* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) { +OneAPI::OneAPI(volatile uint64_t* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) { std::thread t(OneAPI::initGpus, std::ref(_waitForInitCv), loadVar, useFloat, useDouble, matrixSize, gpus); _initThread = std::move(t); @@ -245,7 +245,7 @@ OneAPI::OneAPI(volatile unsigned long long* loadVar, bool useFloat, bool useDoub _waitForInitCv.wait(lk); } -void OneAPI::initGpus(std::condition_variable& cv, volatile unsigned long long* loadVar, bool useFloat, bool useDouble, +void OneAPI::initGpus(std::condition_variable& cv, volatile uint64_t* loadVar, bool useFloat, bool useDouble, unsigned matrixSize, int gpus) { std::condition_variable waitForInitCv; std::mutex waitForInitCvMutex; diff --git a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp index 972c0d0a..8b9a7b02 100644 --- a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp +++ b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp @@ -30,47 +30,46 @@ using namespace firestarter::optimizer::algorithm; -NSGA2::NSGA2(unsigned gen, double cr, double m) - : Algorithm() - , _gen(gen) - , _cr(cr) - , _m(m) { - if (cr >= 1. || cr < 0.) { +NSGA2::NSGA2(unsigned Gen, double Cr, double M) + : Gen(Gen) + , Cr(Cr) + , M(M) { + if (Cr >= 1. || Cr < 0.) { throw std::invalid_argument("The crossover probability must be in the " "[0,1[ range, while a value of " + - std::to_string(cr) + " was detected"); + std::to_string(Cr) + " was detected"); } - if (m < 0. || m > 1.) { + if (M < 0. || M > 1.) { throw std::invalid_argument("The mutation probability must be in the [0,1] " "range, while a value of " + - std::to_string(m) + " was detected"); + std::to_string(M) + " was detected"); } } -void NSGA2::checkPopulation(firestarter::optimizer::Population const& pop, std::size_t populationSize) { - const auto& prob = pop.problem(); +void NSGA2::checkPopulation(firestarter::optimizer::Population const& Pop, std::size_t PopulationSize) { + const auto& Prob = Pop.problem(); - if (!prob.isMO()) { + if (!Prob.isMO()) { throw std::invalid_argument("NSGA2 is a multiobjective algorithms, while number of objectives is " + - std::to_string(prob.getNobjs())); + std::to_string(Prob.getNobjs())); } - if (populationSize < 5u || (populationSize % 4 != 0u)) { + if (PopulationSize < 5u || (PopulationSize % 4 != 0u)) { throw std::invalid_argument("for NSGA-II at least 5 individuals in the " "population are needed and the " "population size must be a multiple of 4. " "Detected input population size is: " + - std::to_string(populationSize)); + std::to_string(PopulationSize)); } } -firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Population& pop) { - const auto& prob = pop.problem(); +auto NSGA2::evolve(firestarter::optimizer::Population& Pop) -> firestarter::optimizer::Population { + const auto& prob = Pop.problem(); const auto bounds = prob.getBounds(); - auto NP = pop.size(); + auto NP = Pop.size(); auto fevals0 = prob.getFevals(); - this->checkPopulation(const_cast(pop), NP); + this->checkPopulation(const_cast(Pop), NP); std::random_device rd; std::mt19937 rng(rd()); @@ -92,10 +91,10 @@ firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Populat firestarter::log::info() << ss.str(); } - for (decltype(_gen) gen = 1u; gen <= _gen; ++gen) { + for (decltype(Gen) gen = 1u; gen <= Gen; ++gen) { { // Print the logs - std::vector idealPoint = util::ideal(pop.f()); + std::vector idealPoint = util::ideal(Pop.f()); std::stringstream ss; ss << std::setw(7) << gen << std::setw(15) << prob.getFevals() - fevals0; @@ -107,7 +106,7 @@ firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Populat } // At each generation we make a copy of the population into popnew - firestarter::optimizer::Population popnew(pop); + firestarter::optimizer::Population popnew(Pop); // We create some pseudo-random permutation of the poulation indexes std::random_shuffle(shuffle1.begin(), shuffle1.end()); @@ -115,7 +114,7 @@ firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Populat // We compute crowding distance and non dominated rank for the current // population - auto fnds_res = util::fast_non_dominated_sorting(pop.f()); + auto fnds_res = util::fastNonDominatedSorting(Pop.f()); auto ndf = std::get<0>(fnds_res); // non dominated fronts [[0,3,2],[1,5,6],[4],...] std::vector pop_cd(NP); // crowding distances of the whole population auto ndr = std::get<3>(fnds_res); // non domination rank [0,1,0,0,2,1,1, ... ] @@ -129,9 +128,9 @@ firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Populat } else { std::vector> front; for (auto idx : front_idxs) { - front.push_back(pop.f()[idx]); + front.push_back(Pop.f()[idx]); } - auto cd = util::crowding_distance(front); + auto cd = util::crowdingDistance(front); for (decltype(cd.size()) i = 0u; i < cd.size(); ++i) { pop_cd[front_idxs[i]] = cd[i]; } @@ -142,33 +141,33 @@ firestarter::optimizer::Population NSGA2::evolve(firestarter::optimizer::Populat // of parents that will each create 2 new offspring for (decltype(NP) i = 0u; i < NP; i += 4) { // We create two offsprings using the shuffled list 1 - parent1_idx = util::mo_tournament_selection(shuffle1[i], shuffle1[i + 1], ndr, pop_cd, rng); - parent2_idx = util::mo_tournament_selection(shuffle1[i + 2], shuffle1[i + 3], ndr, pop_cd, rng); - children = util::sbx_crossover(pop.x()[parent1_idx], pop.x()[parent2_idx], _cr, rng); - util::polynomial_mutation(children.first, bounds, _m, rng); - util::polynomial_mutation(children.second, bounds, _m, rng); + parent1_idx = util::moTournamentSelection(shuffle1[i], shuffle1[i + 1], ndr, pop_cd, rng); + parent2_idx = util::moTournamentSelection(shuffle1[i + 2], shuffle1[i + 3], ndr, pop_cd, rng); + children = util::sbxCrossover(Pop.x()[parent1_idx], Pop.x()[parent2_idx], Cr, rng); + util::polynomialMutation(children.first, bounds, M, rng); + util::polynomialMutation(children.second, bounds, M, rng); popnew.append(children.first); popnew.append(children.second); // We repeat with the shuffled list 2 - parent1_idx = util::mo_tournament_selection(shuffle2[i], shuffle2[i + 1], ndr, pop_cd, rng); - parent2_idx = util::mo_tournament_selection(shuffle2[i + 2], shuffle2[i + 3], ndr, pop_cd, rng); - children = util::sbx_crossover(pop.x()[parent1_idx], pop.x()[parent2_idx], _cr, rng); - util::polynomial_mutation(children.first, bounds, _m, rng); - util::polynomial_mutation(children.second, bounds, _m, rng); + parent1_idx = util::moTournamentSelection(shuffle2[i], shuffle2[i + 1], ndr, pop_cd, rng); + parent2_idx = util::moTournamentSelection(shuffle2[i + 2], shuffle2[i + 3], ndr, pop_cd, rng); + children = util::sbxCrossover(Pop.x()[parent1_idx], Pop.x()[parent2_idx], Cr, rng); + util::polynomialMutation(children.first, bounds, M, rng); + util::polynomialMutation(children.second, bounds, M, rng); popnew.append(children.first); popnew.append(children.second); } // popnew now contains 2NP individuals // This method returns the sorted N best individuals in the population // according to the crowded comparison operator - best_idx = util::select_best_N_mo(popnew.f(), NP); + best_idx = util::selectBestNMo(popnew.f(), NP); // We insert into the population for (decltype(NP) i = 0; i < NP; ++i) { - pop.insert(i, popnew.x()[best_idx[i]], popnew.f()[best_idx[i]]); + Pop.insert(i, popnew.x()[best_idx[i]], popnew.f()[best_idx[i]]); } } - return pop; + return Pop; } diff --git a/src/firestarter/Optimizer/OptimizerWorker.cpp b/src/firestarter/Optimizer/OptimizerWorker.cpp index 7cb98cce..610b8cbd 100644 --- a/src/firestarter/Optimizer/OptimizerWorker.cpp +++ b/src/firestarter/Optimizer/OptimizerWorker.cpp @@ -29,22 +29,22 @@ OptimizerWorker::OptimizerWorker(std::unique_ptrworkerThread, NULL, reinterpret_cast(OptimizerWorker::optimizerThread), this); + : Algorithm(std::move(algorithm)) + , Population(population) + , OptimizationAlgorithm(optimizationAlgorithm) + , Individuals(individuals) + , Preheat(preheat) { + pthread_create(&this->WorkerThread, NULL, reinterpret_cast(OptimizerWorker::optimizerThread), this); } void OptimizerWorker::kill() { // we ignore ESRCH errno if thread already exited - pthread_cancel(this->workerThread); + pthread_cancel(this->WorkerThread); } void OptimizerWorker::join() { // we ignore ESRCH errno if thread already exited - pthread_join(this->workerThread, NULL); + pthread_join(this->WorkerThread, NULL); } void* OptimizerWorker::optimizerThread(void* optimizerWorker) { @@ -57,14 +57,14 @@ void* OptimizerWorker::optimizerThread(void* optimizerWorker) { #endif // heat the cpu before attempting to optimize - std::this_thread::sleep_for(_this->_preheat); + std::this_thread::sleep_for(_this->Preheat); // For NSGA2 we start with a initial population - if (_this->_optimizationAlgorithm == "NSGA2") { - _this->_population.generateInitialPopulation(_this->_individuals); + if (_this->OptimizationAlgorithm == "NSGA2") { + _this->Population.generateInitialPopulation(_this->Individuals); } - _this->_algorithm->evolve(_this->_population); + _this->Algorithm->evolve(_this->Population); return NULL; } diff --git a/src/firestarter/Optimizer/Population.cpp b/src/firestarter/Optimizer/Population.cpp index d7915bd7..35c5ef04 100644 --- a/src/firestarter/Optimizer/Population.cpp +++ b/src/firestarter/Optimizer/Population.cpp @@ -52,7 +52,7 @@ void Population::generateInitialPopulation(std::size_t populationSize) { } } -std::size_t Population::size() const { return _x.size(); } +std::size_t Population::size() const { return X.size(); } void Population::append(Individual const& ind) { assert(this->problem().getDims() == ind.size()); @@ -64,10 +64,10 @@ void Population::append(Individual const& ind) { if (optional_metric.has_value()) { metrics = optional_metric.value(); } else { - metrics = this->_problem->metrics(ind); + metrics = this->ProblemPtr->metrics(ind); } - auto fitness = this->_problem->fitness(metrics); + auto fitness = this->ProblemPtr->fitness(metrics); this->append(ind, fitness); @@ -87,16 +87,16 @@ void Population::append(Individual const& ind, std::vector const& fit) { assert(this->problem().getNobjs() == fit.size()); assert(this->problem().getDims() == ind.size()); - this->_x.push_back(ind); - this->_f.push_back(fit); + this->X.push_back(ind); + this->F.push_back(fit); } void Population::insert(std::size_t idx, Individual const& ind, std::vector const& fit) { // assert that population is big enough - assert(_x.size() > idx); + assert(X.size() > idx); - _x[idx] = ind; - _f[idx] = fit; + X[idx] = ind; + F[idx] = fit; } Individual Population::getRandomIndividual() { @@ -111,7 +111,7 @@ Individual Population::getRandomIndividual() { auto const lb = std::get<0>(bounds[i]); auto const ub = std::get<1>(bounds[i]); - out[i] = std::uniform_int_distribution(lb, ub)(this->gen); + out[i] = std::uniform_int_distribution(lb, ub)(this->Gen); firestarter::log::trace() << " - " << i << ": [" << lb << "," << ub << "]: " << out[i]; } @@ -127,11 +127,11 @@ std::optional Population::bestIndividual() const { } // assert that we have individuals - assert(this->_x.size() > 0); + assert(this->X.size() > 0); - auto best = std::max_element(this->_x.begin(), this->_x.end(), [](auto a, auto b) { return a < b; }); + auto best = std::max_element(this->X.begin(), this->X.end(), [](auto a, auto b) { return a < b; }); - assert(best != this->_x.end()); + assert(best != this->X.end()); return *best; } diff --git a/src/firestarter/Optimizer/Util/MultiObjective.cpp b/src/firestarter/Optimizer/Util/MultiObjective.cpp index df24effa..78092234 100644 --- a/src/firestarter/Optimizer/Util/MultiObjective.cpp +++ b/src/firestarter/Optimizer/Util/MultiObjective.cpp @@ -32,7 +32,7 @@ namespace firestarter::optimizer::util { // Less than compares floating point types placing nans after inf or before -inf // It is a useful function when calling e.g. std::sort to guarantee a weak // strict ordering and avoid an undefined behaviour -bool less_than_f(double a, double b) { +bool lessThanF(double a, double b) { if (!std::isnan(a)) { if (!std::isnan(b)) return a < b; // a < b @@ -49,7 +49,7 @@ bool less_than_f(double a, double b) { // Greater than compares floating point types placing nans after inf or before // -inf It is a useful function when calling e.g. std::sort to guarantee a weak // strict ordering and avoid an undefined behaviour -bool greater_than_f(double a, double b) { +bool greaterThanF(double a, double b) { if (!std::isnan(a)) { if (!std::isnan(b)) return a > b; // a > b @@ -81,7 +81,7 @@ bool greater_than_f(double a, double b) { * @throws std::invalid_argument if the dimensions of the two objectives are * different */ -bool pareto_dominance(const std::vector& obj1, const std::vector& obj2) { +bool paretoDominance(const std::vector& obj1, const std::vector& obj2) { if (obj1.size() != obj2.size()) { throw std::invalid_argument( "Different number of objectives found in input fitnesses: " + std::to_string(obj1.size()) + " and " + @@ -89,9 +89,9 @@ bool pareto_dominance(const std::vector& obj1, const std::vector } bool found_strictly_dominating_dimension = false; for (decltype(obj1.size()) i = 0u; i < obj1.size(); ++i) { - if (greater_than_f(obj2[i], obj1[i])) { + if (greaterThanF(obj2[i], obj1[i])) { return false; - } else if (less_than_f(obj2[i], obj1[i])) { + } else if (lessThanF(obj2[i], obj1[i])) { found_strictly_dominating_dimension = true; } } @@ -130,7 +130,7 @@ bool pareto_dominance(const std::vector& obj1, const std::vector */ std::tuple>, std::vector>, std::vector, std::vector> -fast_non_dominated_sorting(const std::vector>& points) { +fastNonDominatedSorting(const std::vector>& points) { auto N = points.size(); // We make sure to have two points at least (one could also be allowed) if (N < 2u) { @@ -148,10 +148,10 @@ fast_non_dominated_sorting(const std::vector>& points) { dom_list[i].clear(); dom_count[i] = 0u; for (decltype(N) j = 0u; j < i; ++j) { - if (pareto_dominance(points[i], points[j])) { + if (paretoDominance(points[i], points[j])) { dom_list[i].push_back(j); ++dom_count[j]; - } else if (pareto_dominance(points[j], points[i])) { + } else if (paretoDominance(points[j], points[i])) { dom_list[j].push_back(i); ++dom_count[i]; } @@ -212,7 +212,7 @@ fast_non_dominated_sorting(const std::vector>& points) { * @throws std::invalid_argument If points in \p non_dom_front do not all have * the same dimensionality */ -std::vector crowding_distance(const std::vector>& non_dom_front) { +std::vector crowdingDistance(const std::vector>& non_dom_front) { auto N = non_dom_front.size(); // We make sure to have two points at least if (N < 2u) { @@ -239,7 +239,7 @@ std::vector crowding_distance(const std::vector>& no std::vector retval(N, 0.); for (decltype(M) i = 0u; i < M; ++i) { std::sort(indexes.begin(), indexes.end(), [i, &non_dom_front](std::size_t idx1, std::size_t idx2) { - return less_than_f(non_dom_front[idx1][i], non_dom_front[idx2][i]); + return lessThanF(non_dom_front[idx1][i], non_dom_front[idx2][i]); }); retval[indexes[0]] = std::numeric_limits::infinity(); retval[indexes[N - 1u]] = std::numeric_limits::infinity(); @@ -254,9 +254,9 @@ std::vector crowding_distance(const std::vector>& no // Multi-objective tournament selection. Requires all sizes to be consistent. // Does not check if input is well formed. std::vector::size_type -mo_tournament_selection(std::vector::size_type idx1, std::vector::size_type idx2, - const std::vector::size_type>& non_domination_rank, - const std::vector& crowding_d, std::mt19937& mt) { +moTournamentSelection(std::vector::size_type idx1, std::vector::size_type idx2, + const std::vector::size_type>& non_domination_rank, + const std::vector& crowding_d, std::mt19937& mt) { if (non_domination_rank[idx1] < non_domination_rank[idx2]) return idx1; if (non_domination_rank[idx1] > non_domination_rank[idx2]) @@ -275,8 +275,8 @@ mo_tournament_selection(std::vector::size_type idx1, std::vector // bound reads. nix is the integer dimension (integer alleles assumed at the end // of the chromosome) std::pair -sbx_crossover(const firestarter::optimizer::Individual& parent1, const firestarter::optimizer::Individual& parent2, - const double p_cr, std::mt19937& mt) { +sbxCrossover(const firestarter::optimizer::Individual& parent1, const firestarter::optimizer::Individual& parent2, + const double p_cr, std::mt19937& mt) { // Decision vector dimensions auto nix = parent1.size(); firestarter::optimizer::Individual::size_type site1, site2; @@ -309,9 +309,8 @@ sbx_crossover(const firestarter::optimizer::Individual& parent1, const firestart // Performs polynomial mutation. Requires all sizes to be consistent. Does not // check if input is well formed. p_m is the mutation probability -void polynomial_mutation(firestarter::optimizer::Individual& child, - const std::vector>& bounds, const double p_m, - std::mt19937& mt) { +void polynomialMutation(firestarter::optimizer::Individual& child, + const std::vector>& bounds, const double p_m, std::mt19937& mt) { // Decision vector dimensions auto nix = child.size(); // Random distributions @@ -362,7 +361,7 @@ void polynomial_mutation(firestarter::optimizer::Individual& child, * @throws unspecified all exceptions thrown by * pagmo::fast_non_dominated_sorting and pagmo::crowding_distance */ -std::vector select_best_N_mo(const std::vector>& input_f, std::size_t N) { +std::vector selectBestNMo(const std::vector>& input_f, std::size_t N) { if (N == 0u) { // corner case return {}; } @@ -380,7 +379,7 @@ std::vector select_best_N_mo(const std::vector> std::vector retval; std::vector::size_type front_id(0u); // Run fast-non-dominated sorting - auto tuple = fast_non_dominated_sorting(input_f); + auto tuple = fastNonDominatedSorting(input_f); // Insert all non dominated fronts if not more than N for (const auto& front : std::get<0>(tuple)) { if (retval.size() + front.size() <= N) { @@ -401,13 +400,13 @@ std::vector select_best_N_mo(const std::vector> for (decltype(front.size()) i = 0u; i < front.size(); ++i) { non_dom_fits[i] = input_f[front[i]]; } - std::vector cds(crowding_distance(non_dom_fits)); + std::vector cds(crowdingDistance(non_dom_fits)); // We now have front and crowding distance, we sort the front w.r.t. the // crowding std::vector idxs(front.size()); std::iota(idxs.begin(), idxs.end(), std::size_t(0u)); std::sort(idxs.begin(), idxs.end(), [&cds](std::size_t idx1, std::size_t idx2) { - return greater_than_f(cds[idx1], cds[idx2]); + return greaterThanF(cds[idx1], cds[idx2]); }); // Descending order1 auto remaining = N - retval.size(); for (decltype(remaining) i = 0u; i < remaining; ++i) { @@ -453,7 +452,7 @@ std::vector ideal(const std::vector>& points) { for (decltype(M) i = 0u; i < M; ++i) { retval[i] = (*std::min_element(points.begin(), points.end(), [i](const std::vector& f1, const std::vector& f2) { - return util::greater_than_f(f1[i], f2[i]); + return util::greaterThanF(f1[i], f2[i]); }))[i]; } return retval; diff --git a/src/firestarter/WatchdogWorker.cpp b/src/firestarter/WatchdogWorker.cpp index f5091fa2..8d8218eb 100644 --- a/src/firestarter/WatchdogWorker.cpp +++ b/src/firestarter/WatchdogWorker.cpp @@ -80,11 +80,11 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m SCOREP_USER_REGION_BY_NAME_BEGIN("WD_HIGH", SCOREP_USER_REGION_TYPE_COMMON); #endif { - std::unique_lock lk(this->_watchdogTerminateMutex); + std::unique_lock lk(this->WatchdogTerminateMutex); // abort waiting if we get the interrupt signal - this->_watchdogTerminateAlert.wait_for(lk, load_nsec, [this]() { return this->_watchdog_terminate; }); + this->WatchdogTerminateAlert.wait_for(lk, load_nsec, [this]() { return this->WatchdogTerminate; }); // terminate on interrupt - if (this->_watchdog_terminate) { + if (this->WatchdogTerminate) { return EXIT_SUCCESS; } } @@ -109,11 +109,11 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m SCOREP_USER_REGION_BY_NAME_BEGIN("WD_LOW", SCOREP_USER_REGION_TYPE_COMMON); #endif { - std::unique_lock lk(this->_watchdogTerminateMutex); + std::unique_lock lk(this->WatchdogTerminateMutex); // abort waiting if we get the interrupt signal - this->_watchdogTerminateAlert.wait_for(lk, idle_nsec, [this]() { return this->_watchdog_terminate; }); + this->WatchdogTerminateAlert.wait_for(lk, idle_nsec, [this]() { return this->WatchdogTerminate; }); // terminate on interrupt - if (this->_watchdog_terminate) { + if (this->WatchdogTerminate) { return EXIT_SUCCESS; } } @@ -129,8 +129,8 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m // exit when termination signal is received or timeout is reached { - std::lock_guard lk(this->_watchdogTerminateMutex); - if (this->_watchdog_terminate || (timeout > sec::zero() && (time > timeout))) { + std::lock_guard lk(this->WatchdogTerminateMutex); + if (this->WatchdogTerminate || (timeout > sec::zero() && (time > timeout))) { this->setLoad(LOAD_STOP); return EXIT_SUCCESS; @@ -143,9 +143,9 @@ int Firestarter::watchdogWorker(std::chrono::microseconds period, std::chrono::m // else return and wait for sigterm handler to request threads to stop. if (timeout > sec::zero()) { { - std::unique_lock lk(Firestarter::_watchdogTerminateMutex); + std::unique_lock lk(Firestarter::WatchdogTerminateMutex); // abort waiting if we get the interrupt signal - Firestarter::_watchdogTerminateAlert.wait_for(lk, timeout, []() { return Firestarter::_watchdog_terminate; }); + Firestarter::WatchdogTerminateAlert.wait_for(lk, timeout, []() { return Firestarter::WatchdogTerminate; }); } this->setLoad(LOAD_STOP); From 37a8518fd294a6bdc36e5eb4e5ccfaf8df639148 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Fri, 27 Sep 2024 16:17:02 +0200 Subject: [PATCH 011/167] update .clang-tidy --- .clang-tidy | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index c3a4c174..f61bbcf4 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -6,6 +6,13 @@ # -boost-use-ranges: crash of clangd https://github.com/llvm/llvm-project/issues/109037 +# -readability-identifier-length length of at least 3 does not make sense for some variables + +# -cppcoreguidelines-avoid-magic-numbers +# -readability-magic-numbers currently we have too may numbers in this code + +# -bugprone-easily-swappable-parameters we are not using strong typedefs + Checks: > -*, boost-*, @@ -22,7 +29,11 @@ Checks: > readability-*, -bugprone-narrowing-conversions, -cppcoreguidelines-special-member-functions, - -boost-use-ranges + -boost-use-ranges, + -readability-identifier-length, + -cppcoreguidelines-avoid-magic-numbers, + -readability-magic-numbers, + -bugprone-easily-swappable-parameters # Turn all the warnings from the checks above into errors. WarningsAsErrors: "*" From 90beb594232ebbc7460db3b8b193317a1d31a977 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Fri, 27 Sep 2024 16:18:23 +0200 Subject: [PATCH 012/167] add an empty file as a workaround for clangd issue --- src/CMakeLists.txt | 2 ++ .../X86/Platform/X86PlatformConfig.cpp | 25 +++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6136bb35..7d405dd9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,6 +4,8 @@ SET(FIRESTARTER_FILES firestarter/LoadWorker.cpp firestarter/WatchdogWorker.cpp firestarter/DumpRegisterWorker.cpp + + firestarter/Environment/X86/Platform/X86PlatformConfig.cpp firestarter/Environment/Environment.cpp firestarter/Environment/CPUTopology.cpp diff --git a/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp b/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp new file mode 100644 index 00000000..0cc5abef --- /dev/null +++ b/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp @@ -0,0 +1,25 @@ +/****************************************************************************** + * FIRESTARTER - A Processor Stress Test Utility + * Copyright (C) 2024 TU Dresden, Center for Information Services and High + * Performance Computing + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Contact: daniel.hackenberg@tu-dresden.de + *****************************************************************************/ + +// This file exists to get an entry in the compile commands database. Clangd will interpolate the include directories +// for header files based on the source file with the best matching score. This file should be the best score for the +// included header. Therefore the we should not see any errors in this file for missing includes. For more infomation +// look in the LLVM code base: clang/lib/Tooling/InterpolatingCompilationDatabase.cpp \ No newline at end of file From ff6113bb00741efff872772dd2a3bd2856e42649 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Fri, 27 Sep 2024 16:21:48 +0200 Subject: [PATCH 013/167] second pass of clang-tidy for the header files. use unique_ptr for the payload instead of raw pointers --- .../firestarter/DumpRegisterWorkerData.hpp | 6 +- .../firestarter/Environment/CPUTopology.hpp | 60 +-- .../firestarter/Environment/Environment.hpp | 17 +- .../Environment/Payload/Payload.hpp | 3 +- .../Environment/Platform/PlatformConfig.hpp | 11 +- .../Environment/Platform/RuntimeConfig.hpp | 5 +- .../Environment/X86/Payload/AVX512Payload.hpp | 6 +- .../Environment/X86/Payload/AVXPayload.hpp | 6 +- .../Environment/X86/Payload/FMA4Payload.hpp | 6 +- .../Environment/X86/Payload/FMAPayload.hpp | 6 +- .../Environment/X86/Payload/SSE2Payload.hpp | 7 +- .../Environment/X86/Payload/X86Payload.hpp | 10 +- .../Environment/X86/Payload/ZENFMAPayload.hpp | 7 +- .../X86/Platform/BulldozerConfig.hpp | 12 +- .../X86/Platform/HaswellConfig.hpp | 10 +- .../X86/Platform/HaswellEPConfig.hpp | 12 +- .../X86/Platform/KnightsLandingConfig.hpp | 12 +- .../Environment/X86/Platform/NaplesConfig.hpp | 12 +- .../X86/Platform/NehalemConfig.hpp | 12 +- .../X86/Platform/NehalemEPConfig.hpp | 12 +- .../Environment/X86/Platform/RomeConfig.hpp | 12 +- .../X86/Platform/SandyBridgeConfig.hpp | 12 +- .../X86/Platform/SandyBridgeEPConfig.hpp | 12 +- .../X86/Platform/SkylakeConfig.hpp | 12 +- .../X86/Platform/SkylakeSPConfig.hpp | 12 +- .../X86/Platform/X86PlatformConfig.hpp | 45 +- .../Environment/X86/X86CPUTopology.hpp | 9 +- .../Environment/X86/X86Environment.hpp | 58 ++- include/firestarter/Firestarter.hpp | 20 +- include/firestarter/Json/Summary.hpp | 25 +- include/firestarter/LoadWorkerData.hpp | 8 +- .../Logging/FirstWorkerThreadFilter.hpp | 5 +- include/firestarter/Logging/Log.hpp | 42 +- .../Measurement/MeasurementWorker.hpp | 13 +- .../Measurement/Metric/IPCEstimate.h | 2 +- include/firestarter/Measurement/Metric/Perf.h | 2 +- include/firestarter/Measurement/Metric/RAPL.h | 2 +- include/firestarter/Measurement/Summary.hpp | 7 +- include/firestarter/Measurement/TimeValue.hpp | 2 +- include/firestarter/Optimizer/Algorithm.hpp | 2 +- .../firestarter/Optimizer/Algorithm/NSGA2.hpp | 2 +- include/firestarter/Optimizer/History.hpp | 158 +++---- .../firestarter/Optimizer/OptimizerWorker.hpp | 4 +- include/firestarter/Optimizer/Population.hpp | 5 +- include/firestarter/Optimizer/Problem.hpp | 4 +- .../Optimizer/Problem/CLIArgumentProblem.hpp | 6 +- .../Optimizer/Util/MultiObjective.hpp | 2 +- src/firestarter/Environment/CPUTopology.cpp | 401 +++++++++--------- .../Environment/X86/Payload/AVX512Payload.cpp | 9 +- .../Environment/X86/Payload/X86Payload.cpp | 1 + .../Environment/X86/X86CPUTopology.cpp | 228 +++++----- .../Environment/X86/X86Environment.cpp | 86 ++-- src/firestarter/LoadWorker.cpp | 4 +- src/firestarter/Main.cpp | 8 +- .../Measurement/MeasurementWorker.cpp | 2 + src/firestarter/Optimizer/Algorithm/NSGA2.cpp | 2 + src/firestarter/Optimizer/Population.cpp | 2 +- 57 files changed, 723 insertions(+), 735 deletions(-) diff --git a/include/firestarter/DumpRegisterWorkerData.hpp b/include/firestarter/DumpRegisterWorkerData.hpp index a05863d5..cf5e22ba 100644 --- a/include/firestarter/DumpRegisterWorkerData.hpp +++ b/include/firestarter/DumpRegisterWorkerData.hpp @@ -21,9 +21,9 @@ #pragma once +#include "LoadWorkerData.hpp" +#include "Logging/Log.hpp" #include -#include -#include #include #ifdef FIRESTARTER_DEBUG_FEATURES @@ -32,6 +32,8 @@ namespace firestarter { class DumpRegisterWorkerData { public: + DumpRegisterWorkerData() = delete; + DumpRegisterWorkerData(std::shared_ptr LoadWorkerDataPtr, std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath) : LoadWorkerDataPtr(std::move(LoadWorkerDataPtr)) diff --git a/include/firestarter/Environment/CPUTopology.hpp b/include/firestarter/Environment/CPUTopology.hpp index ba10df3c..c58933db 100644 --- a/include/firestarter/Environment/CPUTopology.hpp +++ b/include/firestarter/Environment/CPUTopology.hpp @@ -34,50 +34,52 @@ namespace firestarter::environment { class CPUTopology { public: - CPUTopology(std::string architecture); + explicit CPUTopology(std::string Architecture); virtual ~CPUTopology(); - unsigned numThreads() const { return _numThreadsPerCore * _numCoresTotal; } - unsigned maxNumThreads() const; - unsigned numThreadsPerCore() const { return _numThreadsPerCore; } - unsigned numCoresTotal() const { return _numCoresTotal; } - unsigned numPackages() const { return _numPackages; } + [[nodiscard]] auto numThreads() const -> unsigned { return NumThreadsPerCore * NumCoresTotal; } + [[nodiscard]] auto maxNumThreads() const -> unsigned; + [[nodiscard]] auto numThreadsPerCore() const -> unsigned { return NumThreadsPerCore; } + [[nodiscard]] auto numCoresTotal() const -> unsigned { return NumCoresTotal; } + [[nodiscard]] auto numPackages() const -> unsigned { return NumPackages; } - std::string const& architecture() const { return _architecture; } - virtual std::string const& vendor() const { return _vendor; } - virtual std::string const& processorName() const { return _processorName; } - virtual std::string const& model() const = 0; + [[nodiscard]] auto architecture() const -> std::string const& { return Architecture; } + [[nodiscard]] virtual auto vendor() const -> std::string const& { return Vendor; } + [[nodiscard]] virtual auto processorName() const -> std::string const& { return ProcessorName; } + [[nodiscard]] virtual auto model() const -> std::string const& { return Model; } // get the size of the L1i-cache in bytes - unsigned instructionCacheSize() const { return _instructionCacheSize; } + [[nodiscard]] auto instructionCacheSize() const -> unsigned { return InstructionCacheSize; } // return the cpu clockrate in Hz - virtual uint64_t clockrate() const { return _clockrate; } + [[nodiscard]] virtual auto clockrate() const -> uint64_t { return Clockrate; } // return the cpu features - virtual std::list const& features() const = 0; + [[nodiscard]] virtual auto features() const -> std::list const& = 0; // get a timestamp - virtual uint64_t timestamp() const = 0; + [[nodiscard]] virtual auto timestamp() const -> uint64_t = 0; - int getPkgIdFromPU(unsigned pu) const; - int getCoreIdFromPU(unsigned pu) const; + [[nodiscard]] auto getPkgIdFromPU(unsigned Pu) const -> int; + [[nodiscard]] auto getCoreIdFromPU(unsigned Pu) const -> int; protected: - std::string scalingGovernor() const; - std::ostream& print(std::ostream& stream) const; + [[nodiscard]] static auto scalingGovernor() -> std::string; + [[nodiscard]] auto print(std::ostream& Stream) const -> std::ostream&; + + std::string Vendor; + std::string Model; private: - static std::stringstream getFileAsStream(std::string const& filePath); - - unsigned _numThreadsPerCore; - unsigned _numCoresTotal; - unsigned _numPackages; - std::string _architecture; - std::string _vendor = ""; - std::string _processorName = ""; - unsigned _instructionCacheSize = 0; - uint64_t _clockrate = 0; - hwloc_topology_t topology; + [[nodiscard]] static auto getFileAsStream(std::string const& FilePath) -> std::stringstream; + + unsigned NumThreadsPerCore; + unsigned NumCoresTotal; + unsigned NumPackages; + std::string Architecture; + std::string ProcessorName; + unsigned InstructionCacheSize = 0; + uint64_t Clockrate = 0; + hwloc_topology_t Topology; }; } // namespace firestarter::environment diff --git a/include/firestarter/Environment/Environment.hpp b/include/firestarter/Environment/Environment.hpp index 5f204ccc..57e60094 100644 --- a/include/firestarter/Environment/Environment.hpp +++ b/include/firestarter/Environment/Environment.hpp @@ -21,11 +21,10 @@ #pragma once +#include "CPUTopology.hpp" +#include "Platform/RuntimeConfig.hpp" #include #include -#include -#include -#include #include namespace firestarter::environment { @@ -33,13 +32,9 @@ namespace firestarter::environment { class Environment { public: Environment() = delete; - explicit Environment(CPUTopology* Topology) - : Topology(Topology) {} - virtual ~Environment() { - delete this->Topology; - - delete SelectedConfig; - } + explicit Environment(std::unique_ptr&& Topology) + : Topology(std::move(Topology)) {} + virtual ~Environment() { delete SelectedConfig; } auto evaluateCpuAffinity(unsigned RequestedNumThreads, std::string CpuBind) -> int; auto setCpuAffinity(unsigned Thread) -> int; @@ -77,7 +72,7 @@ class Environment { protected: platform::RuntimeConfig* SelectedConfig = nullptr; - CPUTopology* Topology = nullptr; + std::unique_ptr Topology; private: uint64_t RequestedNumThreads = 0; diff --git a/include/firestarter/Environment/Payload/Payload.hpp b/include/firestarter/Environment/Payload/Payload.hpp index 3871400f..9c37bdfc 100644 --- a/include/firestarter/Environment/Payload/Payload.hpp +++ b/include/firestarter/Environment/Payload/Payload.hpp @@ -22,6 +22,7 @@ #pragma once #include +#include #include #include #include @@ -101,7 +102,7 @@ class Payload { [[nodiscard]] virtual auto highLoadFunction(uint64_t* AddrMem, volatile uint64_t* AddrHigh, uint64_t Iterations) -> uint64_t = 0; - [[nodiscard]] virtual auto clone() const -> Payload* = 0; + [[nodiscard]] virtual auto clone() const -> std::unique_ptr = 0; }; } // namespace firestarter::environment::payload diff --git a/include/firestarter/Environment/Platform/PlatformConfig.hpp b/include/firestarter/Environment/Platform/PlatformConfig.hpp index 5588cb8a..954b6682 100644 --- a/include/firestarter/Environment/Platform/PlatformConfig.hpp +++ b/include/firestarter/Environment/Platform/PlatformConfig.hpp @@ -21,8 +21,7 @@ #pragma once -#include -#include +#include "../Payload/Payload.hpp" #include #include #include @@ -35,7 +34,7 @@ class PlatformConfig { private: std::string Name; std::list Threads; - payload::Payload* Payload; + std::unique_ptr Payload; protected: unsigned InstructionCacheSize; @@ -48,15 +47,15 @@ class PlatformConfig { PlatformConfig(std::string Name, std::list Threads, unsigned InstructionCacheSize, std::initializer_list DataCacheBufferSize, unsigned RamBufferSize, unsigned Lines, - payload::Payload* Payload) + std::unique_ptr&& Payload) : Name(std::move(Name)) , Threads(std::move(Threads)) - , Payload(Payload) + , Payload(std::move(Payload)) , InstructionCacheSize(InstructionCacheSize) , DataCacheBufferSize(DataCacheBufferSize) , RamBufferSize(RamBufferSize) , Lines(Lines) {} - virtual ~PlatformConfig() { delete Payload; } + virtual ~PlatformConfig() = default; [[nodiscard]] auto name() const -> const std::string& { return Name; } [[nodiscard]] auto instructionCacheSize() const -> unsigned { return InstructionCacheSize; } diff --git a/include/firestarter/Environment/Platform/RuntimeConfig.hpp b/include/firestarter/Environment/Platform/RuntimeConfig.hpp index 3d1d1786..17770e97 100644 --- a/include/firestarter/Environment/Platform/RuntimeConfig.hpp +++ b/include/firestarter/Environment/Platform/RuntimeConfig.hpp @@ -21,8 +21,9 @@ #pragma once +#include "../../Logging/Log.hpp" +#include "../Platform/PlatformConfig.hpp" #include -#include namespace firestarter::environment::platform { @@ -62,7 +63,7 @@ class RuntimeConfig { , RamBufferSize(Other.ramBufferSize()) , Lines(Other.lines()) {} - ~RuntimeConfig() { Payload.reset(); } + ~RuntimeConfig() = default; [[nodiscard]] auto platformConfig() const -> PlatformConfig const& { return PlatformConfigValue; } [[nodiscard]] auto payload() const -> payload::Payload& { diff --git a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp index 317b8196..9478353e 100644 --- a/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/AVX512Payload.hpp @@ -21,7 +21,7 @@ #pragma once -#include +#include "X86Payload.hpp" namespace firestarter::environment::x86::payload { class AVX512Payload final : public X86Payload { @@ -37,8 +37,8 @@ class AVX512Payload final : public X86Payload { [[nodiscard]] auto getAvailableInstructions() const -> std::list override; void init(uint64_t* MemoryAddr, uint64_t BufferSize) override; - [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override { - return new AVX512Payload(this->supportedFeatures()); + [[nodiscard]] auto clone() const -> std::unique_ptr override { + return std::make_unique(this->supportedFeatures()); }; private: diff --git a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp index 6516c0de..ca6cac6d 100644 --- a/include/firestarter/Environment/X86/Payload/AVXPayload.hpp +++ b/include/firestarter/Environment/X86/Payload/AVXPayload.hpp @@ -21,7 +21,7 @@ #pragma once -#include +#include "X86Payload.hpp" namespace firestarter::environment::x86::payload { class AVXPayload final : public X86Payload { @@ -37,8 +37,8 @@ class AVXPayload final : public X86Payload { [[nodiscard]] auto getAvailableInstructions() const -> std::list override; void init(uint64_t* MemoryAddr, uint64_t BufferSize) override; - [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override { - return new AVXPayload(this->supportedFeatures()); + [[nodiscard]] auto clone() const -> std::unique_ptr override { + return std::make_unique(this->supportedFeatures()); }; private: diff --git a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp index bb623e68..ccc43d00 100644 --- a/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/FMA4Payload.hpp @@ -21,7 +21,7 @@ #pragma once -#include +#include "X86Payload.hpp" namespace firestarter::environment::x86::payload { @@ -39,8 +39,8 @@ class FMA4Payload final : public X86Payload { [[nodiscard]] auto getAvailableInstructions() const -> std::list override; void init(uint64_t* MemoryAddr, uint64_t BufferSize) override; - [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override { - return new FMA4Payload(this->supportedFeatures()); + [[nodiscard]] auto clone() const -> std::unique_ptr override { + return std::make_unique(this->supportedFeatures()); }; private: diff --git a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp index b610a838..a8443b97 100644 --- a/include/firestarter/Environment/X86/Payload/FMAPayload.hpp +++ b/include/firestarter/Environment/X86/Payload/FMAPayload.hpp @@ -21,7 +21,7 @@ #pragma once -#include +#include "X86Payload.hpp" namespace firestarter::environment::x86::payload { class FMAPayload final : public X86Payload { @@ -37,8 +37,8 @@ class FMAPayload final : public X86Payload { [[nodiscard]] auto getAvailableInstructions() const -> std::list override; void init(uint64_t* MemoryAddr, uint64_t BufferSize) override; - [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override { - return new FMAPayload(this->supportedFeatures()); + [[nodiscard]] auto clone() const -> std::unique_ptr override { + return std::make_unique(this->supportedFeatures()); }; private: diff --git a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp index 538837b4..5e363432 100644 --- a/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/SSE2Payload.hpp @@ -21,8 +21,7 @@ #pragma once -#include -#include +#include "X86Payload.hpp" namespace firestarter::environment::x86::payload { class SSE2Payload final : public X86Payload { @@ -38,8 +37,8 @@ class SSE2Payload final : public X86Payload { [[nodiscard]] auto getAvailableInstructions() const -> std::list override; void init(uint64_t* MemoryAddr, uint64_t BufferSize) override; - [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override { - return new SSE2Payload(this->supportedFeatures()); + [[nodiscard]] auto clone() const -> std::unique_ptr override { + return std::make_unique(this->supportedFeatures()); }; private: diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp index 9e947143..2e38b855 100644 --- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp @@ -21,13 +21,13 @@ #pragma once +#include "../../../Constants.hpp" // IWYU pragma: keep +#include "../../../DumpRegisterStruct.hpp" // IWYU pragma: keep +#include "../../../Logging/Log.hpp" // IWYU pragma: keep +#include "../../Payload/Payload.hpp" #include - #include -#include -#include -#include -#include +#include // IWYU pragma: keep #include #define INIT_BLOCKSIZE 1024 diff --git a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp index 425dd600..35746efc 100644 --- a/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp +++ b/include/firestarter/Environment/X86/Payload/ZENFMAPayload.hpp @@ -21,8 +21,7 @@ #pragma once -#include -#include +#include "X86Payload.hpp" namespace firestarter::environment::x86::payload { class ZENFMAPayload final : public X86Payload { @@ -39,8 +38,8 @@ class ZENFMAPayload final : public X86Payload { [[nodiscard]] auto getAvailableInstructions() const -> std::list override; void init(uint64_t* MemoryAddr, uint64_t BufferSize) override; - [[nodiscard]] auto clone() const -> firestarter::environment::payload::Payload* override { - return new ZENFMAPayload(this->supportedFeatures()); + [[nodiscard]] auto clone() const -> std::unique_ptr override { + return std::make_unique(this->supportedFeatures()); }; private: diff --git a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp index 4cc4b811..fc7e5337 100644 --- a/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/BulldozerConfig.hpp @@ -21,17 +21,17 @@ #pragma once -#include -#include +#include "../Payload/FMA4Payload.hpp" +#include "X86PlatformConfig.hpp" namespace firestarter::environment::x86::platform { class BulldozerConfig final : public X86PlatformConfig { public: - BulldozerConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("BLD_OPTERON", 21, {1, 2, 3}, {1}, 0, {16384, 1048576, 786432}, 104857600, 1536, family, - model, threads, new payload::FMA4Payload(supportedFeatures)) {} + BulldozerConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads) + : X86PlatformConfig("BLD_OPTERON", 21, {1, 2, 3}, {1}, 0, {16384, 1048576, 786432}, 104857600, 1536, Family, + Model, Threads, std::make_unique(SupportedFeatures)) {} - std::vector> getDefaultPayloadSettings() const override { + [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector> override { return std::vector>( {{"RAM_L", 1}, {"L3_L", 1}, {"L2_LS", 5}, {"L1_L", 90}, {"REG", 45}}); } diff --git a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp index 5b30d6a0..00bf3199 100644 --- a/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/HaswellConfig.hpp @@ -21,17 +21,17 @@ #pragma once -#include -#include +#include "../Payload/FMAPayload.hpp" +#include "X86PlatformConfig.hpp" namespace firestarter::environment::x86::platform { class HaswellConfig final : public X86PlatformConfig { public: - HaswellConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) + HaswellConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads) : X86PlatformConfig("HSW_COREI", 6, {60, 61, 69, 70, 71}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, - family, model, threads, new payload::FMAPayload(supportedFeatures)) {} + Family, Model, Threads, std::make_unique(SupportedFeatures)) {} - std::vector> getDefaultPayloadSettings() const override { + [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector> override { return std::vector>( {{"RAM_L", 2}, {"L3_LS", 3}, {"L2_LS", 9}, {"L1_LS", 90}, {"REG", 40}}); } diff --git a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp index 106dd0e3..d5df3341 100644 --- a/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/HaswellEPConfig.hpp @@ -21,17 +21,17 @@ #pragma once -#include -#include +#include "../Payload/FMAPayload.hpp" +#include "X86PlatformConfig.hpp" namespace firestarter::environment::x86::platform { class HaswellEPConfig final : public X86PlatformConfig { public: - HaswellEPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("HSW_XEONEP", 6, {63, 79}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, family, - model, threads, new payload::FMAPayload(supportedFeatures)) {} + HaswellEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads) + : X86PlatformConfig("HSW_XEONEP", 6, {63, 79}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, Family, + Model, Threads, std::make_unique(SupportedFeatures)) {} - std::vector> getDefaultPayloadSettings() const override { + [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector> override { return std::vector>( {{"RAM_L", 8}, {"L3_LS", 1}, {"L2_LS", 29}, {"L1_LS", 100}, {"REG", 100}}); } diff --git a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp index 709ef934..cbc7d976 100644 --- a/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/KnightsLandingConfig.hpp @@ -21,17 +21,17 @@ #pragma once -#include -#include +#include "../Payload/AVX512Payload.hpp" +#include "X86PlatformConfig.hpp" namespace firestarter::environment::x86::platform { class KnightsLandingConfig final : public X86PlatformConfig { public: - KnightsLandingConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("KNL_XEONPHI", 6, {87}, {4}, 0, {32768, 524288, 236279125}, 26214400, 1536, family, model, - threads, new payload::AVX512Payload(supportedFeatures)) {} + KnightsLandingConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads) + : X86PlatformConfig("KNL_XEONPHI", 6, {87}, {4}, 0, {32768, 524288, 236279125}, 26214400, 1536, Family, Model, + Threads, std::make_unique(SupportedFeatures)) {} - std::vector> getDefaultPayloadSettings() const override { + [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector> override { return std::vector>({{"RAM_P", 3}, {"L2_S", 8}, {"L1_L", 40}, {"REG", 10}}); } }; diff --git a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp index 5ad0a065..56f5bdc6 100644 --- a/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/NaplesConfig.hpp @@ -21,17 +21,17 @@ #pragma once -#include -#include +#include "../Payload/ZENFMAPayload.hpp" +#include "X86PlatformConfig.hpp" namespace firestarter::environment::x86::platform { class NaplesConfig final : public X86PlatformConfig { public: - NaplesConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24}, {1, 2}, 0, {65536, 524288, 2097152}, 104857600, 1536, family, - model, threads, new payload::ZENFMAPayload(supportedFeatures)) {} + NaplesConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads) + : X86PlatformConfig("ZEN_EPYC", 23, {1, 8, 17, 24}, {1, 2}, 0, {65536, 524288, 2097152}, 104857600, 1536, Family, + Model, Threads, std::make_unique(SupportedFeatures)) {} - std::vector> getDefaultPayloadSettings() const override { + [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector> override { return std::vector>( {{"RAM_L", 3}, {"L3_L", 14}, {"L2_L", 75}, {"L1_LS", 81}, {"REG", 100}}); } diff --git a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp index 3f0748de..320d29f4 100644 --- a/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/NehalemConfig.hpp @@ -21,17 +21,17 @@ #pragma once -#include -#include +#include "../Payload/SSE2Payload.hpp" +#include "X86PlatformConfig.hpp" namespace firestarter::environment::x86::platform { class NehalemConfig final : public X86PlatformConfig { public: - NehalemConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, family, - model, threads, new payload::SSE2Payload(supportedFeatures)) {} + NehalemConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads) + : X86PlatformConfig("NHM_COREI", 6, {30, 37, 23}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, Family, + Model, Threads, std::make_unique(SupportedFeatures)) {} - std::vector> getDefaultPayloadSettings() const override { + [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector> override { return std::vector>({{"RAM_P", 1}, {"L1_LS", 70}, {"REG", 2}}); } }; diff --git a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp index a738fb7f..c9d032da 100644 --- a/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/NehalemEPConfig.hpp @@ -21,17 +21,17 @@ #pragma once -#include -#include +#include "../Payload/SSE2Payload.hpp" +#include "X86PlatformConfig.hpp" namespace firestarter::environment::x86::platform { class NehalemEPConfig final : public X86PlatformConfig { public: - NehalemEPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("NHM_XEONEP", 6, {26, 44}, {1, 2}, 0, {32768, 262144, 2097152}, 104857600, 1536, family, - model, threads, new payload::SSE2Payload(supportedFeatures)) {} + NehalemEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads) + : X86PlatformConfig("NHM_XEONEP", 6, {26, 44}, {1, 2}, 0, {32768, 262144, 2097152}, 104857600, 1536, Family, + Model, Threads, std::make_unique(SupportedFeatures)) {} - std::vector> getDefaultPayloadSettings() const override { + [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector> override { return std::vector>({{"RAM_P", 1}, {"L1_LS", 60}, {"REG", 2}}); } }; diff --git a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp index 230d91ba..c8c1ea73 100644 --- a/include/firestarter/Environment/X86/Platform/RomeConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/RomeConfig.hpp @@ -21,17 +21,17 @@ #pragma once -#include -#include +#include "../Payload/FMAPayload.hpp" +#include "X86PlatformConfig.hpp" namespace firestarter::environment::x86::platform { class RomeConfig final : public X86PlatformConfig { public: - RomeConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("ZEN_2_EPYC", 23, {49}, {1, 2}, 0, {32768, 524288, 2097152}, 104857600, 1536, family, model, - threads, new payload::FMAPayload(supportedFeatures)) {} + RomeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads) + : X86PlatformConfig("ZEN_2_EPYC", 23, {49}, {1, 2}, 0, {32768, 524288, 2097152}, 104857600, 1536, Family, Model, + Threads, std::make_unique(SupportedFeatures)) {} - std::vector> getDefaultPayloadSettings() const override { + [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector> override { return std::vector>( {{"RAM_L", 10}, {"L3_L", 25}, {"L2_L", 91}, {"L1_2LS_256", 72}, {"L1_LS_256", 82}, {"REG", 75}}); } diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp index a58e193a..5ceef9ff 100644 --- a/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/SandyBridgeConfig.hpp @@ -21,17 +21,17 @@ #pragma once -#include -#include +#include "../Payload/AVXPayload.hpp" +#include "X86PlatformConfig.hpp" namespace firestarter::environment::x86::platform { class SandyBridgeConfig final : public X86PlatformConfig { public: - SandyBridgeConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("SNB_COREI", 6, {42, 58}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, family, model, - threads, new payload::AVXPayload(supportedFeatures)) {} + SandyBridgeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads) + : X86PlatformConfig("SNB_COREI", 6, {42, 58}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, Family, Model, + Threads, std::make_unique(SupportedFeatures)) {} - std::vector> getDefaultPayloadSettings() const override { + [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector> override { return std::vector>( {{"RAM_L", 2}, {"L3_LS", 4}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 45}}); } diff --git a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp index 3f4f6303..8449bee1 100644 --- a/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/SandyBridgeEPConfig.hpp @@ -22,17 +22,17 @@ #ifndef INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SANDYBRIDGEEPCONFIG_H #define INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SANDYBRIDGEEPCONFIG_H -#include -#include +#include "../Payload/AVXPayload.hpp" +#include "X86PlatformConfig.hpp" namespace firestarter::environment::x86::platform { class SandyBridgeEPConfig final : public X86PlatformConfig { public: - SandyBridgeEPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("SNB_XEONEP", 6, {45, 62}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, family, - model, threads, new payload::AVXPayload(supportedFeatures)) {} + SandyBridgeEPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads) + : X86PlatformConfig("SNB_XEONEP", 6, {45, 62}, {1, 2}, 0, {32768, 262144, 2621440}, 104857600, 1536, Family, + Model, Threads, std::make_unique(SupportedFeatures)) {} - std::vector> getDefaultPayloadSettings() const override { + [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector> override { return std::vector>( {{"RAM_L", 3}, {"L3_LS", 2}, {"L2_LS", 10}, {"L1_LS", 90}, {"REG", 30}}); } diff --git a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp index c533c3a5..1437ce98 100644 --- a/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/SkylakeConfig.hpp @@ -22,17 +22,17 @@ #ifndef INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SKYLAKECONFIG_H #define INCLUDE_FIRESTARTER_ENVIRONMENT_X86_PLATFORM_SKYLAKECONFIG_H -#include -#include +#include "../Payload/FMAPayload.hpp" +#include "X86PlatformConfig.hpp" namespace firestarter::environment::x86::platform { class SkylakeConfig final : public X86PlatformConfig { public: - SkylakeConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("SKL_COREI", 6, {78, 94}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, family, model, - threads, new payload::FMAPayload(supportedFeatures)) {} + SkylakeConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads) + : X86PlatformConfig("SKL_COREI", 6, {78, 94}, {1, 2}, 0, {32768, 262144, 1572864}, 104857600, 1536, Family, Model, + Threads, std::make_unique(SupportedFeatures)) {} - std::vector> getDefaultPayloadSettings() const override { + [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector> override { return std::vector>( {{"RAM_L", 3}, {"L3_LS_256", 5}, {"L2_LS_256", 18}, {"L1_2LS_256", 78}, {"REG", 40}}); } diff --git a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp index 8243d9d6..8a91b6de 100644 --- a/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/SkylakeSPConfig.hpp @@ -21,17 +21,17 @@ #pragma once -#include -#include +#include "../Payload/AVX512Payload.hpp" +#include "X86PlatformConfig.hpp" namespace firestarter::environment::x86::platform { class SkylakeSPConfig final : public X86PlatformConfig { public: - SkylakeSPConfig(asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, unsigned threads) - : X86PlatformConfig("SKL_XEONEP", 6, {85}, {1, 2}, 0, {32768, 1048576, 1441792}, 1048576000, 1536, family, model, - threads, new payload::AVX512Payload(supportedFeatures)) {} + SkylakeSPConfig(asmjit::CpuFeatures const& SupportedFeatures, unsigned Family, unsigned Model, unsigned Threads) + : X86PlatformConfig("SKL_XEONEP", 6, {85}, {1, 2}, 0, {32768, 1048576, 1441792}, 1048576000, 1536, Family, Model, + Threads, std::make_unique(SupportedFeatures)) {} - std::vector> getDefaultPayloadSettings() const override { + [[nodiscard]] auto getDefaultPayloadSettings() const -> std::vector> override { return std::vector>({{"RAM_S", 3}, {"RAM_P", 1}, {"L3_S", 1}, diff --git a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp index 648346d8..3d33756c 100644 --- a/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp +++ b/include/firestarter/Environment/X86/Platform/X86PlatformConfig.hpp @@ -21,34 +21,39 @@ #pragma once -#include -#include +#include "../../Platform/PlatformConfig.hpp" +#include "../Payload/X86Payload.hpp" +#include +#include +#include +#include // IWYU pragma: keep namespace firestarter::environment::x86::platform { class X86PlatformConfig : public environment::platform::PlatformConfig { private: - unsigned _family; - std::list _models; - unsigned _currentFamily; - unsigned _currentModel; - unsigned _currentThreads; + unsigned Family; + std::list Models; + unsigned CurrentFamily; + unsigned CurrentModel; + unsigned CurrentThreads; public: - X86PlatformConfig(std::string name, unsigned family, std::initializer_list models, - std::initializer_list threads, unsigned instructionCacheSize, - std::initializer_list dataCacheBufferSize, unsigned ramBuffersize, unsigned lines, - unsigned currentFamily, unsigned currentModel, unsigned currentThreads, - payload::X86Payload* payload) - : PlatformConfig(name, threads, instructionCacheSize, dataCacheBufferSize, ramBuffersize, lines, payload) - , _family(family) - , _models(models) - , _currentFamily(currentFamily) - , _currentModel(currentModel) - , _currentThreads(currentThreads) {} + X86PlatformConfig(std::string Name, unsigned Family, std::initializer_list Models, + std::initializer_list Threads, unsigned InstructionCacheSize, + std::initializer_list DataCacheBufferSize, unsigned RamBuffersize, unsigned Lines, + unsigned CurrentFamily, unsigned CurrentModel, unsigned CurrentThreads, + std::unique_ptr&& Payload) + : PlatformConfig(std::move(Name), Threads, InstructionCacheSize, DataCacheBufferSize, RamBuffersize, Lines, + std::move(Payload)) + , Family(Family) + , Models(Models) + , CurrentFamily(CurrentFamily) + , CurrentModel(CurrentModel) + , CurrentThreads(CurrentThreads) {} - bool isDefault() const override { - return _family == _currentFamily && (std::find(_models.begin(), _models.end(), _currentModel) != _models.end()) && + [[nodiscard]] auto isDefault() const -> bool override { + return Family == CurrentFamily && (std::find(Models.begin(), Models.end(), CurrentModel) != Models.end()) && isAvailable(); } }; diff --git a/include/firestarter/Environment/X86/X86CPUTopology.hpp b/include/firestarter/Environment/X86/X86CPUTopology.hpp index d9ca6393..44875e03 100644 --- a/include/firestarter/Environment/X86/X86CPUTopology.hpp +++ b/include/firestarter/Environment/X86/X86CPUTopology.hpp @@ -23,7 +23,7 @@ #include -#include +#include "../CPUTopology.hpp" namespace firestarter::environment::x86 { @@ -36,9 +36,6 @@ class X86CPUTopology final : public CPUTopology { [[nodiscard]] auto features() const -> std::list const& override { return this->FeatureList; } [[nodiscard]] auto featuresAsmjit() const -> const asmjit::CpuFeatures& { return this->CpuInfo.features(); } - [[nodiscard]] auto vendor() const -> std::string const& override { return this->Vendor; } - [[nodiscard]] auto model() const -> std::string const& override { return this->Model; } - [[nodiscard]] auto clockrate() const -> uint64_t override; [[nodiscard]] auto timestamp() const -> uint64_t override; @@ -50,15 +47,13 @@ class X86CPUTopology final : public CPUTopology { private: [[nodiscard]] auto hasRdtsc() const -> bool { return this->HasRdtsc; } [[nodiscard]] auto hasInvariantRdtsc() const -> bool { return this->HasInvariantRdtsc; } - void cpuid(uint64_t* A, uint64_t* B, uint64_t* C, uint64_t* D) const; + static void cpuid(uint64_t* Rax, uint64_t* Rbx, uint64_t* Rcx, uint64_t* Rdx); asmjit::CpuInfo CpuInfo; std::list FeatureList; bool HasRdtsc; bool HasInvariantRdtsc; - std::string Vendor; - std::string Model; }; inline auto operator<<(std::ostream& Stream, X86CPUTopology const& CpuTopology) -> std::ostream& { diff --git a/include/firestarter/Environment/X86/X86Environment.hpp b/include/firestarter/Environment/X86/X86Environment.hpp index 7873c9c4..3e358d90 100644 --- a/include/firestarter/Environment/X86/X86Environment.hpp +++ b/include/firestarter/Environment/X86/X86Environment.hpp @@ -23,27 +23,26 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "../Environment.hpp" +#include "Platform/BulldozerConfig.hpp" +#include "Platform/HaswellConfig.hpp" +#include "Platform/HaswellEPConfig.hpp" +#include "Platform/KnightsLandingConfig.hpp" +#include "Platform/NaplesConfig.hpp" +#include "Platform/NehalemConfig.hpp" +#include "Platform/NehalemEPConfig.hpp" +#include "Platform/RomeConfig.hpp" +#include "Platform/SandyBridgeConfig.hpp" +#include "Platform/SandyBridgeEPConfig.hpp" +#include "Platform/SkylakeConfig.hpp" +#include "Platform/SkylakeSPConfig.hpp" +#include "Platform/X86PlatformConfig.hpp" +#include "X86CPUTopology.hpp" #define REGISTER(NAME) \ [](asmjit::CpuFeatures const& supportedFeatures, unsigned family, unsigned model, \ - unsigned threads) -> platform::X86PlatformConfig* { \ - return new platform::NAME(supportedFeatures, family, model, threads); \ + unsigned threads) -> std::unique_ptr { \ + return std::make_unique(supportedFeatures, family, model, threads); \ } namespace firestarter::environment::x86 { @@ -51,18 +50,9 @@ namespace firestarter::environment::x86 { class X86Environment final : public Environment { public: X86Environment() - : Environment(new X86CPUTopology()) {} + : Environment(std::make_unique()) {} - ~X86Environment() { - for (auto const& Config : PlatformConfigs) { - delete Config; - } - for (auto const& Config : FallbackPlatformConfigs) { - delete Config; - } - } - - auto topology() -> X86CPUTopology const& { return *reinterpret_cast(this->Topology); } + auto topology() -> X86CPUTopology const& { return *dynamic_cast(Topology.get()); } void evaluateFunctions() override; auto selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) -> int override; @@ -76,16 +66,18 @@ class X86Environment final : public Environment { // The available function IDs are generated by iterating through this list // of PlatformConfig. Add new PlatformConfig at the bottom to maintain // stable IDs. - const std::list> + const std::list(asmjit::CpuFeatures const&, unsigned, + unsigned, unsigned)>> PlatformConfigsCtor = {REGISTER(KnightsLandingConfig), REGISTER(SkylakeConfig), REGISTER(SkylakeSPConfig), REGISTER(HaswellConfig), REGISTER(HaswellEPConfig), REGISTER(SandyBridgeConfig), REGISTER(SandyBridgeEPConfig), REGISTER(NehalemConfig), REGISTER(NehalemEPConfig), REGISTER(BulldozerConfig), REGISTER(NaplesConfig), REGISTER(RomeConfig)}; - std::list PlatformConfigs; + std::list> PlatformConfigs; // List of fallback PlatformConfig. Add one for each x86 extension. - const std::list> + const std::list(asmjit::CpuFeatures const&, unsigned, + unsigned, unsigned)>> FallbackPlatformConfigsCtor = { REGISTER(SkylakeSPConfig), // AVX512 REGISTER(BulldozerConfig), // FMA4 @@ -94,7 +86,7 @@ class X86Environment final : public Environment { REGISTER(NehalemConfig) // SSE2 }; - std::list FallbackPlatformConfigs; + std::list> FallbackPlatformConfigs; #undef REGISTER }; diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp index 8009c1c9..0e4c7ef5 100644 --- a/include/firestarter/Firestarter.hpp +++ b/include/firestarter/Firestarter.hpp @@ -22,27 +22,27 @@ #pragma once #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP) -#include +#include "Cuda/Cuda.hpp" #endif #ifdef FIRESTARTER_BUILD_ONEAPI -#include +#include "OneAPI/OneAPI.hpp" #endif -#include +#include "Constants.hpp" #if defined(linux) || defined(__linux__) -#include -#include -#include -#include +#include "Measurement/MeasurementWorker.hpp" +#include "Optimizer/Algorithm.hpp" +#include "Optimizer/OptimizerWorker.hpp" +#include "Optimizer/Population.hpp" #endif -#include -#include +#include "DumpRegisterWorkerData.hpp" +#include "LoadWorkerData.hpp" #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) -#include +#include "Environment/X86/X86Environment.hpp" #endif #include diff --git a/include/firestarter/Json/Summary.hpp b/include/firestarter/Json/Summary.hpp index a2e8e03a..8fe781e9 100644 --- a/include/firestarter/Json/Summary.hpp +++ b/include/firestarter/Json/Summary.hpp @@ -21,23 +21,26 @@ #pragma once -#include +#include "../Measurement/Summary.hpp" namespace nlohmann { template <> struct adl_serializer { - static firestarter::measurement::Summary from_json(const json& j) { - return {j["num_timepoints"].get(), - std::chrono::milliseconds(j["duration"].get()), j["average"].get(), - j["stddev"].get()}; + // functions for nlohmann json do not follow LLVM code style + // NOLINTBEGIN(readability-identifier-naming) + static auto from_json(const json& J) -> firestarter::measurement::Summary { + return {J["num_timepoints"].get(), + std::chrono::milliseconds(J["duration"].get()), J["average"].get(), + J["stddev"].get()}; } - static void to_json(json& j, firestarter::measurement::Summary s) { - j = json::object(); + static void to_json(json& J, firestarter::measurement::Summary S) { + J = json::object(); - j["num_timepoints"] = s.NumTimepoints; - j["duration"] = s.Duration.count(); - j["average"] = s.Average; - j["stddev"] = s.Stddev; + J["num_timepoints"] = S.NumTimepoints; + J["duration"] = S.Duration.count(); + J["average"] = S.Average; + J["stddev"] = S.Stddev; } + // NOLINTEND(readability-identifier-naming) }; } // namespace nlohmann diff --git a/include/firestarter/LoadWorkerData.hpp b/include/firestarter/LoadWorkerData.hpp index eb7e0c3c..6aa5b40f 100644 --- a/include/firestarter/LoadWorkerData.hpp +++ b/include/firestarter/LoadWorkerData.hpp @@ -21,11 +21,11 @@ #pragma once +#include "Constants.hpp" +#include "DumpRegisterStruct.hpp" +#include "Environment/Environment.hpp" +#include "ErrorDetectionStruct.hpp" #include -#include -#include -#include -#include #include #include #include diff --git a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp index 3a0e68fc..2a1a51f1 100644 --- a/include/firestarter/Logging/FirstWorkerThreadFilter.hpp +++ b/include/firestarter/Logging/FirstWorkerThreadFilter.hpp @@ -21,7 +21,6 @@ #pragma once -#include #include #include @@ -33,8 +32,8 @@ template class FirstWorkerThreadFilter { static void setFirstThread(std::thread::id NewFirstThread) { FirstThread = NewFirstThread; } - auto filter(Record& r) const -> bool { - return r.std_thread_id() == FirstThread || r.severity() >= nitro::log::severity_level::error; + auto filter(Record& R) const -> bool { + return R.std_thread_id() == FirstThread || R.severity() >= nitro::log::severity_level::error; } private: diff --git a/include/firestarter/Logging/Log.hpp b/include/firestarter/Logging/Log.hpp index 74cc3e1a..a2874ebb 100644 --- a/include/firestarter/Logging/Log.hpp +++ b/include/firestarter/Logging/Log.hpp @@ -21,9 +21,7 @@ #pragma once -#include -#include -#include +#include "FirstWorkerThreadFilter.hpp" #include #include #include @@ -42,61 +40,61 @@ namespace logging { class StdOut { public: - void sink(nitro::log::severity_level severity, const std::string& formatted_record) { - switch (severity) { + static void sink(nitro::log::severity_level Severity, const std::string& FormattedRecord) { + switch (Severity) { case nitro::log::severity_level::warn: case nitro::log::severity_level::error: case nitro::log::severity_level::fatal: - std::cerr << formatted_record << std::endl << std::flush; + std::cerr << FormattedRecord << '\n' << std::flush; break; default: - std::cout << formatted_record << std::endl << std::flush; + std::cout << FormattedRecord << '\n' << std::flush; break; } } }; -using record = nitro::log::record; -template class formater { +template class Formater { public: - std::string format(Record& r) { - std::stringstream s; + auto format(Record& R) -> std::string { + std::stringstream S; - switch (r.severity()) { + switch (R.severity()) { case nitro::log::severity_level::warn: - s << "Warning: "; + S << "Warning: "; break; case nitro::log::severity_level::error: - s << "Error: "; + S << "Error: "; break; case nitro::log::severity_level::fatal: - s << "Fatal: "; + S << "Fatal: "; break; case nitro::log::severity_level::trace: - s << "Debug: "; + S << "Debug: "; break; default: break; } - s << r.message(); + S << R.message(); - return s.str(); + return S.str(); } }; -template using filter = nitro::log::filter::severity_filter; +template using Filter = nitro::log::filter::severity_filter; template -using workerFilter = nitro::log::filter::and_filter, FirstWorkerThreadFilter>; +using WorkerFilter = nitro::log::filter::and_filter, FirstWorkerThreadFilter>; } // namespace logging -using log = nitro::log::logger; +using log = nitro::log::logger; using workerLog = - nitro::log::logger; + nitro::log::logger; } // namespace firestarter diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp index 2045bd43..e9e18b76 100644 --- a/include/firestarter/Measurement/MeasurementWorker.hpp +++ b/include/firestarter/Measurement/MeasurementWorker.hpp @@ -21,14 +21,13 @@ #pragma once +#include "Metric/IPCEstimate.h" +#include "Metric/Perf.h" +#include "Metric/RAPL.h" +#include "MetricInterface.h" +#include "Summary.hpp" +#include "TimeValue.hpp" #include -#include -#include -#include -#include -#include -#include -#include #include #include diff --git a/include/firestarter/Measurement/Metric/IPCEstimate.h b/include/firestarter/Measurement/Metric/IPCEstimate.h index 360c1d91..63dcb26b 100644 --- a/include/firestarter/Measurement/Metric/IPCEstimate.h +++ b/include/firestarter/Measurement/Metric/IPCEstimate.h @@ -21,7 +21,7 @@ #pragma once -#include +#include "../MetricInterface.h" extern "C" { diff --git a/include/firestarter/Measurement/Metric/Perf.h b/include/firestarter/Measurement/Metric/Perf.h index 39a070f6..2702cd94 100644 --- a/include/firestarter/Measurement/Metric/Perf.h +++ b/include/firestarter/Measurement/Metric/Perf.h @@ -21,7 +21,7 @@ #pragma once -#include +#include "../MetricInterface.h" extern "C" { diff --git a/include/firestarter/Measurement/Metric/RAPL.h b/include/firestarter/Measurement/Metric/RAPL.h index 726ff61a..017373a1 100644 --- a/include/firestarter/Measurement/Metric/RAPL.h +++ b/include/firestarter/Measurement/Metric/RAPL.h @@ -21,7 +21,7 @@ #pragma once -#include +#include "../MetricInterface.h" extern "C" { diff --git a/include/firestarter/Measurement/Summary.hpp b/include/firestarter/Measurement/Summary.hpp index 09c91016..019a73eb 100644 --- a/include/firestarter/Measurement/Summary.hpp +++ b/include/firestarter/Measurement/Summary.hpp @@ -21,15 +21,12 @@ #pragma once +#include "MetricInterface.h" +#include "TimeValue.hpp" #include -#include #include #include -extern "C" { -#include -} - namespace firestarter::measurement { struct Summary { diff --git a/include/firestarter/Measurement/TimeValue.hpp b/include/firestarter/Measurement/TimeValue.hpp index cc168ad2..10b31e8b 100644 --- a/include/firestarter/Measurement/TimeValue.hpp +++ b/include/firestarter/Measurement/TimeValue.hpp @@ -33,7 +33,7 @@ struct TimeValue { , Value(Value){}; std::chrono::high_resolution_clock::time_point Time; - double Value; + double Value{}; }; } // namespace firestarter::measurement diff --git a/include/firestarter/Optimizer/Algorithm.hpp b/include/firestarter/Optimizer/Algorithm.hpp index 4cdae1ec..8bae8bd3 100644 --- a/include/firestarter/Optimizer/Algorithm.hpp +++ b/include/firestarter/Optimizer/Algorithm.hpp @@ -21,7 +21,7 @@ #pragma once -#include +#include "Population.hpp" namespace firestarter::optimizer { diff --git a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp index e02e7e14..70c2aac0 100644 --- a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp +++ b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp @@ -21,7 +21,7 @@ #pragma once -#include +#include "../Algorithm.hpp" namespace firestarter::optimizer::algorithm { diff --git a/include/firestarter/Optimizer/History.hpp b/include/firestarter/Optimizer/History.hpp index 332b49c5..8c573d72 100644 --- a/include/firestarter/Optimizer/History.hpp +++ b/include/firestarter/Optimizer/History.hpp @@ -21,14 +21,14 @@ #pragma once +#include "../Json/Summary.hpp" // IWYU pragma: keep +#include "../Logging/Log.hpp" +#include "../Measurement/Summary.hpp" +#include "Individual.hpp" #include #include #include #include -#include -#include -#include -#include #include #include #include @@ -88,129 +88,129 @@ struct History { // print the best 20 individuals for each metric in a format // where the user can give it to --run-instruction-groups directly - std::map columnWidth; + std::map ColumnWidth; - for (auto const& metric : OptimizationMetrics) { - columnWidth[metric] = (std::max)(metric.size(), MinColumnWidth); - firestarter::log::trace() << metric << ": " << columnWidth[metric]; + for (auto const& Metric : OptimizationMetrics) { + ColumnWidth[Metric] = (std::max)(Metric.size(), MinColumnWidth); + firestarter::log::trace() << Metric << ": " << ColumnWidth[Metric]; } - for (auto const& metric : OptimizationMetrics) { + for (auto const& Metric : OptimizationMetrics) { using SummaryMap = std::map; - auto compareIndividual = [&metric](SummaryMap const& mapA, SummaryMap const& mapB) { - auto summaryA = mapA.find(metric); - auto summaryB = mapB.find(metric); - - if (summaryA == mapA.end() || summaryB == mapB.end()) { - summaryA = mapA.find(metric.substr(1)); - summaryB = mapB.find(metric.substr(1)); - assert(summaryA != mapA.end()); - assert(summaryB != mapB.end()); - return summaryA->second.Average < summaryB->second.Average; + auto CompareIndividual = [&Metric](SummaryMap const& MapA, SummaryMap const& MapB) { + auto SummaryA = MapA.find(Metric); + auto SummaryB = MapB.find(Metric); + + if (SummaryA == MapA.end() || SummaryB == MapB.end()) { + SummaryA = MapA.find(Metric.substr(1)); + SummaryB = MapB.find(Metric.substr(1)); + assert(SummaryA != MapA.end()); + assert(SummaryB != MapB.end()); + return SummaryA->second.Average < SummaryB->second.Average; } - assert(summaryA != mapA.end()); - assert(summaryB != mapB.end()); - return summaryA->second.Average > summaryB->second.Average; + assert(SummaryA != MapA.end()); + assert(SummaryB != MapB.end()); + return SummaryA->second.Average > SummaryB->second.Average; }; - auto perm = sortPermutation(F, compareIndividual); + auto Perm = sortPermutation(F, CompareIndividual); - auto formatIndividual = [&PayloadItems](std::vector const& individual) { - std::string result = ""; - assert(PayloadItems.size() == individual.size()); + auto FormatIndividual = [&PayloadItems](std::vector const& Individual) { + std::string Result; + assert(PayloadItems.size() == Individual.size()); - for (std::size_t i = 0; i < individual.size(); ++i) { + for (std::size_t I = 0; I < Individual.size(); ++I) { // skip zero values - if (individual[i] == 0) { + if (Individual[I] == 0) { continue; } - if (result.size() != 0) { - result += ","; + if (Result.size() != 0) { + Result += ","; } - result += PayloadItems[i] + ":" + std::to_string(individual[i]); + Result += PayloadItems[I] + ":" + std::to_string(Individual[I]); } - return result; + return Result; }; - auto begin = perm.begin(); - auto end = perm.end(); + auto Begin = Perm.begin(); + auto End = Perm.end(); // stop printing at a max of MaxElementPrintCount - if (std::distance(begin, end) > MaxElementPrintCount) { - end = perm.begin(); - std::advance(end, MaxElementPrintCount); + if (std::distance(Begin, End) > MaxElementPrintCount) { + End = Perm.begin(); + std::advance(End, MaxElementPrintCount); } // print each of the best elements - std::size_t max = 0; - for (auto it = begin; it != end; ++it) { - max = (std::max)(max, formatIndividual(X[*it]).size()); + std::size_t Max = 0; + for (auto It = Begin; It != End; ++It) { + Max = (std::max)(Max, FormatIndividual(X[*It]).size()); } - std::stringstream firstLine; - std::stringstream secondLine; - std::string ind = "INDIVIDUAL"; + std::stringstream FirstLine; + std::stringstream SecondLine; + std::string Ind = "INDIVIDUAL"; - firstLine << " " << ind; - padding(firstLine, max, ind.size(), ' '); + FirstLine << " " << Ind; + padding(FirstLine, Max, Ind.size(), ' '); - secondLine << " "; - padding(secondLine, (std::max)(max, ind.size()), 0, '-'); + SecondLine << " "; + padding(SecondLine, (std::max)(Max, Ind.size()), 0, '-'); - for (auto const& metric : OptimizationMetrics) { - auto width = columnWidth[metric]; + for (auto const& Metric : OptimizationMetrics) { + auto Width = ColumnWidth[Metric]; - firstLine << " | "; - secondLine << "---"; + FirstLine << " | "; + SecondLine << "---"; - firstLine << metric; - padding(firstLine, width, metric.size(), ' '); - padding(secondLine, width, 0, '-'); + FirstLine << Metric; + padding(FirstLine, Width, Metric.size(), ' '); + padding(SecondLine, Width, 0, '-'); } - std::stringstream ss; + std::stringstream Ss; - ss << "\n Best individuals sorted by metric " << metric << " " - << ((metric[0] == '-') ? "ascending" : "descending") << ":\n" - << firstLine.str() << "\n" - << secondLine.str() << "\n"; + Ss << "\n Best individuals sorted by metric " << Metric << " " + << ((Metric[0] == '-') ? "ascending" : "descending") << ":\n" + << FirstLine.str() << "\n" + << SecondLine.str() << "\n"; // print INDIVIDUAL | metric 1 | metric 2 | ... | metric N - for (auto it = begin; it != end; ++it) { - auto const fitness = F[*it]; - auto const ind = formatIndividual(X[*it]); + for (auto It = Begin; It != End; ++It) { + auto const& Fitness = F[*It]; + auto const Ind = FormatIndividual(X[*It]); - ss << " " << ind; - padding(ss, max, ind.size(), ' '); + Ss << " " << Ind; + padding(Ss, Max, Ind.size(), ' '); - for (auto const& metric : OptimizationMetrics) { - auto width = columnWidth[metric]; - std::string value; + for (auto const& Metric : OptimizationMetrics) { + auto Width = ColumnWidth[Metric]; + std::string Value; - auto fitnessOfMetric = fitness.find(metric); - auto invertedMetric = metric.substr(1); - auto fitnessOfInvertedMetric = fitness.find(invertedMetric); + auto FitnessOfMetric = Fitness.find(Metric); + auto InvertedMetric = Metric.substr(1); + auto FitnessOfInvertedMetric = Fitness.find(InvertedMetric); - if (fitnessOfMetric != fitness.end()) { - value = std::to_string(fitnessOfMetric->second.Average); - } else if (fitnessOfInvertedMetric != fitness.end()) { - value = std::to_string(fitnessOfInvertedMetric->second.Average); + if (FitnessOfMetric != Fitness.end()) { + Value = std::to_string(FitnessOfMetric->second.Average); + } else if (FitnessOfInvertedMetric != Fitness.end()) { + Value = std::to_string(FitnessOfInvertedMetric->second.Average); } else { assert(false); } - ss << " | " << value; - padding(ss, width, value.size(), ' '); + Ss << " | " << Value; + padding(Ss, Width, Value.size(), ' '); } - ss << "\n"; + Ss << "\n"; } - ss << "\n"; + Ss << "\n"; - firestarter::log::info() << ss.str(); + firestarter::log::info() << Ss.str(); } firestarter::log::info() << "To run FIRESTARTER with the best individual of a given metric " diff --git a/include/firestarter/Optimizer/OptimizerWorker.hpp b/include/firestarter/Optimizer/OptimizerWorker.hpp index e98c25b9..ba106595 100644 --- a/include/firestarter/Optimizer/OptimizerWorker.hpp +++ b/include/firestarter/Optimizer/OptimizerWorker.hpp @@ -19,9 +19,9 @@ * Contact: daniel.hackenberg@tu-dresden.de *****************************************************************************/ +#include "Algorithm.hpp" +#include "Population.hpp" #include -#include -#include #include extern "C" { diff --git a/include/firestarter/Optimizer/Population.hpp b/include/firestarter/Optimizer/Population.hpp index 757a2e46..2d904467 100644 --- a/include/firestarter/Optimizer/Population.hpp +++ b/include/firestarter/Optimizer/Population.hpp @@ -22,10 +22,9 @@ #ifndef FIRESTARTER_OPTIMIZER_POPULATION_HPP #define FIRESTARTER_OPTIMIZER_POPULATION_HPP +#include "Individual.hpp" +#include "Problem.hpp" #include -#include -#include -#include #include #include #include diff --git a/include/firestarter/Optimizer/Problem.hpp b/include/firestarter/Optimizer/Problem.hpp index df31ec98..ae0d285d 100644 --- a/include/firestarter/Optimizer/Problem.hpp +++ b/include/firestarter/Optimizer/Problem.hpp @@ -21,9 +21,9 @@ #pragma once +#include "../Measurement/Summary.hpp" +#include "Individual.hpp" #include -#include -#include #include #include #include diff --git a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp index 74346a74..9d3c4ed7 100644 --- a/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp +++ b/include/firestarter/Optimizer/Problem/CLIArgumentProblem.hpp @@ -21,11 +21,9 @@ #pragma once +#include "../../Measurement/MeasurementWorker.hpp" +#include "../Problem.hpp" #include -#include -#include -#include -#include #include #include #include diff --git a/include/firestarter/Optimizer/Util/MultiObjective.hpp b/include/firestarter/Optimizer/Util/MultiObjective.hpp index fab62be8..1b3a1873 100644 --- a/include/firestarter/Optimizer/Util/MultiObjective.hpp +++ b/include/firestarter/Optimizer/Util/MultiObjective.hpp @@ -21,7 +21,7 @@ #pragma once -#include +#include "../Individual.hpp" #include #include #include diff --git a/src/firestarter/Environment/CPUTopology.cpp b/src/firestarter/Environment/CPUTopology.cpp index b3e9a862..62c9224c 100644 --- a/src/firestarter/Environment/CPUTopology.cpp +++ b/src/firestarter/Environment/CPUTopology.cpp @@ -27,34 +27,30 @@ #include #include -extern "C" { -#include -} - namespace firestarter::environment { -auto CPUTopology::print(std::ostream& stream) const -> std::ostream& { - stream << " system summary:\n" - << " number of processors: " << this->numPackages() << "\n" - << " number of cores (total)): " << this->numCoresTotal() << "\n" +auto CPUTopology::print(std::ostream& Stream) const -> std::ostream& { + Stream << " system summary:\n" + << " number of processors: " << numPackages() << "\n" + << " number of cores (total)): " << numCoresTotal() << "\n" << " (this includes only cores in the cgroup)" << "\n" - << " number of threads per core: " << this->numThreadsPerCore() << "\n" - << " total number of threads: " << this->numThreads() << "\n\n"; + << " number of threads per core: " << numThreadsPerCore() << "\n" + << " total number of threads: " << numThreads() << "\n\n"; - std::stringstream ss; + std::stringstream Ss; - for (auto const& Entry : this->features()) { - ss << Entry << " "; + for (auto const& Entry : features()) { + Ss << Entry << " "; } - stream << " processor characteristics:\n" - << " architecture: " << this->architecture() << "\n" - << " vendor: " << this->vendor() << "\n" - << " processor-name: " << this->processorName() << "\n" - << " model: " << this->model() << "\n" - << " frequency: " << this->clockrate() / 1000000 << " MHz\n" - << " supported features: " << ss.str() << "\n" + Stream << " processor characteristics:\n" + << " architecture: " << architecture() << "\n" + << " vendor: " << vendor() << "\n" + << " processor-name: " << processorName() << "\n" + << " model: " << model() << "\n" + << " frequency: " << clockrate() / 1000000 << " MHz\n" + << " supported features: " << Ss.str() << "\n" << " Caches:"; std::vector Caches = { @@ -65,76 +61,76 @@ auto CPUTopology::print(std::ostream& stream) const -> std::ostream& { std::vector CacheStrings = {}; for (hwloc_obj_type_t const& Cache : Caches) { - std::stringstream ss; + std::stringstream Ss; - auto Width = hwloc_get_nbobjs_by_type(this->topology, Cache); + auto Width = hwloc_get_nbobjs_by_type(Topology, Cache); if (Width >= 1) { - ss << "\n - "; + Ss << "\n - "; - auto* CacheObj = hwloc_get_obj_by_type(this->topology, Cache, 0); + auto* CacheObj = hwloc_get_obj_by_type(Topology, Cache, 0); std::array String{}; hwloc_obj_type_snprintf(String.begin(), sizeof(String), CacheObj, 0); switch (CacheObj->attr->cache.type) { case HWLOC_OBJ_CACHE_DATA: - ss << "Level " << CacheObj->attr->cache.depth << " Data"; + Ss << "Level " << CacheObj->attr->cache.depth << " Data"; break; case HWLOC_OBJ_CACHE_INSTRUCTION: - ss << "Level " << CacheObj->attr->cache.depth << " Instruction"; + Ss << "Level " << CacheObj->attr->cache.depth << " Instruction"; break; case HWLOC_OBJ_CACHE_UNIFIED: default: - ss << "Unified Level " << CacheObj->attr->cache.depth; + Ss << "Unified Level " << CacheObj->attr->cache.depth; break; } - ss << " Cache, " << CacheObj->attr->cache.size / 1024 << " KiB, " << CacheObj->attr->cache.linesize + Ss << " Cache, " << CacheObj->attr->cache.size / 1024 << " KiB, " << CacheObj->attr->cache.linesize << " B Cacheline, "; switch (CacheObj->attr->cache.associativity) { case -1: - ss << "full"; + Ss << "full"; break; case 0: - ss << "unknown"; + Ss << "unknown"; break; default: - ss << CacheObj->attr->cache.associativity << "-way set"; + Ss << CacheObj->attr->cache.associativity << "-way set"; break; } - ss << " associative, "; + Ss << " associative, "; - auto Shared = this->numThreads() / Width; + auto Shared = numThreads() / Width; if (Shared > 1) { - ss << "shared among " << Shared << " threads."; + Ss << "shared among " << Shared << " threads."; } else { - ss << "per thread."; + Ss << "per thread."; } - stream << ss.str(); + Stream << Ss.str(); } } - return stream; + return Stream; } -CPUTopology::CPUTopology(std::string architecture) - : _architecture(std::move(architecture)) { +CPUTopology::CPUTopology(std::string Architecture) + : Architecture(std::move(Architecture)) { - hwloc_topology_init(&this->topology); + hwloc_topology_init(&Topology); // do not filter icaches - hwloc_topology_set_cache_types_filter(this->topology, HWLOC_TYPE_FILTER_KEEP_ALL); + hwloc_topology_set_cache_types_filter(Topology, HWLOC_TYPE_FILTER_KEEP_ALL); - hwloc_topology_load(this->topology); + hwloc_topology_load(Topology); // check for hybrid processor - int nr_cpukinds = hwloc_cpukinds_get_nr(this->topology, 0); + int NrCpukinds = hwloc_cpukinds_get_nr(Topology, 0); - switch (nr_cpukinds) { + switch (NrCpukinds) { case -1: log::warn() << "Hybrid core check failed"; break; @@ -142,203 +138,206 @@ CPUTopology::CPUTopology(std::string architecture) log::warn() << "Hybrid core check read no information"; break; default: - log::trace() << "Number of CPU kinds:" << nr_cpukinds; + log::trace() << "Number of CPU kinds:" << NrCpukinds; } - if (nr_cpukinds > 1) { + if (NrCpukinds > 1) { log::warn() << "FIRESTARTER detected a hybrid CPU set-up"; } // get number of packages - int depth = hwloc_get_type_depth(this->topology, HWLOC_OBJ_PACKAGE); + int Depth = hwloc_get_type_depth(Topology, HWLOC_OBJ_PACKAGE); - if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) { - this->_numPackages = 1; + if (Depth == HWLOC_TYPE_DEPTH_UNKNOWN) { + NumPackages = 1; log::warn() << "Could not get number of packages"; } else { - this->_numPackages = hwloc_get_nbobjs_by_depth(this->topology, depth); + NumPackages = hwloc_get_nbobjs_by_depth(Topology, Depth); } - log::trace() << "Number of Packages:" << this->_numPackages; + log::trace() << "Number of Packages:" << NumPackages; // get number of cores per package - depth = hwloc_get_type_depth(this->topology, HWLOC_OBJ_CORE); + Depth = hwloc_get_type_depth(Topology, HWLOC_OBJ_CORE); - if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) { - this->_numCoresTotal = 1; + if (Depth == HWLOC_TYPE_DEPTH_UNKNOWN) { + NumCoresTotal = 1; log::warn() << "Could not get number of cores"; } else { - this->_numCoresTotal = hwloc_get_nbobjs_by_depth(this->topology, depth); - if (this->_numCoresTotal == 0) { + NumCoresTotal = hwloc_get_nbobjs_by_depth(Topology, Depth); + if (NumCoresTotal == 0) { log::warn() << "Could not get number of cores"; - this->_numCoresTotal = 1; + NumCoresTotal = 1; } } - log::trace() << "Number of Cores:" << this->_numCoresTotal; + log::trace() << "Number of Cores:" << NumCoresTotal; // get number of threads per core - depth = hwloc_get_type_depth(this->topology, HWLOC_OBJ_PU); + Depth = hwloc_get_type_depth(Topology, HWLOC_OBJ_PU); - if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) { - this->_numThreadsPerCore = 1; + if (Depth == HWLOC_TYPE_DEPTH_UNKNOWN) { + NumThreadsPerCore = 1; log::warn() << "Could not get number of threads"; } else { - this->_numThreadsPerCore = hwloc_get_nbobjs_by_depth(this->topology, depth) / this->_numCoresTotal; - if (this->_numThreadsPerCore == 0) { + NumThreadsPerCore = hwloc_get_nbobjs_by_depth(Topology, Depth) / NumCoresTotal; + if (NumThreadsPerCore == 0) { log::warn() << "Could not get number of threads per core"; - this->_numThreadsPerCore = 1; + NumThreadsPerCore = 1; } } // get vendor, processor name and clockrate for linux #if defined(linux) || defined(__linux__) - auto procCpuinfo = this->getFileAsStream("/proc/cpuinfo"); - std::string line; - std::string clockrate = "0"; - - while (std::getline(procCpuinfo, line, '\n')) { - const std::regex vendorIdRe("^vendor_id.*:\\s*(.*)\\s*$"); - const std::regex modelNameRe("^model name.*:\\s*(.*)\\s*$"); - const std::regex cpuMHzRe("^cpu MHz.*:\\s*(.*)\\s*$"); - std::smatch vendorIdM; - std::smatch modelNameM; - std::smatch cpuMHzM; - - if (std::regex_match(line, vendorIdM, vendorIdRe)) { - this->_vendor = vendorIdM[1].str(); - } + { + auto ProcCpuinfo = getFileAsStream("/proc/cpuinfo"); + std::string Line; + std::string ClockrateStr = "0"; + + while (std::getline(ProcCpuinfo, Line, '\n')) { + const std::regex VendorIdRe("^vendor_id.*:\\s*(.*)\\s*$"); + const std::regex ModelNameRe("^model name.*:\\s*(.*)\\s*$"); + const std::regex CpuMHzRe("^cpu MHz.*:\\s*(.*)\\s*$"); + std::smatch VendorIdMatch; + std::smatch ModelNameMatch; + std::smatch CpuMHzMatch; + + if (std::regex_match(Line, VendorIdMatch, VendorIdRe)) { + Vendor = VendorIdMatch[1].str(); + } - if (std::regex_match(line, modelNameM, modelNameRe)) { - this->_processorName = modelNameM[1].str(); - } + if (std::regex_match(Line, ModelNameMatch, ModelNameRe)) { + ProcessorName = ModelNameMatch[1].str(); + } - if (std::regex_match(line, cpuMHzM, cpuMHzRe)) { - clockrate = cpuMHzM[1].str(); + if (std::regex_match(Line, CpuMHzMatch, CpuMHzRe)) { + ClockrateStr = CpuMHzMatch[1].str(); + } } - } - if (this->_vendor == "") { - log::warn() << "Could determine vendor from /proc/cpuinfo"; - } + if (Vendor.empty()) { + log::warn() << "Could determine vendor from /proc/cpuinfo"; + } - if (this->_processorName == "") { - log::warn() << "Could determine processor-name from /proc/cpuinfo"; - } + if (ProcessorName.empty()) { + log::warn() << "Could determine processor-name from /proc/cpuinfo"; + } - if (clockrate == "0") { - firestarter::log::warn() << "Can't determine clockrate from /proc/cpuinfo"; - } else { - firestarter::log::trace() << "Clockrate from /proc/cpuinfo is " << clockrate; - this->_clockrate = 1e6 * std::stoi(clockrate); - } + if (ClockrateStr == "0") { + firestarter::log::warn() << "Can't determine clockrate from /proc/cpuinfo"; + } else { + firestarter::log::trace() << "Clockrate from /proc/cpuinfo is " << ClockrateStr; + Clockrate = 1e6 * std::stoi(ClockrateStr); + } - auto governor = this->scalingGovernor(); - if (!governor.empty()) { + auto Governor = scalingGovernor(); + if (!Governor.empty()) { - auto scalingCurFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq").str(); - auto cpuinfoCurFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq").str(); - auto scalingMaxFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq").str(); - auto cpuinfoMaxFreq = this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq").str(); + auto ScalingCurFreq = getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq").str(); + auto CpuinfoCurFreq = getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_cur_freq").str(); + auto ScalingMaxFreq = getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq").str(); + auto CpuinfoMaxFreq = getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq").str(); - if (governor.compare("performance") || governor.compare("powersave")) { - if (scalingCurFreq.empty()) { - if (!cpuinfoCurFreq.empty()) { - clockrate = cpuinfoCurFreq; + if (Governor.compare("performance") || Governor.compare("powersave")) { + if (ScalingCurFreq.empty()) { + if (!CpuinfoCurFreq.empty()) { + ClockrateStr = CpuinfoCurFreq; + } + } else { + ClockrateStr = ScalingCurFreq; } } else { - clockrate = scalingCurFreq; - } - } else { - if (scalingMaxFreq.empty()) { - if (!cpuinfoMaxFreq.empty()) { - clockrate = cpuinfoMaxFreq; + if (ScalingMaxFreq.empty()) { + if (!CpuinfoMaxFreq.empty()) { + ClockrateStr = CpuinfoMaxFreq; + } + } else { + ClockrateStr = ScalingMaxFreq; } - } else { - clockrate = scalingMaxFreq; } - } - this->_clockrate = 1e3 * std::stoi(clockrate); + Clockrate = 1e3 * std::stoi(ClockrateStr); + } } #endif // try to detect processor name for macos #ifdef __APPLE__ - // use sysctl to detect the name - std::array buffer; - auto cmd = "sysctl -n machdep.cpu.brand_string"; - std::unique_ptr pipe(popen(cmd, "r"), pclose); - if (!pipe) { - log::warn() << "Could not determine processor-name"; - } - if (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { - auto str = std::string(buffer.data()); - str.erase(std::remove(str.begin(), str.end(), '\n'), str.end()); - this->_processorName = str; + { + // use sysctl to detect the name + std::array Buffer{}; + const auto* Cmd = "sysctl -n machdep.cpu.brand_string"; + std::unique_ptr Pipe(popen(Cmd, "r"), pclose); + if (!Pipe) { + log::warn() << "Could not determine processor-name"; + } + if (fgets(Buffer.data(), Buffer.size(), Pipe.get()) != nullptr) { + auto Str = std::string(Buffer.data()); + Str.erase(std::remove(Str.begin(), Str.end(), '\n'), Str.end()); + ProcessorName = Str; + } } #endif // try to detect processor name for windows #ifdef _WIN32 - // use wmic - std::array buffer; - auto cmd = "wmic cpu get name"; - std::unique_ptr pipe(_popen(cmd, "r"), _pclose); - if (!pipe) { - log::warn() << "Could not determine processor-name"; - } - auto line = 0; - while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { - if (line != 1) { - line++; - continue; + { + // use wmic + std::array Buffer{}; + const auto* Cmd = "wmic cpu get name"; + std::unique_ptr Pipe(_popen(Cmd, "r"), _pclose); + if (!Pipe) { + log::warn() << "Could not determine processor-name"; } + auto Line = 0; + while (fgets(Buffer.data(), Buffer.size(), Pipe.get()) != nullptr) { + if (Line != 1) { + Line++; + continue; + } - auto str = std::string(buffer.data()); - str.erase(std::remove(str.begin(), str.end(), '\n'), str.end()); - this->_processorName = str; + auto Str = std::string(Buffer.data()); + Str.erase(std::remove(Str.begin(), Str.end(), '\n'), Str.end()); + ProcessorName = Str; + } } #endif // get L1i-Cache size - int width = hwloc_get_nbobjs_by_type(this->topology, HWLOC_OBJ_L1ICACHE); + int Width = hwloc_get_nbobjs_by_type(Topology, HWLOC_OBJ_L1ICACHE); - if (width >= 1) { - hwloc_obj_t cacheObj = hwloc_get_obj_by_type(this->topology, HWLOC_OBJ_L1ICACHE, 0); - this->_instructionCacheSize = cacheObj->attr->cache.size; + if (Width >= 1) { + hwloc_obj_t CacheObj = hwloc_get_obj_by_type(Topology, HWLOC_OBJ_L1ICACHE, 0); + InstructionCacheSize = CacheObj->attr->cache.size; } } -CPUTopology::~CPUTopology() { hwloc_topology_destroy(this->topology); } +CPUTopology::~CPUTopology() { hwloc_topology_destroy(Topology); } -std::stringstream CPUTopology::getFileAsStream(std::string const& filePath) { - std::ifstream file(filePath); - std::stringstream ss; +auto CPUTopology::getFileAsStream(std::string const& FilePath) -> std::stringstream { + std::ifstream File(FilePath); + std::stringstream Ss; - if (!file.is_open()) { - log::trace() << "Could not open " << filePath; + if (!File.is_open()) { + log::trace() << "Could not open " << FilePath; } else { - ss << file.rdbuf(); - file.close(); + Ss << File.rdbuf(); + File.close(); } - return ss; + return Ss; } -std::string CPUTopology::scalingGovernor() const { - return this->getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor").str(); +auto CPUTopology::scalingGovernor() -> std::string { + return getFileAsStream("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor").str(); } -int CPUTopology::getCoreIdFromPU(unsigned pu) const { - int width; - hwloc_obj_t obj; - - width = hwloc_get_nbobjs_by_type(this->topology, HWLOC_OBJ_PU); +auto CPUTopology::getCoreIdFromPU(unsigned Pu) const -> int { + auto Width = hwloc_get_nbobjs_by_type(Topology, HWLOC_OBJ_PU); - if (width >= 1) { - for (int i = 0; i < width; i++) { - obj = hwloc_get_obj_by_type(this->topology, HWLOC_OBJ_PU, i); - if (obj->os_index == pu) { - for (; obj; obj = obj->parent) { - if (obj->type == HWLOC_OBJ_CORE) { - return obj->logical_index; + if (Width >= 1) { + for (int I = 0; I < Width; I++) { + auto* Obj = hwloc_get_obj_by_type(Topology, HWLOC_OBJ_PU, I); + if (Obj->os_index == Pu) { + for (; Obj; Obj = Obj->parent) { + if (Obj->type == HWLOC_OBJ_CORE) { + return Obj->logical_index; } } } @@ -348,19 +347,16 @@ int CPUTopology::getCoreIdFromPU(unsigned pu) const { return -1; } -int CPUTopology::getPkgIdFromPU(unsigned pu) const { - int width; - hwloc_obj_t obj; +auto CPUTopology::getPkgIdFromPU(unsigned Pu) const -> int { + auto Width = hwloc_get_nbobjs_by_type(Topology, HWLOC_OBJ_PU); - width = hwloc_get_nbobjs_by_type(this->topology, HWLOC_OBJ_PU); - - if (width >= 1) { - for (int i = 0; i < width; i++) { - obj = hwloc_get_obj_by_type(this->topology, HWLOC_OBJ_PU, i); - if (obj->os_index == pu) { - for (; obj; obj = obj->parent) { - if (obj->type == HWLOC_OBJ_PACKAGE) { - return obj->logical_index; + if (Width >= 1) { + for (int I = 0; I < Width; I++) { + auto* Obj = hwloc_get_obj_by_type(Topology, HWLOC_OBJ_PU, I); + if (Obj->os_index == Pu) { + for (; Obj; Obj = Obj->parent) { + if (Obj->type == HWLOC_OBJ_PACKAGE) { + return Obj->logical_index; } } } @@ -370,46 +366,45 @@ int CPUTopology::getPkgIdFromPU(unsigned pu) const { return -1; } -unsigned CPUTopology::maxNumThreads() const { - unsigned max = 0; +auto CPUTopology::maxNumThreads() const -> unsigned { + unsigned Max = 0; // There might be more then one kind of cores - int nr_cpukinds = hwloc_cpukinds_get_nr(this->topology, 0); + int NrCpukinds = hwloc_cpukinds_get_nr(Topology, 0); // fallback in case this did not work ... can happen on some platforms // already printed a warning earlier - if (nr_cpukinds < 1) { - hwloc_obj_t obj; - int width = hwloc_get_nbobjs_by_type(this->topology, HWLOC_OBJ_PU); - unsigned max = 0; - - for (int i = 0; i < width; i++) { - obj = hwloc_get_obj_by_type(this->topology, HWLOC_OBJ_PU, i); - max = max < obj->os_index ? obj->os_index : max; + if (NrCpukinds < 1) { + auto Width = hwloc_get_nbobjs_by_type(Topology, HWLOC_OBJ_PU); + unsigned Max = 0; + + for (int I = 0; I < Width; I++) { + auto* Obj = hwloc_get_obj_by_type(Topology, HWLOC_OBJ_PU, I); + Max = std::max(Max, Obj->os_index); } - return max + 1; + return Max + 1; } // Allocate bitmap to get CPUs later - hwloc_bitmap_t bitmap = hwloc_bitmap_alloc(); - if (bitmap == NULL) { + hwloc_bitmap_t Bitmap = hwloc_bitmap_alloc(); + if (Bitmap == nullptr) { log::error() << "Could not allocate memory for CPU bitmap"; return 1; } // Find CPUs per kind - for (int kind_index = 0; kind_index < nr_cpukinds; kind_index++) { - int result = hwloc_cpukinds_get_info(this->topology, kind_index, bitmap, NULL, NULL, NULL, 0); - if (result) { - log::warn() << "Could not get information for CPU kind " << kind_index; + for (int KindIndex = 0; KindIndex < NrCpukinds; KindIndex++) { + int Result = hwloc_cpukinds_get_info(Topology, KindIndex, Bitmap, nullptr, nullptr, nullptr, 0); + if (Result) { + log::warn() << "Could not get information for CPU kind " << KindIndex; } - max += hwloc_bitmap_weight(bitmap); + Max += hwloc_bitmap_weight(Bitmap); } - hwloc_bitmap_free(bitmap); + hwloc_bitmap_free(Bitmap); - return max; + return Max; } }; // namespace firestarter::environment \ No newline at end of file diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp index 8e29715f..2325ed04 100644 --- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp @@ -103,7 +103,7 @@ auto AVX512Payload::compilePayload(std::vector> auto shift_reg32 = std::vector({edi, esi, edx}); auto nr_shift_regs = 3; auto mul_regs = 3; - auto add_regs = 24; + auto add_regs = 22; auto alt_dst_regs = 5; auto ram_reg = zmm30; @@ -123,7 +123,7 @@ auto AVX512Payload::compilePayload(std::vector> } // make all other used registers dirty except RAX frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg, - offset_reg, addrHigh_reg, iter_reg, ram_addr); + temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr); for (const auto& reg : shift_reg) { frame.addDirtyRegs(reg); } @@ -190,7 +190,6 @@ auto AVX512Payload::compilePayload(std::vector> bool left = false; auto add_dest = add_start + 1; auto mov_dst = trans_start; - auto mov_src = mov_dst + 1; unsigned l1_offset = 0; #define L1_INCREMENT() \ @@ -292,10 +291,6 @@ auto AVX512Payload::compilePayload(std::vector> if (mov_dst > trans_end) { mov_dst = trans_start; } - mov_src++; - if (mov_src > trans_end) { - mov_src = trans_start; - } shift_pos++; if (shift_pos == nr_shift_regs) { shift_pos = 0; diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp index 73175bd5..93458d25 100644 --- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp @@ -19,6 +19,7 @@ * Contact: daniel.hackenberg@tu-dresden.de *****************************************************************************/ +#include #include #include #include diff --git a/src/firestarter/Environment/X86/X86CPUTopology.cpp b/src/firestarter/Environment/X86/X86CPUTopology.cpp index dae61165..283e7f61 100644 --- a/src/firestarter/Environment/X86/X86CPUTopology.cpp +++ b/src/firestarter/Environment/X86/X86CPUTopology.cpp @@ -31,99 +31,101 @@ #pragma intrinsic(__rdtsc) #endif -using namespace firestarter::environment::x86; +namespace firestarter::environment::x86 { X86CPUTopology::X86CPUTopology() : CPUTopology("x86_64") - , CpuInfo(asmjit::CpuInfo::host()) - , Vendor(this->CpuInfo.vendor()) { + , CpuInfo(asmjit::CpuInfo::host()) { - std::stringstream ss; - ss << "Family " << this->familyId() << ", Model " << this->modelId() << ", Stepping " << this->stepping(); - this->Model = ss.str(); + Vendor = CpuInfo.vendor(); - for (int i = 0; i <= (int)asmjit::CpuFeatures::X86::Id::kMaxValue; i++) { - if (!this->CpuInfo.hasFeature(i)) { + { + std::stringstream Ss; + Ss << "Family " << familyId() << ", Model " << modelId() << ", Stepping " << stepping(); + Model = Ss.str(); + } + + for (auto FeatureId = 0; FeatureId <= asmjit::CpuFeatures::X86::Id::kMaxValue; FeatureId++) { + if (!CpuInfo.hasFeature(FeatureId)) { continue; } - asmjit::String sb; + asmjit::String Sb; - auto error = asmjit::Formatter::formatFeature(sb, this->CpuInfo.arch(), i); - if (error != asmjit::ErrorCode::kErrorOk) { - log::warn() << "Formatting cpu features got asmjit error: " << error; + auto Error = asmjit::Formatter::formatFeature(Sb, CpuInfo.arch(), FeatureId); + if (Error != asmjit::ErrorCode::kErrorOk) { + log::warn() << "Formatting cpu features got asmjit error: " << Error; } - this->FeatureList.push_back(std::string(sb.data())); + FeatureList.emplace_back(Sb.data()); } - uint64_t a = 0, b = 0, c = 0, d = 0; + uint64_t Rax = 0; + uint64_t Rbx = 0; + uint64_t Rcx = 0; + uint64_t Rdx = 0; // check if we have rdtsc - this->cpuid(&a, &b, &c, &d); - if (a >= 1) { - a = 1; - this->cpuid(&a, &b, &c, &d); - if ((int)d & (1 << 4)) { - this->HasRdtsc = true; - } else { - this->HasRdtsc = false; - } + cpuid(&Rax, &Rbx, &Rcx, &Rdx); + if (Rax >= 1) { + Rax = 1; + cpuid(&Rax, &Rbx, &Rcx, &Rdx); + HasRdtsc = (Rdx & (1 << 4)) != 0; } // check if we have invarant rdtsc - if (this->hasRdtsc()) { - a = 0, b = 0, c = 0, d = 0; + if (hasRdtsc()) { + Rax = 0, Rbx = 0, Rcx = 0, Rdx = 0; - this->HasInvariantRdtsc = true; + HasInvariantRdtsc = true; /* TSCs are usable if CPU supports only one frequency in C0 (no speedstep/Cool'n'Quite) or if multiple frequencies are available and the constant/invariant TSC feature flag is set */ - if (0 == this->vendor().compare("INTEL")) { + if ("INTEL" == vendor()) { /*check if Powermanagement and invariant TSC are supported*/ - a = 1; - this->cpuid(&a, &b, &c, &d); + Rax = 1; + cpuid(&Rax, &Rbx, &Rcx, &Rdx); /* no Frequency control */ - if ((!(d & (1 << 22))) && (!(c & (1 << 7)))) { - this->HasInvariantRdtsc = true; + if ((!(Rdx & (1 << 22))) && (!(Rcx & (1 << 7)))) { + HasInvariantRdtsc = true; } else { - a = 0x80000000; - this->cpuid(&a, &b, &c, &d); - if (a >= 0x80000007) { - a = 0x80000007; - this->cpuid(&a, &b, &c, &d); + Rax = 0x80000000; + cpuid(&Rax, &Rbx, &Rcx, &Rdx); + if (Rax >= 0x80000007) { + Rax = 0x80000007; + cpuid(&Rax, &Rbx, &Rcx, &Rdx); /* invariant TSC */ - if (d & (1 << 8)) { - this->HasInvariantRdtsc = true; + if (Rdx & (1 << 8)) { + HasInvariantRdtsc = true; } } } } - if (0 == this->vendor().compare("AMD")) { + if ("AMD" == vendor()) { /*check if Powermanagement and invariant TSC are supported*/ - a = 0x80000000; - this->cpuid(&a, &b, &c, &d); - if (a >= 0x80000007) { - a = 0x80000007; - this->cpuid(&a, &b, &c, &d); + Rax = 0x80000000; + cpuid(&Rax, &Rbx, &Rcx, &Rdx); + if (Rax >= 0x80000007) { + Rax = 0x80000007; + cpuid(&Rax, &Rbx, &Rcx, &Rdx); /* no Frequency control */ - if ((!(d & (1 << 7))) && (!(d & (1 << 1)))) { - this->HasInvariantRdtsc = true; + if ((!(Rdx & (1 << 7))) && (!(Rdx & (1 << 1)))) { + HasInvariantRdtsc = true; } /* invariant TSC */ - if (d & (1 << 8)) { - this->HasInvariantRdtsc = true; + if (Rdx & (1 << 8)) { + HasInvariantRdtsc = true; } } /* assuming no frequency control if cpuid does not provide the extended function to test for it */ else { - this->HasInvariantRdtsc = true; + HasInvariantRdtsc = true; } } } @@ -133,118 +135,122 @@ X86CPUTopology::X86CPUTopology() // only constant TSCs will be used (i.e. power management indepent TSCs) // save frequency in highest P-State or use generic fallback if no invarient TSC // is available -uint64_t X86CPUTopology::clockrate() const { - typedef std::chrono::high_resolution_clock Clock; - typedef std::chrono::microseconds ticks; +auto X86CPUTopology::clockrate() const -> uint64_t { + using ClockT = std::chrono::high_resolution_clock; + using TicksT = std::chrono::microseconds; - uint64_t start1_tsc, start2_tsc, end1_tsc, end2_tsc; - uint64_t time_diff; - uint64_t clock_lower_bound, clock_upper_bound, clock; - uint64_t clockrate = 0; - int i, num_measurements = 0, min_measurements; + uint64_t TimeDiff = 0; + uint64_t Clockrate = 0; + int NumMeasurements = 0; + int MinMeasurements = 0; - Clock::time_point start_time, end_time; + ClockT::time_point StartTime; + ClockT::time_point EndTime; #if not(defined(__APPLE__) || defined(_WIN32)) - auto governor = this->scalingGovernor(); + auto governor = scalingGovernor(); if (governor.empty()) { return CPUTopology::clockrate(); } /* non invariant TSCs can be used if CPUs run at fixed frequency */ - if (!this->hasInvariantRdtsc() && governor.compare("performance") && governor.compare("powersave")) { + if (!hasInvariantRdtsc() && governor.compare("performance") && governor.compare("powersave")) { return CPUTopology::clockrate(); } - min_measurements = 5; + MinMeasurements = 5; #else min_measurements = 20; #endif - i = 3; + int I = 3; do { + uint64_t End1Tsc = 0; + uint64_t End2Tsc = 0; + // start timestamp - start1_tsc = this->timestamp(); - start_time = Clock::now(); - start2_tsc = this->timestamp(); + uint64_t Start1Tsc = timestamp(); + StartTime = ClockT::now(); + uint64_t Start2Tsc = timestamp(); // waiting do { - end1_tsc = this->timestamp(); - } while (end1_tsc < start2_tsc + 1000000 * i); /* busy waiting */ + End1Tsc = timestamp(); + } while (End1Tsc < Start2Tsc + 1000000 * I); /* busy waiting */ // end timestamp do { - end1_tsc = this->timestamp(); - end_time = Clock::now(); - end2_tsc = this->timestamp(); + End1Tsc = timestamp(); + EndTime = ClockT::now(); + End2Tsc = timestamp(); - time_diff = std::chrono::duration_cast(end_time - start_time).count(); - } while (0 == time_diff); + TimeDiff = std::chrono::duration_cast(EndTime - StartTime).count(); + } while (0 == TimeDiff); - clock_lower_bound = (((end1_tsc - start2_tsc) * 1000000) / (time_diff)); - clock_upper_bound = (((end2_tsc - start1_tsc) * 1000000) / (time_diff)); + uint64_t ClockLowerBound = (((End1Tsc - Start2Tsc) * 1000000) / (TimeDiff)); + uint64_t ClockUpperBound = (((End2Tsc - Start1Tsc) * 1000000) / (TimeDiff)); // if both values differ significantly, the measurement could have been // interrupted between 2 rdtsc's - if (((double)clock_lower_bound > (((double)clock_upper_bound) * 0.999)) && ((time_diff) > 2000)) { - num_measurements++; - clock = (clock_lower_bound + clock_upper_bound) / 2; - if (clockrate == 0) - clockrate = clock; + if ((static_cast(ClockLowerBound) > ((static_cast(ClockUpperBound)) * 0.999)) && + ((TimeDiff) > 2000)) { + NumMeasurements++; + uint64_t Clock = (ClockLowerBound + ClockUpperBound) / 2; + bool ClockrateUpdateCondition = Clockrate == 0 || #ifndef _WIN32 - else if (clock < clockrate) - clockrate = clock; + Clock < Clockrate; #else - else if (clock > clockrate) - clockrate = clock; + Clock > Clockrate; #endif + if (ClockrateUpdateCondition) { + Clockrate = Clock; + } } - i += 2; - } while (((time_diff) < 10000) || (num_measurements < min_measurements)); + I += 2; + } while (((TimeDiff) < 10000) || (NumMeasurements < MinMeasurements)); - return clockrate; + return Clockrate; } -uint64_t X86CPUTopology::timestamp() const { -#ifndef _MSC_VER - uint64_t reg_a, reg_d; -#else - uint64_t i; -#endif - - if (!this->hasRdtsc()) { +auto X86CPUTopology::timestamp() const -> uint64_t { + if (!hasRdtsc()) { return 0; } #ifndef _MSC_VER - __asm__ __volatile__("rdtsc;" : "=a"(reg_a), "=d"(reg_d)); - return (reg_d << 32) | (reg_a & 0xffffffffULL); + uint64_t Rax = 0; + uint64_t Rdx = 0; + __asm__ __volatile__("rdtsc;" : "=a"(Rax), "=d"(Rdx)); + return (Rdx << 32) | (Rax & 0xffffffffULL); #else - i = __rdtsc(); - return i; + return __rdtsc(); #endif } -void X86CPUTopology::cpuid(uint64_t* a, uint64_t* b, uint64_t* c, uint64_t* d) const { +void X86CPUTopology::cpuid(uint64_t* Rax, uint64_t* Rbx, uint64_t* Rcx, uint64_t* Rdx) { #ifndef _MSC_VER - uint64_t reg_a, reg_b, reg_c, reg_d; + uint64_t RaxOut = 0; + uint64_t RbxOut = 0; + uint64_t RcxOut = 0; + uint64_t RdxOut = 0; __asm__ __volatile__("cpuid;" - : "=a"(reg_a), "=b"(reg_b), "=c"(reg_c), "=d"(reg_d) - : "a"(*a), "b"(*b), "c"(*c), "d"(*d)); - *a = reg_a; - *b = reg_b; - *c = reg_c; - *d = reg_d; + : "=a"(RaxOut), "=b"(RbxOut), "=c"(RcxOut), "=d"(RdxOut) + : "a"(*Rax), "b"(*Rbx), "c"(*Rcx), "d"(*Rdx)); + *Rax = RaxOut; + *Rbx = RbxOut; + *Rcx = RcxOut; + *Rdx = RdxOut; #else std::array cpuid; - __cpuidex(cpuid.data(), *a, *c); + __cpuidex(cpuid.data(), *Rax, *Rcx); - *a = cpuid[0]; - *b = cpuid[1]; - *c = cpuid[2]; - *d = cpuid[3]; + *Rax = cpuid[0]; + *Rbx = cpuid[1]; + *Rcx = cpuid[2]; + *Rdx = cpuid[3]; #endif } + +} // namespace firestarter::environment::x86 \ No newline at end of file diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp index 508b01c6..2c2dabb0 100644 --- a/src/firestarter/Environment/X86/X86Environment.cpp +++ b/src/firestarter/Environment/X86/X86Environment.cpp @@ -26,50 +26,50 @@ #include #include -using namespace firestarter::environment::x86; +namespace firestarter::environment::x86 { void X86Environment::evaluateFunctions() { - for (auto ctor : this->PlatformConfigsCtor) { + for (const auto& Ctor : PlatformConfigsCtor) { // add asmjit for model and family detection - this->PlatformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(), - this->topology().modelId(), this->topology().numThreadsPerCore())); + PlatformConfigs.emplace_back( + Ctor(topology().featuresAsmjit(), topology().familyId(), topology().modelId(), topology().numThreadsPerCore())); } - for (auto ctor : this->FallbackPlatformConfigsCtor) { - this->FallbackPlatformConfigs.push_back(ctor(this->topology().featuresAsmjit(), this->topology().familyId(), - this->topology().modelId(), this->topology().numThreadsPerCore())); + for (const auto& Ctor : FallbackPlatformConfigsCtor) { + FallbackPlatformConfigs.emplace_back( + Ctor(topology().featuresAsmjit(), topology().familyId(), topology().modelId(), topology().numThreadsPerCore())); } } -int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePayload) { +auto X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) -> int { unsigned id = 1; std::string defaultPayloadName(""); // if functionId is 0 get the default or fallback - for (auto config : this->PlatformConfigs) { - for (auto const& [thread, functionName] : config->getThreadMap()) { + for (const auto& Config : PlatformConfigs) { + for (auto const& [thread, functionName] : Config->getThreadMap()) { // the selected function - if (id == functionId) { - if (!config->isAvailable()) { - log::error() << "Function " << functionId << " (\"" << functionName << "\") requires " - << config->payload().name() << ", which is not supported by the processor."; - if (!allowUnavailablePayload) { + if (id == FunctionId) { + if (!Config->isAvailable()) { + log::error() << "Function " << FunctionId << " (\"" << functionName << "\") requires " + << Config->payload().name() << ", which is not supported by the processor."; + if (!AllowUnavailablePayload) { return EXIT_FAILURE; } } // found function - this->SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig( - *config, thread, this->topology().instructionCacheSize()); + SelectedConfig = + new ::firestarter::environment::platform::RuntimeConfig(*Config, thread, topology().instructionCacheSize()); return EXIT_SUCCESS; } // default function - if (0 == functionId && config->isDefault()) { - if (thread == this->topology().numThreadsPerCore()) { - this->SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig( - *config, thread, this->topology().instructionCacheSize()); + if (0 == FunctionId && Config->isDefault()) { + if (thread == topology().numThreadsPerCore()) { + SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, thread, + topology().instructionCacheSize()); return EXIT_SUCCESS; } else { - defaultPayloadName = config->payload().name(); + defaultPayloadName = Config->payload().name(); } } id++; @@ -78,35 +78,35 @@ int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePay // no default found // use fallback - if (0 == functionId) { + if (0 == FunctionId) { if (!defaultPayloadName.empty()) { // default payload available, but number of threads per core is not // supported - log::warn() << "No " << defaultPayloadName << " code path for " << this->topology().numThreadsPerCore() + log::warn() << "No " << defaultPayloadName << " code path for " << topology().numThreadsPerCore() << " threads per core!"; } - log::warn() << this->topology().vendor() << " " << this->topology().model() + log::warn() << topology().vendor() << " " << topology().model() << " is not supported by this version of FIRESTARTER!\n" << "Check project website for updates."; // loop over available implementation and check if they are marked as // fallback - for (auto config : this->FallbackPlatformConfigs) { - if (config->isAvailable()) { + for (const auto& Config : FallbackPlatformConfigs) { + if (Config->isAvailable()) { auto selectedThread = 0; auto selectedFunctionName = std::string(""); - for (auto const& [thread, functionName] : config->getThreadMap()) { - if (thread == this->topology().numThreadsPerCore()) { + for (auto const& [thread, functionName] : Config->getThreadMap()) { + if (thread == topology().numThreadsPerCore()) { selectedThread = thread; selectedFunctionName = functionName; } } if (selectedThread == 0) { - selectedThread = config->getThreadMap().begin()->first; - selectedFunctionName = config->getThreadMap().begin()->second; + selectedThread = Config->getThreadMap().begin()->first; + selectedFunctionName = Config->getThreadMap().begin()->second; } - this->SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig( - *config, selectedThread, this->topology().instructionCacheSize()); + SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, selectedThread, + topology().instructionCacheSize()); log::warn() << "Using function " << selectedFunctionName << " as fallback.\n" << "You can use the parameter --function to try other " "functions."; @@ -120,14 +120,14 @@ int X86Environment::selectFunction(unsigned functionId, bool allowUnavailablePay return EXIT_FAILURE; } - log::error() << "unknown function id: " << functionId << ", see --avail for available ids"; + log::error() << "unknown function id: " << FunctionId << ", see --avail for available ids"; return EXIT_FAILURE; } int X86Environment::selectInstructionGroups(std::string groups) { const std::string delimiter = ","; const std::regex re("^(\\w+):(\\d+)$"); - const auto availableInstructionGroups = this->selectedConfig().platformConfig().payload().getAvailableInstructions(); + const auto availableInstructionGroups = selectedConfig().platformConfig().payload().getAvailableInstructions(); std::stringstream ss(groups); std::vector> payloadSettings = {}; @@ -161,7 +161,7 @@ int X86Environment::selectInstructionGroups(std::string groups) { } } - this->selectedConfig().setPayloadSettings(payloadSettings); + selectedConfig().setPayloadSettings(payloadSettings); log::info() << " Running custom instruction group: " << groups; @@ -171,7 +171,7 @@ int X86Environment::selectInstructionGroups(std::string groups) { void X86Environment::printAvailableInstructionGroups() { std::stringstream ss; - for (auto const& item : this->selectedConfig().platformConfig().payload().getAvailableInstructions()) { + for (auto const& item : selectedConfig().platformConfig().payload().getAvailableInstructions()) { ss << item << ","; } @@ -180,14 +180,14 @@ void X86Environment::printAvailableInstructionGroups() { s.pop_back(); } - log::info() << " available instruction-groups for payload " - << this->selectedConfig().platformConfig().payload().name() << ":\n" + log::info() << " available instruction-groups for payload " << selectedConfig().platformConfig().payload().name() + << ":\n" << " " << s; } -void X86Environment::setLineCount(unsigned lineCount) { this->selectedConfig().setLineCount(lineCount); } +void X86Environment::setLineCount(unsigned lineCount) { selectedConfig().setLineCount(lineCount); } -void X86Environment::printSelectedCodePathSummary() { this->selectedConfig().printCodePathSummary(); } +void X86Environment::printSelectedCodePathSummary() { selectedConfig().printCodePathSummary(); } void X86Environment::printFunctionSummary() { log::info() << " available load-functions:\n" @@ -200,7 +200,7 @@ void X86Environment::printFunctionSummary() { unsigned id = 1; - for (auto const& config : this->PlatformConfigs) { + for (auto const& config : PlatformConfigs) { for (auto const& [thread, functionName] : config->getThreadMap()) { const char* available = config->isAvailable() ? "yes" : "no"; const char* fmt = " %4u | %-30s | %-24s | %s"; @@ -214,3 +214,5 @@ void X86Environment::printFunctionSummary() { } } } + +} // namespace firestarter::environment::x86 \ No newline at end of file diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp index ed925cf1..c5a998c5 100644 --- a/src/firestarter/LoadWorker.cpp +++ b/src/firestarter/LoadWorker.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #if defined(linux) || defined(__linux__) extern "C" { @@ -95,7 +96,7 @@ int Firestarter::initLoadWorkers(bool lowLoad, uint64_t period) { if (i == 0) { // only show error for all worker threads except first. - firestarter::logging::FirstWorkerThreadFilter::setFirstThread(t.get_id()); + firestarter::logging::FirstWorkerThreadFilter::setFirstThread(t.get_id()); } this->LoadThreads.push_back(std::make_pair(std::move(t), td)); @@ -383,6 +384,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { break; case THREAD_STOP: default: + firestarter::log::debug() << "ERR" << '\n'; return; } } diff --git a/src/firestarter/Main.cpp b/src/firestarter/Main.cpp index 62bcc426..51b53177 100644 --- a/src/firestarter/Main.cpp +++ b/src/firestarter/Main.cpp @@ -263,13 +263,13 @@ Config::Config(int argc, const char** argv) { auto options = parser.parse(argc, argv); if (options.count("quiet")) { - firestarter::logging::filter::set_severity(nitro::log::severity_level::warn); + firestarter::logging::Filter::set_severity(nitro::log::severity_level::warn); } else if (options.count("report")) { - firestarter::logging::filter::set_severity(nitro::log::severity_level::debug); + firestarter::logging::Filter::set_severity(nitro::log::severity_level::debug); } else if (options.count("debug")) { - firestarter::logging::filter::set_severity(nitro::log::severity_level::trace); + firestarter::logging::Filter::set_severity(nitro::log::severity_level::trace); } else { - firestarter::logging::filter::set_severity(nitro::log::severity_level::info); + firestarter::logging::Filter::set_severity(nitro::log::severity_level::info); } if (options.count("version")) { diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp index 36405051..0c880bbb 100644 --- a/src/firestarter/Measurement/MeasurementWorker.cpp +++ b/src/firestarter/Measurement/MeasurementWorker.cpp @@ -19,8 +19,10 @@ * Contact: daniel.hackenberg@tu-dresden.de *****************************************************************************/ +#include #include +#include #include #include diff --git a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp index 8b9a7b02..e6a703bb 100644 --- a/src/firestarter/Optimizer/Algorithm/NSGA2.cpp +++ b/src/firestarter/Optimizer/Algorithm/NSGA2.cpp @@ -21,11 +21,13 @@ // This file borrows a lot of code from https://github.com/esa/pagmo2 +#include #include #include #include #include +#include #include using namespace firestarter::optimizer::algorithm; diff --git a/src/firestarter/Optimizer/Population.cpp b/src/firestarter/Optimizer/Population.cpp index 35c5ef04..e136fda6 100644 --- a/src/firestarter/Optimizer/Population.cpp +++ b/src/firestarter/Optimizer/Population.cpp @@ -20,11 +20,11 @@ *****************************************************************************/ #include +#include #include #include #include -#include using namespace firestarter::optimizer; From c33291f000ee4774cba315864b09fb1442ad7b87 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Fri, 27 Sep 2024 16:38:56 +0200 Subject: [PATCH 014/167] clang-tidy workflow: add .clang-tidy file location --- .github/workflows/clang-tidy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml index 562b5079..6cfe98ec 100644 --- a/.github/workflows/clang-tidy.yml +++ b/.github/workflows/clang-tidy.yml @@ -13,6 +13,7 @@ jobs: id: review with: split_workflow: true + config_file: '${{ github.workspace }}/.clang-tidy' - uses: ZedThree/clang-tidy-review/upload@v0.14.0 id: upload-review From a3b01b16692fe5ecb12bf2d895cc61030bb34757 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Fri, 27 Sep 2024 16:42:44 +0200 Subject: [PATCH 015/167] clang-tidy workflow: update .clang-tidy file location --- .github/workflows/clang-tidy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml index 6cfe98ec..bc5520eb 100644 --- a/.github/workflows/clang-tidy.yml +++ b/.github/workflows/clang-tidy.yml @@ -13,7 +13,7 @@ jobs: id: review with: split_workflow: true - config_file: '${{ github.workspace }}/.clang-tidy' + config_file: '.clang-tidy' - uses: ZedThree/clang-tidy-review/upload@v0.14.0 id: upload-review From d53a1077679acd8a0b2ea5810034c6f70527424f Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Fri, 27 Sep 2024 18:11:23 +0200 Subject: [PATCH 016/167] clang-tidy workflow: add comment and rum build before clang-tidy --- .github/workflows/clang-tidy.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml index bc5520eb..325023ea 100644 --- a/.github/workflows/clang-tidy.yml +++ b/.github/workflows/clang-tidy.yml @@ -9,11 +9,15 @@ jobs: steps: - uses: actions/checkout@v4 + # Ideally we would want to run the clang-tidy for every kind of build. + # This would make shure that we will check all platform dependent code parts. + # Here we only test the standard linux build. - uses: ZedThree/clang-tidy-review@v0.14.0 id: review with: split_workflow: true config_file: '.clang-tidy' + cmake_command: 'cmake . && make -j2' - uses: ZedThree/clang-tidy-review/upload@v0.14.0 id: upload-review From fafd720812071d2ab94111e8620dae6381457515 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Fri, 27 Sep 2024 18:16:49 +0200 Subject: [PATCH 017/167] clang-tidy workflow: update fetch depth --- .github/workflows/clang-tidy.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml index 325023ea..06bd903f 100644 --- a/.github/workflows/clang-tidy.yml +++ b/.github/workflows/clang-tidy.yml @@ -8,6 +8,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + fetch-depth: '0' # Ideally we would want to run the clang-tidy for every kind of build. # This would make shure that we will check all platform dependent code parts. From 6e4c880454baa73586273b82a6b407030b72b6e3 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Fri, 27 Sep 2024 18:36:54 +0200 Subject: [PATCH 018/167] clang-tidy workflow: clone with submodules --- .github/workflows/clang-tidy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml index 06bd903f..a697042a 100644 --- a/.github/workflows/clang-tidy.yml +++ b/.github/workflows/clang-tidy.yml @@ -9,7 +9,7 @@ jobs: steps: - uses: actions/checkout@v4 with: - fetch-depth: '0' + submodules: 'true' # Ideally we would want to run the clang-tidy for every kind of build. # This would make shure that we will check all platform dependent code parts. From 770b405f52056a6dbc17b53d35db15be7afdf965 Mon Sep 17 00:00:00 2001 From: Markus Schmidl Date: Wed, 2 Oct 2024 19:55:57 +0200 Subject: [PATCH 019/167] first pass of clang-tidy for the source files --- .../Environment/X86/Payload/X86Payload.hpp | 368 +++++++++- include/firestarter/Firestarter.hpp | 23 +- .../Measurement/MeasurementWorker.hpp | 4 +- .../Measurement/Metric/IPCEstimate.h | 9 +- include/firestarter/Measurement/Metric/Perf.h | 11 +- include/firestarter/Measurement/Metric/RAPL.h | 9 +- .../firestarter/Optimizer/Algorithm/NSGA2.hpp | 6 +- .../firestarter/Optimizer/OptimizerWorker.hpp | 4 +- src/firestarter/DumpRegisterWorker.cpp | 133 ++-- .../Environment/X86/Payload/AVX512Payload.cpp | 549 ++++++++------- .../Environment/X86/Payload/AVXPayload.cpp | 607 ++++++++-------- .../Environment/X86/Payload/FMA4Payload.cpp | 583 ++++++++-------- .../Environment/X86/Payload/FMAPayload.cpp | 645 +++++++++--------- .../Environment/X86/Payload/SSE2Payload.cpp | 593 ++++++++-------- .../Environment/X86/Payload/X86Payload.cpp | 420 +----------- .../Environment/X86/Payload/ZENFMAPayload.cpp | 502 +++++++------- .../X86/Platform/X86PlatformConfig.cpp | 2 +- .../Environment/X86/X86CPUTopology.cpp | 6 +- .../Environment/X86/X86Environment.cpp | 119 ++-- src/firestarter/Firestarter.cpp | 215 +++--- src/firestarter/LoadWorker.cpp | 294 ++++---- src/firestarter/Main.cpp | 297 ++++---- .../Measurement/MeasurementWorker.cpp | 329 +++++---- .../Measurement/Metric/IPCEstimate.cpp | 40 +- src/firestarter/Measurement/Metric/Perf.cpp | 211 +++--- src/firestarter/Measurement/Metric/RAPL.cpp | 200 +++--- src/firestarter/Measurement/Summary.cpp | 56 +- src/firestarter/Optimizer/Algorithm/NSGA2.cpp | 131 ++-- src/firestarter/Optimizer/OptimizerWorker.cpp | 44 +- src/firestarter/Optimizer/Population.cpp | 112 +-- .../Optimizer/Util/MultiObjective.cpp | 357 +++++----- src/firestarter/WatchdogWorker.cpp | 64 +- 32 files changed, 3461 insertions(+), 3482 deletions(-) diff --git a/include/firestarter/Environment/X86/Payload/X86Payload.hpp b/include/firestarter/Environment/X86/Payload/X86Payload.hpp index 2e38b855..33839135 100644 --- a/include/firestarter/Environment/X86/Payload/X86Payload.hpp +++ b/include/firestarter/Environment/X86/Payload/X86Payload.hpp @@ -26,8 +26,10 @@ #include "../../../Logging/Log.hpp" // IWYU pragma: keep #include "../../Payload/Payload.hpp" #include +#include #include #include // IWYU pragma: keep +#include #include #define INIT_BLOCKSIZE 1024 @@ -49,9 +51,369 @@ class X86Payload : public environment::payload::Payload { [[nodiscard]] auto supportedFeatures() const -> asmjit::CpuFeatures const& { return this->SupportedFeatures; } - template - void emitErrorDetectionCode(asmjit::x86::Builder& Cb, IterRegT IterReg, asmjit::x86::Gpq AddrHighReg, - asmjit::x86::Gpq PointerReg, asmjit::x86::Gpq TempReg, asmjit::x86::Gpq TempReg2); + // add MM regs to dirty regs + // zmm31 is used for backup if VectorReg is of type asmjit::x86::Zmm + template + void emitErrorDetectionCode(asmjit::x86::Builder& Cb, MaybeConstIterRegT& IterReg, + const asmjit::x86::Gpq& AddrHighReg, const asmjit::x86::Gpq& PointerReg, + const asmjit::x86::Gpq& TempReg, const asmjit::x86::Gpq& TempReg2) { + using IterRegT = std::remove_const_t; + using VectorRegT = std::remove_const_t; + + // we don't want anything to break... so we use asserts for everything that + // could break it + static_assert(std::is_base_of_v, "VectorReg must be of asmjit::asmjit::x86::Vec"); + static_assert(std::is_same_v || std::is_same_v || + std::is_same_v, + "VectorReg ist not of any supported type"); + static_assert(std::is_same_v || std::is_same_v, + "IterReg is not of any supported type"); + + if constexpr (std::is_same_v) { + assert((IterReg == asmjit::x86::mm0, "iter_reg must be mm0")); + } + + assert((IterReg != TempReg, "iter_reg must be != temp_reg")); + assert((TempReg != TempReg2, "temp_reg must be != temp_reg2")); + assert((TempReg != AddrHighReg, "temp_reg must be != addrHigh_reg")); + assert((TempReg != PointerReg, "temp_reg must be != pointer_reg")); + + assert((IterReg != asmjit::x86::r8, "iter_reg must be != r8")); + assert((IterReg != asmjit::x86::r9, "iter_reg must be != r9")); + assert((IterReg != asmjit::x86::rax, "iter_reg must be != rax")); + assert((IterReg != asmjit::x86::rbx, "iter_reg must be != rbx")); + assert((IterReg != asmjit::x86::rcx, "iter_reg must be != rcx")); + assert((IterReg != asmjit::x86::rdx, "iter_reg must be != rdx")); + + assert((TempReg != asmjit::x86::r8, "temp_reg must be != r8")); + assert((TempReg != asmjit::x86::r9, "temp_reg must be != r9")); + assert((TempReg != asmjit::x86::rax, "temp_reg must be != rax")); + assert((TempReg != asmjit::x86::rbx, "temp_reg must be != rbx")); + assert((TempReg != asmjit::x86::rcx, "temp_reg must be != rcx")); + assert((TempReg != asmjit::x86::rdx, "temp_reg must be != rdx")); + + assert((TempReg2 != asmjit::x86::r8, "temp_reg2 must be != r8")); + assert((TempReg2 != asmjit::x86::r9, "temp_reg2 must be != r9")); + assert((TempReg2 != asmjit::x86::rax, "temp_reg2 must be != rax")); + assert((TempReg2 != asmjit::x86::rbx, "temp_reg2 must be != rbx")); + assert((TempReg2 != asmjit::x86::rcx, "temp_reg2 must be != rcx")); + assert((TempReg2 != asmjit::x86::rdx, "temp_reg2 must be != rdx")); + + assert((AddrHighReg != asmjit::x86::r8, "addrHigh_reg must be != r8")); + assert((AddrHighReg != asmjit::x86::r9, "addrHigh_reg must be != r9")); + assert((AddrHighReg != asmjit::x86::rax, "addrHigh_reg must be != rax")); + assert((AddrHighReg != asmjit::x86::rbx, "addrHigh_reg must be != rbx")); + assert((AddrHighReg != asmjit::x86::rcx, "addrHigh_reg must be != rcx")); + assert((AddrHighReg != asmjit::x86::rdx, "addrHigh_reg must be != rdx")); + + auto SkipErrorDetection = Cb.newLabel(); + + if constexpr (std::is_same::value) { + Cb.movq(TempReg, IterReg); + } else { + Cb.mov(TempReg, IterReg); + } + // round about 50-100 Hz + // more or less, but this isn't really that relevant + Cb.and_(TempReg, asmjit::Imm(0x3fff)); + Cb.test(TempReg, TempReg); + Cb.jnz(SkipErrorDetection); + + Cb.mov(TempReg, asmjit::Imm(0xffffffff)); + + auto RegisterCount = registerCount(); + + // Create a backup of VectorReg(0) + if constexpr (std::is_same_v) { + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.push(TempReg2); + Cb.crc32(TempReg, TempReg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.push(TempReg2); + Cb.crc32(TempReg, TempReg2); + + } else if constexpr (std::is_same_v && std::is_same_v) { + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.movq(asmjit::x86::Mm(7), TempReg2); + Cb.crc32(TempReg, TempReg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.movq(asmjit::x86::Mm(6), TempReg2); + Cb.crc32(TempReg, TempReg2); + + Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1)); + + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.movq(asmjit::x86::Mm(5), TempReg2); + Cb.crc32(TempReg, TempReg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.movq(asmjit::x86::Mm(4), TempReg2); + Cb.crc32(TempReg, TempReg2); + } else if constexpr (std::is_same_v && std::is_same_v) { + // We use vector registers zmm31 for our backup + Cb.vmovapd(asmjit::x86::zmm31, asmjit::x86::zmm0); + RegisterCount--; + } + + // Calculate the hash of the remaining VectorReg + // use VectorReg(0) as a temporary place to unpack values + for (unsigned I = 1; I < RegisterCount; I++) { + if constexpr (std::is_same_v) { + Cb.vmovapd(asmjit::x86::xmm0, asmjit::x86::Xmm(I)); + + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + } else if constexpr (std::is_same_v) { + Cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(I)); + + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + + Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1)); + + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + } else if constexpr (std::is_same_v) { + Cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(I)); + + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + + Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1)); + + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + + Cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(I), asmjit::Imm(2)); + + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + + Cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(I), asmjit::Imm(3)); + + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.movq(TempReg2, asmjit::x86::xmm0); + Cb.crc32(TempReg, TempReg2); + } + } + + // Restore VectorReg(0) from backup + if constexpr (std::is_same_v) { + Cb.pop(TempReg2); + Cb.movq(asmjit::x86::xmm0, TempReg2); + Cb.movlhps(asmjit::x86::xmm0, asmjit::x86::xmm0); + Cb.pop(TempReg2); + Cb.pinsrw(asmjit::x86::xmm0, TempReg2.r32(), asmjit::Imm(0)); + Cb.shr(TempReg2, asmjit::Imm(32)); + Cb.movd(TempReg2.r32(), asmjit::x86::Mm(7)); + Cb.pinsrw(asmjit::x86::xmm0, TempReg2.r32(), asmjit::Imm(1)); + } else if constexpr (std::is_same_v && std::is_same_v) { + Cb.movq(TempReg2, asmjit::x86::Mm(5)); + Cb.movq(asmjit::x86::xmm0, TempReg2); + Cb.movq(TempReg2, asmjit::x86::Mm(4)); + Cb.pinsrq(asmjit::x86::xmm0, TempReg2, asmjit::Imm(1)); + + Cb.vinsertf128(asmjit::x86::ymm0, asmjit::x86::ymm0, asmjit::x86::xmm0, asmjit::Imm(1)); + + Cb.movq(TempReg2, asmjit::x86::Mm(7)); + Cb.movq(asmjit::x86::xmm0, TempReg2); + Cb.movq(TempReg2, asmjit::x86::Mm(6)); + Cb.pinsrq(asmjit::x86::xmm0, TempReg2, asmjit::Imm(1)); + } else if constexpr (std::is_same_v && std::is_same_v) { + // We use vector registers zmm31 for our backup + Cb.vmovapd(asmjit::x86::zmm0, asmjit::x86::zmm31); + } + + // before starting the communication, backup r8, r9, rax, rbx, rcx and rdx + if constexpr (std::is_same_v) { + Cb.movq(asmjit::x86::Mm(7), asmjit::x86::rax); + Cb.movq(asmjit::x86::Mm(6), asmjit::x86::rbx); + Cb.movq(asmjit::x86::Mm(5), asmjit::x86::rcx); + Cb.movq(asmjit::x86::Mm(4), asmjit::x86::rdx); + Cb.movq(asmjit::x86::Mm(3), asmjit::x86::r8); + Cb.movq(asmjit::x86::Mm(2), asmjit::x86::r9); + } else { + Cb.push(asmjit::x86::rax); + Cb.push(asmjit::x86::rbx); + Cb.push(asmjit::x86::rcx); + Cb.push(asmjit::x86::rdx); + Cb.push(asmjit::x86::r8); + Cb.push(asmjit::x86::r9); + } + + // do the actual communication + // temp_reg contains our hash + + // save the pointer_reg. it might be any of r8, r9, rax, rbx, rcx or rdx + Cb.mov(TempReg2, PointerReg); + + // Don't touch me! + // This sychronization and communication works even if the threads run at + // different (changing) speed, with just one "lock cmpxchg16b" Brought to you + // by a few hours of headache for two people. + auto Communication = [&](auto Offset) { + // communication + Cb.mov(asmjit::x86::r8, asmjit::x86::ptr_64(TempReg2, Offset)); + + // temp data + Cb.mov(asmjit::x86::r9, TempReg2); + Cb.add(asmjit::x86::r9, asmjit::Imm(Offset + 8)); + + Cb.mov(asmjit::x86::rdx, asmjit::x86::ptr_64(asmjit::x86::r9, 0)); + Cb.mov(asmjit::x86::rax, asmjit::x86::ptr_64(asmjit::x86::r9, 8)); + + auto L0 = Cb.newLabel(); + Cb.bind(L0); + + Cb.lock(); + Cb.cmpxchg16b(asmjit::x86::ptr(asmjit::x86::r8)); + + auto L1 = Cb.newLabel(); + Cb.jnz(L1); + + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0)); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0)); + + Cb.mov(asmjit::x86::rax, asmjit::Imm(2)); + + auto L6 = Cb.newLabel(); + Cb.jmp(L6); + + Cb.bind(L1); + + Cb.cmp(asmjit::x86::rcx, asmjit::x86::rdx); + + auto L2 = Cb.newLabel(); + Cb.jle(L2); + + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx); + + Cb.jmp(L0); + + Cb.bind(L2); + + auto L3 = Cb.newLabel(); + + Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0)); + Cb.jne(L3); + Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0)); + Cb.jne(L3); + + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::x86::rdx); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::x86::rax); + + Cb.bind(L3); + + Cb.cmp(asmjit::x86::rcx, asmjit::x86::ptr_64(asmjit::x86::r9, 16)); + Cb.mov(asmjit::x86::rax, asmjit::Imm(4)); + Cb.jne(L6); + + Cb.cmp(asmjit::x86::rbx, asmjit::x86::ptr_64(asmjit::x86::r9, 24)); + auto L4 = Cb.newLabel(); + Cb.jne(L4); + + Cb.mov(asmjit::x86::rax, asmjit::Imm(0)); + + auto L5 = Cb.newLabel(); + Cb.jmp(L5); + + Cb.bind(L4); + + Cb.mov(asmjit::x86::rax, asmjit::Imm(1)); + + Cb.bind(L5); + + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0)); + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0)); + + Cb.bind(L6); + + // if check failed + Cb.cmp(asmjit::x86::rax, asmjit::Imm(1)); + auto L7 = Cb.newLabel(); + Cb.jne(L7); + + // write the error flag + Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 32), asmjit::Imm(1)); + + // stop the execution after some time + Cb.mov(asmjit::x86::ptr_64(AddrHighReg), asmjit::Imm(LOAD_STOP)); + Cb.mfence(); + + Cb.bind(L7); + + auto L9 = Cb.newLabel(); + Cb.jmp(L9); + }; + + // left communication + // move hash + Cb.mov(asmjit::x86::rbx, TempReg); + // move iterations counter + if constexpr (std::is_same_v) { + Cb.movq(asmjit::x86::rcx, IterReg); + } else { + Cb.mov(asmjit::x86::rcx, IterReg); + } + + Communication(-128); + + // right communication + // move hash + Cb.mov(asmjit::x86::rbx, TempReg); + // move iterations counter + if constexpr (std::is_same_v) { + Cb.movq(asmjit::x86::rcx, IterReg); + } else { + Cb.mov(asmjit::x86::rcx, IterReg); + } + + Communication(-64); + + // restore r8, r9, rax, rbx, rcx and rdx + if constexpr (std::is_same_v) { + Cb.movq(asmjit::x86::rax, asmjit::x86::Mm(7)); + Cb.movq(asmjit::x86::rbx, asmjit::x86::Mm(6)); + Cb.movq(asmjit::x86::rcx, asmjit::x86::Mm(5)); + Cb.movq(asmjit::x86::rdx, asmjit::x86::Mm(4)); + Cb.movq(asmjit::x86::r8, asmjit::x86::Mm(3)); + Cb.movq(asmjit::x86::r9, asmjit::x86::Mm(2)); + } else { + Cb.pop(asmjit::x86::r9); + Cb.pop(asmjit::x86::r8); + Cb.pop(asmjit::x86::rdx); + Cb.pop(asmjit::x86::rcx); + Cb.pop(asmjit::x86::rbx); + Cb.pop(asmjit::x86::rax); + } + + Cb.bind(SkipErrorDetection); + } public: X86Payload(asmjit::CpuFeatures const& SupportedFeatures, diff --git a/include/firestarter/Firestarter.hpp b/include/firestarter/Firestarter.hpp index 0e4c7ef5..6e9ad166 100644 --- a/include/firestarter/Firestarter.hpp +++ b/include/firestarter/Firestarter.hpp @@ -66,15 +66,14 @@ class Firestarter { std::chrono::microseconds const& Period, unsigned RequestedNumThreads, std::string const& CpuBind, bool PrintFunctionSummary, unsigned FunctionId, bool ListInstructionGroups, std::string const& InstructionGroups, unsigned LineCount, bool AllowUnavailablePayload, - bool DumpRegisters, std::chrono::seconds const& DumpRegistersTimeDelta, - std::string const& DumpRegistersOutpath, bool ErrorDetection, int Gpus, unsigned GpuMatrixSize, - bool GpuUseFloat, bool GpuUseDouble, bool ListMetrics, bool Measurement, - std::chrono::milliseconds const& StartDelta, std::chrono::milliseconds const& StopDelta, - std::chrono::milliseconds const& MeasurementInterval, std::vector const& MetricPaths, - std::vector const& StdinMetrics, bool Optimize, std::chrono::seconds const& Preheat, - std::string const& OptimizationAlgorithm, std::vector const& OptimizationMetrics, - std::chrono::seconds const& EvaluationDuration, unsigned Individuals, std::string const& OptimizeOutfile, - unsigned Generations, double Nsga2Cr, double Nsga2M); + bool DumpRegisters, std::chrono::seconds const& DumpRegistersTimeDelta, std::string DumpRegistersOutpath, + bool ErrorDetection, int Gpus, unsigned GpuMatrixSize, bool GpuUseFloat, bool GpuUseDouble, + bool ListMetrics, bool Measurement, std::chrono::milliseconds const& StartDelta, + std::chrono::milliseconds const& StopDelta, std::chrono::milliseconds const& MeasurementInterval, + std::vector const& MetricPaths, std::vector const& StdinMetrics, bool Optimize, + std::chrono::seconds const& Preheat, std::string const& OptimizationAlgorithm, + std::vector const& OptimizationMetrics, std::chrono::seconds const& EvaluationDuration, + unsigned Individuals, std::string OptimizeOutfile, unsigned Generations, double Nsga2Cr, double Nsga2M); ~Firestarter(); @@ -141,12 +140,12 @@ class Firestarter { void signalWork() { signalLoadWorkers(THREAD_WORK); }; // WatchdogWorker.cpp - auto watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load, std::chrono::seconds Timeout) - -> int; + static auto watchdogWorker(std::chrono::microseconds Period, std::chrono::microseconds Load, + std::chrono::seconds Timeout) -> int; #ifdef FIRESTARTER_DEBUG_FEATURES // DumpRegisterWorker.cpp - auto initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, std::string DumpFilePath) -> int; + auto initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath) -> int; void joinDumpRegisterWorker(); #endif diff --git a/include/firestarter/Measurement/MeasurementWorker.hpp b/include/firestarter/Measurement/MeasurementWorker.hpp index e9e18b76..0205bc03 100644 --- a/include/firestarter/Measurement/MeasurementWorker.hpp +++ b/include/firestarter/Measurement/MeasurementWorker.hpp @@ -44,7 +44,7 @@ class MeasurementWorker { pthread_t WorkerThread; pthread_t StdinThread; - std::vector Metrics = {&RaplMetric, &PerfIpcMetric, &PerfFreqMetric, &IpcEstimateMetric}; + std::vector Metrics = {&RaplMetric, &PerfIpcMetric, &PerfFreqMetric, &IpcEstimateMetric}; std::mutex ValuesMutex; std::map> Values; @@ -65,7 +65,7 @@ class MeasurementWorker { std::string AvailableMetricsString; #ifndef FIRESTARTER_LINK_STATIC - std::vector _metricDylibs = {}; + std::vector MetricDylibs; #endif std::vector StdinMetrics; diff --git a/include/firestarter/Measurement/Metric/IPCEstimate.h b/include/firestarter/Measurement/Metric/IPCEstimate.h index 63dcb26b..f5362f93 100644 --- a/include/firestarter/Measurement/Metric/IPCEstimate.h +++ b/include/firestarter/Measurement/Metric/IPCEstimate.h @@ -23,9 +23,14 @@ #include "../MetricInterface.h" +#ifdef __cplusplus extern "C" { +#endif -extern MetricInterface IpcEstimateMetric; +extern const MetricInterface IpcEstimateMetric; extern void ipcEstimateMetricInsert(double Value); -}; \ No newline at end of file + +#ifdef __cplusplus +}; +#endif \ No newline at end of file diff --git a/include/firestarter/Measurement/Metric/Perf.h b/include/firestarter/Measurement/Metric/Perf.h index 2702cd94..480fb808 100644 --- a/include/firestarter/Measurement/Metric/Perf.h +++ b/include/firestarter/Measurement/Metric/Perf.h @@ -23,9 +23,14 @@ #include "../MetricInterface.h" +#ifdef __cplusplus extern "C" { +#endif -extern MetricInterface PerfIpcMetric; +extern const MetricInterface PerfIpcMetric; -extern MetricInterface PerfFreqMetric; -}; \ No newline at end of file +extern const MetricInterface PerfFreqMetric; + +#ifdef __cplusplus +}; +#endif \ No newline at end of file diff --git a/include/firestarter/Measurement/Metric/RAPL.h b/include/firestarter/Measurement/Metric/RAPL.h index 017373a1..5076affe 100644 --- a/include/firestarter/Measurement/Metric/RAPL.h +++ b/include/firestarter/Measurement/Metric/RAPL.h @@ -23,7 +23,12 @@ #include "../MetricInterface.h" +#ifdef __cplusplus extern "C" { +#endif -extern MetricInterface RaplMetric; -}; \ No newline at end of file +extern const MetricInterface RaplMetric; + +#ifdef __cplusplus +}; +#endif \ No newline at end of file diff --git a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp index 70c2aac0..acaa441f 100644 --- a/include/firestarter/Optimizer/Algorithm/NSGA2.hpp +++ b/include/firestarter/Optimizer/Algorithm/NSGA2.hpp @@ -35,9 +35,9 @@ class NSGA2 : public Algorithm { auto evolve(firestarter::optimizer::Population& Pop) -> firestarter::optimizer::Population override; private: - unsigned Gen; - double Cr; - double M; + const unsigned Gen; + const double Cr; + const double M; }; } // namespace firestarter::optimizer::algorithm diff --git a/include/firestarter/Optimizer/OptimizerWorker.hpp b/include/firestarter/Optimizer/OptimizerWorker.hpp index ba106595..f6c3a37f 100644 --- a/include/firestarter/Optimizer/OptimizerWorker.hpp +++ b/include/firestarter/Optimizer/OptimizerWorker.hpp @@ -33,7 +33,7 @@ namespace firestarter::optimizer { class OptimizerWorker { public: OptimizerWorker(std::unique_ptr&& Algorithm, - firestarter::optimizer::Population& Population, std::string const& OptimizationAlgorithm, + firestarter::optimizer::Population& Population, std::string OptimizationAlgorithm, unsigned Individuals, std::chrono::seconds const& Preheat); ~OptimizerWorker() = default; @@ -51,7 +51,7 @@ class OptimizerWorker { unsigned Individuals; std::chrono::seconds Preheat; - pthread_t WorkerThread; + pthread_t WorkerThread{}; }; } // namespace firestarter::optimizer diff --git a/src/firestarter/DumpRegisterWorker.cpp b/src/firestarter/DumpRegisterWorker.cpp index 06d7e417..5dee113e 100644 --- a/src/firestarter/DumpRegisterWorker.cpp +++ b/src/firestarter/DumpRegisterWorker.cpp @@ -28,21 +28,19 @@ #include #include -using namespace firestarter; - namespace { -static unsigned hammingDistance(uint64_t x, uint64_t y) { - unsigned dist = 0; +auto hammingDistance(uint64_t X, uint64_t Y) -> unsigned { + unsigned Dist = 0; - for (uint64_t val = x ^ y; val > 0; val >>= 1) { - dist += val & 1; + for (uint64_t Val = X ^ Y; Val > 0; Val >>= 1) { + Dist += Val & 1; } - return dist; + return Dist; } -static std::string registerNameBySize(unsigned registerSize) { - switch (registerSize) { +auto registerNameBySize(unsigned RegisterSize) -> std::string { + switch (RegisterSize) { case 2: return "xmm"; case 4: @@ -55,128 +53,133 @@ static std::string registerNameBySize(unsigned registerSize) { } } // namespace -int Firestarter::initDumpRegisterWorker(std::chrono::seconds dumpTimeDelta, std::string dumpFilePath) { +namespace firestarter { + +auto Firestarter::initDumpRegisterWorker(std::chrono::seconds DumpTimeDelta, const std::string& DumpFilePath) -> int { - auto data = std::make_unique(this->LoadThreads.begin()->second, dumpTimeDelta, dumpFilePath); + auto Data = std::make_unique(this->LoadThreads.begin()->second, DumpTimeDelta, DumpFilePath); - this->DumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(data)); + this->DumpRegisterWorkerThread = std::thread(Firestarter::dumpRegisterWorker, std::move(Data)); return EXIT_SUCCESS; } void Firestarter::joinDumpRegisterWorker() { this->DumpRegisterWorkerThread.join(); } -void Firestarter::dumpRegisterWorker(std::unique_ptr data) { +void Firestarter::dumpRegisterWorker(std::unique_ptr Data) { pthread_setname_np(pthread_self(), "DumpRegWorker"); - int registerCount = data->LoadWorkerDataPtr->config().payload().registerCount(); - int registerSize = data->LoadWorkerDataPtr->config().payload().registerSize(); - std::string registerPrefix = registerNameBySize(registerSize); - auto offset = sizeof(DumpRegisterStruct) / sizeof(uint64_t); + auto RegisterCount = Data->LoadWorkerDataPtr->config().payload().registerCount(); + auto RegisterSize = Data->LoadWorkerDataPtr->config().payload().registerSize(); + std::string RegisterPrefix = registerNameBySize(RegisterSize); + auto Offset = sizeof(DumpRegisterStruct) / sizeof(uint64_t); - auto dumpRegisterStruct = reinterpret_cast(data->LoadWorkerDataPtr->AddrMem - offset); + auto* DumpRegisterStruct = reinterpret_cast(Data->LoadWorkerDataPtr->AddrMem - Offset); - auto dumpVar = reinterpret_cast(&dumpRegisterStruct->DumpVar); + auto* DumpVar = reinterpret_cast(&DumpRegisterStruct->DumpVar); // memory of simd variables is before the padding - volatile uint64_t* dumpMemAddr = dumpRegisterStruct->Padding - registerCount * registerSize; + auto* DumpMemAddr = static_cast(DumpRegisterStruct->Padding) - + (static_cast(RegisterCount * RegisterSize)); - // TODO: maybe use aligned_malloc to make memcpy more efficient and don't + // TODO(marenz): maybe use aligned_malloc to make memcpy more efficient and don't // interrupt the workload as much? - uint64_t* last = reinterpret_cast(malloc(sizeof(uint64_t) * offset)); - uint64_t* current = reinterpret_cast(malloc(sizeof(uint64_t) * offset)); + auto* Last = reinterpret_cast(malloc(sizeof(uint64_t) * Offset)); + auto* Current = reinterpret_cast(malloc(sizeof(uint64_t) * Offset)); - if (last == nullptr || current == nullptr) { + if (Last == nullptr || Current == nullptr) { log::error() << "Malloc failed in Firestarter::dumpRegisterWorker"; exit(ENOMEM); } - std::stringstream dumpFilePath; - dumpFilePath << data->DumpFilePath; + std::stringstream DumpFilePath; + DumpFilePath << Data->DumpFilePath; #if defined(__MINGW32__) || defined(__MINGW64__) dumpFilePath << "\\"; #else - dumpFilePath << "/"; + DumpFilePath << "/"; #endif - dumpFilePath << "hamming_distance.csv"; - auto dumpFile = std::ofstream(dumpFilePath.str()); + DumpFilePath << "hamming_distance.csv"; + auto DumpFile = std::ofstream(DumpFilePath.str()); // dump the header to the csv file - dumpFile << "total_hamming_distance,"; - for (int i = 0; i < registerCount; i++) { - for (int j = 0; j < registerSize; j++) { - dumpFile << registerPrefix << i << "[" << j << "]"; + DumpFile << "total_hamming_distance,"; + for (auto I = 0U; I < RegisterCount; I++) { + for (auto J = 0U; J < RegisterSize; J++) { + DumpFile << RegisterPrefix << I << "[" << J << "]"; - if (j != registerSize - 1) { - dumpFile << ","; + if (J != RegisterSize - 1) { + DumpFile << ","; } } - if (i != registerCount - 1) { - dumpFile << ","; + if (I != RegisterCount - 1) { + DumpFile << ","; } } - dumpFile << std::endl << std::flush; + DumpFile << '\n' << std::flush; // do not output the hamming distance for the first run - bool skipFirst = true; + bool SkipFirst = true; // continue until stop and dump the registers every data->dumpTimeDelta // seconds - for (; *data->LoadWorkerDataPtr->AddrHigh != LOAD_STOP;) { + for (; *Data->LoadWorkerDataPtr->AddrHigh != LOAD_STOP;) { // signal the thread to dump its largest SIMD registers - *dumpVar = DumpVariable::Start; + *DumpVar = DumpVariable::Start; __asm__ __volatile__("mfence;"); - while (*dumpVar == DumpVariable::Start) { + while (*DumpVar == DumpVariable::Start) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); } // copy the register content to minimize the interruption of the load worker - std::memcpy(current, (void*)dumpMemAddr, sizeof(uint64_t) * offset); + std::memcpy(Current, (void*)DumpMemAddr, sizeof(uint64_t) * Offset); // skip the first output, as we first have to get some valid values for last - if (!skipFirst) { + if (!SkipFirst) { // calculate the total hamming distance - int totalHammingDistance = 0; - for (int i = 0; i < registerCount * registerSize; i++) { - totalHammingDistance += hammingDistance(current[i], last[i]); + auto TotalHammingDistance = 0U; + for (auto I = 0U; I < RegisterCount * RegisterSize; I++) { + TotalHammingDistance += hammingDistance(Current[I], Last[I]); } - dumpFile << totalHammingDistance << ","; + DumpFile << TotalHammingDistance << ","; // dump the hamming distance of each double (last, current) pair - for (int i = registerCount - 1; i >= 0; i--) { + for (int I = RegisterCount - 1; I >= 0; I--) { // auto registerNum = registerCount - 1 - i; - for (auto j = 0; j < registerSize; j++) { - auto index = registerSize * i + j; - auto hd = static_cast(hammingDistance(current[index], last[index])); + for (auto J = 0U; J < RegisterSize; J++) { + auto Index = (RegisterSize * I) + J; + auto Hd = static_cast(hammingDistance(Current[Index], Last[Index])); - dumpFile << hd; - if (j != registerSize - 1) { - dumpFile << ","; + DumpFile << Hd; + if (J != RegisterSize - 1) { + DumpFile << ","; } } - if (i != 0) { - dumpFile << ","; + if (I != 0) { + DumpFile << ","; } } - dumpFile << std::endl << std::flush; + DumpFile << '\n' << std::flush; } else { - skipFirst = false; + SkipFirst = false; } - std::memcpy(last, current, sizeof(uint64_t) * offset); + std::memcpy(Last, Current, sizeof(uint64_t) * Offset); - std::this_thread::sleep_for(std::chrono::seconds(data->DumpTimeDelta)); + std::this_thread::sleep_for(std::chrono::seconds(Data->DumpTimeDelta)); } - dumpFile.close(); + DumpFile.close(); - free(last); - free(current); + free(Last); + free(Current); } -#endif +} // namespace firestarter + +#endif \ No newline at end of file diff --git a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp index 2325ed04..3f866f70 100644 --- a/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/AVX512Payload.cpp @@ -21,382 +21,379 @@ #include -using namespace firestarter::environment::x86::payload; -using namespace asmjit; -using namespace asmjit::x86; +namespace firestarter::environment::x86::payload { auto AVX512Payload::compilePayload(std::vector> const& Proportion, unsigned InstructionCacheSize, std::list const& DataCacheBufferSize, unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters, bool ErrorDetection) -> int { + using namespace asmjit; + using namespace asmjit::x86; // Compute the sequence of instruction groups and the number of its repetions // to reach the desired size - auto sequence = this->generateSequence(Proportion); - auto repetitions = this->getNumberOfSequenceRepetitions(sequence, NumberOfLines / Thread); + auto Sequence = generateSequence(Proportion); + auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread); // compute count of flops and memory access for performance report - unsigned flops = 0; - unsigned bytes = 0; + Flops = 0; + Bytes = 0; - for (const auto& item : sequence) { - auto it = this->InstructionFlops.find(item); + for (const auto& Item : Sequence) { + auto It = InstructionFlops.find(Item); - if (it == this->InstructionFlops.end()) { - workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; + if (It == InstructionFlops.end()) { + workerLog::error() << "Instruction group " << Item << " undefined in " << name() << "."; return EXIT_FAILURE; } - flops += it->second; + Flops += It->second; - it = this->InstructionMemory.find(item); + It = InstructionMemory.find(Item); - if (it != this->InstructionMemory.end()) { - bytes += it->second; + if (It != InstructionMemory.end()) { + Bytes += It->second; } } - this->Flops = repetitions * flops; - this->Bytes = repetitions * bytes; - this->Instructions = repetitions * sequence.size() * 4 + 6; + Flops *= Repetitions; + Bytes *= Repetitions; + Instructions = Repetitions * Sequence.size() * 4 + 6; // calculate the buffer sizes - auto l1i_cache_size = InstructionCacheSize / Thread; - auto dataCacheBufferSizeIterator = DataCacheBufferSize.begin(); - auto l1_size = *dataCacheBufferSizeIterator / Thread; - std::advance(dataCacheBufferSizeIterator, 1); - auto l2_size = *dataCacheBufferSizeIterator / Thread; - std::advance(dataCacheBufferSizeIterator, 1); - auto l3_size = *dataCacheBufferSizeIterator / Thread; - auto ram_size = RamBufferSize / Thread; + const auto L1iCacheSize = InstructionCacheSize / Thread; + auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin(); + const auto L1Size = *DataCacheBufferSizeIterator / Thread; + std::advance(DataCacheBufferSizeIterator, 1); + const auto L2Size = *DataCacheBufferSizeIterator / Thread; + std::advance(DataCacheBufferSizeIterator, 1); + const auto L3Size = *DataCacheBufferSizeIterator / Thread; + const auto RamSize = RamBufferSize / Thread; // calculate the reset counters for the buffers - auto l2_loop_count = getL2LoopCount(sequence, NumberOfLines, l2_size * Thread, Thread); - auto l3_loop_count = getL3LoopCount(sequence, NumberOfLines, l3_size * Thread, Thread); - auto ram_loop_count = getRAMLoopCount(sequence, NumberOfLines, ram_size * Thread, Thread); + const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread); + const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread); + const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread); - CodeHolder code; - code.init(this->Rt.environment()); + CodeHolder Code; + Code.init(Rt.environment()); - if (nullptr != this->LoadFunction) { - this->Rt.release(&this->LoadFunction); + if (nullptr != LoadFunction) { + Rt.release(&LoadFunction); } - Builder cb(&code); - cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | + Builder Cb(&Code); + Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | asmjit::DiagnosticOptions::kValidateIntermediate); - auto pointer_reg = rax; - auto l1_addr = rbx; - auto l2_addr = rcx; - auto l3_addr = r8; - auto ram_addr = r9; - auto l2_count_reg = r10; - auto l3_count_reg = r11; - auto ram_count_reg = r12; - auto temp_reg = r13; - auto temp_reg2 = rbp; - auto offset_reg = r14; - auto addrHigh_reg = r15; - auto iter_reg = mm0; - auto shift_reg = std::vector({rdi, rsi, rdx}); - auto shift_reg32 = std::vector({edi, esi, edx}); - auto nr_shift_regs = 3; - auto mul_regs = 3; - auto add_regs = 22; - auto alt_dst_regs = 5; - auto ram_reg = zmm30; - - FuncDetail func; - func.init(FuncSignatureT(CallConvId::kCDecl), - this->Rt.environment()); - - FuncFrame frame; - frame.init(func); + const auto PointerReg = rax; + const auto L1Addr = rbx; + const auto L2Addr = rcx; + const auto L3Addr = r8; + const auto RamAddr = r9; + const auto L2CountReg = r10; + const auto L3CountReg = r11; + const auto RamCountReg = r12; + const auto TempReg = r13; + const auto TempReg2 = rbp; + const auto OffsetReg = r14; + const auto AddrHighReg = r15; + const auto IterReg = mm0; + const auto ShiftReg = std::vector({rdi, rsi, rdx}); + const auto ShiftReg32 = std::vector({edi, esi, edx}); + const auto NrShiftRegs = 3; + const auto MulRegs = 3; + const auto AddRegs = 22; + const auto AltDstRegs = 5; + const auto RamReg = zmm30; + + FuncDetail Func; + Func.init(FuncSignatureT(CallConvId::kCDecl), Rt.environment()); + + FuncFrame Frame; + Frame.init(Func); // make zmm registers dirty - for (int i = 0; i < 32; i++) { - frame.addDirtyRegs(Zmm(i)); + for (int I = 0; I < 32; I++) { + Frame.addDirtyRegs(Zmm(I)); } - for (int i = 0; i < 8; i++) { - frame.addDirtyRegs(Mm(i)); + for (int I = 0; I < 8; I++) { + Frame.addDirtyRegs(Mm(I)); } // make all other used registers dirty except RAX - frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg, - temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr); - for (const auto& reg : shift_reg) { - frame.addDirtyRegs(reg); + Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg, + AddrHighReg, IterReg, RamAddr); + for (const auto& Reg : ShiftReg) { + Frame.addDirtyRegs(Reg); } - FuncArgsAssignment args(&func); + FuncArgsAssignment Args(&Func); // FIXME: asmjit assigment to mm0 does not seem to be supported - args.assignAll(pointer_reg, addrHigh_reg, temp_reg); - args.updateFuncFrame(frame); - frame.finalize(); + Args.assignAll(PointerReg, AddrHighReg, TempReg); + Args.updateFuncFrame(Frame); + Frame.finalize(); - cb.emitProlog(frame); - cb.emitArgsAssignment(frame, args); + Cb.emitProlog(Frame); + Cb.emitArgsAssignment(Frame, Args); // FIXME: movq from temp_reg to iter_reg - cb.movq(iter_reg, temp_reg); + Cb.movq(IterReg, TempReg); // stop right away if low load is selected - auto FunctionExit = cb.newLabel(); + auto FunctionExit = Cb.newLabel(); - cb.mov(temp_reg, ptr_64(addrHigh_reg)); - cb.test(temp_reg, temp_reg); - cb.jz(FunctionExit); + Cb.mov(TempReg, ptr_64(AddrHighReg)); + Cb.test(TempReg, TempReg); + Cb.jz(FunctionExit); - cb.mov(offset_reg, + Cb.mov(OffsetReg, Imm(64)); // increment after each cache/memory access // Initialize registers for shift operations - for (auto const& reg : shift_reg32) { - cb.mov(reg, Imm(0xAAAAAAAA)); + for (auto const& Reg : ShiftReg32) { + Cb.mov(Reg, Imm(0xAAAAAAAA)); } // Initialize AVX512-Registers for FMA Operations - cb.vmovapd(zmm0, zmmword_ptr(pointer_reg)); - cb.vmovapd(zmm1, zmmword_ptr(pointer_reg, 64)); - cb.vmovapd(zmm2, zmmword_ptr(pointer_reg, 128)); - auto add_start = mul_regs; - auto add_end = mul_regs + add_regs - 1; - auto trans_start = add_regs + mul_regs; - auto trans_end = add_regs + mul_regs + alt_dst_regs - 1; - for (int i = add_start; i <= trans_end; i++) { - cb.vmovapd(Zmm(i), zmmword_ptr(pointer_reg, 256 + i * 64)); + Cb.vmovapd(zmm0, zmmword_ptr(PointerReg)); + Cb.vmovapd(zmm1, zmmword_ptr(PointerReg, 64)); + Cb.vmovapd(zmm2, zmmword_ptr(PointerReg, 128)); + auto AddStart = MulRegs; + auto AddEnd = MulRegs + AddRegs - 1; + auto TransStart = AddRegs + MulRegs; + auto TransEnd = AddRegs + MulRegs + AltDstRegs - 1; + for (int I = AddStart; I <= TransEnd; I++) { + Cb.vmovapd(Zmm(I), zmmword_ptr(PointerReg, 256 + (I * 64))); } - cb.mov(l1_addr, pointer_reg); // address for L1-buffer - cb.mov(l2_addr, pointer_reg); - cb.add(l2_addr, Imm(l1_size)); // address for L2-buffer - cb.mov(l3_addr, pointer_reg); - cb.add(l3_addr, Imm(l2_size)); // address for L3-buffer - cb.mov(ram_addr, pointer_reg); - cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer - cb.mov(l2_count_reg, Imm(l2_loop_count)); - workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop (" - << l2_size / 1024 << ") KiB"; - cb.mov(l3_count_reg, Imm(l3_loop_count)); - workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop (" - << l3_size / 1024 << ") KiB"; - cb.mov(ram_count_reg, Imm(ram_loop_count)); - workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop (" - << ram_size / 1024 << ") KiB"; - - cb.align(AlignMode::kCode, 64); - - auto Loop = cb.newLabel(); - cb.bind(Loop); - - auto shift_pos = 0; - bool left = false; - auto add_dest = add_start + 1; - auto mov_dst = trans_start; - unsigned l1_offset = 0; - -#define L1_INCREMENT() \ - l1_offset += 64; \ - if (l1_offset < l1_size * 0.5) { \ - cb.add(l1_addr, offset_reg); \ - } else { \ - l1_offset = 0; \ - cb.mov(l1_addr, pointer_reg); \ - } - -#define L2_INCREMENT() cb.add(l2_addr, offset_reg) - -#define L3_INCREMENT() cb.add(l3_addr, offset_reg) - -#define RAM_INCREMENT() cb.add(ram_addr, offset_reg) - - for (unsigned count = 0; count < repetitions; count++) { - for (const auto& item : sequence) { - if (item == "REG") { - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2); - cb.vfmadd231pd(Zmm(mov_dst), zmm2, zmm1); - cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], temp_reg); - mov_dst++; - } else if (item == "L1_L") { - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2); - cb.vfmadd231pd(Zmm(add_dest), zmm1, zmmword_ptr(l1_addr, 64)); - L1_INCREMENT(); - } else if (item == "L1_BROADCAST") { - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2); - cb.vbroadcastsd(Zmm(add_dest), ptr_64(l1_addr, 64)); - L1_INCREMENT(); - } else if (item == "L1_S") { - cb.vmovapd(zmmword_ptr(l1_addr, 64), Zmm(add_dest)); - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2); - L1_INCREMENT(); - } else if (item == "L1_LS") { - cb.vmovapd(zmmword_ptr(l1_addr, 64), Zmm(add_dest)); - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmmword_ptr(l1_addr, 128)); - L1_INCREMENT(); - } else if (item == "L2_L") { - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2); - cb.vfmadd231pd(Zmm(add_dest), zmm1, zmmword_ptr(l2_addr, 64)); - L2_INCREMENT(); - } else if (item == "L2_S") { - cb.vmovapd(zmmword_ptr(l2_addr, 64), Zmm(add_dest)); - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2); - L2_INCREMENT(); - } else if (item == "L2_LS") { - cb.vmovapd(zmmword_ptr(l2_addr, 64), Zmm(add_dest)); - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmmword_ptr(l2_addr, 128)); - L2_INCREMENT(); - } else if (item == "L3_L") { - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2); - cb.vfmadd231pd(Zmm(add_dest), zmm1, zmmword_ptr(l3_addr, 64)); - L3_INCREMENT(); - } else if (item == "L3_S") { - cb.vmovapd(zmmword_ptr(l3_addr, 64), Zmm(add_dest)); - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2); - L3_INCREMENT(); - } else if (item == "L3_LS") { - cb.vmovapd(zmmword_ptr(l3_addr, 64), Zmm(add_dest)); - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmmword_ptr(l3_addr, 128)); - L3_INCREMENT(); - } else if (item == "L3_P") { - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmmword_ptr(l1_addr, 64)); - cb.prefetcht2(ptr(l3_addr)); - L3_INCREMENT(); - } else if (item == "RAM_L") { - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2); - cb.vfmadd231pd(ram_reg, zmm1, zmmword_ptr(ram_addr, 64)); - RAM_INCREMENT(); - } else if (item == "RAM_S") { - cb.vmovapd(zmmword_ptr(ram_addr, 64), Zmm(add_dest)); - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmm2); - RAM_INCREMENT(); - } else if (item == "RAM_LS") { - cb.vmovapd(zmmword_ptr(ram_addr, 64), Zmm(add_dest)); - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmmword_ptr(ram_addr, 128)); - RAM_INCREMENT(); - } else if (item == "RAM_P") { - cb.vfmadd231pd(Zmm(add_dest), zmm0, zmmword_ptr(l1_addr, 64)); - cb.prefetcht2(ptr(ram_addr)); - RAM_INCREMENT(); + Cb.mov(L1Addr, PointerReg); // address for L1-buffer + Cb.mov(L2Addr, PointerReg); + Cb.add(L2Addr, Imm(L1Size)); // address for L2-buffer + Cb.mov(L3Addr, PointerReg); + Cb.add(L3Addr, Imm(L2Size)); // address for L3-buffer + Cb.mov(RamAddr, PointerReg); + Cb.add(RamAddr, Imm(L3Size)); // address for RAM-buffer + Cb.mov(L2CountReg, Imm(L2LoopCount)); + workerLog::trace() << "reset counter for L2-buffer with " << L2LoopCount << " cache line accesses per loop (" + << L2Size / 1024 << ") KiB"; + Cb.mov(L3CountReg, Imm(L3LoopCount)); + workerLog::trace() << "reset counter for L3-buffer with " << L3LoopCount << " cache line accesses per loop (" + << L3Size / 1024 << ") KiB"; + Cb.mov(RamCountReg, Imm(RamLoopCount)); + workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop (" + << RamSize / 1024 << ") KiB"; + + Cb.align(AlignMode::kCode, 64); + + auto Loop = Cb.newLabel(); + Cb.bind(Loop); + + auto ShiftPos = 0; + bool Left = false; + auto AddDest = AddStart + 1; + auto MovDst = TransStart; + unsigned L1Offset = 0; + + const auto L1Increment = [&Cb, &L1Offset, &L1Size, &L1Addr, &OffsetReg, &PointerReg]() { + L1Offset += 64; + if (L1Offset < L1Size * 0.5) { + Cb.add(L1Addr, OffsetReg); + } else { + L1Offset = 0; + Cb.mov(L1Addr, PointerReg); + } + }; + const auto L2Increment = [&Cb, &L2Addr, &OffsetReg]() { Cb.add(L2Addr, OffsetReg); }; + const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); }; + const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); }; + + for (unsigned Count = 0; Count < Repetitions; Count++) { + for (const auto& Item : Sequence) { + if (Item == "REG") { + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2); + Cb.vfmadd231pd(Zmm(MovDst), zmm2, zmm1); + Cb.xor_(ShiftReg[(ShiftPos + NrShiftRegs - 1) % NrShiftRegs], TempReg); + MovDst++; + } else if (Item == "L1_L") { + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2); + Cb.vfmadd231pd(Zmm(AddDest), zmm1, zmmword_ptr(L1Addr, 64)); + L1Increment(); + } else if (Item == "L1_BROADCAST") { + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2); + Cb.vbroadcastsd(Zmm(AddDest), ptr_64(L1Addr, 64)); + L1Increment(); + } else if (Item == "L1_S") { + Cb.vmovapd(zmmword_ptr(L1Addr, 64), Zmm(AddDest)); + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2); + L1Increment(); + } else if (Item == "L1_LS") { + Cb.vmovapd(zmmword_ptr(L1Addr, 64), Zmm(AddDest)); + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmmword_ptr(L1Addr, 128)); + L1Increment(); + } else if (Item == "L2_L") { + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2); + Cb.vfmadd231pd(Zmm(AddDest), zmm1, zmmword_ptr(L2Addr, 64)); + L2Increment(); + } else if (Item == "L2_S") { + Cb.vmovapd(zmmword_ptr(L2Addr, 64), Zmm(AddDest)); + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2); + L2Increment(); + } else if (Item == "L2_LS") { + Cb.vmovapd(zmmword_ptr(L2Addr, 64), Zmm(AddDest)); + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmmword_ptr(L2Addr, 128)); + L2Increment(); + } else if (Item == "L3_L") { + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2); + Cb.vfmadd231pd(Zmm(AddDest), zmm1, zmmword_ptr(L3Addr, 64)); + L3Increment(); + } else if (Item == "L3_S") { + Cb.vmovapd(zmmword_ptr(L3Addr, 64), Zmm(AddDest)); + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2); + L3Increment(); + } else if (Item == "L3_LS") { + Cb.vmovapd(zmmword_ptr(L3Addr, 64), Zmm(AddDest)); + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmmword_ptr(L3Addr, 128)); + L3Increment(); + } else if (Item == "L3_P") { + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmmword_ptr(L1Addr, 64)); + Cb.prefetcht2(ptr(L3Addr)); + L3Increment(); + } else if (Item == "RAM_L") { + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2); + Cb.vfmadd231pd(RamReg, zmm1, zmmword_ptr(RamAddr, 64)); + RamIncrement(); + } else if (Item == "RAM_S") { + Cb.vmovapd(zmmword_ptr(RamAddr, 64), Zmm(AddDest)); + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmm2); + RamIncrement(); + } else if (Item == "RAM_LS") { + Cb.vmovapd(zmmword_ptr(RamAddr, 64), Zmm(AddDest)); + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmmword_ptr(RamAddr, 128)); + RamIncrement(); + } else if (Item == "RAM_P") { + Cb.vfmadd231pd(Zmm(AddDest), zmm0, zmmword_ptr(L1Addr, 64)); + Cb.prefetcht2(ptr(RamAddr)); + RamIncrement(); } else { - workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; + workerLog::error() << "Instruction group " << Item << " not found in " << name() << "."; return EXIT_FAILURE; } - if (left) { - cb.shr(shift_reg32[shift_pos], Imm(1)); + if (Left) { + Cb.shr(ShiftReg32[ShiftPos], Imm(1)); } else { - cb.shl(shift_reg32[shift_pos], Imm(1)); + Cb.shl(ShiftReg32[ShiftPos], Imm(1)); } - add_dest++; - if (add_dest > add_end) { - add_dest = add_start; + AddDest++; + if (AddDest > AddEnd) { + AddDest = AddStart; } - if (mov_dst > trans_end) { - mov_dst = trans_start; + if (MovDst > TransEnd) { + MovDst = TransStart; } - shift_pos++; - if (shift_pos == nr_shift_regs) { - shift_pos = 0; - left = !left; + ShiftPos++; + if (ShiftPos == NrShiftRegs) { + ShiftPos = 0; + Left = !Left; } } } - cb.movq(temp_reg, iter_reg); // restore iteration counter - if (getRAMSequenceCount(sequence) > 0) { + Cb.movq(TempReg, IterReg); // restore iteration counter + if (getRAMSequenceCount(Sequence) > 0) { // reset RAM counter - auto NoRamReset = cb.newLabel(); - - cb.sub(ram_count_reg, Imm(1)); - cb.jnz(NoRamReset); - cb.mov(ram_count_reg, Imm(ram_loop_count)); - cb.mov(ram_addr, pointer_reg); - cb.add(ram_addr, Imm(l3_size)); - cb.bind(NoRamReset); + auto NoRamReset = Cb.newLabel(); + + Cb.sub(RamCountReg, Imm(1)); + Cb.jnz(NoRamReset); + Cb.mov(RamCountReg, Imm(RamLoopCount)); + Cb.mov(RamAddr, PointerReg); + Cb.add(RamAddr, Imm(L3Size)); + Cb.bind(NoRamReset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.inc(temp_reg); // increment iteration counter - if (getL2SequenceCount(sequence) > 0) { + Cb.inc(TempReg); // increment iteration counter + if (getL2SequenceCount(Sequence) > 0) { // reset L2-Cache counter - auto NoL2Reset = cb.newLabel(); - - cb.sub(l2_count_reg, Imm(1)); - cb.jnz(NoL2Reset); - cb.mov(l2_count_reg, Imm(l2_loop_count)); - cb.mov(l2_addr, pointer_reg); - cb.add(l2_addr, Imm(l1_size)); - cb.bind(NoL2Reset); + auto NoL2Reset = Cb.newLabel(); + + Cb.sub(L2CountReg, Imm(1)); + Cb.jnz(NoL2Reset); + Cb.mov(L2CountReg, Imm(L2LoopCount)); + Cb.mov(L2Addr, PointerReg); + Cb.add(L2Addr, Imm(L1Size)); + Cb.bind(NoL2Reset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.movq(iter_reg, temp_reg); // store iteration counter - if (getL3SequenceCount(sequence) > 0) { + Cb.movq(IterReg, TempReg); // store iteration counter + if (getL3SequenceCount(Sequence) > 0) { // reset L3-Cache counter - auto NoL3Reset = cb.newLabel(); - - cb.sub(l3_count_reg, Imm(1)); - cb.jnz(NoL3Reset); - cb.mov(l3_count_reg, Imm(l3_loop_count)); - cb.mov(l3_addr, pointer_reg); - cb.add(l3_addr, Imm(l2_size)); - cb.bind(NoL3Reset); + auto NoL3Reset = Cb.newLabel(); + + Cb.sub(L3CountReg, Imm(1)); + Cb.jnz(NoL3Reset); + Cb.mov(L3CountReg, Imm(L3LoopCount)); + Cb.mov(L3Addr, PointerReg); + Cb.add(L3Addr, Imm(L2Size)); + Cb.bind(NoL3Reset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.mov(l1_addr, pointer_reg); + Cb.mov(L1Addr, PointerReg); if (DumpRegisters) { - auto SkipRegistersDump = cb.newLabel(); + auto SkipRegistersDump = Cb.newLabel(); - cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait)); - cb.jnz(SkipRegistersDump); + Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait)); + Cb.jnz(SkipRegistersDump); // dump all the ymm register - for (int i = 0; i < (int)this->registerCount(); i++) { - cb.vmovapd(zmmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Zmm(i)); + for (unsigned I = 0; I < registerCount(); I++) { + Cb.vmovapd(zmmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Zmm(I)); } // set read flag - cb.mov(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait)); + Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait)); - cb.bind(SkipRegistersDump); + Cb.bind(SkipRegistersDump); } if (ErrorDetection) { - this->emitErrorDetectionCode(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); + emitErrorDetectionCode(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2); } - cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH)); - cb.jnz(Loop); + Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH)); + Cb.jnz(Loop); - cb.bind(FunctionExit); + Cb.bind(FunctionExit); - cb.movq(rax, iter_reg); + Cb.movq(rax, IterReg); - cb.emitEpilog(frame); + Cb.emitEpilog(Frame); - cb.finalize(); + Cb.finalize(); // String sb; // cb.dump(sb); - Error err = this->Rt.add(&this->LoadFunction, &code); - if (err) { + Error Err = Rt.add(&LoadFunction, &Code); + if (Err) { workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; } // skip if we could not determine cache size - if (l1i_cache_size != 0) { - auto loopSize = code.labelOffset(FunctionExit) - code.labelOffset(Loop); - auto instructionCachePercentage = 100 * loopSize / l1i_cache_size; + if (L1iCacheSize != 0) { + auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop); + auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize; - if (loopSize > l1i_cache_size) { + if (LoopSize > L1iCacheSize) { workerLog::warn() << "Work-loop is bigger than the L1i-Cache."; } - workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage + workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage << "%) from the L1i-Cache for the work-loop."; - workerLog::trace() << "Sequence size: " << sequence.size(); - workerLog::trace() << "Repetition count: " << repetitions; + workerLog::trace() << "Sequence size: " << Sequence.size(); + workerLog::trace() << "Repetition count: " << Repetitions; } return EXIT_SUCCESS; @@ -405,8 +402,8 @@ auto AVX512Payload::compilePayload(std::vector> auto AVX512Payload::getAvailableInstructions() const -> std::list { std::list Instructions; - transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(Instructions), - [](const auto& item) { return item.first; }); + transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions), + [](const auto& Item) { return Item.first; }); return Instructions; } @@ -414,3 +411,5 @@ auto AVX512Payload::getAvailableInstructions() const -> std::list { void AVX512Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize) { X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4); } + +} // namespace firestarter::environment::x86::payload \ No newline at end of file diff --git a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp index f3905ff0..8ce30a9f 100644 --- a/src/firestarter/Environment/X86/Payload/AVXPayload.cpp +++ b/src/firestarter/Environment/X86/Payload/AVXPayload.cpp @@ -19,434 +19,429 @@ * Contact: daniel.hackenberg@tu-dresden.de *****************************************************************************/ -#include #include -#include -#include -#include +namespace firestarter::environment::x86::payload { -using namespace firestarter::environment::x86::payload; -using namespace asmjit; -using namespace asmjit::x86; +auto AVXPayload::compilePayload(std::vector> const& Proportion, + unsigned InstructionCacheSize, std::list const& DataCacheBufferSize, + unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters, + bool ErrorDetection) -> int { + using namespace asmjit; + using namespace asmjit::x86; -int AVXPayload::compilePayload(std::vector> const& proportion, - unsigned instructionCacheSize, std::list const& dataCacheBufferSize, - unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) { // Compute the sequence of instruction groups and the number of its repetions // to reach the desired size - auto sequence = this->generateSequence(proportion); - auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); + auto Sequence = generateSequence(Proportion); + auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread); // compute count of flops and memory access for performance report - unsigned flops = 0; - unsigned bytes = 0; + Flops = 0; + Bytes = 0; - for (const auto& item : sequence) { - auto it = this->InstructionFlops.find(item); + for (const auto& Item : Sequence) { + auto It = InstructionFlops.find(Item); - if (it == this->InstructionFlops.end()) { - workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; + if (It == InstructionFlops.end()) { + workerLog::error() << "Instruction group " << Item << " undefined in " << name() << "."; return EXIT_FAILURE; } - flops += it->second; + Flops += It->second; - it = this->InstructionMemory.find(item); + It = InstructionMemory.find(Item); - if (it != this->InstructionMemory.end()) { - bytes += it->second; + if (It != InstructionMemory.end()) { + Bytes += It->second; } } - this->Flops = repetitions * flops; - this->Bytes = repetitions * bytes; - this->Instructions = repetitions * sequence.size() * 2 + 4; + Flops *= Repetitions; + Bytes *= Repetitions; + Instructions = Repetitions * Sequence.size() * 2 + 4; // calculate the buffer sizes - auto l1i_cache_size = instructionCacheSize / thread; - auto dataCacheBufferSizeIterator = dataCacheBufferSize.begin(); - auto l1_size = *dataCacheBufferSizeIterator / thread; - std::advance(dataCacheBufferSizeIterator, 1); - auto l2_size = *dataCacheBufferSizeIterator / thread; - std::advance(dataCacheBufferSizeIterator, 1); - auto l3_size = *dataCacheBufferSizeIterator / thread; - auto ram_size = ramBufferSize / thread; + const auto L1iCacheSize = InstructionCacheSize / Thread; + auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin(); + const auto L1Size = *DataCacheBufferSizeIterator / Thread; + std::advance(DataCacheBufferSizeIterator, 1); + const auto L2Size = *DataCacheBufferSizeIterator / Thread; + std::advance(DataCacheBufferSizeIterator, 1); + const auto L3Size = *DataCacheBufferSizeIterator / Thread; + const auto RamSize = RamBufferSize / Thread; // calculate the reset counters for the buffers - auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); - auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); - auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); + const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread); + const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread); + const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread); - CodeHolder code; - code.init(this->Rt.environment()); + CodeHolder Code; + Code.init(Rt.environment()); - if (nullptr != this->LoadFunction) { - this->Rt.release(&this->LoadFunction); + if (nullptr != LoadFunction) { + Rt.release(&LoadFunction); } - Builder cb(&code); - cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | + Builder Cb(&Code); + Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | asmjit::DiagnosticOptions::kValidateIntermediate); - auto pointer_reg = rax; - auto l1_addr = rbx; - auto l2_addr = rcx; - auto l3_addr = rdx; - auto ram_addr = rdi; - auto l2_count_reg = r8; - auto l3_count_reg = r9; - auto ram_count_reg = r10; - auto temp_reg = r11; - auto temp_reg2 = rbp; - auto offset_reg = r12; - auto addrHigh_reg = r13; - auto iter_reg = r14; - auto shift_regs = 6; - auto add_regs = 10; - auto trans_regs = 6; - - FuncDetail func; - func.init(FuncSignatureT(CallConvId::kCDecl), - this->Rt.environment()); - - FuncFrame frame; - frame.init(func); + const auto PointerReg = rax; + const auto L1Addr = rbx; + const auto L2Addr = rcx; + const auto L3Addr = rdx; + const auto RamAddr = rdi; + const auto L2CountReg = r8; + const auto L3CountReg = r9; + const auto RamCountReg = r10; + const auto TempReg = r11; + const auto TempReg2 = rbp; + const auto OffsetReg = r12; + const auto AddrHighReg = r13; + const auto IterReg = r14; + const auto ShiftRegs = 6; + const auto AddRegs = 10; + const auto TransRegs = 6; + + FuncDetail Func; + Func.init(FuncSignatureT(CallConvId::kCDecl), Rt.environment()); + + FuncFrame Frame; + Frame.init(Func); // make xmm registers dirty - for (int i = 0; i < 16; i++) { - frame.addDirtyRegs(Ymm(i)); + for (int I = 0; I < 16; I++) { + Frame.addDirtyRegs(Ymm(I)); } // make mmx registers dirty - for (int i = 0; i < 8; i++) { - frame.addDirtyRegs(Mm(i)); + for (int I = 0; I < 8; I++) { + Frame.addDirtyRegs(Mm(I)); } // make all other used registers dirty except RAX - frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg, - temp_reg2, offset_reg, addrHigh_reg, iter_reg); + Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg, + AddrHighReg, IterReg); - FuncArgsAssignment args(&func); - args.assignAll(pointer_reg, addrHigh_reg, iter_reg); - args.updateFuncFrame(frame); - frame.finalize(); + FuncArgsAssignment Args(&Func); + Args.assignAll(PointerReg, AddrHighReg, IterReg); + Args.updateFuncFrame(Frame); + Frame.finalize(); - cb.emitProlog(frame); - cb.emitArgsAssignment(frame, args); + Cb.emitProlog(Frame); + Cb.emitArgsAssignment(Frame, Args); // stop right away if low load is selected - auto FunctionExit = cb.newLabel(); + auto FunctionExit = Cb.newLabel(); - cb.mov(temp_reg, ptr_64(addrHigh_reg)); - cb.test(temp_reg, temp_reg); - cb.jz(FunctionExit); + Cb.mov(TempReg, ptr_64(AddrHighReg)); + Cb.test(TempReg, TempReg); + Cb.jz(FunctionExit); - cb.mov(offset_reg, + Cb.mov(OffsetReg, Imm(64)); // increment after each cache/memory access // Initialize AVX-Registers for Addition - auto add_start = 0; - auto add_end = add_regs - 1; - auto trans_start = add_regs; - auto trans_end = add_regs + trans_regs - 1; - if (add_regs > 0) { - for (int i = add_start; i <= add_end; i++) { - cb.vmovapd(Ymm(i), ymmword_ptr(pointer_reg, 32 * i)); + auto AddStart = 0; + auto AddEnd = AddRegs - 1; + auto TransStart = AddRegs; + auto TransEnd = AddRegs + TransRegs - 1; + if (AddRegs > 0) { + for (int I = AddStart; I <= AddEnd; I++) { + Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 32 * I)); } } // Initialize MMX-Registers for shift operations - auto shift_start = 0; - auto shift_end = shift_regs - 1; - if (shift_regs > 1) { - cb.mov(temp_reg, Imm(0x5555555555555555)); - cb.movq(Mm(shift_start), temp_reg); - for (int i = shift_start + 1; i <= shift_end; i++) { - cb.movq(Mm(i), Mm(shift_start)); + auto ShiftStart = 0; + auto ShiftEnd = ShiftRegs - 1; + if (ShiftRegs > 1) { + Cb.mov(TempReg, Imm(0x5555555555555555)); + Cb.movq(Mm(ShiftStart), TempReg); + for (int I = ShiftStart + 1; I <= ShiftEnd; I++) { + Cb.movq(Mm(I), Mm(ShiftStart)); } } // Initialize AVX-Registers for Transfer-Operations - if (trans_regs > 0) { - if (trans_start % 2 == 0) { - cb.mov(temp_reg, Imm(0x0F0F0F0F0F0F0F0F)); + if (TransRegs > 0) { + if (TransStart % 2 == 0) { + Cb.mov(TempReg, Imm(0x0F0F0F0F0F0F0F0F)); } else { - cb.mov(temp_reg, Imm(0xF0F0F0F0F0F0F0F0)); + Cb.mov(TempReg, Imm(0xF0F0F0F0F0F0F0F0)); } - cb.pinsrq(Xmm(trans_start), temp_reg, Imm(0)); - cb.pinsrq(Xmm(trans_start), temp_reg, Imm(1)); - cb.vinsertf128(Ymm(trans_start), Ymm(trans_start), Xmm(trans_start), Imm(1)); - for (int i = trans_start + 1; i <= trans_end; i++) { - if (i % 2 == 0) { - cb.shr(temp_reg, Imm(4)); + Cb.pinsrq(Xmm(TransStart), TempReg, Imm(0)); + Cb.pinsrq(Xmm(TransStart), TempReg, Imm(1)); + Cb.vinsertf128(Ymm(TransStart), Ymm(TransStart), Xmm(TransStart), Imm(1)); + for (int I = TransStart + 1; I <= TransEnd; I++) { + if (I % 2 == 0) { + Cb.shr(TempReg, Imm(4)); } else { - cb.shl(temp_reg, Imm(4)); + Cb.shl(TempReg, Imm(4)); } - cb.pinsrq(Xmm(i), temp_reg, Imm(0)); - cb.pinsrq(Xmm(i), temp_reg, Imm(1)); - cb.vinsertf128(Ymm(i), Ymm(i), Xmm(i), Imm(1)); + Cb.pinsrq(Xmm(I), TempReg, Imm(0)); + Cb.pinsrq(Xmm(I), TempReg, Imm(1)); + Cb.vinsertf128(Ymm(I), Ymm(I), Xmm(I), Imm(1)); } } - cb.mov(l1_addr, pointer_reg); // address for L1-buffer - cb.mov(l2_addr, pointer_reg); - cb.add(l2_addr, Imm(l1_size)); // address for L2-buffer - cb.mov(l3_addr, pointer_reg); - cb.add(l3_addr, Imm(l2_size)); // address for L3-buffer - cb.mov(ram_addr, pointer_reg); - cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer - cb.mov(l2_count_reg, Imm(l2_loop_count)); - workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop (" - << l2_size / 1024 << ") KiB"; - cb.mov(l3_count_reg, Imm(l3_loop_count)); - workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop (" - << l3_size / 1024 << ") KiB"; - cb.mov(ram_count_reg, Imm(ram_loop_count)); - workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop (" - << ram_size / 1024 << ") KiB"; - - cb.align(AlignMode::kCode, 64); - - auto Loop = cb.newLabel(); - cb.bind(Loop); - - auto left = false; - auto shift_dst = shift_start; - auto add_dest = add_start + 1; - auto mov_dst = trans_start; - auto mov_src = mov_dst + 1; - unsigned l1_offset = 0; - -#define L1_INCREMENT() \ - l1_offset += 64; \ - if (l1_offset < l1_size * 0.5) { \ - cb.add(l1_addr, offset_reg); \ - } else { \ - l1_offset = 0; \ - cb.mov(l1_addr, pointer_reg); \ - } - -#define L2_INCREMENT() cb.add(l2_addr, offset_reg); - -#define L3_INCREMENT() cb.add(l3_addr, offset_reg) - -#define RAM_INCREMENT() cb.add(ram_addr, offset_reg) - - for (unsigned count = 0; count < repetitions; count++) { - for (const auto& item : sequence) { - if (item == "REG") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - cb.vmovdqa(Ymm(mov_dst), Ymm(mov_src)); - } else if (item == "L1_L") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32)); - L1_INCREMENT(); - } else if (item == "L1_S") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); - cb.vmovapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest)); - L1_INCREMENT(); - this->Instructions++; - } else if (item == "L1_LS") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32)); - cb.vmovapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest)); - L1_INCREMENT(); - this->Instructions++; - } else if (item == "L2_L") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l2_addr, 64)); - L2_INCREMENT(); - } else if (item == "L2_S") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); - cb.vmovapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest)); - L2_INCREMENT(); - this->Instructions++; - } else if (item == "L2_LS") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l2_addr, 64)); - cb.vmovapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest)); - L2_INCREMENT(); - this->Instructions++; - } else if (item == "L3_L") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64)); - L3_INCREMENT(); - } else if (item == "L3_S") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); - cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); - L3_INCREMENT(); - this->Instructions++; - } else if (item == "L3_LS") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64)); - cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); - L3_INCREMENT(); - this->Instructions++; - } else if (item == "L3_P") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32)); - cb.prefetcht0(ptr(l3_addr)); - L3_INCREMENT(); - this->Instructions++; - } else if (item == "RAM_L") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(ram_addr, 64)); - RAM_INCREMENT(); - } else if (item == "RAM_S") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), Ymm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); - cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); - RAM_INCREMENT(); - this->Instructions++; - } else if (item == "RAM_LS") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l3_addr, 64)); - cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); - RAM_INCREMENT(); - this->Instructions++; - } else if (item == "RAM_P") { - cb.vaddpd(Ymm(add_dest), Ymm(add_dest), ymmword_ptr(l1_addr, 32)); - cb.prefetcht2(ptr(ram_addr)); - RAM_INCREMENT(); - this->Instructions++; + Cb.mov(L1Addr, PointerReg); // address for L1-buffer + Cb.mov(L2Addr, PointerReg); + Cb.add(L2Addr, Imm(L1Size)); // address for L2-buffer + Cb.mov(L3Addr, PointerReg); + Cb.add(L3Addr, Imm(L2Size)); // address for L3-buffer + Cb.mov(RamAddr, PointerReg); + Cb.add(RamAddr, Imm(L3Size)); // address for RAM-buffer + Cb.mov(L2CountReg, Imm(L2LoopCount)); + workerLog::trace() << "reset counter for L2-buffer with " << L2LoopCount << " cache line accesses per loop (" + << L2Size / 1024 << ") KiB"; + Cb.mov(L3CountReg, Imm(L3LoopCount)); + workerLog::trace() << "reset counter for L3-buffer with " << L3LoopCount << " cache line accesses per loop (" + << L3Size / 1024 << ") KiB"; + Cb.mov(RamCountReg, Imm(RamLoopCount)); + workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop (" + << RamSize / 1024 << ") KiB"; + + Cb.align(AlignMode::kCode, 64); + + auto Loop = Cb.newLabel(); + Cb.bind(Loop); + + auto Left = false; + auto ShiftDest = ShiftStart; + auto AddDest = AddStart + 1; + auto MovDest = TransStart; + auto MovSrc = MovDest + 1; + unsigned L1Offset = 0; + + const auto L1Increment = [&Cb, &L1Offset, &L1Size, &L1Addr, &OffsetReg, &PointerReg]() { + L1Offset += 64; + if (L1Offset < L1Size * 0.5) { + Cb.add(L1Addr, OffsetReg); + } else { + L1Offset = 0; + Cb.mov(L1Addr, PointerReg); + } + }; + const auto L2Increment = [&Cb, &L2Addr, &OffsetReg]() { Cb.add(L2Addr, OffsetReg); }; + const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); }; + const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); }; + + for (unsigned Count = 0; Count < Repetitions; Count++) { + for (const auto& Item : Sequence) { + if (Item == "REG") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs))); + Cb.vmovdqa(Ymm(MovDest), Ymm(MovSrc)); + } else if (Item == "L1_L") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L1Addr, 32)); + L1Increment(); + } else if (Item == "L1_S") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs))); + Cb.vmovapd(xmmword_ptr(L1Addr, 32), Xmm(AddDest)); + L1Increment(); + Instructions++; + } else if (Item == "L1_LS") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L1Addr, 32)); + Cb.vmovapd(xmmword_ptr(L1Addr, 64), Xmm(AddDest)); + L1Increment(); + Instructions++; + } else if (Item == "L2_L") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L2Addr, 64)); + L2Increment(); + } else if (Item == "L2_S") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs))); + Cb.vmovapd(xmmword_ptr(L2Addr, 64), Xmm(AddDest)); + L2Increment(); + Instructions++; + } else if (Item == "L2_LS") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L2Addr, 64)); + Cb.vmovapd(xmmword_ptr(L2Addr, 96), Xmm(AddDest)); + L2Increment(); + Instructions++; + } else if (Item == "L3_L") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L3Addr, 64)); + L3Increment(); + } else if (Item == "L3_S") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs))); + Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest)); + L3Increment(); + Instructions++; + } else if (Item == "L3_LS") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L3Addr, 64)); + Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest)); + L3Increment(); + Instructions++; + } else if (Item == "L3_P") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L1Addr, 32)); + Cb.prefetcht0(ptr(L3Addr)); + L3Increment(); + Instructions++; + } else if (Item == "RAM_L") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(RamAddr, 64)); + RamIncrement(); + } else if (Item == "RAM_S") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), Ymm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs))); + Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest)); + RamIncrement(); + Instructions++; + } else if (Item == "RAM_LS") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L3Addr, 64)); + Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest)); + RamIncrement(); + Instructions++; + } else if (Item == "RAM_P") { + Cb.vaddpd(Ymm(AddDest), Ymm(AddDest), ymmword_ptr(L1Addr, 32)); + Cb.prefetcht2(ptr(RamAddr)); + RamIncrement(); + Instructions++; } else { - workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; + workerLog::error() << "Instruction group " << Item << " not found in " << name() << "."; return EXIT_FAILURE; } - if (shift_regs > 1) { - this->Instructions++; - if (left) { - cb.psrlw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs), Mm(shift_dst)); + if (ShiftRegs > 1) { + Instructions++; + if (Left) { + Cb.psrlw(Mm(ShiftStart + ((ShiftDest - ShiftStart + 3) % ShiftRegs)), Mm(ShiftDest)); } else { - cb.psllw(Mm(shift_start + (shift_dst - shift_start + 3) % shift_regs), Mm(shift_dst)); + Cb.psllw(Mm(ShiftStart + ((ShiftDest - ShiftStart + 3) % ShiftRegs)), Mm(ShiftDest)); } } - add_dest++; - if (add_dest > add_end) { + AddDest++; + if (AddDest > AddEnd) { // DO NOT REMOVE the + 1. It serves for the good of ymm0. If it was to // be overriden, the values in the other registers would rise up to inf. - add_dest = add_start + 1; + AddDest = AddStart + 1; } - mov_dst++; - if (mov_dst > trans_end) { - mov_dst = trans_start; + MovDest++; + if (MovDest > TransEnd) { + MovDest = TransStart; } - mov_src++; - if (mov_src > trans_end) { - mov_src = trans_start; + MovSrc++; + if (MovSrc > TransEnd) { + MovSrc = TransStart; } - if (shift_regs > 1) { - shift_dst++; - if (shift_dst > shift_end) { - shift_dst = shift_start; - left = !left; + if (ShiftRegs > 1) { + ShiftDest++; + if (ShiftDest > ShiftEnd) { + ShiftDest = ShiftStart; + Left = !Left; } } } } - if (this->getRAMSequenceCount(sequence) > 0) { + if (getRAMSequenceCount(Sequence) > 0) { // reset RAM counter - auto NoRamReset = cb.newLabel(); - - cb.sub(ram_count_reg, Imm(1)); - cb.jnz(NoRamReset); - cb.mov(ram_count_reg, Imm(ram_loop_count)); - cb.mov(ram_addr, pointer_reg); - cb.add(ram_addr, Imm(l3_size)); - cb.bind(NoRamReset); + auto NoRamReset = Cb.newLabel(); + + Cb.sub(RamCountReg, Imm(1)); + Cb.jnz(NoRamReset); + Cb.mov(RamCountReg, Imm(RamLoopCount)); + Cb.mov(RamAddr, PointerReg); + Cb.add(RamAddr, Imm(L3Size)); + Cb.bind(NoRamReset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - if (this->getL2SequenceCount(sequence) > 0) { + if (getL2SequenceCount(Sequence) > 0) { // reset L2-Cache counter - auto NoL2Reset = cb.newLabel(); - - cb.sub(l2_count_reg, Imm(1)); - cb.jnz(NoL2Reset); - cb.mov(l2_count_reg, Imm(l2_loop_count)); - cb.mov(l2_addr, pointer_reg); - cb.add(l2_addr, Imm(l1_size)); - cb.bind(NoL2Reset); + auto NoL2Reset = Cb.newLabel(); + + Cb.sub(L2CountReg, Imm(1)); + Cb.jnz(NoL2Reset); + Cb.mov(L2CountReg, Imm(L2LoopCount)); + Cb.mov(L2Addr, PointerReg); + Cb.add(L2Addr, Imm(L1Size)); + Cb.bind(NoL2Reset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - if (this->getL3SequenceCount(sequence) > 0) { + if (getL3SequenceCount(Sequence) > 0) { // reset L3-Cache counter - auto NoL3Reset = cb.newLabel(); - - cb.sub(l3_count_reg, Imm(1)); - cb.jnz(NoL3Reset); - cb.mov(l3_count_reg, Imm(l3_loop_count)); - cb.mov(l3_addr, pointer_reg); - cb.add(l3_addr, Imm(l2_size)); - cb.bind(NoL3Reset); + auto NoL3Reset = Cb.newLabel(); + + Cb.sub(L3CountReg, Imm(1)); + Cb.jnz(NoL3Reset); + Cb.mov(L3CountReg, Imm(L3LoopCount)); + Cb.mov(L3Addr, PointerReg); + Cb.add(L3Addr, Imm(L2Size)); + Cb.bind(NoL3Reset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.inc(iter_reg); // increment iteration counter - cb.mov(l1_addr, pointer_reg); + Cb.inc(IterReg); // increment iteration counter + Cb.mov(L1Addr, PointerReg); - if (dumpRegisters) { - auto SkipRegistersDump = cb.newLabel(); + if (DumpRegisters) { + auto SkipRegistersDump = Cb.newLabel(); - cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait)); - cb.jnz(SkipRegistersDump); + Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait)); + Cb.jnz(SkipRegistersDump); // dump all the ymm register - for (int i = 0; i < (int)this->registerCount(); i++) { - cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i)); + for (unsigned I = 0; I < registerCount(); I++) { + Cb.vmovapd(ymmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Ymm(I)); } // set read flag - cb.mov(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait)); + Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait)); - cb.bind(SkipRegistersDump); + Cb.bind(SkipRegistersDump); } - if (errorDetection) { - this->emitErrorDetectionCode(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); + if (ErrorDetection) { + emitErrorDetectionCode(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2); } - cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH)); - cb.jnz(Loop); + Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH)); + Cb.jnz(Loop); - cb.bind(FunctionExit); + Cb.bind(FunctionExit); - cb.mov(rax, iter_reg); // restore iteration counter + Cb.mov(rax, IterReg); // restore iteration counter - cb.emitEpilog(frame); + Cb.emitEpilog(Frame); - cb.finalize(); + Cb.finalize(); // String sb; // cb.dump(sb); - Error err = this->Rt.add(&this->LoadFunction, &code); - if (err) { + Error Err = Rt.add(&LoadFunction, &Code); + if (Err) { workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; } // skip if we could not determine cache size - if (l1i_cache_size != 0) { - auto loopSize = code.labelOffset(FunctionExit) - code.labelOffset(Loop); - auto instructionCachePercentage = 100 * loopSize / l1i_cache_size; + if (L1iCacheSize != 0) { + auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop); + auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize; - if (loopSize > l1i_cache_size) { + if (LoopSize > L1iCacheSize) { workerLog::warn() << "Work-loop is bigger than the L1i-Cache."; } - workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage + workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage << "%) from the L1i-Cache for the work-loop."; - workerLog::trace() << "Sequence size: " << sequence.size(); - workerLog::trace() << "Repetition count: " << repetitions; + workerLog::trace() << "Sequence size: " << Sequence.size(); + workerLog::trace() << "Repetition count: " << Repetitions; } return EXIT_SUCCESS; } -std::list AVXPayload::getAvailableInstructions() const { - std::list instructions; +auto AVXPayload::getAvailableInstructions() const -> std::list { + std::list Instructions; - transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions), - [](const auto& item) { return item.first; }); + transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions), + [](const auto& Item) { return Item.first; }); - return instructions; + return Instructions; } -void AVXPayload::init(uint64_t* memoryAddr, uint64_t bufferSize) { - X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, 1.654738925401e-15); +void AVXPayload::init(uint64_t* MemoryAddr, uint64_t BufferSize) { + X86Payload::init(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15); } + +} // namespace firestarter::environment::x86::payload \ No newline at end of file diff --git a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp index 9df404e2..7dc06a3f 100644 --- a/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/FMA4Payload.cpp @@ -19,412 +19,397 @@ * Contact: daniel.hackenberg@tu-dresden.de *****************************************************************************/ -#include #include -#include -#include -#include +namespace firestarter::environment::x86::payload { -using namespace firestarter::environment::x86::payload; -using namespace asmjit; -using namespace asmjit::x86; +auto FMA4Payload::compilePayload(std::vector> const& Proportion, + unsigned InstructionCacheSize, std::list const& DataCacheBufferSize, + unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters, + bool ErrorDetection) -> int { + using namespace asmjit; + using namespace asmjit::x86; -int FMA4Payload::compilePayload(std::vector> const& proportion, - unsigned instructionCacheSize, std::list const& dataCacheBufferSize, - unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) { // Compute the sequence of instruction groups and the number of its repetions // to reach the desired size - auto sequence = this->generateSequence(proportion); - auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); + auto Sequence = generateSequence(Proportion); + auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread); // compute count of flops and memory access for performance report - unsigned flops = 0; - unsigned bytes = 0; + Flops = 0; + Bytes = 0; - for (const auto& item : sequence) { - auto it = this->InstructionFlops.find(item); + for (const auto& Item : Sequence) { + auto It = InstructionFlops.find(Item); - if (it == this->InstructionFlops.end()) { - workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; + if (It == InstructionFlops.end()) { + workerLog::error() << "Instruction group " << Item << " undefined in " << name() << "."; return EXIT_FAILURE; } - flops += it->second; + Flops += It->second; - it = this->InstructionMemory.find(item); + It = InstructionMemory.find(Item); - if (it != this->InstructionMemory.end()) { - bytes += it->second; + if (It != InstructionMemory.end()) { + Bytes += It->second; } } - this->Flops = repetitions * flops; - this->Bytes = repetitions * bytes; - this->Instructions = repetitions * sequence.size() * 4 + 6; + Flops *= Repetitions; + Bytes *= Repetitions; + Instructions = Repetitions * Sequence.size() * 4 + 6; // calculate the buffer sizes - auto l1i_cache_size = instructionCacheSize / thread; - auto dataCacheBufferSizeIterator = dataCacheBufferSize.begin(); - auto l1_size = *dataCacheBufferSizeIterator / thread; - std::advance(dataCacheBufferSizeIterator, 1); - auto l2_size = *dataCacheBufferSizeIterator / thread; - std::advance(dataCacheBufferSizeIterator, 1); - auto l3_size = *dataCacheBufferSizeIterator / thread; - auto ram_size = ramBufferSize / thread; + const auto L1iCacheSize = InstructionCacheSize / Thread; + auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin(); + const auto L1Size = *DataCacheBufferSizeIterator / Thread; + std::advance(DataCacheBufferSizeIterator, 1); + const auto L2Size = *DataCacheBufferSizeIterator / Thread; + std::advance(DataCacheBufferSizeIterator, 1); + const auto L3Size = *DataCacheBufferSizeIterator / Thread; + const auto RamSize = RamBufferSize / Thread; // calculate the reset counters for the buffers - auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); - auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); - auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); + const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread); + const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread); + const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread); - CodeHolder code; - code.init(this->Rt.environment()); + CodeHolder Code; + Code.init(Rt.environment()); - if (nullptr != this->LoadFunction) { - this->Rt.release(&this->LoadFunction); + if (nullptr != LoadFunction) { + Rt.release(&LoadFunction); } - Builder cb(&code); - cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | + Builder Cb(&Code); + Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | asmjit::DiagnosticOptions::kValidateIntermediate); - auto pointer_reg = rax; - auto l1_addr = rbx; - auto l2_addr = rcx; - auto l3_addr = r8; - auto ram_addr = r9; - auto l2_count_reg = r10; - auto l3_count_reg = r11; - auto ram_count_reg = r12; - auto temp_reg = r13; - auto temp_reg2 = rbp; - auto offset_reg = r14; - auto addrHigh_reg = r15; - auto iter_reg = mm0; - auto shift_reg = std::vector({rdi, rsi, rdx}); - auto shift_reg32 = std::vector({edi, esi, edx}); - auto nr_shift_regs = 3; - auto mul_regs = 2; - auto add_regs = 9; - auto alt_dst_regs = 3; - auto ram_reg = xmm15; - - FuncDetail func; - func.init(FuncSignatureT(CallConvId::kCDecl), - this->Rt.environment()); - - FuncFrame frame; - frame.init(func); + const auto PointerReg = rax; + const auto L1Addr = rbx; + const auto L2Addr = rcx; + const auto L3Addr = r8; + const auto RamAddr = r9; + const auto L2CountReg = r10; + const auto L3CountReg = r11; + const auto RamCountReg = r12; + const auto TempReg = r13; + const auto TempReg2 = rbp; + const auto OffsetReg = r14; + const auto AddrHighReg = r15; + const auto IterReg = mm0; + const auto ShiftReg = std::vector({rdi, rsi, rdx}); + const auto ShiftReg32 = std::vector({edi, esi, edx}); + const auto NbShiftRegs = 3; + const auto MulRegs = 2; + const auto AddRegs = 9; + const auto AltDestRegs = 3; + const auto RamReg = xmm15; + + FuncDetail Func; + Func.init(FuncSignatureT(CallConvId::kCDecl), Rt.environment()); + + FuncFrame Frame; + Frame.init(Func); // make (x|y)mm registers dirty - for (int i = 0; i < 16; i++) { - frame.addDirtyRegs(Ymm(i)); + for (int I = 0; I < 16; I++) { + Frame.addDirtyRegs(Ymm(I)); } - for (int i = 0; i < 8; i++) { - frame.addDirtyRegs(Mm(i)); + for (int I = 0; I < 8; I++) { + Frame.addDirtyRegs(Mm(I)); } // make all other used registers dirty except RAX - frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg, - temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr); - for (const auto& reg : shift_reg) { - frame.addDirtyRegs(reg); + Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg, + AddrHighReg, IterReg, RamAddr); + for (const auto& Reg : ShiftReg) { + Frame.addDirtyRegs(Reg); } - FuncArgsAssignment args(&func); + FuncArgsAssignment Args(&Func); // FIXME: asmjit assigment to mm0 does not seem to be supported - args.assignAll(pointer_reg, addrHigh_reg, temp_reg); - args.updateFuncFrame(frame); - frame.finalize(); + Args.assignAll(PointerReg, AddrHighReg, TempReg); + Args.updateFuncFrame(Frame); + Frame.finalize(); - cb.emitProlog(frame); - cb.emitArgsAssignment(frame, args); + Cb.emitProlog(Frame); + Cb.emitArgsAssignment(Frame, Args); // FIXME: movq from temp_reg to iter_reg - cb.movq(iter_reg, temp_reg); + Cb.movq(IterReg, TempReg); // stop right away if low load is selected - auto FunctionExit = cb.newLabel(); + auto FunctionExit = Cb.newLabel(); - cb.mov(temp_reg, ptr_64(addrHigh_reg)); - cb.test(temp_reg, temp_reg); - cb.jz(FunctionExit); + Cb.mov(TempReg, ptr_64(AddrHighReg)); + Cb.test(TempReg, TempReg); + Cb.jz(FunctionExit); - cb.mov(offset_reg, + Cb.mov(OffsetReg, Imm(64)); // increment after each cache/memory access // Initialize registers for shift operations - for (auto const& reg : shift_reg32) { - cb.mov(reg, Imm(0xAAAAAAAA)); + for (auto const& Reg : ShiftReg32) { + Cb.mov(Reg, Imm(0xAAAAAAAA)); } // Initialize AVX-Registers for FMA4 Operations - cb.vmovapd(ymm0, ymmword_ptr(pointer_reg)); - cb.vmovapd(ymm1, ymmword_ptr(pointer_reg)); - auto add_start = mul_regs; - auto add_end = mul_regs + add_regs - 1; - auto trans_start = add_regs + mul_regs; - auto trans_end = add_regs + mul_regs + alt_dst_regs - 1; - for (int i = add_start; i <= trans_end; i++) { - cb.vmovapd(Ymm(i), ymmword_ptr(pointer_reg, 256 + i * 32)); + Cb.vmovapd(ymm0, ymmword_ptr(PointerReg)); + Cb.vmovapd(ymm1, ymmword_ptr(PointerReg)); + auto AddStart = MulRegs; + auto AddEnd = MulRegs + AddRegs - 1; + auto TransStart = AddRegs + MulRegs; + auto TransEnd = AddRegs + MulRegs + AltDestRegs - 1; + for (int I = AddStart; I <= TransEnd; I++) { + Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 256 + I * 32)); } - cb.mov(l1_addr, pointer_reg); // address for L1-buffer - cb.mov(l2_addr, pointer_reg); - cb.add(l2_addr, Imm(l1_size)); // address for L2-buffer - cb.mov(l3_addr, pointer_reg); - cb.add(l3_addr, Imm(l2_size)); // address for L3-buffer - cb.mov(ram_addr, pointer_reg); - cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer - cb.mov(l2_count_reg, Imm(l2_loop_count)); - workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop (" - << l2_size / 1024 << ") KiB"; - cb.mov(l3_count_reg, Imm(l3_loop_count)); - workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop (" - << l3_size / 1024 << ") KiB"; - cb.mov(ram_count_reg, Imm(ram_loop_count)); - workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop (" - << ram_size / 1024 << ") KiB"; - - cb.align(AlignMode::kCode, 64); - - auto Loop = cb.newLabel(); - cb.bind(Loop); - - auto shift_pos = 0; - bool left = false; - auto add_dest = add_start + 1; - auto mov_dst = trans_start; - auto mov_src = mov_dst + 1; - unsigned l1_offset = 0; - -#define L1_INCREMENT() \ - l1_offset += 64; \ - if (l1_offset < l1_size * 0.5) { \ - cb.add(l1_addr, offset_reg); \ - } else { \ - l1_offset = 0; \ - cb.mov(l1_addr, pointer_reg); \ - } - -#define L2_INCREMENT() cb.add(l2_addr, offset_reg); - -#define L3_INCREMENT() cb.add(l3_addr, offset_reg) - -#define RAM_INCREMENT() cb.add(ram_addr, offset_reg) - - for (unsigned count = 0; count < repetitions; count++) { - for (const auto& item : sequence) { - if (item == "REG") { - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - cb.vfmaddpd(Xmm(mov_dst), Xmm(mov_dst), xmm1, - Xmm(add_start + (add_dest - add_start + add_regs + 2) % add_regs)); - cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], temp_reg); - mov_dst++; - } else if (item == "L1_L") { - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm1, ymmword_ptr(l1_addr, 32)); - L1_INCREMENT(); - } else if (item == "L1_S") { - cb.vmovapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest)); - cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm0, - Ymm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - L1_INCREMENT(); - } else if (item == "L1_LS") { - cb.vmovapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest)); - cb.vfmaddpd(Ymm(add_dest), Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 32)); - L1_INCREMENT(); - } else if (item == "L2_L") { - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm1, xmmword_ptr(l2_addr, 64)); - L2_INCREMENT(); - } else if (item == "L2_S") { - cb.vmovapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest)); - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - L2_INCREMENT(); - } else if (item == "L2_LS") { - cb.vmovapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest)); - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l2_addr, 64)); - L2_INCREMENT(); - } else if (item == "L3_L") { - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm1, xmmword_ptr(l3_addr, 64)); - L3_INCREMENT(); - } else if (item == "L3_S") { - cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - L3_INCREMENT(); - } else if (item == "L3_LS") { - cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l3_addr, 64)); - L3_INCREMENT(); - } else if (item == "L3_P") { - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l1_addr, 32)); - cb.prefetcht2(ptr(l3_addr)); - L3_INCREMENT(); - } else if (item == "RAM_L") { - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - cb.vfmaddpd(ram_reg, ram_reg, xmm1, xmmword_ptr(ram_addr, 64)); - RAM_INCREMENT(); - } else if (item == "RAM_S") { - cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, - Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - RAM_INCREMENT(); - } else if (item == "RAM_LS") { - cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(ram_addr, 32)); - RAM_INCREMENT(); - } else if (item == "RAM_P") { - cb.vfmaddpd(Xmm(add_dest), Xmm(add_dest), xmm0, xmmword_ptr(l1_addr, 32)); - cb.prefetcht2(ptr(ram_addr)); - RAM_INCREMENT(); + Cb.mov(L1Addr, PointerReg); // address for L1-buffer + Cb.mov(L2Addr, PointerReg); + Cb.add(L2Addr, Imm(L1Size)); // address for L2-buffer + Cb.mov(L3Addr, PointerReg); + Cb.add(L3Addr, Imm(L2Size)); // address for L3-buffer + Cb.mov(RamAddr, PointerReg); + Cb.add(RamAddr, Imm(L3Size)); // address for RAM-buffer + Cb.mov(L2CountReg, Imm(L2LoopCount)); + workerLog::trace() << "reset counter for L2-buffer with " << L2LoopCount << " cache line accesses per loop (" + << L2Size / 1024 << ") KiB"; + Cb.mov(L3CountReg, Imm(L3LoopCount)); + workerLog::trace() << "reset counter for L3-buffer with " << L3LoopCount << " cache line accesses per loop (" + << L3Size / 1024 << ") KiB"; + Cb.mov(RamCountReg, Imm(RamLoopCount)); + workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop (" + << RamSize / 1024 << ") KiB"; + + Cb.align(AlignMode::kCode, 64); + + auto Loop = Cb.newLabel(); + Cb.bind(Loop); + + auto ShiftPos = 0; + bool Left = false; + auto AddDest = AddStart + 1; + auto MovDest = TransStart; + auto MovSrc = MovDest + 1; + unsigned L1Offset = 0; + + const auto L1Increment = [&Cb, &L1Offset, &L1Size, &L1Addr, &OffsetReg, &PointerReg]() { + L1Offset += 64; + if (L1Offset < L1Size * 0.5) { + Cb.add(L1Addr, OffsetReg); + } else { + L1Offset = 0; + Cb.mov(L1Addr, PointerReg); + } + }; + const auto L2Increment = [&Cb, &L2Addr, &OffsetReg]() { Cb.add(L2Addr, OffsetReg); }; + const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); }; + const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); }; + + for (unsigned Count = 0; Count < Repetitions; Count++) { + for (const auto& Item : Sequence) { + if (Item == "REG") { + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs))); + Cb.vfmaddpd(Xmm(MovDest), Xmm(MovDest), xmm1, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 2) % AddRegs))); + Cb.xor_(ShiftReg[(ShiftPos + NbShiftRegs - 1) % NbShiftRegs], TempReg); + MovDest++; + } else if (Item == "L1_L") { + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs))); + Cb.vfmaddpd(Ymm(AddDest), Ymm(AddDest), ymm1, ymmword_ptr(L1Addr, 32)); + L1Increment(); + } else if (Item == "L1_S") { + Cb.vmovapd(xmmword_ptr(L1Addr, 32), Xmm(AddDest)); + Cb.vfmaddpd(Ymm(AddDest), Ymm(AddDest), ymm0, Ymm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs))); + L1Increment(); + } else if (Item == "L1_LS") { + Cb.vmovapd(xmmword_ptr(L1Addr, 64), Xmm(AddDest)); + Cb.vfmaddpd(Ymm(AddDest), Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 32)); + L1Increment(); + } else if (Item == "L2_L") { + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs))); + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm1, xmmword_ptr(L2Addr, 64)); + L2Increment(); + } else if (Item == "L2_S") { + Cb.vmovapd(xmmword_ptr(L2Addr, 64), Xmm(AddDest)); + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs))); + L2Increment(); + } else if (Item == "L2_LS") { + Cb.vmovapd(xmmword_ptr(L2Addr, 96), Xmm(AddDest)); + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, xmmword_ptr(L2Addr, 64)); + L2Increment(); + } else if (Item == "L3_L") { + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs))); + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm1, xmmword_ptr(L3Addr, 64)); + L3Increment(); + } else if (Item == "L3_S") { + Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest)); + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs))); + L3Increment(); + } else if (Item == "L3_LS") { + Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest)); + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, xmmword_ptr(L3Addr, 64)); + L3Increment(); + } else if (Item == "L3_P") { + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, xmmword_ptr(L1Addr, 32)); + Cb.prefetcht2(ptr(L3Addr)); + L3Increment(); + } else if (Item == "RAM_L") { + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs))); + Cb.vfmaddpd(RamReg, RamReg, xmm1, xmmword_ptr(RamAddr, 64)); + RamIncrement(); + } else if (Item == "RAM_S") { + Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest)); + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs))); + RamIncrement(); + } else if (Item == "RAM_LS") { + Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest)); + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, xmmword_ptr(RamAddr, 32)); + RamIncrement(); + } else if (Item == "RAM_P") { + Cb.vfmaddpd(Xmm(AddDest), Xmm(AddDest), xmm0, xmmword_ptr(L1Addr, 32)); + Cb.prefetcht2(ptr(RamAddr)); + RamIncrement(); } else { - workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; + workerLog::error() << "Instruction group " << Item << " not found in " << name() << "."; return EXIT_FAILURE; } - if (left) { - cb.shr(shift_reg32[shift_pos], Imm(1)); + if (Left) { + Cb.shr(ShiftReg32[ShiftPos], Imm(1)); } else { - cb.shl(shift_reg32[shift_pos], Imm(1)); + Cb.shl(ShiftReg32[ShiftPos], Imm(1)); } - add_dest++; - if (add_dest > add_end) { - add_dest = add_start; + AddDest++; + if (AddDest > AddEnd) { + AddDest = AddStart; } - if (mov_dst > trans_end) { - mov_dst = trans_start; + if (MovDest > TransEnd) { + MovDest = TransStart; } - mov_src++; - if (mov_src > trans_end) { - mov_src = trans_start; + MovSrc++; + if (MovSrc > TransEnd) { + MovSrc = TransStart; } - shift_pos++; - if (shift_pos == nr_shift_regs) { - shift_pos = 0; - left = !left; + ShiftPos++; + if (ShiftPos == NbShiftRegs) { + ShiftPos = 0; + Left = !Left; } } } - cb.movq(temp_reg, iter_reg); // restore iteration counter - if (this->getRAMSequenceCount(sequence) > 0) { + Cb.movq(TempReg, IterReg); // restore iteration counter + if (getRAMSequenceCount(Sequence) > 0) { // reset RAM counter - auto NoRamReset = cb.newLabel(); - - cb.sub(ram_count_reg, Imm(1)); - cb.jnz(NoRamReset); - cb.mov(ram_count_reg, Imm(ram_loop_count)); - cb.mov(ram_addr, pointer_reg); - cb.add(ram_addr, Imm(l3_size)); - cb.bind(NoRamReset); + auto NoRamReset = Cb.newLabel(); + + Cb.sub(RamCountReg, Imm(1)); + Cb.jnz(NoRamReset); + Cb.mov(RamCountReg, Imm(RamLoopCount)); + Cb.mov(RamAddr, PointerReg); + Cb.add(RamAddr, Imm(L3Size)); + Cb.bind(NoRamReset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.inc(temp_reg); // increment iteration counter - if (this->getL2SequenceCount(sequence) > 0) { + Cb.inc(TempReg); // increment iteration counter + if (getL2SequenceCount(Sequence) > 0) { // reset L2-Cache counter - auto NoL2Reset = cb.newLabel(); - - cb.sub(l2_count_reg, Imm(1)); - cb.jnz(NoL2Reset); - cb.mov(l2_count_reg, Imm(l2_loop_count)); - cb.mov(l2_addr, pointer_reg); - cb.add(l2_addr, Imm(l1_size)); - cb.bind(NoL2Reset); + auto NoL2Reset = Cb.newLabel(); + + Cb.sub(L2CountReg, Imm(1)); + Cb.jnz(NoL2Reset); + Cb.mov(L2CountReg, Imm(L2LoopCount)); + Cb.mov(L2Addr, PointerReg); + Cb.add(L2Addr, Imm(L1Size)); + Cb.bind(NoL2Reset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.movq(iter_reg, temp_reg); // store iteration counter - if (this->getL3SequenceCount(sequence) > 0) { + Cb.movq(IterReg, TempReg); // store iteration counter + if (getL3SequenceCount(Sequence) > 0) { // reset L3-Cache counter - auto NoL3Reset = cb.newLabel(); - - cb.sub(l3_count_reg, Imm(1)); - cb.jnz(NoL3Reset); - cb.mov(l3_count_reg, Imm(l3_loop_count)); - cb.mov(l3_addr, pointer_reg); - cb.add(l3_addr, Imm(l2_size)); - cb.bind(NoL3Reset); + auto NoL3Reset = Cb.newLabel(); + + Cb.sub(L3CountReg, Imm(1)); + Cb.jnz(NoL3Reset); + Cb.mov(L3CountReg, Imm(L3LoopCount)); + Cb.mov(L3Addr, PointerReg); + Cb.add(L3Addr, Imm(L2Size)); + Cb.bind(NoL3Reset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.mov(l1_addr, pointer_reg); + Cb.mov(L1Addr, PointerReg); - if (dumpRegisters) { - auto SkipRegistersDump = cb.newLabel(); + if (DumpRegisters) { + auto SkipRegistersDump = Cb.newLabel(); - cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait)); - cb.jnz(SkipRegistersDump); + Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait)); + Cb.jnz(SkipRegistersDump); // dump all the ymm register - for (int i = 0; i < (int)this->registerCount(); i++) { - cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i)); + for (unsigned I = 0; I < registerCount(); I++) { + Cb.vmovapd(ymmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Ymm(I)); } // set read flag - cb.mov(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait)); + Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait)); - cb.bind(SkipRegistersDump); + Cb.bind(SkipRegistersDump); } - if (errorDetection) { - this->emitErrorDetectionCode(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); + if (ErrorDetection) { + emitErrorDetectionCode(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2); } - cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH)); - cb.jnz(Loop); + Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH)); + Cb.jnz(Loop); - cb.bind(FunctionExit); + Cb.bind(FunctionExit); - cb.movq(rax, iter_reg); + Cb.movq(rax, IterReg); - cb.emitEpilog(frame); + Cb.emitEpilog(Frame); - cb.finalize(); + Cb.finalize(); // String sb; // cb.dump(sb); - Error err = this->Rt.add(&this->LoadFunction, &code); - if (err) { + Error Err = Rt.add(&LoadFunction, &Code); + if (Err) { workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; } // skip if we could not determine cache size - if (l1i_cache_size != 0) { - auto loopSize = code.labelOffset(FunctionExit) - code.labelOffset(Loop); - auto instructionCachePercentage = 100 * loopSize / l1i_cache_size; + if (L1iCacheSize != 0) { + auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop); + auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize; - if (loopSize > l1i_cache_size) { + if (LoopSize > L1iCacheSize) { workerLog::warn() << "Work-loop is bigger than the L1i-Cache."; } - workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage + workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage << "%) from the L1i-Cache for the work-loop."; - workerLog::trace() << "Sequence size: " << sequence.size(); - workerLog::trace() << "Repetition count: " << repetitions; + workerLog::trace() << "Sequence size: " << Sequence.size(); + workerLog::trace() << "Repetition count: " << Repetitions; } return EXIT_SUCCESS; } -std::list FMA4Payload::getAvailableInstructions() const { - std::list instructions; +auto FMA4Payload::getAvailableInstructions() const -> std::list { + std::list Instructions; - transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions), - [](const auto& item) { return item.first; }); + transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions), + [](const auto& Item) { return Item.first; }); - return instructions; + return Instructions; } -void FMA4Payload::init(uint64_t* memoryAddr, uint64_t bufferSize) { - X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4); +void FMA4Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize) { + X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4); } + +} // namespace firestarter::environment::x86::payload \ No newline at end of file diff --git a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp index ba6534a9..4ecd24ca 100644 --- a/src/firestarter/Environment/X86/Payload/FMAPayload.cpp +++ b/src/firestarter/Environment/X86/Payload/FMAPayload.cpp @@ -20,438 +20,433 @@ *****************************************************************************/ #include -#include -#include -#include +namespace firestarter::environment::x86::payload { -using namespace firestarter::environment::x86::payload; -using namespace asmjit; -using namespace asmjit::x86; +auto FMAPayload::compilePayload(std::vector> const& Proportion, + unsigned InstructionCacheSize, std::list const& DataCacheBufferSize, + unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters, + bool ErrorDetection) -> int { + using namespace asmjit; + using namespace asmjit::x86; -int FMAPayload::compilePayload(std::vector> const& proportion, - unsigned instructionCacheSize, std::list const& dataCacheBufferSize, - unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) { // Compute the sequence of instruction groups and the number of its repetions // to reach the desired size - auto sequence = this->generateSequence(proportion); - auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); + auto Sequence = generateSequence(Proportion); + auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread); // compute count of flops and memory access for performance report - unsigned flops = 0; - unsigned bytes = 0; + Flops = 0; + Bytes = 0; - for (const auto& item : sequence) { - auto it = this->InstructionFlops.find(item); + for (const auto& Item : Sequence) { + auto It = InstructionFlops.find(Item); - if (it == this->InstructionFlops.end()) { - workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; + if (It == InstructionFlops.end()) { + workerLog::error() << "Instruction group " << Item << " undefined in " << name() << "."; return EXIT_FAILURE; } - flops += it->second; + Flops += It->second; - it = this->InstructionMemory.find(item); + It = InstructionMemory.find(Item); - if (it != this->InstructionMemory.end()) { - bytes += it->second; + if (It != InstructionMemory.end()) { + Bytes += It->second; } } - this->Flops = repetitions * flops; - this->Bytes = repetitions * bytes; - this->Instructions = repetitions * sequence.size() * 4 + 6; + Flops *= Repetitions; + Bytes *= Repetitions; + Instructions = Repetitions * Sequence.size() * 4 + 6; // calculate the buffer sizes - auto l1i_cache_size = instructionCacheSize / thread; - auto dataCacheBufferSizeIterator = dataCacheBufferSize.begin(); - auto l1_size = *dataCacheBufferSizeIterator / thread; - std::advance(dataCacheBufferSizeIterator, 1); - auto l2_size = *dataCacheBufferSizeIterator / thread; - std::advance(dataCacheBufferSizeIterator, 1); - auto l3_size = *dataCacheBufferSizeIterator / thread; - auto ram_size = ramBufferSize / thread; + const auto L1iCacheSize = InstructionCacheSize / Thread; + auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin(); + const auto L1Size = *DataCacheBufferSizeIterator / Thread; + std::advance(DataCacheBufferSizeIterator, 1); + const auto L2Size = *DataCacheBufferSizeIterator / Thread; + std::advance(DataCacheBufferSizeIterator, 1); + const auto L3Size = *DataCacheBufferSizeIterator / Thread; + const auto RamSize = RamBufferSize / Thread; // calculate the reset counters for the buffers - auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); - auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); - auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); + const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread); + const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread); + const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread); - CodeHolder code; - code.init(this->Rt.environment()); + CodeHolder Code; + Code.init(Rt.environment()); - if (nullptr != this->LoadFunction) { - this->Rt.release(&this->LoadFunction); + if (nullptr != LoadFunction) { + Rt.release(&LoadFunction); } - Builder cb(&code); - cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | + Builder Cb(&Code); + Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | asmjit::DiagnosticOptions::kValidateIntermediate); - auto pointer_reg = rax; - auto l1_addr = rbx; - auto l2_addr = rcx; - auto l3_addr = r8; - auto ram_addr = r9; - auto l2_count_reg = r10; - auto l3_count_reg = r11; - auto ram_count_reg = r12; - auto temp_reg = r13; - auto temp_reg2 = rbp; - auto offset_reg = r14; - auto addrHigh_reg = r15; - auto iter_reg = mm0; - auto shift_reg = std::vector({rdi, rsi, rdx}); - auto shift_reg32 = std::vector({edi, esi, edx}); - auto nr_shift_regs = 3; - auto mul_regs = 3; - auto add_regs = 9; - auto alt_dst_regs = 3; - auto ram_reg = ymm15; - - FuncDetail func; - func.init(FuncSignatureT(CallConvId::kCDecl), - this->Rt.environment()); - - FuncFrame frame; - frame.init(func); + const auto PointerReg = rax; + const auto L1Addr = rbx; + const auto L2Addr = rcx; + const auto L3Addr = r8; + const auto RamAddr = r9; + const auto L2CountReg = r10; + const auto L3CountReg = r11; + const auto RamCountReg = r12; + const auto TempReg = r13; + const auto TempReg2 = rbp; + const auto OffsetReg = r14; + const auto AddrHighReg = r15; + const auto IterReg = mm0; + const auto ShiftRegs = std::vector({rdi, rsi, rdx}); + const auto ShiftRegs32 = std::vector({edi, esi, edx}); + const auto NbShiftRegs = 3; + const auto MulRegs = 3; + const auto AddRegs = 9; + const auto AltDestRegs = 3; + const auto RamReg = ymm15; + + FuncDetail Func; + Func.init(FuncSignatureT(CallConvId::kCDecl), Rt.environment()); + + FuncFrame Frame; + Frame.init(Func); // make (x|y)mm registers dirty - for (int i = 0; i < 16; i++) { - frame.addDirtyRegs(Ymm(i)); + for (int I = 0; I < 16; I++) { + Frame.addDirtyRegs(Ymm(I)); } - for (int i = 0; i < 8; i++) { - frame.addDirtyRegs(Mm(i)); + for (int I = 0; I < 8; I++) { + Frame.addDirtyRegs(Mm(I)); } // make all other used registers dirty except RAX - frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg, - temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr); - for (const auto& reg : shift_reg) { - frame.addDirtyRegs(reg); + Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg, + AddrHighReg, IterReg, RamAddr); + for (const auto& Reg : ShiftRegs) { + Frame.addDirtyRegs(Reg); } - FuncArgsAssignment args(&func); + FuncArgsAssignment Args(&Func); // FIXME: asmjit assigment to mm0 does not seem to be supported - args.assignAll(pointer_reg, addrHigh_reg, temp_reg); - args.updateFuncFrame(frame); - frame.finalize(); + Args.assignAll(PointerReg, AddrHighReg, TempReg); + Args.updateFuncFrame(Frame); + Frame.finalize(); - cb.emitProlog(frame); - cb.emitArgsAssignment(frame, args); + Cb.emitProlog(Frame); + Cb.emitArgsAssignment(Frame, Args); // FIXME: movq from temp_reg to iter_reg - cb.movq(iter_reg, temp_reg); + Cb.movq(IterReg, TempReg); // stop right away if low load is selected - auto FunctionExit = cb.newLabel(); + auto FunctionExit = Cb.newLabel(); - cb.mov(temp_reg, ptr_64(addrHigh_reg)); - cb.test(temp_reg, temp_reg); - cb.jz(FunctionExit); + Cb.mov(TempReg, ptr_64(AddrHighReg)); + Cb.test(TempReg, TempReg); + Cb.jz(FunctionExit); - cb.mov(offset_reg, + Cb.mov(OffsetReg, Imm(64)); // increment after each cache/memory access // Initialize registers for shift operations - for (auto const& reg : shift_reg32) { - cb.mov(reg, Imm(0xAAAAAAAA)); + for (auto const& Reg : ShiftRegs32) { + Cb.mov(Reg, Imm(0xAAAAAAAA)); } // Initialize AVX-Registers for FMA Operations - cb.vmovapd(ymm0, ymmword_ptr(pointer_reg)); - cb.vmovapd(ymm1, ymmword_ptr(pointer_reg, 32)); - cb.vmovapd(ymm2, ymmword_ptr(pointer_reg, 64)); - auto add_start = mul_regs; - auto add_end = mul_regs + add_regs - 1; - auto trans_start = add_regs + mul_regs; - auto trans_end = add_regs + mul_regs + alt_dst_regs - 1; - for (int i = add_start; i <= trans_end; i++) { - cb.vmovapd(Ymm(i), ymmword_ptr(pointer_reg, 256 + i * 32)); + Cb.vmovapd(ymm0, ymmword_ptr(PointerReg)); + Cb.vmovapd(ymm1, ymmword_ptr(PointerReg, 32)); + Cb.vmovapd(ymm2, ymmword_ptr(PointerReg, 64)); + auto AddStart = MulRegs; + auto AddEnd = MulRegs + AddRegs - 1; + auto TransStart = AddRegs + MulRegs; + auto TransEnd = AddRegs + MulRegs + AltDestRegs - 1; + for (int I = AddStart; I <= TransEnd; I++) { + Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 256 + (I * 32))); } - cb.mov(l1_addr, pointer_reg); // address for L1-buffer - cb.mov(l2_addr, pointer_reg); - cb.add(l2_addr, Imm(l1_size)); // address for L2-buffer - cb.mov(l3_addr, pointer_reg); - cb.add(l3_addr, Imm(l2_size)); // address for L3-buffer - cb.mov(ram_addr, pointer_reg); - cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer - cb.mov(l2_count_reg, Imm(l2_loop_count)); - workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop (" - << l2_size / 1024 << ") KiB"; - cb.mov(l3_count_reg, Imm(l3_loop_count)); - workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop (" - << l3_size / 1024 << ") KiB"; - cb.mov(ram_count_reg, Imm(ram_loop_count)); - workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop (" - << ram_size / 1024 << ") KiB"; - - cb.align(AlignMode::kCode, 64); - - auto Loop = cb.newLabel(); - cb.bind(Loop); - - auto shift_pos = 0; - bool left = false; - auto add_dest = add_start + 1; - auto mov_dst = trans_start; - auto mov_src = mov_dst + 1; - unsigned l1_offset = 0; - -#define L1_INCREMENT_TIMES(n) \ - l1_offset += n * 64; \ - if (l1_offset < l1_size * 0.5) { \ - cb.add(l1_addr, offset_reg); \ - } else { \ - l1_offset = 0; \ - cb.mov(l1_addr, pointer_reg); \ - } - -#define L1_INCREMENT() L1_INCREMENT_TIMES(1) - -#define L2_INCREMENT_TIMES(n) \ - if (n == 1) { \ - cb.add(l2_addr, offset_reg); \ - } else { \ - cb.add(l2_addr, n * 64); \ - } - -#define L2_INCREMENT() L2_INCREMENT_TIMES(1) - -#define L3_INCREMENT() cb.add(l3_addr, offset_reg) - -#define RAM_INCREMENT() cb.add(ram_addr, offset_reg) - - for (unsigned count = 0; count < repetitions; count++) { - for (const auto& item : sequence) { - if (item == "REG") { - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2); - cb.vfmadd231pd(Ymm(mov_dst), ymm2, ymm1); - cb.xor_(shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs], temp_reg); - mov_dst++; - } else if (item == "L1_L") { - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2); - cb.vfmadd231pd(Ymm(add_dest), ymm1, ymmword_ptr(l1_addr, 32)); - L1_INCREMENT(); - } else if (item == "L1_2L") { - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 32)); - cb.vfmadd231pd(Ymm(mov_dst), ymm1, ymmword_ptr(l1_addr, 64)); - L1_INCREMENT(); - } else if (item == "L1_S") { - cb.vmovapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest)); - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2); - L1_INCREMENT(); - } else if (item == "L1_LS") { - cb.vmovapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest)); - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 32)); - L1_INCREMENT(); - } else if (item == "L1_LS_256") { - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 64)); - cb.vmovapd(ymmword_ptr(l1_addr, 32), Ymm(add_dest)); - L1_INCREMENT(); - } else if (item == "L1_2LS_256") { - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 64)); - cb.vfmadd231pd(Ymm(mov_dst), ymm1, ymmword_ptr(l1_addr, 96)); - cb.vmovapd(ymmword_ptr(l1_addr, 32), Ymm(add_dest)); - L1_INCREMENT_TIMES(2); - } else if (item == "L2_L") { - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2); - cb.vfmadd231pd(Ymm(add_dest), ymm1, ymmword_ptr(l2_addr, 64)); - L2_INCREMENT(); - } else if (item == "L2_S") { - cb.vmovapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest)); - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2); - L2_INCREMENT(); - } else if (item == "L2_LS") { - cb.vmovapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest)); - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l2_addr, 64)); - L2_INCREMENT(); - } else if (item == "L2_LS_256") { - cb.vmovapd(ymmword_ptr(l2_addr, 96), Ymm(add_dest)); - cb.vfmadd231pd(Ymm(add_dest), ymm0, ptr(l2_addr, 64)); - L2_INCREMENT(); - } else if (item == "L2_2LS_256") { - cb.vfmadd231pd(Ymm(add_dest), ymm0, ptr(l2_addr, 64)); - cb.vfmadd231pd(Ymm(mov_dst), ymm1, ptr(l2_addr, 96)); - cb.vmovapd(ymmword_ptr(l2_addr, 32), Ymm(add_dest)); - L2_INCREMENT_TIMES(2); - } else if (item == "L3_L") { - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2); - cb.vfmadd231pd(Ymm(add_dest), ymm1, ymmword_ptr(l3_addr, 64)); - L3_INCREMENT(); - } else if (item == "L3_S") { - cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2); - L3_INCREMENT(); - } else if (item == "L3_LS") { - cb.vmovapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l3_addr, 64)); - L3_INCREMENT(); - } else if (item == "L3_LS_256") { - cb.vmovapd(ymmword_ptr(l3_addr, 96), Ymm(add_dest)); - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l3_addr, 64)); - L3_INCREMENT(); - } else if (item == "L3_P") { - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 32)); - cb.prefetcht2(ptr(l3_addr)); - L3_INCREMENT(); - } else if (item == "RAM_L") { - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2); - cb.vfmadd231pd(ram_reg, ymm1, ymmword_ptr(ram_addr, 64)); - RAM_INCREMENT(); - } else if (item == "RAM_S") { - cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymm2); - RAM_INCREMENT(); - } else if (item == "RAM_LS") { - cb.vmovapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(ram_addr, 32)); - RAM_INCREMENT(); - } else if (item == "RAM_P") { - cb.vfmadd231pd(Ymm(add_dest), ymm0, ymmword_ptr(l1_addr, 32)); - cb.prefetcht2(ptr(ram_addr)); - RAM_INCREMENT(); + Cb.mov(L1Addr, PointerReg); // address for L1-buffer + Cb.mov(L2Addr, PointerReg); + Cb.add(L2Addr, Imm(L1Size)); // address for L2-buffer + Cb.mov(L3Addr, PointerReg); + Cb.add(L3Addr, Imm(L2Size)); // address for L3-buffer + Cb.mov(RamAddr, PointerReg); + Cb.add(RamAddr, Imm(L3Size)); // address for RAM-buffer + Cb.mov(L2CountReg, Imm(L2LoopCount)); + workerLog::trace() << "reset counter for L2-buffer with " << L2LoopCount << " cache line accesses per loop (" + << L2Size / 1024 << ") KiB"; + Cb.mov(L3CountReg, Imm(L3LoopCount)); + workerLog::trace() << "reset counter for L3-buffer with " << L3LoopCount << " cache line accesses per loop (" + << L3Size / 1024 << ") KiB"; + Cb.mov(RamCountReg, Imm(RamLoopCount)); + workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop (" + << RamSize / 1024 << ") KiB"; + + Cb.align(AlignMode::kCode, 64); + + auto Loop = Cb.newLabel(); + Cb.bind(Loop); + + auto ShiftPos = 0; + bool Left = false; + auto AddDest = AddStart + 1; + auto MovDest = TransStart; + auto MovSrc = MovDest + 1; + unsigned L1Offset = 0; + + const auto L1IncrementTimes = [&Cb, &L1Offset, &L1Size, &L1Addr, &OffsetReg, &PointerReg](unsigned Times) { + L1Offset += Times * 64; + if (L1Offset < L1Size * 0.5) { + Cb.add(L1Addr, OffsetReg); + } else { + L1Offset = 0; + Cb.mov(L1Addr, PointerReg); + } + }; + const auto L1Increment = [&L1IncrementTimes] { L1IncrementTimes(1); }; + const auto L2IncrementTimes = [&Cb, &L2Addr, &OffsetReg](unsigned Times) { + if (Times == 1) { + Cb.add(L2Addr, OffsetReg); + } else { + Cb.add(L2Addr, Times * 64); + } + }; + const auto L2Increment = [&L2IncrementTimes] { L2IncrementTimes(1); }; + const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); }; + const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); }; + + for (unsigned Count = 0; Count < Repetitions; Count++) { + for (const auto& Item : Sequence) { + if (Item == "REG") { + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2); + Cb.vfmadd231pd(Ymm(MovDest), ymm2, ymm1); + Cb.xor_(ShiftRegs[(ShiftPos + NbShiftRegs - 1) % NbShiftRegs], TempReg); + MovDest++; + } else if (Item == "L1_L") { + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2); + Cb.vfmadd231pd(Ymm(AddDest), ymm1, ymmword_ptr(L1Addr, 32)); + L1Increment(); + } else if (Item == "L1_2L") { + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 32)); + Cb.vfmadd231pd(Ymm(MovDest), ymm1, ymmword_ptr(L1Addr, 64)); + L1Increment(); + } else if (Item == "L1_S") { + Cb.vmovapd(xmmword_ptr(L1Addr, 32), Xmm(AddDest)); + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2); + L1Increment(); + } else if (Item == "L1_LS") { + Cb.vmovapd(xmmword_ptr(L1Addr, 64), Xmm(AddDest)); + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 32)); + L1Increment(); + } else if (Item == "L1_LS_256") { + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 64)); + Cb.vmovapd(ymmword_ptr(L1Addr, 32), Ymm(AddDest)); + L1Increment(); + } else if (Item == "L1_2LS_256") { + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 64)); + Cb.vfmadd231pd(Ymm(MovDest), ymm1, ymmword_ptr(L1Addr, 96)); + Cb.vmovapd(ymmword_ptr(L1Addr, 32), Ymm(AddDest)); + L1IncrementTimes(2); + } else if (Item == "L2_L") { + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2); + Cb.vfmadd231pd(Ymm(AddDest), ymm1, ymmword_ptr(L2Addr, 64)); + L2Increment(); + } else if (Item == "L2_S") { + Cb.vmovapd(xmmword_ptr(L2Addr, 64), Xmm(AddDest)); + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2); + L2Increment(); + } else if (Item == "L2_LS") { + Cb.vmovapd(xmmword_ptr(L2Addr, 96), Xmm(AddDest)); + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L2Addr, 64)); + L2Increment(); + } else if (Item == "L2_LS_256") { + Cb.vmovapd(ymmword_ptr(L2Addr, 96), Ymm(AddDest)); + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ptr(L2Addr, 64)); + L2Increment(); + } else if (Item == "L2_2LS_256") { + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ptr(L2Addr, 64)); + Cb.vfmadd231pd(Ymm(MovDest), ymm1, ptr(L2Addr, 96)); + Cb.vmovapd(ymmword_ptr(L2Addr, 32), Ymm(AddDest)); + L2IncrementTimes(2); + } else if (Item == "L3_L") { + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2); + Cb.vfmadd231pd(Ymm(AddDest), ymm1, ymmword_ptr(L3Addr, 64)); + L3Increment(); + } else if (Item == "L3_S") { + Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest)); + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2); + L3Increment(); + } else if (Item == "L3_LS") { + Cb.vmovapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest)); + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L3Addr, 64)); + L3Increment(); + } else if (Item == "L3_LS_256") { + Cb.vmovapd(ymmword_ptr(L3Addr, 96), Ymm(AddDest)); + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L3Addr, 64)); + L3Increment(); + } else if (Item == "L3_P") { + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 32)); + Cb.prefetcht2(ptr(L3Addr)); + L3Increment(); + } else if (Item == "RAM_L") { + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2); + Cb.vfmadd231pd(RamReg, ymm1, ymmword_ptr(RamAddr, 64)); + RamIncrement(); + } else if (Item == "RAM_S") { + Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest)); + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymm2); + RamIncrement(); + } else if (Item == "RAM_LS") { + Cb.vmovapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest)); + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(RamAddr, 32)); + RamIncrement(); + } else if (Item == "RAM_P") { + Cb.vfmadd231pd(Ymm(AddDest), ymm0, ymmword_ptr(L1Addr, 32)); + Cb.prefetcht2(ptr(RamAddr)); + RamIncrement(); } else { - workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; + workerLog::error() << "Instruction group " << Item << " not found in " << name() << "."; return EXIT_FAILURE; } - if (item != "L1_2LS_256" && item != "L2_2LS_256") { - if (left) { - cb.shr(shift_reg32[shift_pos], Imm(1)); + if (Item != "L1_2LS_256" && Item != "L2_2LS_256") { + if (Left) { + Cb.shr(ShiftRegs32[ShiftPos], Imm(1)); } else { - cb.shl(shift_reg32[shift_pos], Imm(1)); + Cb.shl(ShiftRegs32[ShiftPos], Imm(1)); } } - add_dest++; - if (add_dest > add_end) { - add_dest = add_start; + AddDest++; + if (AddDest > AddEnd) { + AddDest = AddStart; } - if (mov_dst > trans_end) { - mov_dst = trans_start; + if (MovDest > TransEnd) { + MovDest = TransStart; } - mov_src++; - if (mov_src > trans_end) { - mov_src = trans_start; + MovSrc++; + if (MovSrc > TransEnd) { + MovSrc = TransStart; } - shift_pos++; - if (shift_pos == nr_shift_regs) { - shift_pos = 0; - left = !left; + ShiftPos++; + if (ShiftPos == NbShiftRegs) { + ShiftPos = 0; + Left = !Left; } } } - cb.movq(temp_reg, iter_reg); // restore iteration counter - if (this->getRAMSequenceCount(sequence) > 0) { + Cb.movq(TempReg, IterReg); // restore iteration counter + if (getRAMSequenceCount(Sequence) > 0) { // reset RAM counter - auto NoRamReset = cb.newLabel(); - - cb.sub(ram_count_reg, Imm(1)); - cb.jnz(NoRamReset); - cb.mov(ram_count_reg, Imm(ram_loop_count)); - cb.mov(ram_addr, pointer_reg); - cb.add(ram_addr, Imm(l3_size)); - cb.bind(NoRamReset); + auto NoRamReset = Cb.newLabel(); + + Cb.sub(RamCountReg, Imm(1)); + Cb.jnz(NoRamReset); + Cb.mov(RamCountReg, Imm(RamLoopCount)); + Cb.mov(RamAddr, PointerReg); + Cb.add(RamAddr, Imm(L3Size)); + Cb.bind(NoRamReset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.inc(temp_reg); // increment iteration counter - if (this->getL2SequenceCount(sequence) > 0) { + Cb.inc(TempReg); // increment iteration counter + if (getL2SequenceCount(Sequence) > 0) { // reset L2-Cache counter - auto NoL2Reset = cb.newLabel(); - - cb.sub(l2_count_reg, Imm(1)); - cb.jnz(NoL2Reset); - cb.mov(l2_count_reg, Imm(l2_loop_count)); - cb.mov(l2_addr, pointer_reg); - cb.add(l2_addr, Imm(l1_size)); - cb.bind(NoL2Reset); + auto NoL2Reset = Cb.newLabel(); + + Cb.sub(L2CountReg, Imm(1)); + Cb.jnz(NoL2Reset); + Cb.mov(L2CountReg, Imm(L2LoopCount)); + Cb.mov(L2Addr, PointerReg); + Cb.add(L2Addr, Imm(L1Size)); + Cb.bind(NoL2Reset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.movq(iter_reg, temp_reg); // store iteration counter - if (this->getL3SequenceCount(sequence) > 0) { + Cb.movq(IterReg, TempReg); // store iteration counter + if (getL3SequenceCount(Sequence) > 0) { // reset L3-Cache counter - auto NoL3Reset = cb.newLabel(); - - cb.sub(l3_count_reg, Imm(1)); - cb.jnz(NoL3Reset); - cb.mov(l3_count_reg, Imm(l3_loop_count)); - cb.mov(l3_addr, pointer_reg); - cb.add(l3_addr, Imm(l2_size)); - cb.bind(NoL3Reset); + auto NoL3Reset = Cb.newLabel(); + + Cb.sub(L3CountReg, Imm(1)); + Cb.jnz(NoL3Reset); + Cb.mov(L3CountReg, Imm(L3LoopCount)); + Cb.mov(L3Addr, PointerReg); + Cb.add(L3Addr, Imm(L2Size)); + Cb.bind(NoL3Reset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.mov(l1_addr, pointer_reg); + Cb.mov(L1Addr, PointerReg); - if (dumpRegisters) { - auto SkipRegistersDump = cb.newLabel(); + if (DumpRegisters) { + auto SkipRegistersDump = Cb.newLabel(); - cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait)); - cb.jnz(SkipRegistersDump); + Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait)); + Cb.jnz(SkipRegistersDump); // dump all the ymm register - for (int i = 0; i < (int)this->registerCount(); i++) { - cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i)); + for (unsigned I = 0; I < registerCount(); I++) { + Cb.vmovapd(ymmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Ymm(I)); } // set read flag - cb.mov(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait)); + Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait)); - cb.bind(SkipRegistersDump); + Cb.bind(SkipRegistersDump); } - if (errorDetection) { - this->emitErrorDetectionCode(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); + if (ErrorDetection) { + emitErrorDetectionCode(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2); } - cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH)); - cb.jnz(Loop); + Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH)); + Cb.jnz(Loop); - cb.bind(FunctionExit); + Cb.bind(FunctionExit); - cb.movq(rax, iter_reg); + Cb.movq(rax, IterReg); - cb.emitEpilog(frame); + Cb.emitEpilog(Frame); - cb.finalize(); + Cb.finalize(); // String sb; // cb.dump(sb); - Error err = this->Rt.add(&this->LoadFunction, &code); - if (err) { + Error Err = Rt.add(&LoadFunction, &Code); + if (Err) { workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; } // skip if we could not determine cache size - if (l1i_cache_size != 0) { - auto loopSize = code.labelOffset(FunctionExit) - code.labelOffset(Loop); - auto instructionCachePercentage = 100 * loopSize / l1i_cache_size; + if (L1iCacheSize != 0) { + auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop); + auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize; - if (loopSize > l1i_cache_size) { + if (LoopSize > L1iCacheSize) { workerLog::warn() << "Work-loop is bigger than the L1i-Cache."; } - workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage + workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage << "%) from the L1i-Cache for the work-loop."; - workerLog::trace() << "Sequence size: " << sequence.size(); - workerLog::trace() << "Repetition count: " << repetitions; + workerLog::trace() << "Sequence size: " << Sequence.size(); + workerLog::trace() << "Repetition count: " << Repetitions; } return EXIT_SUCCESS; } -std::list FMAPayload::getAvailableInstructions() const { - std::list instructions; +auto FMAPayload::getAvailableInstructions() const -> std::list { + std::list Instructions; - transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions), - [](const auto& item) { return item.first; }); + transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions), + [](const auto& Item) { return Item.first; }); - return instructions; + return Instructions; } -void FMAPayload::init(uint64_t* memoryAddr, uint64_t bufferSize) { - X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4); +void FMAPayload::init(uint64_t* MemoryAddr, uint64_t BufferSize) { + X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4); } + +} // namespace firestarter::environment::x86::payload \ No newline at end of file diff --git a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp index 60a98ef1..202cd423 100644 --- a/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/SSE2Payload.cpp @@ -20,423 +20,420 @@ *****************************************************************************/ #include -#include -#include -#include +namespace firestarter::environment::x86::payload { -using namespace firestarter::environment::x86::payload; -using namespace asmjit; -using namespace asmjit::x86; +auto SSE2Payload::compilePayload(std::vector> const& Proportion, + unsigned InstructionCacheSize, std::list const& DataCacheBufferSize, + unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters, + bool ErrorDetection) -> int { + using namespace asmjit; + using namespace asmjit::x86; -int SSE2Payload::compilePayload(std::vector> const& proportion, - unsigned instructionCacheSize, std::list const& dataCacheBufferSize, - unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) { // Compute the sequence of instruction groups and the number of its repetions // to reach the desired size - auto sequence = this->generateSequence(proportion); - auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); + auto Sequence = generateSequence(Proportion); + auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread); // compute count of flops and memory access for performance report - unsigned flops = 0; - unsigned bytes = 0; + Flops = 0; + Bytes = 0; - for (const auto& item : sequence) { - auto it = this->InstructionFlops.find(item); + for (const auto& Item : Sequence) { + auto It = InstructionFlops.find(Item); - if (it == this->InstructionFlops.end()) { - workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; + if (It == InstructionFlops.end()) { + workerLog::error() << "Instruction group " << Item << " undefined in " << name() << "."; return EXIT_FAILURE; } - flops += it->second; + Flops += It->second; - it = this->InstructionMemory.find(item); + It = InstructionMemory.find(Item); - if (it != this->InstructionMemory.end()) { - bytes += it->second; + if (It != InstructionMemory.end()) { + Bytes += It->second; } } - this->Flops = repetitions * flops; - this->Bytes = repetitions * bytes; - this->Instructions = repetitions * sequence.size() * 2 + 4; + Flops *= Repetitions; + Bytes *= Repetitions; + Instructions = Repetitions * Sequence.size() * 2 + 4; // calculate the buffer sizes - auto l1i_cache_size = instructionCacheSize / thread; - auto dataCacheBufferSizeIterator = dataCacheBufferSize.begin(); - auto l1_size = *dataCacheBufferSizeIterator / thread; - std::advance(dataCacheBufferSizeIterator, 1); - auto l2_size = *dataCacheBufferSizeIterator / thread; - std::advance(dataCacheBufferSizeIterator, 1); - auto l3_size = *dataCacheBufferSizeIterator / thread; - auto ram_size = ramBufferSize / thread; + const auto L1iCacheSize = InstructionCacheSize / Thread; + auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin(); + const auto L1Size = *DataCacheBufferSizeIterator / Thread; + std::advance(DataCacheBufferSizeIterator, 1); + const auto L2Size = *DataCacheBufferSizeIterator / Thread; + std::advance(DataCacheBufferSizeIterator, 1); + const auto L3Size = *DataCacheBufferSizeIterator / Thread; + const auto RamSize = RamBufferSize / Thread; // calculate the reset counters for the buffers - auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); - auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); - auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); + const auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread); + const auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread); + const auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread); - CodeHolder code; - code.init(this->Rt.environment()); + CodeHolder Code; + Code.init(Rt.environment()); - if (nullptr != this->LoadFunction) { - this->Rt.release(&this->LoadFunction); + if (nullptr != LoadFunction) { + Rt.release(&LoadFunction); } - Builder cb(&code); - cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | + Builder Cb(&Code); + Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | asmjit::DiagnosticOptions::kValidateIntermediate); - auto pointer_reg = rax; - auto l1_addr = rbx; - auto l2_addr = rcx; - auto l3_addr = rdx; - auto ram_addr = rdi; - auto l2_count_reg = r8; - auto l3_count_reg = r9; - auto ram_count_reg = r10; - auto temp_reg = r11; - auto temp_reg2 = rbp; - auto offset_reg = r12; - auto addrHigh_reg = r13; - auto iter_reg = r14; - auto mov_regs = 0; - auto add_regs = 14; - auto trans_regs = 2; - - FuncDetail func; - func.init(FuncSignatureT(CallConvId::kCDecl), this->Rt.environment()); - - FuncFrame frame; - frame.init(func); + const auto PointerReg = rax; + const auto L1Addr = rbx; + const auto L2Addr = rcx; + const auto L3Addr = rdx; + const auto RamAddr = rdi; + const auto L2CountReg = r8; + const auto L3CountReg = r9; + const auto RamCountReg = r10; + const auto TempReg = r11; + const auto TempReg2 = rbp; + const auto OffsetReg = r12; + const auto AddrHighReg = r13; + const auto IterReg = r14; + const auto MovRegs = 0; + const auto AddRegs = 14; + const auto TransRegs = 2; + + FuncDetail Func; + Func.init(FuncSignatureT(CallConvId::kCDecl), Rt.environment()); + + FuncFrame Frame; + Frame.init(Func); // make xmm registers dirty - for (int i = 0; i < 16; i++) { - frame.addDirtyRegs(Xmm(i)); + for (int I = 0; I < 16; I++) { + Frame.addDirtyRegs(Xmm(I)); } // make mmx registers dirty - for (int i = 0; i < 8; i++) { - frame.addDirtyRegs(Mm(i)); + for (int I = 0; I < 8; I++) { + Frame.addDirtyRegs(Mm(I)); } // make all other used registers dirty except RAX - frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg, - temp_reg2, offset_reg, addrHigh_reg, iter_reg); + Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg, + AddrHighReg, IterReg); - FuncArgsAssignment args(&func); - args.assignAll(pointer_reg, addrHigh_reg, iter_reg); - args.updateFuncFrame(frame); - frame.finalize(); + FuncArgsAssignment Args(&Func); + Args.assignAll(PointerReg, AddrHighReg, IterReg); + Args.updateFuncFrame(Frame); + Frame.finalize(); - cb.emitProlog(frame); - cb.emitArgsAssignment(frame, args); + Cb.emitProlog(Frame); + Cb.emitArgsAssignment(Frame, Args); // stop right away if low load is selected - auto FunctionExit = cb.newLabel(); + auto FunctionExit = Cb.newLabel(); - cb.mov(temp_reg, ptr_64(addrHigh_reg)); - cb.test(temp_reg, temp_reg); - cb.jz(FunctionExit); + Cb.mov(TempReg, ptr_64(AddrHighReg)); + Cb.test(TempReg, TempReg); + Cb.jz(FunctionExit); - cb.mov(offset_reg, + Cb.mov(OffsetReg, Imm(64)); // increment after each cache/memory access // Initialize SSE-Registers for Addition - auto add_start = 0; - auto add_end = add_regs - 1; - auto trans_start = add_regs; - auto trans_end = add_regs + trans_regs - 1; - if (add_regs > 0) { - for (int i = add_start; i <= add_end; i++) { - cb.movapd(Xmm(i), xmmword_ptr(pointer_reg, 32 * i)); + const auto AddStart = 0; + const auto AddEnd = AddRegs - 1; + const auto TransStart = AddRegs; + const auto TransEnd = AddRegs + TransRegs - 1; + if (AddRegs > 0) { + for (int I = AddStart; I <= AddEnd; I++) { + Cb.movapd(Xmm(I), xmmword_ptr(PointerReg, 32 * I)); } } // Initialize MMX-Registers for shift operations - auto mov_start = 0; - auto mov_end = mov_regs - 1; - if (mov_regs > 0) { - cb.mov(temp_reg, Imm(0x5555555555555555)); - cb.movq(Mm(mov_start), temp_reg); - for (int i = mov_start + 1; i <= mov_end; i++) { - cb.movq(Mm(i), Mm(mov_start)); + const auto MovStart = 0; + const auto MovEnd = MovRegs - 1; + if (MovRegs > 0) { + Cb.mov(TempReg, Imm(0x5555555555555555)); + Cb.movq(Mm(MovStart), TempReg); + for (int I = MovStart + 1; I <= MovEnd; I++) { + Cb.movq(Mm(I), Mm(MovStart)); } } // Initialize SSE-Registers for Transfer-Operations - if (trans_regs > 0) { - if (trans_start % 2 == 0) { - cb.mov(temp_reg, Imm(0x0F0F0F0F0F0F0F0F)); + if (TransRegs > 0) { + if (TransStart % 2 == 0) { + Cb.mov(TempReg, Imm(0x0F0F0F0F0F0F0F0F)); } else { - cb.mov(temp_reg, Imm(0xF0F0F0F0F0F0F0F0)); + Cb.mov(TempReg, Imm(0xF0F0F0F0F0F0F0F0)); } - cb.pinsrq(Xmm(trans_start), temp_reg, Imm(0)); - cb.pinsrq(Xmm(trans_start), temp_reg, Imm(1)); - for (int i = trans_start + 1; i <= trans_end; i++) { - if (i % 2 == 0) { - cb.shr(temp_reg, Imm(4)); + Cb.pinsrq(Xmm(TransStart), TempReg, Imm(0)); + Cb.pinsrq(Xmm(TransStart), TempReg, Imm(1)); + for (int I = TransStart + 1; I <= TransEnd; I++) { + if (I % 2 == 0) { + Cb.shr(TempReg, Imm(4)); } else { - cb.shl(temp_reg, Imm(4)); + Cb.shl(TempReg, Imm(4)); } - cb.pinsrq(Xmm(i), temp_reg, Imm(0)); - cb.pinsrq(Xmm(i), temp_reg, Imm(1)); + Cb.pinsrq(Xmm(I), TempReg, Imm(0)); + Cb.pinsrq(Xmm(I), TempReg, Imm(1)); } } - cb.mov(l1_addr, pointer_reg); // address for L1-buffer - cb.mov(l2_addr, pointer_reg); - cb.add(l2_addr, Imm(l1_size)); // address for L2-buffer - cb.mov(l3_addr, pointer_reg); - cb.add(l3_addr, Imm(l2_size)); // address for L3-buffer - cb.mov(ram_addr, pointer_reg); - cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer - cb.mov(l2_count_reg, Imm(l2_loop_count)); - workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop (" - << l2_size / 1024 << ") KiB"; - cb.mov(l3_count_reg, Imm(l3_loop_count)); - workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop (" - << l3_size / 1024 << ") KiB"; - cb.mov(ram_count_reg, Imm(ram_loop_count)); - workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop (" - << ram_size / 1024 << ") KiB"; - - cb.align(AlignMode::kCode, 64); - - auto Loop = cb.newLabel(); - cb.bind(Loop); - - auto movq_dst = mov_start; - auto add_dest = add_start + 1; - auto mov_dst = trans_start; - auto mov_src = mov_dst + 1; - unsigned l1_offset = 0; - -#define L1_INCREMENT() \ - l1_offset += 64; \ - if (l1_offset < l1_size * 0.5) { \ - cb.add(l1_addr, offset_reg); \ - } else { \ - l1_offset = 0; \ - cb.mov(l1_addr, pointer_reg); \ - } - -#define L2_INCREMENT() cb.add(l2_addr, offset_reg); - -#define L3_INCREMENT() cb.add(l3_addr, offset_reg) - -#define RAM_INCREMENT() cb.add(ram_addr, offset_reg) - - for (unsigned count = 0; count < repetitions; count++) { - for (const auto& item : sequence) { - if (item == "REG") { - cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs + 1) % add_regs)); - cb.movdqa(Xmm(mov_dst), Xmm(mov_src)); - } else if (item == "L1_L") { - cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32)); - L1_INCREMENT(); - } else if (item == "L1_S") { - cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); - cb.movapd(xmmword_ptr(l1_addr, 32), Xmm(add_dest)); - L1_INCREMENT(); - this->Instructions++; - } else if (item == "L1_LS") { - cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32)); - cb.movapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest)); - L1_INCREMENT(); - this->Instructions++; - } else if (item == "L2_L") { - cb.addpd(Xmm(add_dest), xmmword_ptr(l2_addr, 64)); - L2_INCREMENT(); - } else if (item == "L2_S") { - cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); - cb.movapd(xmmword_ptr(l2_addr, 64), Xmm(add_dest)); - L2_INCREMENT(); - this->Instructions++; - } else if (item == "L2_LS") { - cb.addpd(Xmm(add_dest), xmmword_ptr(l2_addr, 64)); - cb.movapd(xmmword_ptr(l2_addr, 96), Xmm(add_dest)); - L2_INCREMENT(); - this->Instructions++; - } else if (item == "L3_L") { - cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64)); - L3_INCREMENT(); - } else if (item == "L3_S") { - cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); - cb.movapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); - L3_INCREMENT(); - this->Instructions++; - } else if (item == "L3_LS") { - cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64)); - cb.movapd(xmmword_ptr(l3_addr, 96), Xmm(add_dest)); - L3_INCREMENT(); - this->Instructions++; - } else if (item == "L3_P") { - cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32)); - cb.prefetcht0(ptr(l3_addr)); - L3_INCREMENT(); - this->Instructions++; - } else if (item == "RAM_L") { - cb.addpd(Xmm(add_dest), xmmword_ptr(ram_addr, 64)); - RAM_INCREMENT(); - } else if (item == "RAM_S") { - cb.addpd(Xmm(add_dest), Xmm(add_start + (add_dest - add_start + add_regs - 1) % add_regs)); - cb.movapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); - RAM_INCREMENT(); - this->Instructions++; - } else if (item == "RAM_LS") { - cb.addpd(Xmm(add_dest), xmmword_ptr(l3_addr, 64)); - cb.movapd(xmmword_ptr(ram_addr, 64), Xmm(add_dest)); - RAM_INCREMENT(); - this->Instructions++; - } else if (item == "RAM_P") { - cb.addpd(Xmm(add_dest), xmmword_ptr(l1_addr, 32)); - cb.prefetcht2(ptr(ram_addr)); - RAM_INCREMENT(); - this->Instructions++; + Cb.mov(L1Addr, PointerReg); // address for L1-buffer + Cb.mov(L2Addr, PointerReg); + Cb.add(L2Addr, Imm(L1Size)); // address for L2-buffer + Cb.mov(L3Addr, PointerReg); + Cb.add(L3Addr, Imm(L2Size)); // address for L3-buffer + Cb.mov(RamAddr, PointerReg); + Cb.add(RamAddr, Imm(L3Size)); // address for RAM-buffer + Cb.mov(L2CountReg, Imm(L2LoopCount)); + workerLog::trace() << "reset counter for L2-buffer with " << L2LoopCount << " cache line accesses per loop (" + << L2Size / 1024 << ") KiB"; + Cb.mov(L3CountReg, Imm(L3LoopCount)); + workerLog::trace() << "reset counter for L3-buffer with " << L3LoopCount << " cache line accesses per loop (" + << L3Size / 1024 << ") KiB"; + Cb.mov(RamCountReg, Imm(RamLoopCount)); + workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop (" + << RamSize / 1024 << ") KiB"; + + Cb.align(AlignMode::kCode, 64); + + auto Loop = Cb.newLabel(); + Cb.bind(Loop); + + auto MovqDest = MovStart; + auto AddDest = AddStart + 1; + auto MovDest = TransStart; + auto MovSrc = MovDest + 1; + unsigned L1Offset = 0; + + const auto L1Increment = [&Cb, &L1Offset, &L1Size, &L1Addr, &OffsetReg, &PointerReg]() { + L1Offset += 64; + if (L1Offset < L1Size * 0.5) { + Cb.add(L1Addr, OffsetReg); + } else { + L1Offset = 0; + Cb.mov(L1Addr, PointerReg); + } + }; + const auto L2Increment = [&Cb, &L2Addr, &OffsetReg]() { Cb.add(L2Addr, OffsetReg); }; + const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); }; + const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); }; + + for (unsigned Count = 0; Count < Repetitions; Count++) { + for (const auto& Item : Sequence) { + if (Item == "REG") { + Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs + 1) % AddRegs))); + Cb.movdqa(Xmm(MovDest), Xmm(MovSrc)); + } else if (Item == "L1_L") { + Cb.addpd(Xmm(AddDest), xmmword_ptr(L1Addr, 32)); + L1Increment(); + } else if (Item == "L1_S") { + Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs))); + Cb.movapd(xmmword_ptr(L1Addr, 32), Xmm(AddDest)); + L1Increment(); + Instructions++; + } else if (Item == "L1_LS") { + Cb.addpd(Xmm(AddDest), xmmword_ptr(L1Addr, 32)); + Cb.movapd(xmmword_ptr(L1Addr, 64), Xmm(AddDest)); + L1Increment(); + Instructions++; + } else if (Item == "L2_L") { + Cb.addpd(Xmm(AddDest), xmmword_ptr(L2Addr, 64)); + L2Increment(); + } else if (Item == "L2_S") { + Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs))); + Cb.movapd(xmmword_ptr(L2Addr, 64), Xmm(AddDest)); + L2Increment(); + Instructions++; + } else if (Item == "L2_LS") { + Cb.addpd(Xmm(AddDest), xmmword_ptr(L2Addr, 64)); + Cb.movapd(xmmword_ptr(L2Addr, 96), Xmm(AddDest)); + L2Increment(); + Instructions++; + } else if (Item == "L3_L") { + Cb.addpd(Xmm(AddDest), xmmword_ptr(L3Addr, 64)); + L3Increment(); + } else if (Item == "L3_S") { + Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs))); + Cb.movapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest)); + L3Increment(); + Instructions++; + } else if (Item == "L3_LS") { + Cb.addpd(Xmm(AddDest), xmmword_ptr(L3Addr, 64)); + Cb.movapd(xmmword_ptr(L3Addr, 96), Xmm(AddDest)); + L3Increment(); + Instructions++; + } else if (Item == "L3_P") { + Cb.addpd(Xmm(AddDest), xmmword_ptr(L1Addr, 32)); + Cb.prefetcht0(ptr(L3Addr)); + L3Increment(); + Instructions++; + } else if (Item == "RAM_L") { + Cb.addpd(Xmm(AddDest), xmmword_ptr(RamAddr, 64)); + RamIncrement(); + } else if (Item == "RAM_S") { + Cb.addpd(Xmm(AddDest), Xmm(AddStart + ((AddDest - AddStart + AddRegs - 1) % AddRegs))); + Cb.movapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest)); + RamIncrement(); + Instructions++; + } else if (Item == "RAM_LS") { + Cb.addpd(Xmm(AddDest), xmmword_ptr(L3Addr, 64)); + Cb.movapd(xmmword_ptr(RamAddr, 64), Xmm(AddDest)); + RamIncrement(); + Instructions++; + } else if (Item == "RAM_P") { + Cb.addpd(Xmm(AddDest), xmmword_ptr(L1Addr, 32)); + Cb.prefetcht2(ptr(RamAddr)); + RamIncrement(); + Instructions++; } else { - workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; + workerLog::error() << "Instruction group " << Item << " not found in " << name() << "."; return EXIT_FAILURE; } - if (mov_regs > 0) { - this->Instructions++; - cb.movq(Mm(mov_start + (movq_dst - mov_start + mov_regs - 1) % mov_regs), Mm(movq_dst)); + if (MovRegs > 0) { + Instructions++; + Cb.movq(Mm(MovStart + ((MovqDest - MovStart + MovRegs - 1) % MovRegs)), Mm(MovqDest)); } - add_dest++; - if (add_dest > add_end) { + AddDest++; + if (AddDest > AddEnd) { // DO NOT REMOVE the + 1. It serves for the good of ymm0. If it was to // be overriden, the values in the other registers would rise up to inf. - add_dest = add_start + 1; + AddDest = AddStart + 1; } - mov_dst++; - if (mov_dst > trans_end) { - mov_dst = trans_start; + MovDest++; + if (MovDest > TransEnd) { + MovDest = TransStart; } - mov_src++; - if (mov_src > trans_end) { - mov_src = trans_start; + MovSrc++; + if (MovSrc > TransEnd) { + MovSrc = TransStart; } - if (mov_regs > 0) { - movq_dst++; - if (movq_dst > mov_end) { - movq_dst = mov_start; + if (MovRegs > 0) { + MovqDest++; + if (MovqDest > MovEnd) { + MovqDest = MovStart; } } } } - if (this->getRAMSequenceCount(sequence) > 0) { + if (getRAMSequenceCount(Sequence) > 0) { // reset RAM counter - auto NoRamReset = cb.newLabel(); - - cb.sub(ram_count_reg, Imm(1)); - cb.jnz(NoRamReset); - cb.mov(ram_count_reg, Imm(ram_loop_count)); - cb.mov(ram_addr, pointer_reg); - cb.add(ram_addr, Imm(l3_size)); - cb.bind(NoRamReset); + auto NoRamReset = Cb.newLabel(); + + Cb.sub(RamCountReg, Imm(1)); + Cb.jnz(NoRamReset); + Cb.mov(RamCountReg, Imm(RamLoopCount)); + Cb.mov(RamAddr, PointerReg); + Cb.add(RamAddr, Imm(L3Size)); + Cb.bind(NoRamReset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - if (this->getL2SequenceCount(sequence) > 0) { + if (getL2SequenceCount(Sequence) > 0) { // reset L2-Cache counter - auto NoL2Reset = cb.newLabel(); - - cb.sub(l2_count_reg, Imm(1)); - cb.jnz(NoL2Reset); - cb.mov(l2_count_reg, Imm(l2_loop_count)); - cb.mov(l2_addr, pointer_reg); - cb.add(l2_addr, Imm(l1_size)); - cb.bind(NoL2Reset); + auto NoL2Reset = Cb.newLabel(); + + Cb.sub(L2CountReg, Imm(1)); + Cb.jnz(NoL2Reset); + Cb.mov(L2CountReg, Imm(L2LoopCount)); + Cb.mov(L2Addr, PointerReg); + Cb.add(L2Addr, Imm(L1Size)); + Cb.bind(NoL2Reset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - if (this->getL3SequenceCount(sequence) > 0) { + if (getL3SequenceCount(Sequence) > 0) { // reset L3-Cache counter - auto NoL3Reset = cb.newLabel(); - - cb.sub(l3_count_reg, Imm(1)); - cb.jnz(NoL3Reset); - cb.mov(l3_count_reg, Imm(l3_loop_count)); - cb.mov(l3_addr, pointer_reg); - cb.add(l3_addr, Imm(l2_size)); - cb.bind(NoL3Reset); + auto NoL3Reset = Cb.newLabel(); + + Cb.sub(L3CountReg, Imm(1)); + Cb.jnz(NoL3Reset); + Cb.mov(L3CountReg, Imm(L3LoopCount)); + Cb.mov(L3Addr, PointerReg); + Cb.add(L3Addr, Imm(L2Size)); + Cb.bind(NoL3Reset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.inc(iter_reg); // increment iteration counter - cb.mov(l1_addr, pointer_reg); + Cb.inc(IterReg); // increment iteration counter + Cb.mov(L1Addr, PointerReg); - if (dumpRegisters) { - auto SkipRegistersDump = cb.newLabel(); + if (DumpRegisters) { + auto SkipRegistersDump = Cb.newLabel(); - cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait)); - cb.jnz(SkipRegistersDump); + Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait)); + Cb.jnz(SkipRegistersDump); // dump all the xmm register - for (int i = 0; i < (int)this->registerCount(); i++) { - cb.movapd(xmmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Xmm(i)); + for (unsigned I = 0; I < registerCount(); I++) { + Cb.movapd(xmmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Xmm(I)); } // set read flag - cb.mov(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait)); + Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait)); - cb.bind(SkipRegistersDump); + Cb.bind(SkipRegistersDump); } - if (errorDetection) { - this->emitErrorDetectionCode(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); + if (ErrorDetection) { + emitErrorDetectionCode(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2); } - cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH)); - cb.jnz(Loop); + Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH)); + Cb.jnz(Loop); - cb.bind(FunctionExit); + Cb.bind(FunctionExit); - cb.mov(rax, iter_reg); // restore iteration counter + Cb.mov(rax, IterReg); // restore iteration counter - cb.emitEpilog(frame); + Cb.emitEpilog(Frame); - cb.finalize(); + Cb.finalize(); // String sb; // cb.dump(sb); - Error err = this->Rt.add(&this->LoadFunction, &code); - if (err) { + Error Err = Rt.add(&LoadFunction, &Code); + if (Err) { workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; } // skip if we could not determine cache size - if (l1i_cache_size != 0) { - auto loopSize = code.labelOffset(FunctionExit) - code.labelOffset(Loop); - auto instructionCachePercentage = 100 * loopSize / l1i_cache_size; + if (L1iCacheSize != 0) { + auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop); + auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize; - if (loopSize > l1i_cache_size) { + if (LoopSize > L1iCacheSize) { workerLog::warn() << "Work-loop is bigger than the L1i-Cache."; } - workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage + workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage << "%) from the L1i-Cache for the work-loop."; - workerLog::trace() << "Sequence size: " << sequence.size(); - workerLog::trace() << "Repetition count: " << repetitions; + workerLog::trace() << "Sequence size: " << Sequence.size(); + workerLog::trace() << "Repetition count: " << Repetitions; } return EXIT_SUCCESS; } -std::list SSE2Payload::getAvailableInstructions() const { - std::list instructions; +auto SSE2Payload::getAvailableInstructions() const -> std::list { + std::list Instructions; - transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions), - [](const auto& item) { return item.first; }); + transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions), + [](const auto& Item) { return Item.first; }); - return instructions; + return Instructions; } -void SSE2Payload::init(uint64_t* memoryAddr, uint64_t bufferSize) { - X86Payload::init(memoryAddr, bufferSize, 1.654738925401e-10, 1.654738925401e-15); +void SSE2Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize) { + X86Payload::init(MemoryAddr, BufferSize, 1.654738925401e-10, 1.654738925401e-15); } + +} // namespace firestarter::environment::x86::payload \ No newline at end of file diff --git a/src/firestarter/Environment/X86/Payload/X86Payload.cpp b/src/firestarter/Environment/X86/Payload/X86Payload.cpp index 93458d25..d0cedd61 100644 --- a/src/firestarter/Environment/X86/Payload/X86Payload.cpp +++ b/src/firestarter/Environment/X86/Payload/X86Payload.cpp @@ -22,7 +22,6 @@ #include #include #include -#include #ifdef _MSC_VER #include @@ -31,436 +30,59 @@ #include -using namespace firestarter::environment::x86::payload; +namespace firestarter::environment::x86::payload { -void X86Payload::lowLoadFunction(volatile uint64_t* addrHigh, uint64_t period) { - int nap; -#ifdef _MSC_VER - std::array cpuid; -#endif +void X86Payload::lowLoadFunction(volatile uint64_t* AddrHigh, uint64_t Period) { + int Nap = Period / 100; - nap = period / 100; #ifndef _MSC_VER __asm__ __volatile__("mfence;" "cpuid;" :: : "eax", "ebx", "ecx", "edx"); #else + std::array Cpuid; _mm_mfence(); - __cpuid(cpuid.data(), 0); + __cpuid(Cpuid.data(), 0); #endif + // while signal low load - while (*addrHigh == LOAD_LOW) { + while (*AddrHigh == LOAD_LOW) { #ifndef _MSC_VER __asm__ __volatile__("mfence;" "cpuid;" :: : "eax", "ebx", "ecx", "edx"); #else _mm_mfence(); - __cpuid(cpuid.data(), 0); + __cpuid(Cpuid.data(), 0); #endif - std::this_thread::sleep_for(std::chrono::microseconds(nap)); + std::this_thread::sleep_for(std::chrono::microseconds(Nap)); #ifndef _MSC_VER __asm__ __volatile__("mfence;" "cpuid;" :: : "eax", "ebx", "ecx", "edx"); #else _mm_mfence(); - __cpuid(cpuid.data(), 0); + __cpuid(Cpuid.data(), 0); #endif } } -void X86Payload::init(uint64_t* memoryAddr, uint64_t bufferSize, double firstValue, double lastValue) { +void X86Payload::init(uint64_t* MemoryAddr, uint64_t BufferSize, double FirstValue, double LastValue) { uint64_t i = 0; - for (; i < INIT_BLOCKSIZE; i++) - *((double*)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * firstValue; - for (; i <= bufferSize - INIT_BLOCKSIZE; i += INIT_BLOCKSIZE) - std::memcpy(memoryAddr + i, memoryAddr + i - INIT_BLOCKSIZE, sizeof(uint64_t) * INIT_BLOCKSIZE); - for (; i < bufferSize; i++) - *((double*)(memoryAddr + i)) = 0.25 + (double)i * 8.0 * lastValue; -} - -uint64_t X86Payload::highLoadFunction(uint64_t* addrMem, volatile uint64_t* addrHigh, uint64_t iterations) { - return this->LoadFunction(addrMem, addrHigh, iterations); -} - -// add MM regs to dirty regs -// zmm31 is used for backup if VectorReg is of type asmjit::x86::Zmm -template -void X86Payload::emitErrorDetectionCode(asmjit::x86::Builder& Cb, IterRegT IterReg, asmjit::x86::Gpq addrHigh_reg, - asmjit::x86::Gpq pointer_reg, asmjit::x86::Gpq temp_reg, - asmjit::x86::Gpq temp_reg2) { - // we don't want anything to break... so we use asserts for everything that - // could break it - static_assert(std::is_base_of::value, "VectorReg must be of asmjit::asmjit::x86::Vec"); - static_assert(std::is_same::value || - std::is_same::value || - std::is_same::value, - "VectorReg ist not of any supported type"); - static_assert(std::is_same::value || std::is_same::value, - "IterReg is not of any supported type"); - - if constexpr (std::is_same::value) { - assert((IterReg == asmjit::x86::mm0, "iter_reg must be mm0")); + for (; i < INIT_BLOCKSIZE; i++) { + reinterpret_cast(MemoryAddr)[i] = 0.25 + static_cast(i) * 8.0 * FirstValue; } - - assert((IterReg != temp_reg, "iter_reg must be != temp_reg")); - assert((temp_reg != temp_reg2, "temp_reg must be != temp_reg2")); - assert((temp_reg != addrHigh_reg, "temp_reg must be != addrHigh_reg")); - assert((temp_reg != pointer_reg, "temp_reg must be != pointer_reg")); - - assert((IterReg != asmjit::x86::r8, "iter_reg must be != r8")); - assert((IterReg != asmjit::x86::r9, "iter_reg must be != r9")); - assert((IterReg != asmjit::x86::rax, "iter_reg must be != rax")); - assert((IterReg != asmjit::x86::rbx, "iter_reg must be != rbx")); - assert((IterReg != asmjit::x86::rcx, "iter_reg must be != rcx")); - assert((IterReg != asmjit::x86::rdx, "iter_reg must be != rdx")); - - assert((temp_reg != asmjit::x86::r8, "temp_reg must be != r8")); - assert((temp_reg != asmjit::x86::r9, "temp_reg must be != r9")); - assert((temp_reg != asmjit::x86::rax, "temp_reg must be != rax")); - assert((temp_reg != asmjit::x86::rbx, "temp_reg must be != rbx")); - assert((temp_reg != asmjit::x86::rcx, "temp_reg must be != rcx")); - assert((temp_reg != asmjit::x86::rdx, "temp_reg must be != rdx")); - - assert((temp_reg2 != asmjit::x86::r8, "temp_reg2 must be != r8")); - assert((temp_reg2 != asmjit::x86::r9, "temp_reg2 must be != r9")); - assert((temp_reg2 != asmjit::x86::rax, "temp_reg2 must be != rax")); - assert((temp_reg2 != asmjit::x86::rbx, "temp_reg2 must be != rbx")); - assert((temp_reg2 != asmjit::x86::rcx, "temp_reg2 must be != rcx")); - assert((temp_reg2 != asmjit::x86::rdx, "temp_reg2 must be != rdx")); - - assert((addrHigh_reg != asmjit::x86::r8, "addrHigh_reg must be != r8")); - assert((addrHigh_reg != asmjit::x86::r9, "addrHigh_reg must be != r9")); - assert((addrHigh_reg != asmjit::x86::rax, "addrHigh_reg must be != rax")); - assert((addrHigh_reg != asmjit::x86::rbx, "addrHigh_reg must be != rbx")); - assert((addrHigh_reg != asmjit::x86::rcx, "addrHigh_reg must be != rcx")); - assert((addrHigh_reg != asmjit::x86::rdx, "addrHigh_reg must be != rdx")); - - auto SkipErrorDetection = Cb.newLabel(); - - if constexpr (std::is_same::value) { - Cb.movq(temp_reg, IterReg); - } else { - Cb.mov(temp_reg, IterReg); + for (; i <= BufferSize - INIT_BLOCKSIZE; i += INIT_BLOCKSIZE) { + std::memcpy(MemoryAddr + i, MemoryAddr + i - INIT_BLOCKSIZE, sizeof(uint64_t) * INIT_BLOCKSIZE); } - // round about 50-100 Hz - // more or less, but this isn't really that relevant - Cb.and_(temp_reg, asmjit::Imm(0x3fff)); - Cb.test(temp_reg, temp_reg); - Cb.jnz(SkipErrorDetection); - - Cb.mov(temp_reg, asmjit::Imm(0xffffffff)); - - int registerCount = (int)this->registerCount(); - - // Create a backup of VectorReg(0) - if constexpr (std::is_same::value) { - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.push(temp_reg2); - Cb.crc32(temp_reg, temp_reg2); - Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.push(temp_reg2); - Cb.crc32(temp_reg, temp_reg2); - - } else if constexpr (std::is_same::value && - std::is_same::value) { - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.movq(asmjit::x86::Mm(7), temp_reg2); - Cb.crc32(temp_reg, temp_reg2); - Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.movq(asmjit::x86::Mm(6), temp_reg2); - Cb.crc32(temp_reg, temp_reg2); - - Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1)); - - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.movq(asmjit::x86::Mm(5), temp_reg2); - Cb.crc32(temp_reg, temp_reg2); - Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.movq(asmjit::x86::Mm(4), temp_reg2); - Cb.crc32(temp_reg, temp_reg2); - } else if constexpr (std::is_same::value && - std::is_same::value) { - // We use vector registers zmm31 for our backup - Cb.vmovapd(asmjit::x86::zmm31, asmjit::x86::zmm0); - registerCount--; + for (; i < BufferSize; i++) { + reinterpret_cast(MemoryAddr)[i] = 0.25 + static_cast(i) * 8.0 * LastValue; } - - // Calculate the hash of the remaining VectorReg - // use VectorReg(0) as a temporary place to unpack values - for (int i = 1; i < registerCount; i++) { - if constexpr (std::is_same::value) { - Cb.vmovapd(asmjit::x86::xmm0, asmjit::x86::Xmm(i)); - - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - } else if constexpr (std::is_same::value) { - Cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(i)); - - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - - Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1)); - - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - } else if constexpr (std::is_same::value) { - Cb.vmovapd(asmjit::x86::ymm0, asmjit::x86::Ymm(i)); - - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - - Cb.vextractf128(asmjit::x86::xmm0, asmjit::x86::ymm0, asmjit::Imm(1)); - - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - - Cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(i), asmjit::Imm(2)); - - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - - Cb.vextractf32x4(asmjit::x86::xmm0, asmjit::x86::Zmm(i), asmjit::Imm(3)); - - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - Cb.movhlps(asmjit::x86::xmm0, asmjit::x86::xmm0); - Cb.movq(temp_reg2, asmjit::x86::xmm0); - Cb.crc32(temp_reg, temp_reg2); - } - } - - // Restore VectorReg(0) from backup - if constexpr (std::is_same::value) { - Cb.pop(temp_reg2); - Cb.movq(asmjit::x86::xmm0, temp_reg2); - Cb.movlhps(asmjit::x86::xmm0, asmjit::x86::xmm0); - Cb.pop(temp_reg2); - Cb.pinsrw(asmjit::x86::xmm0, temp_reg2.r32(), asmjit::Imm(0)); - Cb.shr(temp_reg2, asmjit::Imm(32)); - Cb.movd(temp_reg2.r32(), asmjit::x86::Mm(7)); - Cb.pinsrw(asmjit::x86::xmm0, temp_reg2.r32(), asmjit::Imm(1)); - } else if constexpr (std::is_same::value && - std::is_same::value) { - Cb.movq(temp_reg2, asmjit::x86::Mm(5)); - Cb.movq(asmjit::x86::xmm0, temp_reg2); - Cb.movq(temp_reg2, asmjit::x86::Mm(4)); - Cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1)); - - Cb.vinsertf128(asmjit::x86::ymm0, asmjit::x86::ymm0, asmjit::x86::xmm0, asmjit::Imm(1)); - - Cb.movq(temp_reg2, asmjit::x86::Mm(7)); - Cb.movq(asmjit::x86::xmm0, temp_reg2); - Cb.movq(temp_reg2, asmjit::x86::Mm(6)); - Cb.pinsrq(asmjit::x86::xmm0, temp_reg2, asmjit::Imm(1)); - } else if constexpr (std::is_same::value && - std::is_same::value) { - // We use vector registers zmm31 for our backup - Cb.vmovapd(asmjit::x86::zmm0, asmjit::x86::zmm31); - } - - // before starting the communication, backup r8, r9, rax, rbx, rcx and rdx - if constexpr (std::is_same::value) { - Cb.movq(asmjit::x86::Mm(7), asmjit::x86::rax); - Cb.movq(asmjit::x86::Mm(6), asmjit::x86::rbx); - Cb.movq(asmjit::x86::Mm(5), asmjit::x86::rcx); - Cb.movq(asmjit::x86::Mm(4), asmjit::x86::rdx); - Cb.movq(asmjit::x86::Mm(3), asmjit::x86::r8); - Cb.movq(asmjit::x86::Mm(2), asmjit::x86::r9); - } else { - Cb.push(asmjit::x86::rax); - Cb.push(asmjit::x86::rbx); - Cb.push(asmjit::x86::rcx); - Cb.push(asmjit::x86::rdx); - Cb.push(asmjit::x86::r8); - Cb.push(asmjit::x86::r9); - } - - // do the actual communication - // temp_reg contains our hash - - // save the pointer_reg. it might be any of r8, r9, rax, rbx, rcx or rdx - Cb.mov(temp_reg2, pointer_reg); - - // Don't touch me! - // This sychronization and communication works even if the threads run at - // different (changing) speed, with just one "lock cmpxchg16b" Brought to you - // by a few hours of headache for two people. - auto communication = [&](auto offset) { - // communication - Cb.mov(asmjit::x86::r8, asmjit::x86::ptr_64(temp_reg2, offset)); - - // temp data - Cb.mov(asmjit::x86::r9, temp_reg2); - Cb.add(asmjit::x86::r9, asmjit::Imm(offset + 8)); - - Cb.mov(asmjit::x86::rdx, asmjit::x86::ptr_64(asmjit::x86::r9, 0)); - Cb.mov(asmjit::x86::rax, asmjit::x86::ptr_64(asmjit::x86::r9, 8)); - - auto L0 = Cb.newLabel(); - Cb.bind(L0); - - Cb.lock(); - Cb.cmpxchg16b(asmjit::x86::ptr(asmjit::x86::r8)); - - auto L1 = Cb.newLabel(); - Cb.jnz(L1); - - Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx); - Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx); - Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0)); - Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0)); - - Cb.mov(asmjit::x86::rax, asmjit::Imm(2)); - - auto L6 = Cb.newLabel(); - Cb.jmp(L6); - - Cb.bind(L1); - - Cb.cmp(asmjit::x86::rcx, asmjit::x86::rdx); - - auto L2 = Cb.newLabel(); - Cb.jle(L2); - - Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 0), asmjit::x86::rcx); - Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 8), asmjit::x86::rbx); - - Cb.jmp(L0); - - Cb.bind(L2); - - auto L3 = Cb.newLabel(); - - Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0)); - Cb.jne(L3); - Cb.cmp(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0)); - Cb.jne(L3); - - Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::x86::rdx); - Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::x86::rax); - - Cb.bind(L3); - - Cb.cmp(asmjit::x86::rcx, asmjit::x86::ptr_64(asmjit::x86::r9, 16)); - Cb.mov(asmjit::x86::rax, asmjit::Imm(4)); - Cb.jne(L6); - - Cb.cmp(asmjit::x86::rbx, asmjit::x86::ptr_64(asmjit::x86::r9, 24)); - auto L4 = Cb.newLabel(); - Cb.jne(L4); - - Cb.mov(asmjit::x86::rax, asmjit::Imm(0)); - - auto L5 = Cb.newLabel(); - Cb.jmp(L5); - - Cb.bind(L4); - - Cb.mov(asmjit::x86::rax, asmjit::Imm(1)); - - Cb.bind(L5); - - Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 16), asmjit::Imm(0)); - Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 24), asmjit::Imm(0)); - - Cb.bind(L6); - - // if check failed - Cb.cmp(asmjit::x86::rax, asmjit::Imm(1)); - auto L7 = Cb.newLabel(); - Cb.jne(L7); - - // write the error flag - Cb.mov(asmjit::x86::ptr_64(asmjit::x86::r9, 32), asmjit::Imm(1)); - - // stop the execution after some time - Cb.mov(asmjit::x86::ptr_64(addrHigh_reg), asmjit::Imm(LOAD_STOP)); - Cb.mfence(); - - Cb.bind(L7); - - auto L9 = Cb.newLabel(); - Cb.jmp(L9); - }; - - // left communication - // move hash - Cb.mov(asmjit::x86::rbx, temp_reg); - // move iterations counter - if constexpr (std::is_same::value) { - Cb.movq(asmjit::x86::rcx, IterReg); - } else { - Cb.mov(asmjit::x86::rcx, IterReg); - } - - communication(-128); - - // right communication - // move hash - Cb.mov(asmjit::x86::rbx, temp_reg); - // move iterations counter - if constexpr (std::is_same::value) { - Cb.movq(asmjit::x86::rcx, IterReg); - } else { - Cb.mov(asmjit::x86::rcx, IterReg); - } - - communication(-64); - - // restore r8, r9, rax, rbx, rcx and rdx - if constexpr (std::is_same::value) { - Cb.movq(asmjit::x86::rax, asmjit::x86::Mm(7)); - Cb.movq(asmjit::x86::rbx, asmjit::x86::Mm(6)); - Cb.movq(asmjit::x86::rcx, asmjit::x86::Mm(5)); - Cb.movq(asmjit::x86::rdx, asmjit::x86::Mm(4)); - Cb.movq(asmjit::x86::r8, asmjit::x86::Mm(3)); - Cb.movq(asmjit::x86::r9, asmjit::x86::Mm(2)); - } else { - Cb.pop(asmjit::x86::r9); - Cb.pop(asmjit::x86::r8); - Cb.pop(asmjit::x86::rdx); - Cb.pop(asmjit::x86::rcx); - Cb.pop(asmjit::x86::rbx); - Cb.pop(asmjit::x86::rax); - } - - Cb.bind(SkipErrorDetection); } -template void X86Payload::emitErrorDetectionCode( - asmjit::x86::Builder& cb, asmjit::x86::Gpq iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg, - asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2); -template void X86Payload::emitErrorDetectionCode( - asmjit::x86::Builder& cb, asmjit::x86::Gpq iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg, - asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2); +auto X86Payload::highLoadFunction(uint64_t* AddrMem, volatile uint64_t* AddrHigh, uint64_t Iterations) -> uint64_t { + return this->LoadFunction(AddrMem, AddrHigh, Iterations); +} -template void X86Payload::emitErrorDetectionCode( - asmjit::x86::Builder& cb, asmjit::x86::Mm iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg, - asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2); -template void X86Payload::emitErrorDetectionCode( - asmjit::x86::Builder& cb, asmjit::x86::Mm iter_reg, asmjit::x86::Gpq addrHigh_reg, asmjit::x86::Gpq pointer_reg, - asmjit::x86::Gpq temp_reg, asmjit::x86::Gpq temp_reg2); +}; // namespace firestarter::environment::x86::payload \ No newline at end of file diff --git a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp index ac7550e1..01c62777 100644 --- a/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp +++ b/src/firestarter/Environment/X86/Payload/ZENFMAPayload.cpp @@ -20,389 +20,385 @@ *****************************************************************************/ #include -#include -#include -#include +namespace firestarter::environment::x86::payload { -using namespace firestarter::environment::x86::payload; -using namespace asmjit; -using namespace asmjit::x86; +auto ZENFMAPayload::compilePayload(std::vector> const& Proportion, + unsigned InstructionCacheSize, std::list const& DataCacheBufferSize, + unsigned RamBufferSize, unsigned Thread, unsigned NumberOfLines, bool DumpRegisters, + bool ErrorDetection) -> int { + using namespace asmjit; + using namespace asmjit::x86; -int ZENFMAPayload::compilePayload(std::vector> const& proportion, - unsigned instructionCacheSize, std::list const& dataCacheBufferSize, - unsigned ramBufferSize, unsigned thread, unsigned numberOfLines, bool dumpRegisters, - bool errorDetection) { // Compute the sequence of instruction groups and the number of its repetions // to reach the desired size - auto sequence = this->generateSequence(proportion); - auto repetitions = this->getNumberOfSequenceRepetitions(sequence, numberOfLines / thread); + auto Sequence = generateSequence(Proportion); + auto Repetitions = getNumberOfSequenceRepetitions(Sequence, NumberOfLines / Thread); // compute count of flops and memory access for performance report - unsigned flops = 0; - unsigned bytes = 0; + Flops = 0; + Bytes = 0; - for (const auto& item : sequence) { - auto it = this->InstructionFlops.find(item); + for (const auto& Item : Sequence) { + auto It = InstructionFlops.find(Item); - if (it == this->InstructionFlops.end()) { - workerLog::error() << "Instruction group " << item << " undefined in " << name() << "."; + if (It == InstructionFlops.end()) { + workerLog::error() << "Instruction group " << Item << " undefined in " << name() << "."; return EXIT_FAILURE; } - flops += it->second; + Flops += It->second; - it = this->InstructionMemory.find(item); + It = InstructionMemory.find(Item); - if (it != this->InstructionMemory.end()) { - bytes += it->second; + if (It != InstructionMemory.end()) { + Bytes += It->second; } } - this->Flops = repetitions * flops; - this->Bytes = repetitions * bytes; - this->Instructions = repetitions * sequence.size() * 4 + 6; + Flops *= Repetitions; + Bytes *= Repetitions; + Instructions = Repetitions * Sequence.size() * 4 + 6; // calculate the buffer sizes - auto l1i_cache_size = instructionCacheSize / thread; - auto dataCacheBufferSizeIterator = dataCacheBufferSize.begin(); - auto l1_size = *dataCacheBufferSizeIterator / thread; - std::advance(dataCacheBufferSizeIterator, 1); - auto l2_size = *dataCacheBufferSizeIterator / thread; - std::advance(dataCacheBufferSizeIterator, 1); - auto l3_size = *dataCacheBufferSizeIterator / thread; - auto ram_size = ramBufferSize / thread; + auto L1iCacheSize = InstructionCacheSize / Thread; + auto DataCacheBufferSizeIterator = DataCacheBufferSize.begin(); + auto L1Size = *DataCacheBufferSizeIterator / Thread; + std::advance(DataCacheBufferSizeIterator, 1); + auto L2Size = *DataCacheBufferSizeIterator / Thread; + std::advance(DataCacheBufferSizeIterator, 1); + auto L3Size = *DataCacheBufferSizeIterator / Thread; + auto RamSize = RamBufferSize / Thread; // calculate the reset counters for the buffers - auto l2_loop_count = getL2LoopCount(sequence, numberOfLines, l2_size * thread, thread); - auto l3_loop_count = getL3LoopCount(sequence, numberOfLines, l3_size * thread, thread); - auto ram_loop_count = getRAMLoopCount(sequence, numberOfLines, ram_size * thread, thread); + auto L2LoopCount = getL2LoopCount(Sequence, NumberOfLines, L2Size * Thread, Thread); + auto L3LoopCount = getL3LoopCount(Sequence, NumberOfLines, L3Size * Thread, Thread); + auto RamLoopCount = getRAMLoopCount(Sequence, NumberOfLines, RamSize * Thread, Thread); - CodeHolder code; - code.init(this->Rt.environment()); + CodeHolder Code; + Code.init(Rt.environment()); - if (nullptr != this->LoadFunction) { - this->Rt.release(&this->LoadFunction); + if (nullptr != LoadFunction) { + Rt.release(&LoadFunction); } - Builder cb(&code); - cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | + Builder Cb(&Code); + Cb.addDiagnosticOptions(asmjit::DiagnosticOptions::kValidateAssembler | asmjit::DiagnosticOptions::kValidateIntermediate); - auto pointer_reg = rax; - auto l1_addr = rbx; - auto l2_addr = rcx; - auto l3_addr = r8; - auto ram_addr = r9; - auto l2_count_reg = r10; - auto l3_count_reg = r11; - auto ram_count_reg = r12; - auto temp_reg = r13; - auto temp_reg2 = rbp; - auto offset_reg = r14; - auto addrHigh_reg = r15; - auto iter_reg = mm0; - auto shift_reg = std::vector({rdi, rsi, rdx}); - auto nr_shift_regs = 3; - auto nr_add_regs = 11; - auto ram_reg = ymm15; - - FuncDetail func; - func.init(FuncSignatureT(CallConvId::kCDecl), - this->Rt.environment()); - - FuncFrame frame; - frame.init(func); + auto PointerReg = rax; + auto L1Addr = rbx; + auto L2Addr = rcx; + auto L3Addr = r8; + auto RamAddr = r9; + auto L2CountReg = r10; + auto L3CountReg = r11; + auto RamCountReg = r12; + auto TempReg = r13; + auto TempReg2 = rbp; + auto OffsetReg = r14; + auto AddrHighReg = r15; + auto IterReg = mm0; + auto ShiftRegs = std::vector({rdi, rsi, rdx}); + auto NbShiftRegs = 3; + auto NbAddRegs = 11; + auto RamReg = ymm15; + + FuncDetail Func; + Func.init(FuncSignatureT(CallConvId::kCDecl), Rt.environment()); + + FuncFrame Frame; + Frame.init(Func); // make (x|y)mm registers dirty - for (int i = 0; i < 16; i++) { - frame.addDirtyRegs(Ymm(i)); + for (int I = 0; I < 16; I++) { + Frame.addDirtyRegs(Ymm(I)); } - for (int i = 0; i < 8; i++) { - frame.addDirtyRegs(Mm(i)); + for (int I = 0; I < 8; I++) { + Frame.addDirtyRegs(Mm(I)); } // make all other used registers dirty except RAX - frame.addDirtyRegs(l1_addr, l2_addr, l3_addr, ram_addr, l2_count_reg, l3_count_reg, ram_count_reg, temp_reg, - temp_reg2, offset_reg, addrHigh_reg, iter_reg, ram_addr); - for (const auto& reg : shift_reg) { - frame.addDirtyRegs(reg); + Frame.addDirtyRegs(L1Addr, L2Addr, L3Addr, RamAddr, L2CountReg, L3CountReg, RamCountReg, TempReg, TempReg2, OffsetReg, + AddrHighReg, IterReg, RamAddr); + for (const auto& Reg : ShiftRegs) { + Frame.addDirtyRegs(Reg); } - FuncArgsAssignment args(&func); + FuncArgsAssignment Args(&Func); // FIXME: asmjit assigment to mm0 does not seem to be supported - args.assignAll(pointer_reg, addrHigh_reg, temp_reg); - args.updateFuncFrame(frame); - frame.finalize(); + Args.assignAll(PointerReg, AddrHighReg, TempReg); + Args.updateFuncFrame(Frame); + Frame.finalize(); - cb.emitProlog(frame); - cb.emitArgsAssignment(frame, args); + Cb.emitProlog(Frame); + Cb.emitArgsAssignment(Frame, Args); // FIXME: movq from temp_reg to iter_reg - cb.movq(iter_reg, temp_reg); + Cb.movq(IterReg, TempReg); // stop right away if low load is selected - auto FunctionExit = cb.newLabel(); + auto FunctionExit = Cb.newLabel(); - cb.mov(temp_reg, ptr_64(addrHigh_reg)); - cb.test(temp_reg, temp_reg); - cb.jz(FunctionExit); + Cb.mov(TempReg, ptr_64(AddrHighReg)); + Cb.test(TempReg, TempReg); + Cb.jz(FunctionExit); - cb.mov(offset_reg, + Cb.mov(OffsetReg, Imm(64)); // increment after each cache/memory access // Initialize registers for shift operations - for (auto const& reg : shift_reg) { - cb.mov(reg, Imm(0xAAAAAAAAAAAAAAAA)); + for (auto const& Reg : ShiftRegs) { + Cb.mov(Reg, Imm(0xAAAAAAAAAAAAAAAA)); } // Initialize AVX-Registers for FMA Operations - cb.vmovapd(ymm0, ymmword_ptr(pointer_reg)); - cb.vmovapd(ymm1, ymmword_ptr(pointer_reg, 32)); + Cb.vmovapd(ymm0, ymmword_ptr(PointerReg)); + Cb.vmovapd(ymm1, ymmword_ptr(PointerReg, 32)); - auto add_regs_start = 2; - auto add_regs_end = add_regs_start + nr_add_regs - 1; - for (int i = add_regs_start; i <= add_regs_end; i++) { - cb.vmovapd(Ymm(i), ymmword_ptr(pointer_reg, 256 + i * 32)); + auto AddRegsStart = 2; + auto AddRegsEnd = AddRegsStart + NbAddRegs - 1; + for (int I = AddRegsStart; I <= AddRegsEnd; I++) { + Cb.vmovapd(Ymm(I), ymmword_ptr(PointerReg, 256 + I * 32)); } // Initialize xmm14 for shift operation // cb.mov(temp_reg, Imm(1)); // cb.movd(temp_reg, Xmm(14)); - cb.movd(shift_reg[0], Xmm(13)); - cb.vbroadcastss(Xmm(13), Xmm(13)); - cb.vmovapd(Xmm(14), Xmm(13)); - cb.vpsrlq(Xmm(14), Xmm(14), Imm(1)); - - cb.mov(l1_addr, pointer_reg); // address for L1-buffer - cb.mov(l2_addr, pointer_reg); - cb.add(l2_addr, Imm(l1_size)); // address for L2-buffer - cb.mov(l3_addr, pointer_reg); - cb.add(l3_addr, Imm(l2_size)); // address for L3-buffer - cb.mov(ram_addr, pointer_reg); - cb.add(ram_addr, Imm(l3_size)); // address for RAM-buffer - cb.mov(l2_count_reg, Imm(l2_loop_count)); - workerLog::trace() << "reset counter for L2-buffer with " << l2_loop_count << " cache line accesses per loop (" - << l2_size / 1024 << ") KiB"; - cb.mov(l3_count_reg, Imm(l3_loop_count)); - workerLog::trace() << "reset counter for L3-buffer with " << l3_loop_count << " cache line accesses per loop (" - << l3_size / 1024 << ") KiB"; - cb.mov(ram_count_reg, Imm(ram_loop_count)); - workerLog::trace() << "reset counter for RAM-buffer with " << ram_loop_count << " cache line accesses per loop (" - << ram_size / 1024 << ") KiB"; - - cb.align(AlignMode::kCode, 64); - - auto Loop = cb.newLabel(); - cb.bind(Loop); - - auto shift_pos = 0; - bool left = false; - auto itemCount = 0; - auto add_dest = add_regs_start; - unsigned l1_offset = 0; - -#define L1_INCREMENT() \ - l1_offset += 64; \ - if (l1_offset < l1_size * 0.5) { \ - cb.add(l1_addr, offset_reg); \ - } else { \ - l1_offset = 0; \ - cb.mov(l1_addr, pointer_reg); \ - } - -#define L2_INCREMENT() cb.add(l2_addr, offset_reg); - -#define L3_INCREMENT() cb.add(l3_addr, offset_reg) - -#define RAM_INCREMENT() cb.add(ram_addr, offset_reg) + Cb.movd(ShiftRegs[0], Xmm(13)); + Cb.vbroadcastss(Xmm(13), Xmm(13)); + Cb.vmovapd(Xmm(14), Xmm(13)); + Cb.vpsrlq(Xmm(14), Xmm(14), Imm(1)); + + Cb.mov(L1Addr, PointerReg); // address for L1-buffer + Cb.mov(L2Addr, PointerReg); + Cb.add(L2Addr, Imm(L1Size)); // address for L2-buffer + Cb.mov(L3Addr, PointerReg); + Cb.add(L3Addr, Imm(L2Size)); // address for L3-buffer + Cb.mov(RamAddr, PointerReg); + Cb.add(RamAddr, Imm(L3Size)); // address for RAM-buffer + Cb.mov(L2CountReg, Imm(L2LoopCount)); + workerLog::trace() << "reset counter for L2-buffer with " << L2LoopCount << " cache line accesses per loop (" + << L2Size / 1024 << ") KiB"; + Cb.mov(L3CountReg, Imm(L3LoopCount)); + workerLog::trace() << "reset counter for L3-buffer with " << L3LoopCount << " cache line accesses per loop (" + << L3Size / 1024 << ") KiB"; + Cb.mov(RamCountReg, Imm(RamLoopCount)); + workerLog::trace() << "reset counter for RAM-buffer with " << RamLoopCount << " cache line accesses per loop (" + << RamSize / 1024 << ") KiB"; + + Cb.align(AlignMode::kCode, 64); + + auto Loop = Cb.newLabel(); + Cb.bind(Loop); + + auto ShiftPos = 0; + bool Left = false; + unsigned ItemCount = 0; + auto AddDest = AddRegsStart; + unsigned L1Offset = 0; + + const auto L1Increment = [&Cb, &L1Offset, &L1Size, &L1Addr, &OffsetReg, &PointerReg]() { + L1Offset += 64; + if (L1Offset < L1Size * 0.5) { + Cb.add(L1Addr, OffsetReg); + } else { + L1Offset = 0; + Cb.mov(L1Addr, PointerReg); + } + }; + const auto L2Increment = [&Cb, &L2Addr, &OffsetReg]() { Cb.add(L2Addr, OffsetReg); }; + const auto L3Increment = [&Cb, &L3Addr, &OffsetReg]() { Cb.add(L3Addr, OffsetReg); }; + const auto RamIncrement = [&Cb, &RamAddr, &OffsetReg]() { Cb.add(RamAddr, OffsetReg); }; - for (unsigned count = 0; count < repetitions; count++) { - for (const auto& item : sequence) { + for (unsigned Count = 0; Count < Repetitions; Count++) { + for (const auto& Item : Sequence) { // swap second and third param of fma instruction to force bitchanges on // the pipes to its execution units - Ymm secondParam; - Ymm thirdParam; - if (0 == itemCount % 2) { - secondParam = ymm0; - thirdParam = ymm1; + Ymm SecondParam; + Ymm ThirdParam; + if (0 == ItemCount % 2) { + SecondParam = ymm0; + ThirdParam = ymm1; } else { - secondParam = ymm1; - thirdParam = ymm0; + SecondParam = ymm1; + ThirdParam = ymm0; } - if (item == "REG") { - cb.vfmadd231pd(Ymm(add_dest), secondParam, thirdParam); - cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]); - if (left) { - cb.shr(shift_reg[shift_pos], Imm(1)); + if (Item == "REG") { + Cb.vfmadd231pd(Ymm(AddDest), SecondParam, ThirdParam); + Cb.xor_(TempReg, ShiftRegs[(ShiftPos + NbShiftRegs - 1) % NbShiftRegs]); + if (Left) { + Cb.shr(ShiftRegs[ShiftPos], Imm(1)); } else { - cb.shl(shift_reg[shift_pos], Imm(1)); + Cb.shl(ShiftRegs[ShiftPos], Imm(1)); } - } else if (item == "L1_LS") { - cb.vfmadd231pd(Ymm(add_dest), secondParam, ymmword_ptr(l1_addr, 32)); - cb.vmovapd(xmmword_ptr(l1_addr, 64), Xmm(add_dest)); - L1_INCREMENT(); - } else if (item == "L2_L") { - cb.vfmadd231pd(Ymm(add_dest), secondParam, ymmword_ptr(l2_addr, 64)); - cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]); - L2_INCREMENT(); - } else if (item == "L3_L") { - cb.vfmadd231pd(Ymm(add_dest), secondParam, ymmword_ptr(l3_addr, 64)); - cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]); - L3_INCREMENT(); - } else if (item == "RAM_L") { - cb.vfmadd231pd(Ymm(ram_reg), secondParam, ymmword_ptr(ram_addr, 32)); - cb.xor_(temp_reg, shift_reg[(shift_pos + nr_shift_regs - 1) % nr_shift_regs]); - RAM_INCREMENT(); + } else if (Item == "L1_LS") { + Cb.vfmadd231pd(Ymm(AddDest), SecondParam, ymmword_ptr(L1Addr, 32)); + Cb.vmovapd(xmmword_ptr(L1Addr, 64), Xmm(AddDest)); + L1Increment(); + } else if (Item == "L2_L") { + Cb.vfmadd231pd(Ymm(AddDest), SecondParam, ymmword_ptr(L2Addr, 64)); + Cb.xor_(TempReg, ShiftRegs[(ShiftPos + NbShiftRegs - 1) % NbShiftRegs]); + L2Increment(); + } else if (Item == "L3_L") { + Cb.vfmadd231pd(Ymm(AddDest), SecondParam, ymmword_ptr(L3Addr, 64)); + Cb.xor_(TempReg, ShiftRegs[(ShiftPos + NbShiftRegs - 1) % NbShiftRegs]); + L3Increment(); + } else if (Item == "RAM_L") { + Cb.vfmadd231pd(Ymm(RamReg), SecondParam, ymmword_ptr(RamAddr, 32)); + Cb.xor_(TempReg, ShiftRegs[(ShiftPos + NbShiftRegs - 1) % NbShiftRegs]); + RamIncrement(); } else { - workerLog::error() << "Instruction group " << item << " not found in " << this->name() << "."; + workerLog::error() << "Instruction group " << Item << " not found in " << name() << "."; return EXIT_FAILURE; } // make sure the shifts do could end up shifting out the data one end. - if (itemCount < (int)(sequence.size() * repetitions - (sequence.size() * repetitions) % 4)) { - switch (itemCount % 4) { + if (ItemCount < (Sequence.size() * Repetitions) - ((Sequence.size() * Repetitions) % 4)) { + switch (ItemCount % 4) { case 0: - cb.vpsrlq(Xmm(13), Xmm(13), Imm(1)); + Cb.vpsrlq(Xmm(13), Xmm(13), Imm(1)); break; case 1: - cb.vpsllq(Xmm(14), Xmm(14), Imm(1)); + Cb.vpsllq(Xmm(14), Xmm(14), Imm(1)); break; case 2: - cb.vpsllq(Xmm(13), Xmm(13), Imm(1)); + Cb.vpsllq(Xmm(13), Xmm(13), Imm(1)); break; case 3: - cb.vpsrlq(Xmm(14), Xmm(14), Imm(1)); + Cb.vpsrlq(Xmm(14), Xmm(14), Imm(1)); break; } } - itemCount++; + ItemCount++; - add_dest++; - if (add_dest > add_regs_end) { - add_dest = add_regs_start; + AddDest++; + if (AddDest > AddRegsEnd) { + AddDest = AddRegsStart; } - shift_pos++; - if (shift_pos == nr_shift_regs) { - shift_pos = 0; - left = !left; + ShiftPos++; + if (ShiftPos == NbShiftRegs) { + ShiftPos = 0; + Left = !Left; } } } - cb.movq(temp_reg, iter_reg); // restore iteration counter - if (this->getRAMSequenceCount(sequence) > 0) { + Cb.movq(TempReg, IterReg); // restore iteration counter + if (getRAMSequenceCount(Sequence) > 0) { // reset RAM counter - auto NoRamReset = cb.newLabel(); - - cb.sub(ram_count_reg, Imm(1)); - cb.jnz(NoRamReset); - cb.mov(ram_count_reg, Imm(ram_loop_count)); - cb.mov(ram_addr, pointer_reg); - cb.add(ram_addr, Imm(l3_size)); - cb.bind(NoRamReset); + auto NoRamReset = Cb.newLabel(); + + Cb.sub(RamCountReg, Imm(1)); + Cb.jnz(NoRamReset); + Cb.mov(RamCountReg, Imm(RamLoopCount)); + Cb.mov(RamAddr, PointerReg); + Cb.add(RamAddr, Imm(L3Size)); + Cb.bind(NoRamReset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.inc(temp_reg); // increment iteration counter - if (this->getL2SequenceCount(sequence) > 0) { + Cb.inc(TempReg); // increment iteration counter + if (getL2SequenceCount(Sequence) > 0) { // reset L2-Cache counter - auto NoL2Reset = cb.newLabel(); - - cb.sub(l2_count_reg, Imm(1)); - cb.jnz(NoL2Reset); - cb.mov(l2_count_reg, Imm(l2_loop_count)); - cb.mov(l2_addr, pointer_reg); - cb.add(l2_addr, Imm(l1_size)); - cb.bind(NoL2Reset); + auto NoL2Reset = Cb.newLabel(); + + Cb.sub(L2CountReg, Imm(1)); + Cb.jnz(NoL2Reset); + Cb.mov(L2CountReg, Imm(L2LoopCount)); + Cb.mov(L2Addr, PointerReg); + Cb.add(L2Addr, Imm(L1Size)); + Cb.bind(NoL2Reset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.movq(iter_reg, temp_reg); // store iteration counter - if (this->getL3SequenceCount(sequence) > 0) { + Cb.movq(IterReg, TempReg); // store iteration counter + if (getL3SequenceCount(Sequence) > 0) { // reset L3-Cache counter - auto NoL3Reset = cb.newLabel(); - - cb.sub(l3_count_reg, Imm(1)); - cb.jnz(NoL3Reset); - cb.mov(l3_count_reg, Imm(l3_loop_count)); - cb.mov(l3_addr, pointer_reg); - cb.add(l3_addr, Imm(l2_size)); - cb.bind(NoL3Reset); + auto NoL3Reset = Cb.newLabel(); + + Cb.sub(L3CountReg, Imm(1)); + Cb.jnz(NoL3Reset); + Cb.mov(L3CountReg, Imm(L3LoopCount)); + Cb.mov(L3Addr, PointerReg); + Cb.add(L3Addr, Imm(L2Size)); + Cb.bind(NoL3Reset); // adds always two instruction - this->Instructions += 2; + Instructions += 2; } - cb.mov(l1_addr, pointer_reg); + Cb.mov(L1Addr, PointerReg); - if (dumpRegisters) { - auto SkipRegistersDump = cb.newLabel(); + if (DumpRegisters) { + auto SkipRegistersDump = Cb.newLabel(); - cb.test(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait)); - cb.jnz(SkipRegistersDump); + Cb.test(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait)); + Cb.jnz(SkipRegistersDump); // dump all the ymm register - for (int i = 0; i < (int)this->registerCount(); i++) { - cb.vmovapd(ymmword_ptr(pointer_reg, -64 - this->registerSize() * 8 * (i + 1)), Ymm(i)); + for (unsigned I = 0; I < registerCount(); I++) { + Cb.vmovapd(ymmword_ptr(PointerReg, -64 - (registerSize() * 8 * (I + 1))), Ymm(I)); } // set read flag - cb.mov(ptr_64(pointer_reg, -8), Imm(firestarter::DumpVariable::Wait)); + Cb.mov(ptr_64(PointerReg, -8), Imm(firestarter::DumpVariable::Wait)); - cb.bind(SkipRegistersDump); + Cb.bind(SkipRegistersDump); } - if (errorDetection) { - this->emitErrorDetectionCode(cb, iter_reg, addrHigh_reg, pointer_reg, temp_reg, temp_reg2); + if (ErrorDetection) { + emitErrorDetectionCode(Cb, IterReg, AddrHighReg, PointerReg, TempReg, TempReg2); } - cb.test(ptr_64(addrHigh_reg), Imm(LOAD_HIGH)); - cb.jnz(Loop); + Cb.test(ptr_64(AddrHighReg), Imm(LOAD_HIGH)); + Cb.jnz(Loop); - cb.bind(FunctionExit); + Cb.bind(FunctionExit); - cb.movq(rax, iter_reg); + Cb.movq(rax, IterReg); - cb.emitEpilog(frame); + Cb.emitEpilog(Frame); - cb.finalize(); + Cb.finalize(); // String sb; // cb.dump(sb); - Error err = this->Rt.add(&this->LoadFunction, &code); - if (err) { + Error Err = Rt.add(&LoadFunction, &Code); + if (Err) { workerLog::error() << "Asmjit adding Assembler to JitRuntime failed in " << __FILE__ << " at " << __LINE__; return EXIT_FAILURE; } // skip if we could not determine cache size - if (l1i_cache_size != 0) { - auto loopSize = code.labelOffset(FunctionExit) - code.labelOffset(Loop); - auto instructionCachePercentage = 100 * loopSize / l1i_cache_size; + if (L1iCacheSize != 0) { + auto LoopSize = Code.labelOffset(FunctionExit) - Code.labelOffset(Loop); + auto InstructionCachePercentage = 100 * LoopSize / L1iCacheSize; - if (loopSize > l1i_cache_size) { + if (LoopSize > L1iCacheSize) { workerLog::warn() << "Work-loop is bigger than the L1i-Cache."; } - workerLog::trace() << "Using " << loopSize << " of " << l1i_cache_size << " Bytes (" << instructionCachePercentage + workerLog::trace() << "Using " << LoopSize << " of " << L1iCacheSize << " Bytes (" << InstructionCachePercentage << "%) from the L1i-Cache for the work-loop."; - workerLog::trace() << "Sequence size: " << sequence.size(); - workerLog::trace() << "Repetition count: " << repetitions; + workerLog::trace() << "Sequence size: " << Sequence.size(); + workerLog::trace() << "Repetition count: " << Repetitions; } return EXIT_SUCCESS; } -std::list ZENFMAPayload::getAvailableInstructions() const { - std::list instructions; +auto ZENFMAPayload::getAvailableInstructions() const -> std::list { + std::list Instructions; - transform(this->InstructionFlops.begin(), this->InstructionFlops.end(), back_inserter(instructions), - [](const auto& item) { return item.first; }); + transform(InstructionFlops.begin(), InstructionFlops.end(), back_inserter(Instructions), + [](const auto& Item) { return Item.first; }); - return instructions; + return Instructions; } -void ZENFMAPayload::init(uint64_t* memoryAddr, uint64_t bufferSize) { - X86Payload::init(memoryAddr, bufferSize, 0.27948995982e-4, 0.27948995982e-4); +void ZENFMAPayload::init(uint64_t* MemoryAddr, uint64_t BufferSize) { + X86Payload::init(MemoryAddr, BufferSize, 0.27948995982e-4, 0.27948995982e-4); } + +} // namespace firestarter::environment::x86::payload \ No newline at end of file diff --git a/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp b/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp index 0cc5abef..cbf977df 100644 --- a/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp +++ b/src/firestarter/Environment/X86/Platform/X86PlatformConfig.cpp @@ -21,5 +21,5 @@ // This file exists to get an entry in the compile commands database. Clangd will interpolate the include directories // for header files based on the source file with the best matching score. This file should be the best score for the -// included header. Therefore the we should not see any errors in this file for missing includes. For more infomation +// included header. Therefore we should not see any errors in this file for missing includes. For more infomation // look in the LLVM code base: clang/lib/Tooling/InterpolatingCompilationDatabase.cpp \ No newline at end of file diff --git a/src/firestarter/Environment/X86/X86CPUTopology.cpp b/src/firestarter/Environment/X86/X86CPUTopology.cpp index 283e7f61..df579c38 100644 --- a/src/firestarter/Environment/X86/X86CPUTopology.cpp +++ b/src/firestarter/Environment/X86/X86CPUTopology.cpp @@ -148,13 +148,13 @@ auto X86CPUTopology::clockrate() const -> uint64_t { ClockT::time_point EndTime; #if not(defined(__APPLE__) || defined(_WIN32)) - auto governor = scalingGovernor(); - if (governor.empty()) { + auto Governor = scalingGovernor(); + if (Governor.empty()) { return CPUTopology::clockrate(); } /* non invariant TSCs can be used if CPUs run at fixed frequency */ - if (!hasInvariantRdtsc() && governor.compare("performance") && governor.compare("powersave")) { + if (!hasInvariantRdtsc() && Governor.compare("performance") && Governor.compare("powersave")) { return CPUTopology::clockrate(); } diff --git a/src/firestarter/Environment/X86/X86Environment.cpp b/src/firestarter/Environment/X86/X86Environment.cpp index 2c2dabb0..2f24b683 100644 --- a/src/firestarter/Environment/X86/X86Environment.cpp +++ b/src/firestarter/Environment/X86/X86Environment.cpp @@ -42,14 +42,14 @@ void X86Environment::evaluateFunctions() { } auto X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePayload) -> int { - unsigned id = 1; - std::string defaultPayloadName(""); + unsigned Id = 1; + std::string DefaultPayloadName; // if functionId is 0 get the default or fallback for (const auto& Config : PlatformConfigs) { for (auto const& [thread, functionName] : Config->getThreadMap()) { // the selected function - if (id == FunctionId) { + if (Id == FunctionId) { if (!Config->isAvailable()) { log::error() << "Function " << FunctionId << " (\"" << functionName << "\") requires " << Config->payload().name() << ", which is not supported by the processor."; @@ -68,21 +68,20 @@ auto X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, thread, topology().instructionCacheSize()); return EXIT_SUCCESS; - } else { - defaultPayloadName = Config->payload().name(); } + DefaultPayloadName = Config->payload().name(); } - id++; + Id++; } } // no default found // use fallback if (0 == FunctionId) { - if (!defaultPayloadName.empty()) { + if (!DefaultPayloadName.empty()) { // default payload available, but number of threads per core is not // supported - log::warn() << "No " << defaultPayloadName << " code path for " << topology().numThreadsPerCore() + log::warn() << "No " << DefaultPayloadName << " code path for " << topology().numThreadsPerCore() << " threads per core!"; } log::warn() << topology().vendor() << " " << topology().model() @@ -93,21 +92,21 @@ auto X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa // fallback for (const auto& Config : FallbackPlatformConfigs) { if (Config->isAvailable()) { - auto selectedThread = 0; - auto selectedFunctionName = std::string(""); - for (auto const& [thread, functionName] : Config->getThreadMap()) { - if (thread == topology().numThreadsPerCore()) { - selectedThread = thread; - selectedFunctionName = functionName; + auto SelectedThread = 0; + auto SelectedFunctionName = std::string(""); + for (auto const& [Thread, FunctionName] : Config->getThreadMap()) { + if (Thread == topology().numThreadsPerCore()) { + SelectedThread = Thread; + SelectedFunctionName = FunctionName; } } - if (selectedThread == 0) { - selectedThread = Config->getThreadMap().begin()->first; - selectedFunctionName = Config->getThreadMap().begin()->second; + if (SelectedThread == 0) { + SelectedThread = Config->getThreadMap().begin()->first; + SelectedFunctionName = Config->getThreadMap().begin()->second; } - SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, selectedThread, + SelectedConfig = new ::firestarter::environment::platform::RuntimeConfig(*Config, SelectedThread, topology().instructionCacheSize()); - log::warn() << "Using function " << selectedFunctionName << " as fallback.\n" + log::warn() << "Using function " << SelectedFunctionName << " as fallback.\n" << "You can use the parameter --function to try other " "functions."; return EXIT_SUCCESS; @@ -124,68 +123,68 @@ auto X86Environment::selectFunction(unsigned FunctionId, bool AllowUnavailablePa return EXIT_FAILURE; } -int X86Environment::selectInstructionGroups(std::string groups) { - const std::string delimiter = ","; - const std::regex re("^(\\w+):(\\d+)$"); - const auto availableInstructionGroups = selectedConfig().platformConfig().payload().getAvailableInstructions(); +auto X86Environment::selectInstructionGroups(std::string Groups) -> int { + const std::string Delimiter = ","; + const std::regex Re("^(\\w+):(\\d+)$"); + const auto AvailableInstructionGroups = selectedConfig().platformConfig().payload().getAvailableInstructions(); - std::stringstream ss(groups); - std::vector> payloadSettings = {}; + std::stringstream Ss(Groups); + std::vector> PayloadSettings = {}; - while (ss.good()) { - std::string token; - std::smatch m; - std::getline(ss, token, ','); + while (Ss.good()) { + std::string Token; + std::smatch M; + std::getline(Ss, Token, ','); - if (std::regex_match(token, m, re)) { - if (std::find(availableInstructionGroups.begin(), availableInstructionGroups.end(), m[1].str()) == - availableInstructionGroups.end()) { - log::error() << "Invalid instruction-group: " << m[1].str() + if (std::regex_match(Token, M, Re)) { + if (std::find(AvailableInstructionGroups.begin(), AvailableInstructionGroups.end(), M[1].str()) == + AvailableInstructionGroups.end()) { + log::error() << "Invalid instruction-group: " << M[1].str() << "\n --run-instruction-groups format: multiple INST:VAL " "pairs comma-seperated"; return EXIT_FAILURE; } - int num = std::stoul(m[2].str()); - if (num == 0) { + int Num = std::stoul(M[2].str()); + if (Num == 0) { log::error() << "instruction-group VAL may not contain number 0" << "\n --run-instruction-groups format: multiple INST:VAL " "pairs comma-seperated"; return EXIT_FAILURE; } - payloadSettings.push_back(std::make_pair(m[1].str(), num)); + PayloadSettings.emplace_back(M[1].str(), Num); } else { - log::error() << "Invalid symbols in instruction-group: " << token + log::error() << "Invalid symbols in instruction-group: " << Token << "\n --run-instruction-groups format: multiple INST:VAL " "pairs comma-seperated"; return EXIT_FAILURE; } } - selectedConfig().setPayloadSettings(payloadSettings); + selectedConfig().setPayloadSettings(PayloadSettings); - log::info() << " Running custom instruction group: " << groups; + log::info() << " Running custom instruction group: " << Groups; return EXIT_SUCCESS; } void X86Environment::printAvailableInstructionGroups() { - std::stringstream ss; + std::stringstream Ss; - for (auto const& item : selectedConfig().platformConfig().payload().getAvailableInstructions()) { - ss << item << ","; + for (auto const& Item : selectedConfig().platformConfig().payload().getAvailableInstructions()) { + Ss << Item << ","; } - auto s = ss.str(); - if (s.size() > 0) { - s.pop_back(); + auto S = Ss.str(); + if (S.size() > 0) { + S.pop_back(); } log::info() << " available instruction-groups for payload " << selectedConfig().platformConfig().payload().name() << ":\n" - << " " << s; + << " " << S; } -void X86Environment::setLineCount(unsigned lineCount) { selectedConfig().setLineCount(lineCount); } +void X86Environment::setLineCount(unsigned LineCount) { selectedConfig().setLineCount(LineCount); } void X86Environment::printSelectedCodePathSummary() { selectedConfig().printCodePathSummary(); } @@ -198,19 +197,19 @@ void X86Environment::printFunctionSummary() { "-------------------------------------------------------------" "-----------------------------"; - unsigned id = 1; - - for (auto const& config : PlatformConfigs) { - for (auto const& [thread, functionName] : config->getThreadMap()) { - const char* available = config->isAvailable() ? "yes" : "no"; - const char* fmt = " %4u | %-30s | %-24s | %s"; - int sz = std::snprintf(nullptr, 0, fmt, id, functionName.c_str(), available, - config->getDefaultPayloadSettingsString().c_str()); - std::vector buf(sz + 1); - std::snprintf(&buf[0], buf.size(), fmt, id, functionName.c_str(), available, - config->getDefaultPayloadSettingsString().c_str()); - log::info() << std::string(&buf[0]); - id++; + unsigned Id = 1; + + for (auto const& Config : PlatformConfigs) { + for (auto const& [thread, functionName] : Config->getThreadMap()) { + const char* Available = Config->isAvailable() ? "yes" : "no"; + const char* Fmt = " %4u | %-30s | %-24s | %s"; + int Sz = std::snprintf(nullptr, 0, Fmt, Id, functionName.c_str(), Available, + Config->getDefaultPayloadSettingsString().c_str()); + std::vector Buf(Sz + 1); + std::snprintf(Buf.data(), Buf.size(), Fmt, Id, functionName.c_str(), Available, + Config->getDefaultPayloadSettingsString().c_str()); + log::info() << std::string(Buf.data()); + Id++; } } } diff --git a/src/firestarter/Firestarter.cpp b/src/firestarter/Firestarter.cpp index 0df2c6c3..40e2f690 100644 --- a/src/firestarter/Firestarter.cpp +++ b/src/firestarter/Firestarter.cpp @@ -19,33 +19,32 @@ * Contact: daniel.hackenberg@tu-dresden.de *****************************************************************************/ +#include #include #include #if defined(linux) || defined(__linux__) +#include #include #include #include -extern "C" { -#include -} #endif #include #include -#include +#include #ifdef _MSC_VER #include #endif -using namespace firestarter; +namespace firestarter { Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds const& Timeout, unsigned LoadPercent, std::chrono::microseconds const& Period, unsigned RequestedNumThreads, std::string const& CpuBind, bool PrintFunctionSummary, unsigned FunctionId, bool ListInstructionGroups, std::string const& InstructionGroups, unsigned LineCount, bool AllowUnavailablePayload, bool DumpRegisters, - std::chrono::seconds const& DumpRegistersTimeDelta, std::string const& DumpRegistersOutpath, + std::chrono::seconds const& DumpRegistersTimeDelta, std::string DumpRegistersOutpath, bool ErrorDetection, int Gpus, unsigned GpuMatrixSize, bool GpuUseFloat, bool GpuUseDouble, bool ListMetrics, bool Measurement, std::chrono::milliseconds const& StartDelta, std::chrono::milliseconds const& StopDelta, @@ -54,7 +53,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds bool Optimize, std::chrono::seconds const& Preheat, std::string const& OptimizationAlgorithm, std::vector const& OptimizationMetrics, std::chrono::seconds const& EvaluationDuration, unsigned Individuals, - std::string const& OptimizeOutfile, unsigned Generations, double Nsga2Cr, double Nsga2M) + std::string OptimizeOutfile, unsigned Generations, double Nsga2Cr, double Nsga2M) : Argc(Argc) , Argv(Argv) , Timeout(Timeout) @@ -62,7 +61,7 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds , Period(Period) , DumpRegisters(DumpRegisters) , DumpRegistersTimeDelta(DumpRegistersTimeDelta) - , DumpRegistersOutpath(DumpRegistersOutpath) + , DumpRegistersOutpath(std::move(DumpRegistersOutpath)) , ErrorDetection(ErrorDetection) , Gpus(Gpus) , GpuMatrixSize(GpuMatrixSize) @@ -77,11 +76,11 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds , OptimizationMetrics(OptimizationMetrics) , EvaluationDuration(EvaluationDuration) , Individuals(Individuals) - , OptimizeOutfile(OptimizeOutfile) + , OptimizeOutfile(std::move(OptimizeOutfile)) , Generations(Generations) , Nsga2Cr(Nsga2Cr) , Nsga2M(Nsga2M) { - int returnCode; + int ReturnCode = 0; Load = (Period * LoadPercent) / 100; if (LoadPercent == 100 || Load == std::chrono::microseconds::zero()) { @@ -97,11 +96,11 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds #endif #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) - this->Environment = new environment::x86::X86Environment(); + Environment = new environment::x86::X86Environment(); #endif - if (EXIT_SUCCESS != (returnCode = this->environment().evaluateCpuAffinity(RequestedNumThreads, CpuBind))) { - std::exit(returnCode); + if (EXIT_SUCCESS != (ReturnCode = environment().evaluateCpuAffinity(RequestedNumThreads, CpuBind))) { + std::exit(ReturnCode); } #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) @@ -114,42 +113,42 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds } #endif - if (ErrorDetection && this->environment().requestedNumThreads() < 2) { + if (ErrorDetection && environment().requestedNumThreads() < 2) { throw std::invalid_argument("Option --error-detection must run with 2 or more threads. Number of " "threads is " + - std::to_string(this->environment().requestedNumThreads()) + "\n"); + std::to_string(environment().requestedNumThreads()) + "\n"); } - this->environment().evaluateFunctions(); + environment().evaluateFunctions(); if (PrintFunctionSummary) { - this->environment().printFunctionSummary(); + environment().printFunctionSummary(); std::exit(EXIT_SUCCESS); } - if (EXIT_SUCCESS != (returnCode = this->environment().selectFunction(FunctionId, AllowUnavailablePayload))) { - std::exit(returnCode); + if (EXIT_SUCCESS != (ReturnCode = environment().selectFunction(FunctionId, AllowUnavailablePayload))) { + std::exit(ReturnCode); } if (ListInstructionGroups) { - this->environment().printAvailableInstructionGroups(); + environment().printAvailableInstructionGroups(); std::exit(EXIT_SUCCESS); } if (!InstructionGroups.empty()) { - if (EXIT_SUCCESS != (returnCode = this->environment().selectInstructionGroups(InstructionGroups))) { - std::exit(returnCode); + if (EXIT_SUCCESS != (ReturnCode = environment().selectInstructionGroups(InstructionGroups))) { + std::exit(ReturnCode); } } if (LineCount != 0) { - this->environment().setLineCount(LineCount); + environment().setLineCount(LineCount); } #if defined(linux) || defined(__linux__) if (Measurement || ListMetrics || Optimize) { MeasurementWorker = std::make_shared( - MeasurementInterval, this->environment().requestedNumThreads(), MetricPaths, StdinMetrics); + MeasurementInterval, environment().requestedNumThreads(), MetricPaths, StdinMetrics); if (ListMetrics) { log::info() << MeasurementWorker->availableMetrics(); @@ -157,112 +156,108 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds } // init all metrics - auto all = MeasurementWorker->metricNames(); - auto initialized = MeasurementWorker->initMetrics(all); + auto All = MeasurementWorker->metricNames(); + auto Initialized = MeasurementWorker->initMetrics(All); - if (initialized.size() == 0) { + if (Initialized.size() == 0) { log::error() << "No metrics initialized"; std::exit(EXIT_FAILURE); } // check if selected metrics are initialized - for (auto const& optimizationMetric : OptimizationMetrics) { - auto nameEqual = [optimizationMetric](auto const& name) { - auto invertedName = "-" + name; - return name.compare(optimizationMetric) == 0 || invertedName.compare(optimizationMetric) == 0; + for (auto const& OptimizationMetric : OptimizationMetrics) { + auto NameEqual = [OptimizationMetric](auto const& Name) { + auto InvertedName = "-" + Name; + return Name.compare(OptimizationMetric) == 0 || InvertedName.compare(OptimizationMetric) == 0; }; // metric name is not found - if (std::find_if(all.begin(), all.end(), nameEqual) == all.end()) { - log::error() << "Metric \"" << optimizationMetric << "\" does not exist."; + if (std::find_if(All.begin(), All.end(), NameEqual) == All.end()) { + log::error() << "Metric \"" << OptimizationMetric << "\" does not exist."; std::exit(EXIT_FAILURE); } // metric has not initialized properly - if (std::find_if(initialized.begin(), initialized.end(), nameEqual) == initialized.end()) { - log::error() << "Metric \"" << optimizationMetric << "\" failed to initialize."; + if (std::find_if(Initialized.begin(), Initialized.end(), NameEqual) == Initialized.end()) { + log::error() << "Metric \"" << OptimizationMetric << "\" failed to initialize."; std::exit(EXIT_FAILURE); } } } if (Optimize) { - auto applySettings = std::bind( - [this](std::vector> const& setting) { + auto ApplySettings = std::bind( + [this](std::vector> const& Setting) { using Clock = std::chrono::high_resolution_clock; - auto start = Clock::now(); + auto Start = Clock::now(); - for (auto& thread : this->LoadThreads) { - auto td = thread.second; + for (auto& Thread : LoadThreads) { + auto Td = Thread.second; - td->config().setPayloadSettings(setting); + Td->config().setPayloadSettings(Setting); } - for (auto const& thread : this->LoadThreads) { - auto td = thread.second; + for (auto const& Thread : LoadThreads) { + auto Td = Thread.second; - td->Mutex.lock(); + Td->Mutex.lock(); } - for (auto const& thread : this->LoadThreads) { - auto td = thread.second; + for (auto const& Thread : LoadThreads) { + auto Td = Thread.second; - td->Comm = THREAD_SWITCH; - td->Mutex.unlock(); + Td->Comm = THREAD_SWITCH; + Td->Mutex.unlock(); } - this->LoadVar = LOAD_SWITCH; + LoadVar = LOAD_SWITCH; - for (auto const& thread : this->LoadThreads) { - auto td = thread.second; - bool ack; + for (auto const& Thread : LoadThreads) { + auto Td = Thread.second; + bool Ack = false; do { - td->Mutex.lock(); - ack = td->Ack; - td->Mutex.unlock(); - } while (!ack); - - td->Mutex.lock(); - td->Ack = false; - td->Mutex.unlock(); + Td->Mutex.lock(); + Ack = Td->Ack; + Td->Mutex.unlock(); + } while (!Ack); + + Td->Mutex.lock(); + Td->Ack = false; + Td->Mutex.unlock(); } - this->LoadVar = LOAD_HIGH; + LoadVar = LOAD_HIGH; - this->signalWork(); + signalWork(); - uint64_t startTimestamp = 0xffffffffffffffff; - uint64_t stopTimestamp = 0; + uint64_t StartTimestamp = 0xffffffffffffffff; + uint64_t StopTimestamp = 0; - for (auto const& thread : this->LoadThreads) { - auto td = thread.second; + for (auto const& Thread : LoadThreads) { + auto Td = Thread.second; - if (startTimestamp > td->LastStartTsc) { - startTimestamp = td->LastStartTsc; - } - if (stopTimestamp < td->LastStopTsc) { - stopTimestamp = td->LastStopTsc; - } + StartTimestamp = std::min(StartTimestamp, Td->LastStartTsc); + StopTimestamp = std::max(StopTimestamp, Td->LastStopTsc); } - for (auto const& thread : this->LoadThreads) { - auto td = thread.second; - ipcEstimateMetricInsert((double)td->LastIterations * - (double)this->LoadThreads.front().second->config().payload().instructions() / - (double)(stopTimestamp - startTimestamp)); + for (auto const& Thread : LoadThreads) { + auto Td = Thread.second; + ipcEstimateMetricInsert((double)Td->LastIterations * + static_cast(LoadThreads.front().second->config().payload().instructions()) / + static_cast(StopTimestamp - StartTimestamp)); } - auto end = Clock::now(); + auto End = Clock::now(); log::trace() << "Switching payload took " - << std::chrono::duration_cast(end - start).count() << "ms"; + << std::chrono::duration_cast(End - Start).count() << "ms"; }, std::placeholders::_1); - auto prob = std::make_shared( - std::move(applySettings), MeasurementWorker, OptimizationMetrics, EvaluationDuration, StartDelta, StopDelta, - this->environment().selectedConfig().payloadItems()); + auto Prob = std::make_shared( + std::move(ApplySettings), MeasurementWorker, OptimizationMetrics, EvaluationDuration, StartDelta, StopDelta, + environment().selectedConfig().payloadItems()); - Population = firestarter::optimizer::Population(std::move(prob)); + Population = firestarter::optimizer::Population(std::move(Prob)); if (OptimizationAlgorithm == "NSGA2") { Algorithm = std::make_unique(Generations, Nsga2Cr, Nsga2M); @@ -274,14 +269,14 @@ Firestarter::Firestarter(const int Argc, const char** Argv, std::chrono::seconds } #endif - this->environment().printSelectedCodePathSummary(); + environment().printSelectedCodePathSummary(); - log::info() << this->environment().topology(); + log::info() << environment().topology(); // setup thread with either high or low load configured at the start // low loads has to know the length of the period - if (EXIT_SUCCESS != (returnCode = this->initLoadWorkers((LoadPercent == 0), Period.count()))) { - std::exit(returnCode); + if (EXIT_SUCCESS != (ReturnCode = initLoadWorkers((LoadPercent == 0), Period.count()))) { + std::exit(ReturnCode); } // add some signal handler for aborting FIRESTARTER @@ -305,14 +300,14 @@ Firestarter::~Firestarter() { } void Firestarter::mainThread() { - this->environment().printThreadSummary(); + environment().printThreadSummary(); #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_HIP) - _cuda = std::make_unique(&this->loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus); + _cuda = std::make_unique(&loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus); #endif #ifdef FIRESTARTER_BUILD_ONEAPI - _oneapi = std::make_unique(&this->loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus); + _oneapi = std::make_unique(&loadVar, _gpuUseFloat, _gpuUseDouble, _gpuMatrixSize, _gpus); #endif #if defined(linux) || defined(__linux__) @@ -322,24 +317,24 @@ void Firestarter::mainThread() { } #endif - this->signalWork(); + signalWork(); #ifdef FIRESTARTER_DEBUG_FEATURES if (DumpRegisters) { - int returnCode; - if (EXIT_SUCCESS != (returnCode = this->initDumpRegisterWorker(DumpRegistersTimeDelta, DumpRegistersOutpath))) { - std::exit(returnCode); + int ReturnCode = 0; + if (EXIT_SUCCESS != (ReturnCode = initDumpRegisterWorker(DumpRegistersTimeDelta, DumpRegistersOutpath))) { + std::exit(ReturnCode); } } #endif // worker thread for load control - this->watchdogWorker(Period, Load, Timeout); + watchdogWorker(Period, Load, Timeout); #if defined(linux) || defined(__linux__) // check if optimization is selected if (Optimize) { - auto startTime = optimizer::History::getTime(); + auto StartTime = optimizer::History::getTime(); Firestarter::Optimizer = std::make_unique(std::move(Algorithm), Population, OptimizationAlgorithm, Individuals, Preheat); @@ -347,12 +342,12 @@ void Firestarter::mainThread() { // wait here until optimizer thread terminates Firestarter::Optimizer->join(); - auto payloadItems = this->environment().selectedConfig().payloadItems(); + auto PayloadItems = environment().selectedConfig().payloadItems(); - firestarter::optimizer::History::save(OptimizeOutfile, startTime, payloadItems, Argc, Argv); + firestarter::optimizer::History::save(OptimizeOutfile, StartTime, PayloadItems, Argc, Argv); // print the best 20 according to each metric - firestarter::optimizer::History::printBest(OptimizationMetrics, payloadItems); + firestarter::optimizer::History::printBest(OptimizationMetrics, PayloadItems); // stop all the load threads std::raise(SIGTERM); @@ -360,15 +355,15 @@ void Firestarter::mainThread() { #endif // wait for watchdog to timeout or until user terminates - this->joinLoadWorkers(); + joinLoadWorkers(); #ifdef FIRESTARTER_DEBUG_FEATURES if (DumpRegisters) { - this->joinDumpRegisterWorker(); + joinDumpRegisterWorker(); } #endif if (!Optimize) { - this->printPerformanceReport(); + printPerformanceReport(); } #if defined(linux) || defined(__linux__) @@ -384,13 +379,13 @@ void Firestarter::mainThread() { #endif if (ErrorDetection) { - this->printThreadErrorReport(); + printThreadErrorReport(); } } -void Firestarter::setLoad(uint64_t value) { +void Firestarter::setLoad(uint64_t Value) { // signal load change to workers - Firestarter::LoadVar = value; + Firestarter::LoadVar = Value; #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) #ifndef _MSC_VER __asm__ __volatile__("mfence;"); @@ -402,17 +397,17 @@ void Firestarter::setLoad(uint64_t value) { #endif } -void Firestarter::sigalrmHandler(int signum) { (void)signum; } +void Firestarter::sigalrmHandler(int Signum) { (void)Signum; } -void Firestarter::sigtermHandler(int signum) { - (void)signum; +void Firestarter::sigtermHandler(int Signum) { + (void)Signum; Firestarter::setLoad(LOAD_STOP); // exit loop // used in case of 0 < load < 100 // or interrupt sleep for timeout { - std::lock_guard lk(Firestarter::WatchdogTerminateMutex); + std::lock_guard Lk(Firestarter::WatchdogTerminateMutex); Firestarter::WatchdogTerminate = true; } Firestarter::WatchdogTerminateAlert.notify_all(); @@ -424,3 +419,5 @@ void Firestarter::sigtermHandler(int signum) { } #endif } + +} // namespace firestarter \ No newline at end of file diff --git a/src/firestarter/LoadWorker.cpp b/src/firestarter/LoadWorker.cpp index c5a998c5..09a68464 100644 --- a/src/firestarter/LoadWorker.cpp +++ b/src/firestarter/LoadWorker.cpp @@ -19,15 +19,14 @@ * Contact: daniel.hackenberg@tu-dresden.de *****************************************************************************/ +#include #include #include #include #include #if defined(linux) || defined(__linux__) -extern "C" { #include -} #endif #ifdef ENABLE_VTRACING @@ -39,133 +38,135 @@ extern "C" { #include #include -#include #include -using namespace firestarter; +namespace { +auto AlignedFreeDeleter = [](void* P) { ALIGNED_FREE(P); }; + +} -auto aligned_free_deleter = [](void* p) { ALIGNED_FREE(p); }; +namespace firestarter { -int Firestarter::initLoadWorkers(bool lowLoad, uint64_t period) { - int returnCode; +auto Firestarter::initLoadWorkers(bool LowLoad, uint64_t Period) -> int { + auto ReturnCode = environment().setCpuAffinity(0); - if (EXIT_SUCCESS != (returnCode = this->environment().setCpuAffinity(0))) { + if (EXIT_SUCCESS != ReturnCode) { return EXIT_FAILURE; } // setup load variable to execute low or high load once the threads switch to // work. - this->LoadVar = lowLoad ? LOAD_LOW : LOAD_HIGH; + LoadVar = LowLoad ? LOAD_LOW : LOAD_HIGH; - auto numThreads = this->environment().requestedNumThreads(); + auto NumThreads = environment().requestedNumThreads(); // create a std::vector> of requestenNumThreads() // communication pointers and add these to the threaddata if (ErrorDetection) { - for (uint64_t i = 0; i < numThreads; i++) { - auto commPtr = reinterpret_cast(ALIGNED_MALLOC(2 * sizeof(uint64_t), 64)); - assert(commPtr); - this->ErrorCommunication.push_back(std::shared_ptr(commPtr, aligned_free_deleter)); - log::debug() << "Threads " << (i + numThreads - 1) % numThreads << " and " << i << " commPtr = 0x" - << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex << (uint64_t)commPtr; + for (uint64_t I = 0; I < NumThreads; I++) { + auto* CommPtr = reinterpret_cast(ALIGNED_MALLOC(2 * sizeof(uint64_t), 64)); + assert(CommPtr); + ErrorCommunication.push_back(std::shared_ptr(CommPtr, AlignedFreeDeleter)); + log::debug() << "Threads " << (I + NumThreads - 1) % NumThreads << " and " << I << " commPtr = 0x" + << std::setfill('0') << std::setw(sizeof(uint64_t) * 2) << std::hex + << reinterpret_cast(CommPtr); } } - for (uint64_t i = 0; i < numThreads; i++) { - auto td = - std::make_shared(i, this->environment(), &this->LoadVar, period, DumpRegisters, ErrorDetection); + for (uint64_t I = 0; I < NumThreads; I++) { + auto Td = std::make_shared(I, environment(), &LoadVar, Period, DumpRegisters, ErrorDetection); if (ErrorDetection) { // distribute pointers for error deteciton. (set threads in a ring) // give this thread the left pointer i and right pointer (i+1) % // requestedNumThreads(). - td->setErrorCommunication(this->ErrorCommunication[i], this->ErrorCommunication[(i + 1) % numThreads]); + Td->setErrorCommunication(ErrorCommunication[I], ErrorCommunication[(I + 1) % NumThreads]); } - auto dataCacheSizeIt = td->config().platformConfig().dataCacheBufferSize().begin(); - auto ramBufferSize = td->config().platformConfig().ramBufferSize(); + auto DataCacheSizeIt = Td->config().platformConfig().dataCacheBufferSize().begin(); + auto RamBufferSize = Td->config().platformConfig().ramBufferSize(); - td->BuffersizeMem = - (*dataCacheSizeIt + *std::next(dataCacheSizeIt, 1) + *std::next(dataCacheSizeIt, 2) + ramBufferSize) / - td->config().thread() / sizeof(uint64_t); + Td->BuffersizeMem = + (*DataCacheSizeIt + *std::next(DataCacheSizeIt, 1) + *std::next(DataCacheSizeIt, 2) + RamBufferSize) / + Td->config().thread() / sizeof(uint64_t); // create the thread - std::thread t(Firestarter::loadThreadWorker, td); + std::thread T(Firestarter::loadThreadWorker, Td); - log::trace() << "Created thread #" << i << " with ID: " << t.get_id(); + log::trace() << "Created thread #" << I << " with ID: " << T.get_id(); - if (i == 0) { + if (I == 0) { // only show error for all worker threads except first. - firestarter::logging::FirstWorkerThreadFilter::setFirstThread(t.get_id()); + firestarter::logging::FirstWorkerThreadFilter::setFirstThread(T.get_id()); } - this->LoadThreads.push_back(std::make_pair(std::move(t), td)); + LoadThreads.emplace_back(std::move(T), Td); } - this->signalLoadWorkers(THREAD_INIT); + signalLoadWorkers(THREAD_INIT); return EXIT_SUCCESS; } -void Firestarter::signalLoadWorkers(int comm) { - bool ack; +void Firestarter::signalLoadWorkers(int Comm) { + bool Ack = false; // start the work - for (auto const& thread : this->LoadThreads) { - auto td = thread.second; + for (auto const& Thread : LoadThreads) { + auto Td = Thread.second; - td->Mutex.lock(); + Td->Mutex.lock(); } - for (auto const& thread : this->LoadThreads) { - auto td = thread.second; + for (auto const& Thread : LoadThreads) { + auto Td = Thread.second; - td->Comm = comm; - td->Mutex.unlock(); + Td->Comm = Comm; + Td->Mutex.unlock(); } - for (auto const& thread : this->LoadThreads) { - auto td = thread.second; + for (auto const& Thread : LoadThreads) { + auto Td = Thread.second; do { - td->Mutex.lock(); - ack = td->Ack; - td->Mutex.unlock(); - } while (!ack); - - td->Mutex.lock(); - td->Ack = false; - td->Mutex.unlock(); + Td->Mutex.lock(); + Ack = Td->Ack; + Td->Mutex.unlock(); + } while (!Ack); + + Td->Mutex.lock(); + Td->Ack = false; + Td->Mutex.unlock(); } } void Firestarter::joinLoadWorkers() { // wait for threads after watchdog has requested termination - for (auto& thread : this->LoadThreads) { - thread.first.join(); + for (auto& Thread : LoadThreads) { + Thread.first.join(); } } void Firestarter::printThreadErrorReport() { if (ErrorDetection) { - auto maxSize = this->LoadThreads.size(); + auto MaxSize = LoadThreads.size(); - std::vector errors(maxSize, false); + std::vector Errors(MaxSize, false); - for (decltype(maxSize) i = 0; i < maxSize; i++) { - auto errorDetectionStruct = this->LoadThreads[i].second->errorDetectionStruct(); + for (decltype(MaxSize) I = 0; I < MaxSize; I++) { + const auto* ErrorDetectionStructPtr = LoadThreads[I].second->errorDetectionStruct(); - if (errorDetectionStruct->ErrorLeft) { - errors[(i + maxSize - 1) % maxSize] = true; + if (ErrorDetectionStructPtr->ErrorLeft) { + Errors[(I + MaxSize - 1) % MaxSize] = true; } - if (errorDetectionStruct->ErrorRight) { - errors[i] = true; + if (ErrorDetectionStructPtr->ErrorRight) { + Errors[I] = true; } } - for (decltype(maxSize) i = 0; i < maxSize; i++) { - if (errors[i]) { - log::fatal() << "Data mismatch between Threads " << i << " and " << (i + 1) % maxSize + for (decltype(MaxSize) I = 0; I < MaxSize; I++) { + if (Errors[I]) { + log::fatal() << "Data mismatch between Threads " << I << " and " << (I + 1) % MaxSize << ".\n This may be caused by bit-flips in the hardware."; } } @@ -174,70 +175,61 @@ void Firestarter::printThreadErrorReport() { void Firestarter::printPerformanceReport() { // performance report - uint64_t startTimestamp = 0xffffffffffffffff; - uint64_t stopTimestamp = 0; + uint64_t StartTimestamp = 0xffffffffffffffff; + uint64_t StopTimestamp = 0; - uint64_t iterations = 0; + uint64_t Iterations = 0; log::debug() << "\nperformance report:\n"; - for (auto const& thread : this->LoadThreads) { - auto td = thread.second; + for (auto const& Thread : LoadThreads) { + auto Td = Thread.second; - log::debug() << "Thread " << td->id() << ": " << td->Iterations - << " iterations, tsc_delta: " << td->StopTsc - td->StartTsc; + log::debug() << "Thread " << Td->id() << ": " << Td->Iterations + << " iterations, tsc_delta: " << Td->StopTsc - Td->StartTsc; - if (startTimestamp > td->StartTsc) { - startTimestamp = td->StartTsc; - } - if (stopTimestamp < td->StopTsc) { - stopTimestamp = td->StopTsc; - } + StartTimestamp = std::min(StartTimestamp, Td->StartTsc); + StopTimestamp = std::max(StopTimestamp, Td->StopTsc); - iterations += td->Iterations; + Iterations += Td->Iterations; } - double runtime = (double)(stopTimestamp - startTimestamp) / (double)this->environment().topology().clockrate(); - double gFlops = - (double)this->LoadThreads.front().second->config().payload().flops() * 0.000000001 * (double)iterations / runtime; - double bandwidth = - (double)this->LoadThreads.front().second->config().payload().bytes() * 0.000000001 * (double)iterations / runtime; + double Runtime = + static_cast(StopTimestamp - StartTimestamp) / static_cast(environment().topology().clockrate()); + double GFlops = static_cast(LoadThreads.front().second->config().payload().flops()) * 0.000000001 * + static_cast(Iterations) / Runtime; + double Bandwidth = static_cast(LoadThreads.front().second->config().payload().bytes()) * 0.000000001 * + static_cast(Iterations) / Runtime; // insert values for ipc-estimate metric // if we are on linux #if defined(linux) || defined(__linux__) if (Measurement) { - for (auto const& thread : this->LoadThreads) { - auto td = thread.second; - ipcEstimateMetricInsert((double)td->Iterations * - (double)this->LoadThreads.front().second->config().payload().instructions() / - (double)(stopTimestamp - startTimestamp)); + for (auto const& Thread : LoadThreads) { + auto Td = Thread.second; + ipcEstimateMetricInsert(static_cast(Td->Iterations) * + static_cast(LoadThreads.front().second->config().payload().instructions()) / + static_cast(StopTimestamp - StartTimestamp)); } } #endif // format runtime, gflops and bandwidth %.2f - const char* fmt = "%.2f"; - int size; - -#define FORMAT(input) \ - size = std::snprintf(nullptr, 0, fmt, input); \ - std::vector input##Vector(size + 1); \ - std::snprintf(&input##Vector[0], input##Vector.size(), fmt, input); \ - auto input##String = std::string(&input##Vector[0]) + const auto FormatString = [](double Value) -> std::string { + const char* Fmt = "%.2f"; - FORMAT(runtime); - FORMAT(gFlops); - FORMAT(bandwidth); - -#undef FORMAT + auto Size = std::snprintf(nullptr, 0, Fmt, Value); + std::vector CharVec(Size + 1); + std::snprintf(CharVec.data(), CharVec.size(), Fmt, Value); + return {std::string(CharVec.data())}; + }; log::debug() << "\n" - << "total iterations: " << iterations << "\n" - << "runtime: " << runtimeString << " seconds (" << stopTimestamp - startTimestamp << " cycles)\n" + << "total iterations: " << Iterations << "\n" + << "runtime: " << FormatString(Runtime) << " seconds (" << StopTimestamp - StartTimestamp << " cycles)\n" << "\n" - << "estimated floating point performance: " << gFlopsString << " GFLOPS\n" - << "estimated memory bandwidth*: " << bandwidthString << " GB/s\n" + << "estimated floating point performance: " << FormatString(GFlops) << " GFLOPS\n" + << "estimated memory bandwidth*: " << FormatString(Bandwidth) << " GB/s\n" << "\n" << "* this estimate is highly unreliable if --function is used in order " "to " @@ -247,80 +239,80 @@ void Firestarter::printPerformanceReport() { << " executed on an unsupported architecture!"; } -void Firestarter::loadThreadWorker(std::shared_ptr td) { +void Firestarter::loadThreadWorker(std::shared_ptr Td) { - int old = THREAD_WAIT; + int Old = THREAD_WAIT; #if defined(linux) || defined(__linux__) pthread_setname_np(pthread_self(), "LoadWorker"); #endif for (;;) { - td->Mutex.lock(); - int comm = td->Comm; - td->Mutex.unlock(); + Td->Mutex.lock(); + int Comm = Td->Comm; + Td->Mutex.unlock(); - if (comm != old) { - old = comm; + if (Comm != Old) { + Old = Comm; - td->Mutex.lock(); - td->Ack = true; - td->Mutex.unlock(); + Td->Mutex.lock(); + Td->Ack = true; + Td->Mutex.unlock(); } else { std::this_thread::sleep_for(std::chrono::microseconds(1)); continue; } - switch (comm) { + switch (Comm) { // allocate and initialize memory case THREAD_INIT: // set affinity - td->environment().setCpuAffinity(td->id()); + Td->environment().setCpuAffinity(Td->id()); // compile payload - td->config().payload().compilePayload(td->config().payloadSettings(), td->config().instructionCacheSize(), - td->config().dataCacheBufferSize(), td->config().ramBufferSize(), - td->config().thread(), td->config().lines(), td->DumpRegisters, - td->ErrorDetection); + Td->config().payload().compilePayload(Td->config().payloadSettings(), Td->config().instructionCacheSize(), + Td->config().dataCacheBufferSize(), Td->config().ramBufferSize(), + Td->config().thread(), Td->config().lines(), Td->DumpRegisters, + Td->ErrorDetection); // allocate memory // if we should dump some registers, we use the first part of the memory // for them. - td->AddrMem = - reinterpret_cast(ALIGNED_MALLOC((td->BuffersizeMem + td->AddrOffset) * sizeof(uint64_t), 64)) + - td->AddrOffset; + Td->AddrMem = + reinterpret_cast(ALIGNED_MALLOC((Td->BuffersizeMem + Td->AddrOffset) * sizeof(uint64_t), 64)) + + Td->AddrOffset; // exit application on error - if (td->AddrMem - td->AddrOffset == nullptr) { - workerLog::error() << "Could not allocate memory for CPU load thread " << td->id() << "\n"; + if (Td->AddrMem - Td->AddrOffset == nullptr) { + workerLog::error() << "Could not allocate memory for CPU load thread " << Td->id() << "\n"; exit(ENOMEM); } - if (td->DumpRegisters) { - reinterpret_cast(td->AddrMem - td->AddrOffset)->DumpVar = DumpVariable::Wait; + if (Td->DumpRegisters) { + reinterpret_cast(Td->AddrMem - Td->AddrOffset)->DumpVar = DumpVariable::Wait; } - if (td->ErrorDetection) { - auto errorDetectionStruct = reinterpret_cast(td->AddrMem - td->AddrOffset); + if (Td->ErrorDetection) { + auto* ErrorDetectionStructPtr = reinterpret_cast(Td->AddrMem - Td->AddrOffset); - std::memset(errorDetectionStruct, 0, sizeof(ErrorDetectionStruct)); + std::memset(ErrorDetectionStructPtr, 0, sizeof(ErrorDetectionStruct)); // distribute left and right communication pointers - errorDetectionStruct->CommunicationLeft = td->CommunicationLeft.get(); - errorDetectionStruct->CommunicationRight = td->CommunicationRight.get(); + ErrorDetectionStructPtr->CommunicationLeft = Td->CommunicationLeft.get(); + ErrorDetectionStructPtr->CommunicationRight = Td->CommunicationRight.get(); // do first touch memset 0 for the communication pointers - std::memset((void*)errorDetectionStruct->CommunicationLeft, 0, sizeof(uint64_t) * 2); + std::memset((void*)ErrorDetectionStructPtr->CommunicationLeft, 0, sizeof(uint64_t) * 2); } // call init function - td->config().payload().init(td->AddrMem, td->BuffersizeMem); + Td->config().payload().init(Td->AddrMem, Td->BuffersizeMem); break; // perform stress test case THREAD_WORK: // record threads start timestamp - td->StartTsc = td->environment().topology().timestamp(); + Td->StartTsc = Td->environment().topology().timestamp(); // will be terminated by watchdog for (;;) { @@ -331,7 +323,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { #ifdef ENABLE_SCOREP SCOREP_USER_REGION_BY_NAME_BEGIN("HIGH", SCOREP_USER_REGION_TYPE_COMMON); #endif - td->Iterations = td->config().payload().highLoadFunction(td->AddrMem, td->AddrHigh, td->Iterations); + Td->Iterations = Td->config().payload().highLoadFunction(Td->AddrMem, Td->AddrHigh, Td->Iterations); // call low load function #ifdef ENABLE_VTRACING @@ -342,7 +334,7 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { SCOREP_USER_REGION_BY_NAME_END("HIGH"); SCOREP_USER_REGION_BY_NAME_BEGIN("LOW", SCOREP_USER_REGION_TYPE_COMMON); #endif - td->config().payload().lowLoadFunction(td->AddrHigh, td->Period); + Td->config().payload().lowLoadFunction(Td->AddrHigh, Td->Period); #ifdef ENABLE_VTRACING VT_USER_END("LOW_LOAD_FUNC"); #endif @@ -351,14 +343,14 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { #endif // terminate if master signals end of run and record stop timestamp - if (*td->AddrHigh == LOAD_STOP) { - td->StopTsc = td->environment().topology().timestamp(); + if (*Td->AddrHigh == LOAD_STOP) { + Td->StopTsc = Td->environment().topology().timestamp(); return; } - if (*td->AddrHigh == LOAD_SWITCH) { - td->StopTsc = td->environment().topology().timestamp(); + if (*Td->AddrHigh == LOAD_SWITCH) { + Td->StopTsc = Td->environment().topology().timestamp(); break; } @@ -366,19 +358,19 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { break; case THREAD_SWITCH: // compile payload - td->config().payload().compilePayload(td->config().payloadSettings(), td->config().instructionCacheSize(), - td->config().dataCacheBufferSize(), td->config().ramBufferSize(), - td->config().thread(), td->config().lines(), td->DumpRegisters, - td->ErrorDetection); + Td->config().payload().compilePayload(Td->config().payloadSettings(), Td->config().instructionCacheSize(), + Td->config().dataCacheBufferSize(), Td->config().ramBufferSize(), + Td->config().thread(), Td->config().lines(), Td->DumpRegisters, + Td->ErrorDetection); // call init function - td->config().payload().init(td->AddrMem, td->BuffersizeMem); + Td->config().payload().init(Td->AddrMem, Td->BuffersizeMem); // save old iteration count - td->LastIterations = td->Iterations; - td->LastStartTsc = td->StartTsc; - td->LastStopTsc = td->StopTsc; - td->Iterations = 0; + Td->LastIterations = Td->Iterations; + Td->LastStartTsc = Td->StartTsc; + Td->LastStopTsc = Td->StopTsc; + Td->Iterations = 0; break; case THREAD_WAIT: break; @@ -389,3 +381,5 @@ void Firestarter::loadThreadWorker(std::shared_ptr td) { } } } + +} // namespace firestarter \ No newline at end of file diff --git a/src/firestarter/Main.cpp b/src/firestarter/Main.cpp index 51b53177..9627adb2 100644 --- a/src/firestarter/Main.cpp +++ b/src/firestarter/Main.cpp @@ -27,7 +27,7 @@ #include struct Config { - inline static const std::vector> optionsMap = { + inline static const std::vector> OptionsMap = { {"information", "Information Options:\n"}, {"general", "General Options:\n"}, {"specialized-workloads", "Specialized workloads:\n"}, @@ -41,53 +41,55 @@ struct Config { }; // default parameters - std::chrono::seconds timeout; - unsigned loadPercent; - std::chrono::microseconds period; - unsigned requestedNumThreads; - std::string cpuBind = ""; - bool printFunctionSummary; - unsigned functionId; - bool listInstructionGroups; - std::string instructionGroups; - unsigned lineCount = 0; + std::chrono::seconds Timeout{}; + unsigned LoadPercent; + std::chrono::microseconds Period{}; + unsigned RequestedNumThreads; + std::string CpuBind; + bool PrintFunctionSummary; + unsigned FunctionId; + bool ListInstructionGroups; + std::string InstructionGroups; + unsigned LineCount = 0; // debug features - bool allowUnavailablePayload = false; - bool dumpRegisters = false; - std::chrono::seconds dumpRegistersTimeDelta = std::chrono::seconds(0); - std::string dumpRegistersOutpath = ""; - bool errorDetection = false; + bool AllowUnavailablePayload = false; + bool DumpRegisters = false; + std::chrono::seconds DumpRegistersTimeDelta = std::chrono::seconds(0); + std::string DumpRegistersOutpath; + bool ErrorDetection = false; // CUDA parameters - int gpus = 0; - unsigned gpuMatrixSize = 0; - bool gpuUseFloat = false; - bool gpuUseDouble = false; + int Gpus = 0; + unsigned GpuMatrixSize = 0; + bool GpuUseFloat = false; + bool GpuUseDouble = false; // linux features - bool listMetrics = false; - bool measurement = false; - std::chrono::milliseconds startDelta = std::chrono::milliseconds(0); - std::chrono::milliseconds stopDelta = std::chrono::milliseconds(0); - std::chrono::milliseconds measurementInterval = std::chrono::milliseconds(0); - std::vector stdinMetrics; + bool ListMetrics = false; + bool Measurement = false; + std::chrono::milliseconds StartDelta = std::chrono::milliseconds(0); + std::chrono::milliseconds StopDelta = std::chrono::milliseconds(0); + std::chrono::milliseconds MeasurementInterval = std::chrono::milliseconds(0); + std::vector StdinMetrics; // linux and dynamic linked binary - std::vector metricPaths; + std::vector MetricPaths; // optimization - bool optimize = false; - std::chrono::seconds preheat; - std::string optimizationAlgorithm; - std::vector optimizationMetrics; - std::chrono::seconds evaluationDuration; - unsigned individuals; - std::string optimizeOutfile = ""; - unsigned generations; - double nsga2_cr; - double nsga2_m; - - Config(int argc, const char** argv); + bool Optimize = false; + std::chrono::seconds Preheat{}; + std::string OptimizationAlgorithm; + std::vector OptimizationMetrics; + std::chrono::seconds EvaluationDuration{}; + unsigned Individuals; + std::string OptimizeOutfile; + unsigned Generations; + double Nsga2Cr; + double Nsga2M; + + Config(int Argc, const char** Argv); }; -void print_copyright() { +namespace { + +void printCopyright() { firestarter::log::info() << "This program is free software: you can redistribute it and/or " "modify\n" << "it under the terms of the GNU General Public License as published " @@ -100,7 +102,7 @@ void print_copyright() { ".\n"; } -void print_warranty() { +void printWarranty() { firestarter::log::info() << "This program is distributed in the hope that it will be useful,\n" << "but WITHOUT ANY WARRANTY; without even the implied warranty of\n" << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" @@ -111,20 +113,20 @@ void print_warranty() { ".\n"; } -void print_help(cxxopts::Options const& parser, std::string const& section) { - std::vector> options(Config::optionsMap.size()); +void printHelp(cxxopts::Options const& Parser, std::string const& Section) { + std::vector> Options(Config::OptionsMap.size()); - if (section.size() == 0) { - std::copy(Config::optionsMap.begin(), Config::optionsMap.end(), options.begin()); + if (Section.size() == 0) { + std::copy(Config::OptionsMap.begin(), Config::OptionsMap.end(), Options.begin()); } else { - auto findSection = [&](std::pair const& pair) { return pair.first == section; }; - auto it = std::copy_if(Config::optionsMap.begin(), Config::optionsMap.end(), options.begin(), findSection); - options.resize(std::distance(options.begin(), it)); + auto FindSection = [&](std::pair const& Pair) { return Pair.first == Section; }; + auto It = std::copy_if(Config::OptionsMap.begin(), Config::OptionsMap.end(), Options.begin(), FindSection); + Options.resize(std::distance(Options.begin(), It)); } // clang-format off firestarter::log::info() - << parser.help(options) + << Parser.help(Options) << "Examples:\n" << " ./FIRESTARTER starts FIRESTARTER without timeout\n" << " ./FIRESTARTER -t 300 starts a 5 minute run of FIRESTARTER\n" @@ -155,12 +157,15 @@ void print_help(cxxopts::Options const& parser, std::string const& section) { // clang-format on } -Config::Config(int argc, const char** argv) { +} // namespace + +Config::Config(int Argc, const char** Argv) { + const auto* ExecutableName = *Argv; - cxxopts::Options parser(argv[0]); + cxxopts::Options Parser(ExecutableName); // clang-format off - parser.add_options("information") + Parser.add_options("information") ("h,help", "Display usage information. SECTION can be any of: information | general | specialized-workloads" #ifdef FIRESTARTER_DEBUG_FEATURES " | debug" @@ -178,7 +183,7 @@ Config::Config(int argc, const char** argv) { ("debug", "Print debug output") ("a,avail", "List available functions"); - parser.add_options("general") + Parser.add_options("general") ("i,function", "Specify integer ID of the load-function to be\nused (as listed by --avail)", cxxopts::value()->default_value("0"), "ID") #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_ONEAPI) || defined(FIRESTARTER_BUILD_HIP) @@ -206,7 +211,7 @@ Config::Config(int argc, const char** argv) { #endif ("error-detection", "Enable error detection. This aborts execution when the calculated data is corruped by errors. FIRESTARTER must run with 2 or more threads for this feature. Cannot be used with -l | --load and --optimize."); - parser.add_options("specialized-workloads") + Parser.add_options("specialized-workloads") ("list-instruction-groups", "List the available instruction groups for the\npayload of the current platform.") ("run-instruction-groups", "Run the payload with the specified\ninstruction groups. GROUPS format: multiple INST:VAL\npairs comma-seperated.", cxxopts::value()->default_value(""), "GROUPS") @@ -214,7 +219,7 @@ Config::Config(int argc, const char** argv) { cxxopts::value()); #ifdef FIRESTARTER_DEBUG_FEATURES - parser.add_options("debug") + Parser.add_options("debug") ("allow-unavailable-payload", "") ("dump-registers", "Dump the working registers on the first\nthread. Depending on the payload these are mm, xmm,\nymm or zmm. Only use it without a timeout and\n100 percent load. DELAY between dumps in secs. Cannot be used with --error-detection.", cxxopts::value()->implicit_value("10"), "DELAY") @@ -223,7 +228,7 @@ Config::Config(int argc, const char** argv) { #endif #if defined(linux) || defined(__linux__) - parser.add_options("measurement") + Parser.add_options("measurement") ("list-metrics", "List the available metrics.") #ifndef FIRESTARTER_LINK_STATIC ("metric-path", "Add a path to a shared library representing an interface for a metric. This option can be specified multiple times.", @@ -241,7 +246,7 @@ Config::Config(int argc, const char** argv) { ("preheat", "Preheat for N seconds, default: 240", cxxopts::value()->default_value("240"), "N"); - parser.add_options("optimization") + Parser.add_options("optimization") ("optimize", "Run the optimization with one of these algorithms: NSGA2.\nCannot be combined with --measurement.", cxxopts::value()) ("optimize-outfile", "Dump the output of the optimization into this\nfile, default: $PWD/$HOSTNAME_$DATE.json", @@ -260,176 +265,176 @@ Config::Config(int argc, const char** argv) { // clang-format on try { - auto options = parser.parse(argc, argv); + auto Options = Parser.parse(Argc, Argv); - if (options.count("quiet")) { + if (Options.count("quiet")) { firestarter::logging::Filter::set_severity(nitro::log::severity_level::warn); - } else if (options.count("report")) { + } else if (Options.count("report")) { firestarter::logging::Filter::set_severity(nitro::log::severity_level::debug); - } else if (options.count("debug")) { + } else if (Options.count("debug")) { firestarter::logging::Filter::set_severity(nitro::log::severity_level::trace); } else { firestarter::logging::Filter::set_severity(nitro::log::severity_level::info); } - if (options.count("version")) { + if (Options.count("version")) { std::exit(EXIT_SUCCESS); } - if (options.count("copyright")) { - print_copyright(); + if (Options.count("copyright")) { + printCopyright(); std::exit(EXIT_SUCCESS); } - if (options.count("warranty")) { - print_warranty(); + if (Options.count("warranty")) { + printWarranty(); std::exit(EXIT_SUCCESS); } - firestarter::log::info() << "This program comes with ABSOLUTELY NO WARRANTY; for details run `" << argv[0] + firestarter::log::info() << "This program comes with ABSOLUTELY NO WARRANTY; for details run `" << ExecutableName << " -w`.\n" << "This is free software, and you are welcome to redistribute it\n" - << "under certain conditions; run `" << argv[0] << " -c` for details.\n"; + << "under certain conditions; run `" << ExecutableName << " -c` for details.\n"; - if (options.count("help")) { - auto section = options["help"].as(); + if (Options.count("help")) { + auto Section = Options["help"].as(); // section not found - auto findSection = [&](std::pair const& pair) { return pair.first == section; }; - if (std::find_if(optionsMap.begin(), optionsMap.end(), findSection) == optionsMap.end() && section.size() != 0) { - throw std::invalid_argument("Section \"" + section + "\" not found in help."); + auto FindSection = [&](std::pair const& Pair) { return Pair.first == Section; }; + if (std::find_if(OptionsMap.begin(), OptionsMap.end(), FindSection) == OptionsMap.end() && Section.size() != 0) { + throw std::invalid_argument("Section \"" + Section + "\" not found in help."); } - print_help(parser, section); + printHelp(Parser, Section); std::exit(EXIT_SUCCESS); } - timeout = std::chrono::seconds(options["timeout"].as()); - loadPercent = options["load"].as(); - period = std::chrono::microseconds(options["period"].as()); + Timeout = std::chrono::seconds(Options["timeout"].as()); + LoadPercent = Options["load"].as(); + Period = std::chrono::microseconds(Options["period"].as()); - if (loadPercent > 100) { + if (LoadPercent > 100) { throw std::invalid_argument("Option -l/--load may not be above 100."); } - errorDetection = options.count("error-detection"); - if (errorDetection && loadPercent != 100) { + ErrorDetection = Options.count("error-detection"); + if (ErrorDetection && LoadPercent != 100) { throw std::invalid_argument("Option --error-detection may only be used " "with -l/--load equal 100."); } #ifdef FIRESTARTER_DEBUG_FEATURES - allowUnavailablePayload = options.count("allow-unavailable-payload"); - dumpRegisters = options.count("dump-registers"); - if (dumpRegisters) { - dumpRegistersTimeDelta = std::chrono::seconds(options["dump-registers"].as()); - if (timeout != std::chrono::microseconds::zero() && loadPercent != 100) { + AllowUnavailablePayload = Options.count("allow-unavailable-payload"); + DumpRegisters = Options.count("dump-registers"); + if (DumpRegisters) { + DumpRegistersTimeDelta = std::chrono::seconds(Options["dump-registers"].as()); + if (Timeout != std::chrono::microseconds::zero() && LoadPercent != 100) { throw std::invalid_argument("Option --dump-registers may only be used " "without a timeout and full load."); } - if (errorDetection) { + if (ErrorDetection) { throw std::invalid_argument("Options --dump-registers and --error-detection cannot be used " "together."); } } #endif - requestedNumThreads = options["threads"].as(); + RequestedNumThreads = Options["threads"].as(); #if (defined(linux) || defined(__linux__)) && defined(FIRESTARTER_THREAD_AFFINITY) - cpuBind = options["bind"].as(); - if (!cpuBind.empty()) { - if (requestedNumThreads != 0) { + CpuBind = Options["bind"].as(); + if (!CpuBind.empty()) { + if (RequestedNumThreads != 0) { throw std::invalid_argument("Options -b/--bind and -n/--threads cannot be used together."); } } #endif #if defined(FIRESTARTER_BUILD_CUDA) || defined(FIRESTARTER_BUILD_ONEAPI) || defined(FIRESTARTER_BUILD_HIP) - gpuUseFloat = options.count("usegpufloat"); - gpuUseDouble = options.count("usegpudouble"); + GpuUseFloat = Options.count("usegpufloat"); + GpuUseDouble = Options.count("usegpudouble"); - if (gpuUseFloat && gpuUseDouble) { + if (GpuUseFloat && GpuUseDouble) { throw std::invalid_argument("Options -f/--usegpufloat and " "-d/--usegpudouble cannot be used together."); } - gpuMatrixSize = options["matrixsize"].as(); - if (gpuMatrixSize > 0 && gpuMatrixSize < 64) { + GpuMatrixSize = Options["matrixsize"].as(); + if (GpuMatrixSize > 0 && GpuMatrixSize < 64) { throw std::invalid_argument("Option -m/--matrixsize may not be below 64."); } - gpus = options["gpus"].as(); + Gpus = Options["gpus"].as(); #endif - printFunctionSummary = options.count("avail"); + PrintFunctionSummary = Options.count("avail"); - functionId = options["function"].as(); + FunctionId = Options["function"].as(); - listInstructionGroups = options.count("list-instruction-groups"); - instructionGroups = options["run-instruction-groups"].as(); - if (options.count("set-line-count")) { - lineCount = options["set-line-count"].as(); + ListInstructionGroups = Options.count("list-instruction-groups"); + InstructionGroups = Options["run-instruction-groups"].as(); + if (Options.count("set-line-count")) { + LineCount = Options["set-line-count"].as(); } #if defined(linux) || defined(__linux__) - startDelta = std::chrono::milliseconds(options["start-delta"].as()); - stopDelta = std::chrono::milliseconds(options["stop-delta"].as()); - measurementInterval = std::chrono::milliseconds(options["measurement-interval"].as()); + StartDelta = std::chrono::milliseconds(Options["start-delta"].as()); + StopDelta = std::chrono::milliseconds(Options["stop-delta"].as()); + MeasurementInterval = std::chrono::milliseconds(Options["measurement-interval"].as()); #ifndef FIRESTARTER_LINK_STATIC - metricPaths = options["metric-path"].as>(); + MetricPaths = Options["metric-path"].as>(); #endif - if (options.count("metric-from-stdin")) { - stdinMetrics = options["metric-from-stdin"].as>(); + if (Options.count("metric-from-stdin")) { + StdinMetrics = Options["metric-from-stdin"].as>(); } - measurement = options.count("measurement"); - listMetrics = options.count("list-metrics"); + Measurement = Options.count("measurement"); + ListMetrics = Options.count("list-metrics"); - if ((optimize = options.count("optimize"))) { - if (errorDetection) { + if ((Optimize = Options.count("optimize"))) { + if (ErrorDetection) { throw std::invalid_argument("Options --error-detection and --optimize " "cannot be used together."); } - if (measurement) { + if (Measurement) { throw std::invalid_argument("Options --measurement and --optimize cannot be used together."); } - preheat = std::chrono::seconds(options["preheat"].as()); - optimizationAlgorithm = options["optimize"].as(); - if (options.count("optimization-metric")) { - optimizationMetrics = options["optimization-metric"].as>(); + Preheat = std::chrono::seconds(Options["preheat"].as()); + OptimizationAlgorithm = Options["optimize"].as(); + if (Options.count("optimization-metric")) { + OptimizationMetrics = Options["optimization-metric"].as>(); } - if (loadPercent != 100) { + if (LoadPercent != 100) { throw std::invalid_argument("Options -p | --period and -l | --load are " "not compatible with --optimize."); } - if (timeout == std::chrono::seconds::zero()) { + if (Timeout == std::chrono::seconds::zero()) { throw std::invalid_argument("Option -t | --timeout must be specified for optimization."); } - evaluationDuration = timeout; + EvaluationDuration = Timeout; // this will deactivate the watchdog worker - timeout = std::chrono::seconds::zero(); - individuals = options["individuals"].as(); - if (options.count("optimize-outfile")) { - optimizeOutfile = options["optimize-outfile"].as(); + Timeout = std::chrono::seconds::zero(); + Individuals = Options["individuals"].as(); + if (Options.count("optimize-outfile")) { + OptimizeOutfile = Options["optimize-outfile"].as(); } - generations = options["generations"].as(); - nsga2_cr = options["nsga2-cr"].as(); - nsga2_m = options["nsga2-m"].as(); + Generations = Options["generations"].as(); + Nsga2Cr = Options["nsga2-cr"].as(); + Nsga2M = Options["nsga2-m"].as(); - if (optimizationAlgorithm != "NSGA2") { + if (OptimizationAlgorithm != "NSGA2") { throw std::invalid_argument("Option --optimize must be any of: NSGA2"); } } #endif - } catch (std::exception& e) { - firestarter::log::error() << e.what() << "\n"; - print_help(parser, ""); + } catch (std::exception& E) { + firestarter::log::error() << E.what() << "\n"; + printHelp(Parser, ""); std::exit(EXIT_FAILURE); } } -int main(int argc, const char** argv) { +auto main(int argc, const char** argv) -> int { firestarter::log::info() << "FIRESTARTER - A Processor Stress Test Utility, Version " << _FIRESTARTER_VERSION_STRING << "\n" @@ -444,22 +449,22 @@ int main(int argc, const char** argv) { << "\n"; #endif - Config cfg{argc, argv}; + Config Cfg{argc, argv}; try { - firestarter::Firestarter firestarter( - argc, argv, cfg.timeout, cfg.loadPercent, cfg.period, cfg.requestedNumThreads, cfg.cpuBind, - cfg.printFunctionSummary, cfg.functionId, cfg.listInstructionGroups, cfg.instructionGroups, cfg.lineCount, - cfg.allowUnavailablePayload, cfg.dumpRegisters, cfg.dumpRegistersTimeDelta, cfg.dumpRegistersOutpath, - cfg.errorDetection, cfg.gpus, cfg.gpuMatrixSize, cfg.gpuUseFloat, cfg.gpuUseDouble, cfg.listMetrics, - cfg.measurement, cfg.startDelta, cfg.stopDelta, cfg.measurementInterval, cfg.metricPaths, cfg.stdinMetrics, - cfg.optimize, cfg.preheat, cfg.optimizationAlgorithm, cfg.optimizationMetrics, cfg.evaluationDuration, - cfg.individuals, cfg.optimizeOutfile, cfg.generations, cfg.nsga2_cr, cfg.nsga2_m); - - firestarter.mainThread(); - - } catch (std::exception const& e) { - firestarter::log::error() << e.what(); + firestarter::Firestarter Firestarter( + argc, argv, Cfg.Timeout, Cfg.LoadPercent, Cfg.Period, Cfg.RequestedNumThreads, Cfg.CpuBind, + Cfg.PrintFunctionSummary, Cfg.FunctionId, Cfg.ListInstructionGroups, Cfg.InstructionGroups, Cfg.LineCount, + Cfg.AllowUnavailablePayload, Cfg.DumpRegisters, Cfg.DumpRegistersTimeDelta, Cfg.DumpRegistersOutpath, + Cfg.ErrorDetection, Cfg.Gpus, Cfg.GpuMatrixSize, Cfg.GpuUseFloat, Cfg.GpuUseDouble, Cfg.ListMetrics, + Cfg.Measurement, Cfg.StartDelta, Cfg.StopDelta, Cfg.MeasurementInterval, Cfg.MetricPaths, Cfg.StdinMetrics, + Cfg.Optimize, Cfg.Preheat, Cfg.OptimizationAlgorithm, Cfg.OptimizationMetrics, Cfg.EvaluationDuration, + Cfg.Individuals, Cfg.OptimizeOutfile, Cfg.Generations, Cfg.Nsga2Cr, Cfg.Nsga2M); + + Firestarter.mainThread(); + + } catch (std::exception const& E) { + firestarter::log::error() << E.what(); return EXIT_FAILURE; } diff --git a/src/firestarter/Measurement/MeasurementWorker.cpp b/src/firestarter/Measurement/MeasurementWorker.cpp index 0c880bbb..e6d3305b 100644 --- a/src/firestarter/Measurement/MeasurementWorker.cpp +++ b/src/firestarter/Measurement/MeasurementWorker.cpp @@ -21,10 +21,7 @@ #include #include - -#include #include -#include #ifndef FIRESTARTER_LINK_STATIC extern "C" { @@ -32,99 +29,99 @@ extern "C" { } #endif -void insertCallback(void* cls, const char* metricName, int64_t timeSinceEpoch, double value) { - static_cast(cls)->insertCallback(metricName, timeSinceEpoch, value); +void insertCallback(void* Cls, const char* MetricName, int64_t TimeSinceEpoch, double Value) { + static_cast(Cls)->insertCallback(MetricName, TimeSinceEpoch, Value); } -using namespace firestarter::measurement; +namespace firestarter::measurement { -MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, uint64_t numThreads, - std::vector const& metricDylibs, - std::vector const& stdinMetrics) - : UpdateInterval(updateInterval) - , NumThreads(numThreads) { +MeasurementWorker::MeasurementWorker(std::chrono::milliseconds UpdateInterval, uint64_t NumThreads, + std::vector const& MetricDylibs, + std::vector const& StdinMetrics) + : UpdateInterval(UpdateInterval) + , NumThreads(NumThreads) { #ifndef FIRESTARTER_LINK_STATIC // open dylibs and find metric symbol. // create an entry in _metricDylibs with handle from dlopen and // metric_interface_t structure. add this structe as a pointer to metrics. - for (auto const& dylib : metricDylibs) { - void* handle; - const char* filename = dylib.c_str(); + for (auto const& Dylib : MetricDylibs) { + void* Handle = nullptr; + const char* Filename = Dylib.c_str(); - handle = dlopen(dylib.c_str(), RTLD_NOW | RTLD_LOCAL); + Handle = dlopen(Dylib.c_str(), RTLD_NOW | RTLD_LOCAL); - if (!handle) { - firestarter::log::error() << filename << ": " << dlerror(); + if (!Handle) { + firestarter::log::error() << Filename << ": " << dlerror(); continue; } // clear existing error dlerror(); - metric_interface_t* metric = nullptr; + MetricInterface* Metric = nullptr; - metric = (metric_interface_t*)dlsym(handle, "metric"); + Metric = static_cast(dlsym(Handle, "metric")); - char* error; - if ((error = dlerror()) != NULL) { - firestarter::log::error() << filename << ": " << error; - dlclose(handle); + char* Error = nullptr; + if ((Error = dlerror()) != nullptr) { + firestarter::log::error() << Filename << ": " << Error; + dlclose(Handle); continue; } - if (this->findMetricByName(metric->name) != nullptr) { - firestarter::log::error() << "A metric named \"" << metric->name << "\" is already loaded."; - dlclose(handle); + if (this->findMetricByName(Metric->Name) != nullptr) { + firestarter::log::error() << "A metric named \"" << Metric->Name << "\" is already loaded."; + dlclose(Handle); continue; } // lets push our metric object and the handle - this->_metricDylibs.push_back(handle); - this->metrics.push_back(metric); + this->MetricDylibs.push_back(Handle); + this->Metrics.push_back(Metric); } #else - (void)metricDylibs; + (void)MetricDylibs; #endif // setup metric objects for metric names passed from stdin. - for (auto const& name : stdinMetrics) { - if (this->findMetricByName(name) != nullptr) { - firestarter::log::error() << "A metric named \"" << name << "\" is already loaded."; + for (auto const& Name : StdinMetrics) { + if (this->findMetricByName(Name) != nullptr) { + firestarter::log::error() << "A metric named \"" << Name << "\" is already loaded."; continue; } - this->StdinMetrics.push_back(name); + this->StdinMetrics.push_back(Name); } - std::stringstream ss; - unsigned maxLength = 0; - std::map available; + std::stringstream Ss; + unsigned MaxLength = 0; + std::map Available; - for (auto const& metric : this->Metrics) { - std::string name(metric->Name); - maxLength = maxLength < name.size() ? name.size() : maxLength; - int returnCode = metric->Init(); - metric->Fini(); - available[name] = returnCode == EXIT_SUCCESS ? true : false; + for (auto const& Metric : this->Metrics) { + std::string Name(Metric->Name); + MaxLength = MaxLength < Name.size() ? Name.size() : MaxLength; + auto ReturnCode = Metric->Init(); + Metric->Fini(); + Available[Name] = ReturnCode == EXIT_SUCCESS; } - unsigned padding = maxLength > 6 ? maxLength - 6 : 0; - ss << " METRIC" << std::string(padding + 1, ' ') << "| available\n"; - ss << " " << std::string(padding + 7, '-') << "-----------\n"; - for (auto const& [key, value] : available) { - ss << " " << key << std::string(padding + 7 - key.size(), ' ') << "| "; - ss << (value ? "yes" : "no") << "\n"; + unsigned Padding = MaxLength > 6 ? MaxLength - 6 : 0; + Ss << " METRIC" << std::string(Padding + 1, ' ') << "| available\n"; + Ss << " " << std::string(Padding + 7, '-') << "-----------\n"; + for (auto const& [key, value] : Available) { + Ss << " " << key << std::string(Padding + 7 - key.size(), ' ') << "| "; + Ss << (value ? "yes" : "no") << "\n"; } - this->AvailableMetricsString = ss.str(); + this->AvailableMetricsString = Ss.str(); - pthread_create(&this->WorkerThread, NULL, + pthread_create(&this->WorkerThread, nullptr, reinterpret_cast(MeasurementWorker::dataAcquisitionWorker), this); // create a worker for getting metric values from stdin if (this->StdinMetrics.size() > 0) { - pthread_create(&this->StdinThread, NULL, + pthread_create(&this->StdinThread, nullptr, reinterpret_cast(MeasurementWorker::stdinDataAcquisitionWorker), this); } } @@ -132,39 +129,39 @@ MeasurementWorker::MeasurementWorker(std::chrono::milliseconds updateInterval, u MeasurementWorker::~MeasurementWorker() { pthread_cancel(this->WorkerThread); - pthread_join(this->WorkerThread, NULL); + pthread_join(this->WorkerThread, nullptr); if (this->StdinMetrics.size() > 0) { pthread_cancel(this->StdinThread); - pthread_join(this->StdinThread, NULL); + pthread_join(this->StdinThread, nullptr); } for (auto const& [key, value] : this->Values) { - auto metric = this->findMetricByName(key); - if (metric == nullptr) { + const auto* Metric = this->findMetricByName(key); + if (Metric == nullptr) { continue; } - metric->Fini(); + Metric->Fini(); } #ifndef FIRESTARTER_LINK_STATIC - for (auto handle : this->_metricDylibs) { - dlclose(handle); + for (auto* Handle : this->MetricDylibs) { + dlclose(Handle); } #endif } -std::vector MeasurementWorker::metricNames() { - std::vector metrics; - std::transform(this->Metrics.begin(), this->Metrics.end(), std::back_inserter(metrics), - [](auto& metric) -> std::string { return std::string(metric->Name); }); - for (auto const& name : this->StdinMetrics) { - metrics.push_back(name); +auto MeasurementWorker::metricNames() -> std::vector { + std::vector Metrics; + std::transform(this->Metrics.begin(), this->Metrics.end(), std::back_inserter(Metrics), + [](auto& Metric) -> std::string { return std::string(Metric->Name); }); + for (auto const& Name : this->StdinMetrics) { + Metrics.push_back(Name); } - return metrics; + return Metrics; } auto MeasurementWorker::findMetricByName(std::string MetricName) -> const MetricInterface* { @@ -184,49 +181,49 @@ auto MeasurementWorker::findMetricByName(std::string MetricName) -> const Metric auto MeasurementWorker::initMetrics(std::vector const& MetricNames) -> std::vector { this->ValuesMutex.lock(); - std::vector initialized = {}; + std::vector Initialized = {}; // try to find each metric and initialize it - for (auto const& metricName : MetricNames) { + for (auto const& MetricName : MetricNames) { // init values map with empty vector - auto name_equal = [metricName](auto const& pair) { return metricName.compare(pair.first) == 0; }; - auto pair = std::find_if(this->Values.begin(), this->Values.end(), name_equal); - if (pair != this->Values.end()) { - pair->second.clear(); + auto NameEqual = [MetricName](auto const& Pair) { return MetricName.compare(Pair.first) == 0; }; + auto Pair = std::find_if(this->Values.begin(), this->Values.end(), NameEqual); + if (Pair != this->Values.end()) { + Pair->second.clear(); } else { - auto metric = this->findMetricByName(metricName); - if (metric != nullptr) { - int returnValue = metric->Init(); - if (returnValue != EXIT_SUCCESS) { - log::error() << "Metric " << metric->Name << ": " << metric->GetError(); + const auto* Metric = this->findMetricByName(MetricName); + if (Metric != nullptr) { + int ReturnValue = Metric->Init(); + if (ReturnValue != EXIT_SUCCESS) { + log::error() << "Metric " << Metric->Name << ": " << Metric->GetError(); continue; } } - this->Values[metricName] = std::vector(); - if (metric != nullptr) { - if (metric->Type.InsertCallback) { - metric->RegisterInsertCallback(::insertCallback, this); + this->Values[MetricName] = std::vector(); + if (Metric != nullptr) { + if (Metric->Type.InsertCallback) { + Metric->RegisterInsertCallback(::insertCallback, this); } } - initialized.push_back(metricName); + Initialized.push_back(MetricName); } } this->ValuesMutex.unlock(); - return initialized; + return Initialized; } -void MeasurementWorker::insertCallback(const char* metricName, int64_t timeSinceEpoch, double value) { +void MeasurementWorker::insertCallback(const char* MetricName, int64_t TimeSinceEpoch, double Value) { this->ValuesMutex.lock(); using Duration = std::chrono::duration; - auto time = std::chrono::time_point(Duration(timeSinceEpoch)); - auto name_equal = [metricName](auto const& pair) { return std::string(metricName).compare(pair.first) == 0; }; - auto pair = std::find_if(this->Values.begin(), this->Values.end(), name_equal); + auto Time = std::chrono::time_point(Duration(TimeSinceEpoch)); + auto NameEqual = [MetricName](auto const& Pair) { return std::string(MetricName).compare(Pair.first) == 0; }; + auto Pair = std::find_if(this->Values.begin(), this->Values.end(), NameEqual); - if (pair != this->Values.end()) { - pair->second.push_back(TimeValue(time, value)); + if (Pair != this->Values.end()) { + Pair->second.emplace_back(Time, Value); } this->ValuesMutex.unlock(); @@ -234,171 +231,173 @@ void MeasurementWorker::insertCallback(const char* metricName, int64_t timeSince void MeasurementWorker::startMeasurement() { this->StartTime = std::chrono::high_resolution_clock::now(); } -std::map MeasurementWorker::getValues(std::chrono::milliseconds startDelta, - std::chrono::milliseconds stopDelta) { - std::map measurment = {}; +auto MeasurementWorker::getValues(std::chrono::milliseconds StartDelta, std::chrono::milliseconds StopDelta) + -> std::map { + std::map Measurment = {}; this->ValuesMutex.lock(); for (auto& [key, values] : this->Values) { - auto startTime = this->StartTime; - auto endTime = std::chrono::high_resolution_clock::now(); - auto metric = this->findMetricByName(key); + auto StartTime = this->StartTime; + auto EndTime = std::chrono::high_resolution_clock::now(); + const auto* Metric = this->findMetricByName(key); - MetricType type; - std::memset(&type, 0, sizeof(type)); - if (metric == nullptr) { - type.Absolute = 1; + MetricType Type; + std::memset(&Type, 0, sizeof(Type)); + if (Metric == nullptr) { + Type.Absolute = 1; - startTime += startDelta; - endTime -= stopDelta; + StartTime += StartDelta; + EndTime -= StopDelta; } else { - std::memcpy(&type, &metric->Type, sizeof(type)); + std::memcpy(&Type, &Metric->Type, sizeof(Type)); - if (metric->Type.IgnoreStartStopDelta == 0) { - startTime += startDelta; - endTime -= stopDelta; + if (Metric->Type.IgnoreStartStopDelta == 0) { + StartTime += StartDelta; + EndTime -= StopDelta; } } - decltype(values) croppedValues(values.size()); + decltype(values) CroppedValues(values.size()); - auto findAll = [startTime, endTime](auto const& tv) { return startTime <= tv.Time && tv.Time <= endTime; }; - auto it = std::copy_if(values.begin(), values.end(), croppedValues.begin(), findAll); - croppedValues.resize(std::distance(croppedValues.begin(), it)); + auto FindAll = [StartTime, EndTime](auto const& Tv) { return StartTime <= Tv.Time && Tv.Time <= EndTime; }; + auto It = std::copy_if(values.begin(), values.end(), CroppedValues.begin(), FindAll); + CroppedValues.resize(std::distance(CroppedValues.begin(), It)); - Summary sum = Summary::calculate(croppedValues.begin(), croppedValues.end(), type, this->NumThreads); + Summary Sum = Summary::calculate(CroppedValues.begin(), CroppedValues.end(), Type, this->NumThreads); - measurment[key] = sum; + Measurment[key] = Sum; } this->ValuesMutex.unlock(); - return measurment; + return Measurment; } -int* MeasurementWorker::dataAcquisitionWorker(void* measurementWorker) { +auto MeasurementWorker::dataAcquisitionWorker(void* MeasurementWorker) -> int* { - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); + pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, nullptr); - auto _this = reinterpret_cast(measurementWorker); + auto* This = reinterpret_cast(MeasurementWorker); #ifndef __APPLE__ pthread_setname_np(pthread_self(), "DataAcquisition"); #endif - using clock = std::chrono::high_resolution_clock; + using Clock = std::chrono::high_resolution_clock; - using callbackTuple = + using CallbackTuple = std::tuple; - auto callbackTupleComparator = [](callbackTuple left, callbackTuple right) { - return std::get<2>(left) > std::get<2>(right); + auto CallbackTupleComparator = [](CallbackTuple Left, CallbackTuple Right) { + return std::get<2>(Left) > std::get<2>(Right); }; // this datastructure holds a tuple of our callback, the callback frequency // and the next timepoint. it will be sorted, so the pop function will give // back the next callback - std::priority_queue, decltype(callbackTupleComparator)> callbackQueue( - callbackTupleComparator); + std::priority_queue, decltype(CallbackTupleComparator)> CallbackQueue( + CallbackTupleComparator); - _this->ValuesMutex.lock(); + This->ValuesMutex.lock(); - for (auto const& [key, value] : _this->Values) { - auto metric_interface = _this->findMetricByName(key); + for (auto const& [key, value] : This->Values) { + const auto* MetricInterface = This->findMetricByName(key); - if (metric_interface == nullptr) { + if (MetricInterface == nullptr) { continue; } - auto callbackTime = std::chrono::microseconds(metric_interface->CallbackTime); - if (callbackTime.count() == 0) { + auto CallbackTime = std::chrono::microseconds(MetricInterface->CallbackTime); + if (CallbackTime.count() == 0) { continue; } - auto currentTime = clock::now(); + auto CurrentTime = Clock::now(); - callbackQueue.push(std::make_tuple(metric_interface->Callback, callbackTime, currentTime)); + CallbackQueue.emplace(MetricInterface->Callback, CallbackTime, CurrentTime); } - _this->ValuesMutex.unlock(); + This->ValuesMutex.unlock(); - auto nextFetch = clock::now() + _this->UpdateInterval; + auto NextFetch = Clock::now() + This->UpdateInterval; for (;;) { - auto now = clock::now(); + auto Now = Clock::now(); - if (nextFetch <= now) { - _this->ValuesMutex.lock(); + if (NextFetch <= Now) { + This->ValuesMutex.lock(); - for (auto& [metricName, values] : _this->Values) { - auto metric_interface = _this->findMetricByName(metricName); + for (auto& [metricName, values] : This->Values) { + const auto* MetricInterface = This->findMetricByName(metricName); - if (metric_interface == nullptr) { + if (MetricInterface == nullptr) { continue; } - double value; + double Value = NAN; - if (!metric_interface->Type.InsertCallback && metric_interface->GetReading != nullptr) { - if (EXIT_SUCCESS == metric_interface->GetReading(&value)) { - auto tv = TimeValue(std::chrono::high_resolution_clock::now(), value); - values.push_back(tv); + if (!MetricInterface->Type.InsertCallback && MetricInterface->GetReading != nullptr) { + if (EXIT_SUCCESS == MetricInterface->GetReading(&Value)) { + auto Tv = TimeValue(std::chrono::high_resolution_clock::now(), Value); + values.push_back(Tv); } } } - _this->ValuesMutex.unlock(); + This->ValuesMutex.unlock(); - nextFetch = now + _this->UpdateInterval; + NextFetch = Now + This->UpdateInterval; } - auto nextWake = nextFetch; + auto NextWake = NextFetch; - if (!callbackQueue.empty()) { - auto [callbackFunction, callbackTime, nextCallback] = callbackQueue.top(); + if (!CallbackQueue.empty()) { + auto [callbackFunction, callbackTime, nextCallback] = CallbackQueue.top(); - if (nextCallback <= now) { + if (nextCallback <= Now) { // remove the elment from the queue - callbackQueue.pop(); + CallbackQueue.pop(); // call our callback callbackFunction(); // add it with the updated callback time to the queue again - nextCallback = now + callbackTime; - callbackQueue.push(std::make_tuple(callbackFunction, callbackTime, nextCallback)); + nextCallback = Now + callbackTime; + CallbackQueue.emplace(callbackFunction, callbackTime, nextCallback); } - nextWake = nextCallback < nextWake ? nextCallback : nextWake; + NextWake = nextCallback < NextWake ? nextCallback : NextWake; } - std::this_thread::sleep_for(nextWake - clock::now()); + std::this_thread::sleep_for(NextWake - Clock::now()); } } -int* MeasurementWorker::stdinDataAcquisitionWorker(void* measurementWorker) { +auto MeasurementWorker::stdinDataAcquisitionWorker(void* MeasurementWorker) -> int* { - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); + pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, nullptr); - auto _this = reinterpret_cast(measurementWorker); + auto* This = reinterpret_cast(MeasurementWorker); #ifndef __APPLE__ pthread_setname_np(pthread_self(), "StdinDataAcquis"); #endif - for (std::string line; std::getline(std::cin, line);) { - int64_t time; - double value; - char name[128]; - if (std::sscanf(line.c_str(), "%127s %ld %lf", name, &time, &value) == 3) { - auto name_equal = [name](auto const& allowedName) { return allowedName.compare(std::string(name)) == 0; }; - auto item = std::find_if(_this->stdinMetrics().begin(), _this->stdinMetrics().end(), name_equal); + for (std::string Line; std::getline(std::cin, Line);) { + int64_t Time = 0; + double Value = NAN; + char Name[128]; + if (std::sscanf(Line.c_str(), "%127s %ld %lf", Name, &Time, &Value) == 3) { + auto NameEqual = [Name](auto const& AllowedName) { return AllowedName.compare(std::string(Name)) == 0; }; + auto Item = std::find_if(This->stdinMetrics().begin(), This->stdinMetrics().end(), NameEqual); // metric name is allowed - if (item != _this->stdinMetrics().end()) { - _this->insertCallback(name, time, value); + if (Item != This->stdinMetrics().end()) { + This->insertCallback(Name, Time, Value); } } } - return NULL; + return nullptr; } + +} // namespace firestarter::measurement \ No newline at end of file diff --git a/src/firestarter/Measurement/Metric/IPCEstimate.cpp b/src/firestarter/Measurement/Metric/IPCEstimate.cpp index 9e18a6be..6bd5f7d9 100644 --- a/src/firestarter/Measurement/Metric/IPCEstimate.cpp +++ b/src/firestarter/Measurement/Metric/IPCEstimate.cpp @@ -28,48 +28,48 @@ extern "C" { #include } -static std::string errorString = ""; +static std::string ErrorString; -static void (*callback)(void*, const char*, int64_t, double) = nullptr; -static void* callback_arg = nullptr; +static void (*Callback)(void*, const char*, int64_t, double) = nullptr; +static void* CallbackArg = nullptr; -static int32_t fini(void) { - callback = nullptr; - callback_arg = nullptr; +static auto fini() -> int32_t { + Callback = nullptr; + CallbackArg = nullptr; return EXIT_SUCCESS; } -static int32_t init(void) { - errorString = ""; +static auto init() -> int32_t { + ErrorString = ""; return EXIT_SUCCESS; } -static const char* get_error(void) { - const char* errorCString = errorString.c_str(); - return errorCString; +static auto getError() -> const char* { + const char* ErrorCString = ErrorString.c_str(); + return ErrorCString; } -static int32_t register_insert_callback(void (*c)(void*, const char*, int64_t, double), void* arg) { - callback = c; - callback_arg = arg; +static auto registerInsertCallback(void (*C)(void*, const char*, int64_t, double), void* Arg) -> int32_t { + Callback = C; + CallbackArg = Arg; return EXIT_SUCCESS; } void ipcEstimateMetricInsert(double Value) { - if (callback == nullptr || callback_arg == nullptr) { + if (Callback == nullptr || CallbackArg == nullptr) { return; } - int64_t t = + int64_t T = std::chrono::duration_cast(std::chrono::high_resolution_clock::now().time_since_epoch()) .count(); - callback(callback_arg, "ipc-estimate", t, Value); + Callback(CallbackArg, "ipc-estimate", T, Value); } -MetricInterface IpcEstimateMetric = { +const MetricInterface IpcEstimateMetric = { .Name = "ipc-estimate", .Type = {.Absolute = 1, .Accumalative = 0, @@ -83,6 +83,6 @@ MetricInterface IpcEstimateMetric = { .Init = init, .Fini = fini, .GetReading = nullptr, - .GetError = get_error, - .RegisterInsertCallback = register_insert_callback, + .GetError = getError, + .RegisterInsertCallback = registerInsertCallback, }; diff --git a/src/firestarter/Measurement/Metric/Perf.cpp b/src/firestarter/Measurement/Metric/Perf.cpp index 0d7a0225..d49cc0a0 100644 --- a/src/firestarter/Measurement/Metric/Perf.cpp +++ b/src/firestarter/Measurement/Metric/Perf.cpp @@ -31,66 +31,67 @@ extern "C" { #include #include -#define PERF_EVENT_PARANOID "/proc/sys/kernel/perf_event_paranoid" +static const std::string PerfEventParanoidFile = "/proc/sys/kernel/perf_event_paranoid"; -struct read_format { - uint64_t nr; +struct ReadFormat { + uint64_t Nr; struct { - uint64_t value; - uint64_t id; - } values[2]; + uint64_t Value; + uint64_t Id; + } Values[2]; }; -static std::string errorString = ""; +static std::string ErrorString; -static int cpu_cycles_fd = -1; -static int instructions_fd = -1; -static uint64_t cpu_cycles_id; -static uint64_t instructions_id; -static bool init_done = false; -static int32_t init_value; +static int CpuCyclesFd = -1; +static int InstructionsFd = -1; +static uint64_t CpuCyclesId; +static uint64_t InstructionsId; +static bool InitDone = false; +static int32_t InitValue; -static struct read_format last; +static struct ReadFormat Last; -static long perf_event_open(struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { - return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); +static auto perfEventOpen(struct perf_event_attr* HwEvent, pid_t Pid, int Cpu, int GroupFd, unsigned long Flags) + -> long { + return syscall(__NR_perf_event_open, HwEvent, Pid, Cpu, GroupFd, Flags); } -static int32_t fini(void) { - if (!(cpu_cycles_fd < 0)) { - close(cpu_cycles_fd); - cpu_cycles_fd = -1; +static auto fini() -> int32_t { + if (!(CpuCyclesFd < 0)) { + close(CpuCyclesFd); + CpuCyclesFd = -1; } - if (!(instructions_fd < 0)) { - close(instructions_fd); - instructions_fd = -1; + if (!(InstructionsFd < 0)) { + close(InstructionsFd); + InstructionsFd = -1; } - init_done = false; + InitDone = false; return EXIT_SUCCESS; } -static int32_t init(void) { - if (init_done) { - return init_value; +static auto init() -> int32_t { + if (InitDone) { + return InitValue; } - if (access(PERF_EVENT_PARANOID, F_OK) == -1) { + if (access(PerfEventParanoidFile.c_str(), F_OK) == -1) { // https://man7.org/linux/man-pages/man2/perf_event_open.2.html // The official way of knowing if perf_event_open() support is enabled // is checking for the existence of the file // /proc/sys/kernel/perf_event_paranoid. - errorString = "syscall perf_event_open not supported or file " PERF_EVENT_PARANOID " does not exist"; - init_value = EXIT_FAILURE; - init_done = true; + ErrorString = "syscall perf_event_open not supported or file " + PerfEventParanoidFile + " does not exist"; + InitValue = EXIT_FAILURE; + InitDone = true; return EXIT_FAILURE; } - struct perf_event_attr cpu_cycles_attr; - std::memset(&cpu_cycles_attr, 0, sizeof(struct perf_event_attr)); - cpu_cycles_attr.type = PERF_TYPE_HARDWARE; - cpu_cycles_attr.size = sizeof(struct perf_event_attr); - cpu_cycles_attr.config = PERF_COUNT_HW_CPU_CYCLES; - cpu_cycles_attr.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; + struct perf_event_attr CpuCyclesAttr {}; + std::memset(&CpuCyclesAttr, 0, sizeof(struct perf_event_attr)); + CpuCyclesAttr.type = PERF_TYPE_HARDWARE; + CpuCyclesAttr.size = sizeof(struct perf_event_attr); + CpuCyclesAttr.config = PERF_COUNT_HW_CPU_CYCLES; + CpuCyclesAttr.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; // https://man7.org/linux/man-pages/man2/perf_event_open.2.html // inherit // The inherit bit specifies that this counter should count @@ -110,28 +111,28 @@ static int32_t init(void) { // changed the check // - if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) // + if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)) - cpu_cycles_attr.inherit = 1; - cpu_cycles_attr.exclude_kernel = 1; - cpu_cycles_attr.exclude_hv = 1; - - if ((cpu_cycles_fd = perf_event_open(&cpu_cycles_attr, - // pid == 0 and cpu == -1 - // This measures the calling process/thread on any CPU. - 0, -1, - // The group_fd argument allows event groups to be created. An event - // group has one event which is the group leader. The leader is - // created first, with group_fd = -1. The rest of the group members - // are created with subsequent perf_event_open() calls with group_fd - // being set to the file descriptor of the group leader. - -1, 0)) < 0) { + CpuCyclesAttr.inherit = 1; + CpuCyclesAttr.exclude_kernel = 1; + CpuCyclesAttr.exclude_hv = 1; + + if ((CpuCyclesFd = perfEventOpen(&CpuCyclesAttr, + // pid == 0 and cpu == -1 + // This measures the calling process/thread on any CPU. + 0, -1, + // The group_fd argument allows event groups to be created. An event + // group has one event which is the group leader. The leader is + // created first, with group_fd = -1. The rest of the group members + // are created with subsequent perf_event_open() calls with group_fd + // being set to the file descriptor of the group leader. + -1, 0)) < 0) { fini(); - errorString = "perf_event_open failed for PERF_COUNT_HW_CPU_CYCLES"; - init_value = EXIT_FAILURE; - init_done = true; + ErrorString = "perf_event_open failed for PERF_COUNT_HW_CPU_CYCLES"; + InitValue = EXIT_FAILURE; + InitDone = true; return EXIT_FAILURE; } - ioctl(cpu_cycles_fd, PERF_EVENT_IOC_ID, &cpu_cycles_id); + ioctl(CpuCyclesFd, PERF_EVENT_IOC_ID, &CpuCyclesId); struct perf_event_attr instructions_attr; std::memset(&instructions_attr, 0, sizeof(struct perf_event_attr)); @@ -143,94 +144,94 @@ static int32_t init(void) { instructions_attr.exclude_kernel = 1; instructions_attr.exclude_hv = 1; - if ((instructions_fd = perf_event_open(&instructions_attr, - // pid == 0 and cpu == -1 - // This measures the calling process/thread on any CPU. - 0, -1, - // The group_fd argument allows event groups to be created. An event - // group has one event which is the group leader. The leader is - // created first, with group_fd = -1. The rest of the group members - // are created with subsequent perf_event_open() calls with group_fd - // being set to the file descriptor of the group leader. - cpu_cycles_fd, 0)) < 0) { + if ((InstructionsFd = perfEventOpen(&instructions_attr, + // pid == 0 and cpu == -1 + // This measures the calling process/thread on any CPU. + 0, -1, + // The group_fd argument allows event groups to be created. An event + // group has one event which is the group leader. The leader is + // created first, with group_fd = -1. The rest of the group members + // are created with subsequent perf_event_open() calls with group_fd + // being set to the file descriptor of the group leader. + CpuCyclesFd, 0)) < 0) { fini(); - errorString = "perf_event_open failed for PERF_COUNT_HW_INSTRUCTIONS"; - init_value = EXIT_FAILURE; - init_done = true; + ErrorString = "perf_event_open failed for PERF_COUNT_HW_INSTRUCTIONS"; + InitValue = EXIT_FAILURE; + InitDone = true; return EXIT_FAILURE; } - ioctl(instructions_fd, PERF_EVENT_IOC_ID, &instructions_id); + ioctl(InstructionsFd, PERF_EVENT_IOC_ID, &InstructionsId); - ioctl(cpu_cycles_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); - ioctl(cpu_cycles_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); + ioctl(CpuCyclesFd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); + ioctl(CpuCyclesFd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); - if (0 == read(cpu_cycles_fd, &last, sizeof(last))) { + if (0 == read(CpuCyclesFd, &Last, sizeof(Last))) { fini(); - errorString = "group read failed in init"; - init_value = EXIT_FAILURE; - init_done = true; + ErrorString = "group read failed in init"; + InitValue = EXIT_FAILURE; + InitDone = true; return EXIT_FAILURE; } - init_value = EXIT_SUCCESS; - init_done = true; + InitValue = EXIT_SUCCESS; + InitDone = true; return EXIT_SUCCESS; } -static uint64_t value_from_id(struct read_format* values, uint64_t id) { - for (decltype(values->nr) i = 0; i < values->nr; ++i) { - if (id == values->values[i].id) { - return values->values[i].value; +static auto valueFromId(struct ReadFormat* Values, uint64_t Id) -> uint64_t { + for (decltype(Values->Nr) I = 0; I < Values->Nr; ++I) { + if (Id == Values->Values[I].Id) { + return Values->Values[I].Value; } } return 0; } -static int32_t get_reading(double* ipc_value, double* freq_value) { +static auto getReading(double* IpcValue, double* FreqValue) -> int32_t { - if (cpu_cycles_fd < 0 || instructions_fd < 0) { + if (CpuCyclesFd < 0 || InstructionsFd < 0) { fini(); return EXIT_FAILURE; } - struct read_format read_values; + struct ReadFormat ReadValues {}; - if (0 == read(cpu_cycles_fd, &read_values, sizeof(read_values))) { + if (0 == read(CpuCyclesFd, &ReadValues, sizeof(ReadValues))) { fini(); - errorString = "group read failed"; + ErrorString = "group read failed"; return EXIT_FAILURE; } - if (ipc_value != nullptr) { - uint64_t diff[2]; - diff[0] = value_from_id(&read_values, instructions_id) - value_from_id(&last, instructions_id); - diff[1] = value_from_id(&read_values, cpu_cycles_id) - value_from_id(&last, cpu_cycles_id); + if (IpcValue != nullptr) { + uint64_t Diff[2]; + Diff[0] = valueFromId(&ReadValues, InstructionsId) - valueFromId(&Last, InstructionsId); + Diff[1] = valueFromId(&ReadValues, CpuCyclesId) - valueFromId(&Last, CpuCyclesId); - std::memcpy(&last, &read_values, sizeof(last)); + std::memcpy(&Last, &ReadValues, sizeof(Last)); - *ipc_value = (double)diff[0] / (double)diff[1]; + *IpcValue = (double)Diff[0] / (double)Diff[1]; } - if (freq_value != nullptr) { - *freq_value = (double)value_from_id(&read_values, cpu_cycles_id) / 1e9; + if (FreqValue != nullptr) { + *FreqValue = (double)valueFromId(&ReadValues, CpuCyclesId) / 1e9; } return EXIT_SUCCESS; } -static int32_t get_reading_ipc(double* value) { return get_reading(value, nullptr); } +static auto getReadingIpc(double* Value) -> int32_t { return getReading(Value, nullptr); } -static int32_t get_reading_freq(double* value) { return get_reading(nullptr, value); } +static auto getReadingFreq(double* Value) -> int32_t { return getReading(nullptr, Value); } -static const char* get_error(void) { - const char* errorCString = errorString.c_str(); - return errorCString; +static auto getError() -> const char* { + const char* ErrorCString = ErrorString.c_str(); + return ErrorCString; } } -MetricInterface PerfIpcMetric = { +const MetricInterface PerfIpcMetric = { .Name = "perf-ipc", .Type = {.Absolute = 1, .Accumalative = 0, @@ -243,12 +244,12 @@ MetricInterface PerfIpcMetric = { .Callback = nullptr, .Init = init, .Fini = fini, - .GetReading = get_reading_ipc, - .GetError = get_error, + .GetReading = getReadingIpc, + .GetError = getError, .RegisterInsertCallback = nullptr, }; -MetricInterface PerfFreqMetric = { +const MetricInterface PerfFreqMetric = { .Name = "perf-freq", .Type = {.Absolute = 0, .Accumalative = 1, @@ -261,7 +262,7 @@ MetricInterface PerfFreqMetric = { .Callback = nullptr, .Init = init, .Fini = fini, - .GetReading = get_reading_freq, - .GetError = get_error, + .GetReading = getReadingFreq, + .GetError = getError, .RegisterInsertCallback = nullptr, }; diff --git a/src/firestarter/Measurement/Metric/RAPL.cpp b/src/firestarter/Measurement/Metric/RAPL.cpp index c73ef004..e9910fe7 100644 --- a/src/firestarter/Measurement/Metric/RAPL.cpp +++ b/src/firestarter/Measurement/Metric/RAPL.cpp @@ -32,42 +32,42 @@ extern "C" { #include -#define RAPL_PATH "/sys/class/powercap" +static const std::string RaplPath = "/sys/class/powercap"; -static std::string errorString = ""; +static std::string errorString; -struct reader_def { - char* path; - long long int last_reading; - long long int overflow; - long long int max; +struct ReaderDef { + char* Path; + long long int LastReading; + long long int Overflow; + long long int Max; }; -struct reader_def_free { - void operator()(struct reader_def* def) { - if (def != nullptr) { - if (((void*)def->path) != nullptr) { - free((void*)def->path); +struct ReaderDefFree { + void operator()(struct ReaderDef* Def) { + if (Def != nullptr) { + if (((void*)Def->Path) != nullptr) { + free((void*)Def->Path); } - free((void*)def); + free((void*)Def); } } }; -static std::vector> readers = {}; +static std::vector> Readers = {}; -static int32_t fini(void) { - readers.clear(); +static auto fini() -> int32_t { + Readers.clear(); return EXIT_SUCCESS; } -static int32_t init(void) { +static auto init() -> int32_t { errorString = ""; - DIR* raplDir = opendir(RAPL_PATH); - if (raplDir == NULL) { - errorString = "Could not open " RAPL_PATH; + DIR* RaplDir = opendir(RaplPath.c_str()); + if (RaplDir == nullptr) { + errorString = "Could not open " + RaplPath; return EXIT_FAILURE; } @@ -76,104 +76,104 @@ static int32_t init(void) { // and finally package only. // contains an empty path if it is not found - std::string psysPath = ""; + std::string PsysPath; // a vector of all paths to package and dram - std::vector paths = {}; + std::vector Paths = {}; - struct dirent* dir; - while ((dir = readdir(raplDir)) != NULL) { - std::stringstream path; - std::stringstream namePath; - path << RAPL_PATH << "/" << dir->d_name; - namePath << path.str() << "/name"; + struct dirent* Dir = nullptr; + while ((Dir = readdir(RaplDir)) != nullptr) { + std::stringstream Path; + std::stringstream NamePath; + Path << RaplPath << "/" << Dir->d_name; + NamePath << Path.str() << "/name"; - std::ifstream nameStream(namePath.str()); - if (!nameStream.good()) { + std::ifstream NameStream(NamePath.str()); + if (!NameStream.good()) { // an error opening the file occured continue; } - std::string name; - std::getline(nameStream, name); + std::string Name; + std::getline(NameStream, Name); - if (name == "psys") { + if (Name == "psys") { // found psys - psysPath = path.str(); - } else if (0 == name.rfind("package", 0) || name == "dram") { + PsysPath = Path.str(); + } else if (0 == Name.rfind("package", 0) || Name == "dram") { // find all package and dram - paths.push_back(path.str()); + Paths.push_back(Path.str()); } } - closedir(raplDir); + closedir(RaplDir); // make psys the only value if available - if (!psysPath.empty()) { - paths.clear(); - paths.push_back(psysPath); + if (!PsysPath.empty()) { + Paths.clear(); + Paths.push_back(PsysPath); } // paths now contains all interesting nodes - if (paths.size() == 0) { - errorString = "No valid entries in " RAPL_PATH; + if (Paths.size() == 0) { + errorString = "No valid entries in " + RaplPath; return EXIT_FAILURE; } - for (auto const& path : paths) { - std::stringstream energyUjPath; - energyUjPath << path << "/energy_uj"; - std::ifstream energyReadingStream(energyUjPath.str()); - if (!energyReadingStream.good()) { + for (auto const& Path : Paths) { + std::stringstream EnergyUjPath; + EnergyUjPath << Path << "/energy_uj"; + std::ifstream EnergyReadingStream(EnergyUjPath.str()); + if (!EnergyReadingStream.good()) { errorString = "Could not read energy_uj"; break; } - std::stringstream maxEnergyUjRangePath; - maxEnergyUjRangePath << path << "/max_energy_range_uj"; - std::ifstream maxEnergyReadingStream(maxEnergyUjRangePath.str()); - if (!maxEnergyReadingStream.good()) { + std::stringstream MaxEnergyUjRangePath; + MaxEnergyUjRangePath << Path << "/max_energy_range_uj"; + std::ifstream MaxEnergyReadingStream(MaxEnergyUjRangePath.str()); + if (!MaxEnergyReadingStream.good()) { errorString = "Could not read max_energy_range_uj"; break; } - uint64_t reading; - uint64_t max; - std::string buffer; - int read; + uint64_t Reading = 0; + uint64_t Max = 0; + std::string Buffer; + int Read = 0; - std::getline(energyReadingStream, buffer); - read = std::sscanf(buffer.c_str(), "%lu", &reading); + std::getline(EnergyReadingStream, Buffer); + Read = std::sscanf(Buffer.c_str(), "%lu", &Reading); - if (read == 0) { - std::stringstream ss; - ss << "Contents in file " << energyUjPath.str() << " do not conform to mask (uint64_t)"; - errorString = ss.str(); + if (Read == 0) { + std::stringstream Ss; + Ss << "Contents in file " << EnergyUjPath.str() << " do not conform to mask (uint64_t)"; + errorString = Ss.str(); break; } - std::getline(maxEnergyReadingStream, buffer); - read = std::sscanf(buffer.c_str(), "%lu", &max); + std::getline(MaxEnergyReadingStream, Buffer); + Read = std::sscanf(Buffer.c_str(), "%lu", &Max); - if (read == 0) { + if (Read == 0) { std::stringstream ss; - ss << "Contents in file " << maxEnergyUjRangePath.str() << " do not conform to mask (uint64_t)"; + ss << "Contents in file " << MaxEnergyUjRangePath.str() << " do not conform to mask (uint64_t)"; errorString = ss.str(); break; } - std::shared_ptr def(reinterpret_cast(malloc(sizeof(struct reader_def))), - reader_def_free()); - auto pathName = path.c_str(); - size_t size = (strlen(pathName) + 1) * sizeof(char); - void* name = malloc(size); - memcpy(name, pathName, size); - def->path = (char*)name; - def->max = max; - def->last_reading = reading; - def->overflow = 0; - - readers.push_back(def); + std::shared_ptr Def(reinterpret_cast(malloc(sizeof(struct ReaderDef))), + ReaderDefFree()); + const auto* PathName = Path.c_str(); + size_t Size = (strlen(PathName) + 1) * sizeof(char); + void* Name = malloc(Size); + memcpy(Name, PathName, Size); + Def->Path = (char*)Name; + Def->Max = Max; + Def->LastReading = Reading; + Def->Overflow = 0; + + Readers.push_back(Def); } if (errorString.size() != 0) { @@ -184,46 +184,46 @@ static int32_t init(void) { return EXIT_SUCCESS; } -static int32_t get_reading(double* value) { - double finalReading = 0.0; +static auto getReading(double* Value) -> int32_t { + double FinalReading = 0.0; - for (auto& def : readers) { - long long int reading; - std::string buffer; + for (auto& Def : Readers) { + long long int Reading = 0; + std::string Buffer; - std::stringstream energyUjPath; - energyUjPath << def->path << "/energy_uj"; - std::ifstream energyReadingStream(energyUjPath.str()); - std::getline(energyReadingStream, buffer); - std::sscanf(buffer.c_str(), "%llu", &reading); + std::stringstream EnergyUjPath; + EnergyUjPath << Def->Path << "/energy_uj"; + std::ifstream EnergyReadingStream(EnergyUjPath.str()); + std::getline(EnergyReadingStream, Buffer); + std::sscanf(Buffer.c_str(), "%llu", &Reading); - if (reading < def->last_reading) { - def->overflow += 1; + if (Reading < Def->LastReading) { + Def->Overflow += 1; } - def->last_reading = reading; + Def->LastReading = Reading; - finalReading += 1.0E-6 * (double)(def->overflow * def->max + def->last_reading); + FinalReading += 1.0E-6 * (double)((Def->Overflow * Def->Max) + Def->LastReading); } - if (value != nullptr) { - *value = finalReading; + if (Value != nullptr) { + *Value = FinalReading; } return EXIT_SUCCESS; } -static const char* get_error(void) { - const char* errorCString = errorString.c_str(); - return errorCString; +static auto getError() -> const char* { + const char* ErrorCString = errorString.c_str(); + return ErrorCString; } // this function will be called periodically to make sure we do not miss an // overflow of the counter -static void callback() { get_reading(nullptr); } +static void callback() { getReading(nullptr); } } -MetricInterface RaplMetric = { +const MetricInterface RaplMetric = { .Name = "sysfs-powercap-rapl", .Type = {.Absolute = 0, .Accumalative = 1, @@ -236,7 +236,7 @@ MetricInterface RaplMetric = { .Callback = callback, .Init = init, .Fini = fini, - .GetReading = get_reading, - .GetError = get_error, + .GetReading = getReading, + .GetError = getError, .RegisterInsertCallback = nullptr, }; diff --git a/src/firestarter/Measurement/Summary.cpp b/src/firestarter/Measurement/Summary.cpp index 730775be..da626e9e 100644 --- a/src/firestarter/Measurement/Summary.cpp +++ b/src/firestarter/Measurement/Summary.cpp @@ -24,7 +24,7 @@ #include #include -using namespace firestarter::measurement; +namespace firestarter::measurement { // this functions borows a lot of code from // https://github.com/metricq/metricq-cpp/blob/master/tools/metricq-summary/src/summary.cpp @@ -35,34 +35,34 @@ auto Summary::calculate(std::vector::iterator Begin, std::vector