From 8f4ef1332fbf72532f0836b8549707bf2a85c891 Mon Sep 17 00:00:00 2001 From: Finn Wilkinson <56131608+FinnWilkinson@users.noreply.github.com> Date: Tue, 17 Dec 2024 17:27:19 +0000 Subject: [PATCH] DI unit balanced port allocation fix (#431) * Made more balanced RS allocation in DI unit by not stalling on first port allocation failure but instead cycling through all possible RSs. * Added port allocator config option. --- configs/DEMO_RISCV.yaml | 2 + configs/a64fx.yaml | 2 + configs/a64fx_SME.yaml | 2 + configs/m1_firestorm.yaml | 2 + configs/sst-cores/a64fx-sst.yaml | 2 + configs/sst-cores/m1_firestorm-sst.yaml | 2 + configs/sst-cores/tx2-sst.yaml | 2 + configs/tx2.yaml | 2 + docs/sphinx/user/configuring_simeng.rst | 7 ++++ src/include/simeng/CoreInstance.hh | 1 + .../simeng/pipeline/M1PortAllocator.hh | 4 +- src/lib/CoreInstance.cc | 33 ++++++++++++++- src/lib/config/ModelConfig.cc | 7 ++++ src/lib/pipeline/DispatchIssueUnit.cc | 41 ++++++++++++------- src/lib/pipeline/M1PortAllocator.cc | 2 +- test/integration/ConfigTest.cc | 6 ++- test/unit/GenericPredictorTest.cc | 7 +++- test/unit/pipeline/DispatchIssueUnitTest.cc | 4 +- test/unit/pipeline/M1PortAllocatorTest.cc | 2 +- 19 files changed, 103 insertions(+), 27 deletions(-) diff --git a/configs/DEMO_RISCV.yaml b/configs/DEMO_RISCV.yaml index 2ca8c35c36..0d64ca296e 100644 --- a/configs/DEMO_RISCV.yaml +++ b/configs/DEMO_RISCV.yaml @@ -26,6 +26,8 @@ Queue-Sizes: ROB: 180 Load: 64 Store: 36 +Port-Allocator: + Type: Balanced Branch-Predictor: Type: "Perceptron" BTB-Tag-Bits: 11 diff --git a/configs/a64fx.yaml b/configs/a64fx.yaml index 36d09a42c9..c3285a22b7 100644 --- a/configs/a64fx.yaml +++ b/configs/a64fx.yaml @@ -28,6 +28,8 @@ Queue-Sizes: ROB: 128 Load: 40 Store: 24 +Port-Allocator: + Type: A64FX Branch-Predictor: Type: "Perceptron" BTB-Tag-Bits: 11 diff --git a/configs/a64fx_SME.yaml b/configs/a64fx_SME.yaml index 7b1442cc32..b10f955f6f 100644 --- a/configs/a64fx_SME.yaml +++ b/configs/a64fx_SME.yaml @@ -30,6 +30,8 @@ Queue-Sizes: ROB: 128 Load: 40 Store: 24 +Port-Allocator: + Type: A64FX Branch-Predictor: Type: "Perceptron" BTB-Tag-Bits: 11 diff --git a/configs/m1_firestorm.yaml b/configs/m1_firestorm.yaml index a593500685..822856dac5 100644 --- a/configs/m1_firestorm.yaml +++ b/configs/m1_firestorm.yaml @@ -24,6 +24,8 @@ Queue-Sizes: ROB: 630 Load: 130 Store: 60 +Port-Allocator: + Type: M1 Branch-Predictor: Type: "Perceptron" BTB-Tag-Bits: 11 diff --git a/configs/sst-cores/a64fx-sst.yaml b/configs/sst-cores/a64fx-sst.yaml index fd503c668d..b984c63970 100644 --- a/configs/sst-cores/a64fx-sst.yaml +++ b/configs/sst-cores/a64fx-sst.yaml @@ -28,6 +28,8 @@ Queue-Sizes: ROB: 128 Load: 40 Store: 24 +Port-Allocator: + Type: A64FX Branch-Predictor: Type: "Perceptron" BTB-Tag-Bits: 11 diff --git a/configs/sst-cores/m1_firestorm-sst.yaml b/configs/sst-cores/m1_firestorm-sst.yaml index e7bc241b8f..ce0302ecc8 100644 --- a/configs/sst-cores/m1_firestorm-sst.yaml +++ b/configs/sst-cores/m1_firestorm-sst.yaml @@ -24,6 +24,8 @@ Queue-Sizes: ROB: 630 Load: 130 Store: 60 +Port-Allocator: + Type: M1 Branch-Predictor: Type: "Perceptron" BTB-Tag-Bits: 11 diff --git a/configs/sst-cores/tx2-sst.yaml b/configs/sst-cores/tx2-sst.yaml index e3d1e3231c..174b30f732 100644 --- a/configs/sst-cores/tx2-sst.yaml +++ b/configs/sst-cores/tx2-sst.yaml @@ -26,6 +26,8 @@ Queue-Sizes: ROB: 180 Load: 64 Store: 36 +Port-Allocator: + Type: Balanced Branch-Predictor: Type: "Perceptron" BTB-Tag-Bits: 11 diff --git a/configs/tx2.yaml b/configs/tx2.yaml index a5e28807f9..45a8bb498b 100644 --- a/configs/tx2.yaml +++ b/configs/tx2.yaml @@ -26,6 +26,8 @@ Queue-Sizes: ROB: 180 Load: 64 Store: 36 +Port-Allocator: + Type: Balanced Branch-Predictor: Type: "Perceptron" BTB-Tag-Bits: 11 diff --git a/docs/sphinx/user/configuring_simeng.rst b/docs/sphinx/user/configuring_simeng.rst index a021369ea6..765e8c7e45 100644 --- a/docs/sphinx/user/configuring_simeng.rst +++ b/docs/sphinx/user/configuring_simeng.rst @@ -140,6 +140,13 @@ Load Store The size of the store queue within the load/store queue unit. +Port-Allocator +-------------- + +This section allows a user to select which Port Allocator to use. The available options are: + +Type + The specific allocator algorithm to use. The current options are ``Balanced``, ``A64FX``, and ``M1``. The former implements a round-robin style algorithm, allocating instructions to compatable ports evenly. The latter two implement the port allocation algorithms found in the respective hardware as per their names. Branch-Predictor ---------------- diff --git a/src/include/simeng/CoreInstance.hh b/src/include/simeng/CoreInstance.hh index 64e2f9e1f5..2cc739f3f9 100644 --- a/src/include/simeng/CoreInstance.hh +++ b/src/include/simeng/CoreInstance.hh @@ -20,6 +20,7 @@ #include "simeng/models/outoforder/Core.hh" #include "simeng/pipeline/A64FXPortAllocator.hh" #include "simeng/pipeline/BalancedPortAllocator.hh" +#include "simeng/pipeline/M1PortAllocator.hh" namespace simeng { diff --git a/src/include/simeng/pipeline/M1PortAllocator.hh b/src/include/simeng/pipeline/M1PortAllocator.hh index 136c7636fb..7bfaa94817 100644 --- a/src/include/simeng/pipeline/M1PortAllocator.hh +++ b/src/include/simeng/pipeline/M1PortAllocator.hh @@ -19,7 +19,7 @@ class M1PortAllocator : public PortAllocator { * a port type which denotes the matching requirements of said instruction * groups. */ M1PortAllocator(const std::vector>& portArrangement, - std::vector> rsArrangement); + std::vector> rsArrangement); /** Allocate the lowest weighted port available for the specified instruction * group. Returns the allocated port, and increases the weight of the port. @@ -56,7 +56,7 @@ class M1PortAllocator : public PortAllocator { std::function&)> rsSizes_; /** Mapping from port index to reservation station */ - std::vector> rsArrangement_; + std::vector> rsArrangement_; }; } // namespace pipeline diff --git a/src/lib/CoreInstance.cc b/src/lib/CoreInstance.cc index 45832347ce..46f8638286 100644 --- a/src/lib/CoreInstance.cc +++ b/src/lib/CoreInstance.cc @@ -236,8 +236,37 @@ void CoreInstance::createCore() { portArrangement[i].push_back(grp); } } - portAllocator_ = - std::make_unique(portArrangement); + + // Initialise the desired port allocator + std::string portAllocatorType = + config_["Port-Allocator"]["Type"].as(); + if (portAllocatorType == "Balanced") { + portAllocator_ = + std::make_unique(portArrangement); + } else if (portAllocatorType == "A64FX") { + portAllocator_ = + std::make_unique(portArrangement); + } else if (portAllocatorType == "M1") { + // Extract the reservation station arrangement from the config file + auto config_rs = config_["Reservation-Stations"]; + std::vector> rsArrangement; + for (size_t i = 0; i < config_rs.num_children(); i++) { + auto config_rs_ports = config_rs[i]["Port-Nums"]; + for (size_t j = 0; j < config_rs_ports.num_children(); j++) { + uint16_t port = config_rs_ports[j].as(); + if (static_cast(rsArrangement.size()) < port + 1) { + rsArrangement.resize(port + 1); + } + rsArrangement[port] = {i, config_rs[i]["Size"].as()}; + } + } + portAllocator_ = std::make_unique( + portArrangement, rsArrangement); + } else { + std::cout << "[SimEng:CoreInstnce] Invalid Port Allocator type selected." + << std::endl; + exit(EXIT_FAILURE); + } // Construct the core object based on the defined simulation mode uint64_t entryPoint = process_->getEntryPoint(); diff --git a/src/lib/config/ModelConfig.cc b/src/lib/config/ModelConfig.cc index 6d6152ced4..1b12e629d7 100644 --- a/src/lib/config/ModelConfig.cc +++ b/src/lib/config/ModelConfig.cc @@ -497,6 +497,13 @@ void ModelConfig::setExpectations(bool isDefault) { ExpectationNode::createExpectation(16, "Store")); expectations_["Queue-Sizes"]["Store"].setValueBounds(1, UINT32_MAX); + // Port-Allocator + expectations_.addChild(ExpectationNode::createExpectation("Port-Allocator")); + expectations_["Port-Allocator"].addChild( + ExpectationNode::createExpectation("Balanced", "Type")); + expectations_["Port-Allocator"]["Type"].setValueSet( + std::vector{"Balanced", "A64FX", "M1"}); + // Branch-Predictor expectations_.addChild( ExpectationNode::createExpectation("Branch-Predictor")); diff --git a/src/lib/pipeline/DispatchIssueUnit.cc b/src/lib/pipeline/DispatchIssueUnit.cc index b3712715ed..ca2ca44a88 100644 --- a/src/lib/pipeline/DispatchIssueUnit.cc +++ b/src/lib/pipeline/DispatchIssueUnit.cc @@ -67,31 +67,42 @@ void DispatchIssueUnit::tick() { continue; } - const std::vector& supportedPorts = uop->getSupportedPorts(); + std::vector supportedPorts = uop->getSupportedPorts(); if (uop->exceptionEncountered()) { // Exception; mark as ready to commit, and remove from pipeline uop->setCommitReady(); input_.getHeadSlots()[slot] = nullptr; continue; } - // Allocate issue port to uop - uint16_t port = portAllocator_.allocate(supportedPorts); - uint16_t RS_Index = portMapping_[port].first; - uint16_t RS_Port = portMapping_[port].second; - assert(RS_Index < reservationStations_.size() && - "Allocated port inaccessible"); - ReservationStation& rs = reservationStations_[RS_Index]; - // When appropriate, stall uop or input buffer if stall buffer full - if (rs.currentSize == rs.capacity || - dispatches_[RS_Index] == rs.dispatchRate) { - // Deallocate port given - portAllocator_.deallocate(port); + // Loop through all ports and remove any who's RS is at capacity or dispatch + // rate has been met + auto portIt = supportedPorts.begin(); + while (portIt != supportedPorts.end()) { + uint16_t RS_Index = portMapping_[*portIt].first; + ReservationStation* rs = &reservationStations_[RS_Index]; + if (rs->currentSize == rs->capacity || + dispatches_[RS_Index] == rs->dispatchRate) { + portIt = supportedPorts.erase(portIt); + } else { + portIt++; + } + } + // If no ports left, stall and return + if (supportedPorts.size() == 0) { input_.stall(true); rsStalls_++; return; } + // Find an available RS + uint16_t port = portAllocator_.allocate(supportedPorts); + uint16_t RS_Index = portMapping_[port].first; + uint16_t RS_Port = portMapping_[port].second; + assert(RS_Index < reservationStations_.size() && + "Allocated port inaccessible"); + ReservationStation* rs = &reservationStations_[RS_Index]; + // Assume the uop will be ready bool ready = true; @@ -123,10 +134,10 @@ void DispatchIssueUnit::tick() { // Increment dispatches made and RS occupied entries size dispatches_[RS_Index]++; - rs.currentSize++; + rs->currentSize++; if (ready) { - rs.ports[RS_Port].ready.push_back(std::move(uop)); + rs->ports[RS_Port].ready.push_back(std::move(uop)); } input_.getHeadSlots()[slot] = nullptr; diff --git a/src/lib/pipeline/M1PortAllocator.cc b/src/lib/pipeline/M1PortAllocator.cc index 94a2f18563..5d26b6d550 100644 --- a/src/lib/pipeline/M1PortAllocator.cc +++ b/src/lib/pipeline/M1PortAllocator.cc @@ -9,7 +9,7 @@ namespace pipeline { M1PortAllocator::M1PortAllocator( const std::vector>& portArrangement, - std::vector> rsArrangement) + std::vector> rsArrangement) : weights(portArrangement.size(), 0), rsArrangement_(rsArrangement) {} uint16_t M1PortAllocator::allocate(const std::vector& ports) { diff --git a/test/integration/ConfigTest.cc b/test/integration/ConfigTest.cc index 12c295d2d4..4e6ac2ad68 100644 --- a/test/integration/ConfigTest.cc +++ b/test/integration/ConfigTest.cc @@ -49,7 +49,8 @@ TEST(ConfigTest, Default) { "'FloatingPoint/SVE-Count': 38\n 'Predicate-Count': 17\n " "'Conditional-Count': 1\n 'Matrix-Count': 1\n'Pipeline-Widths':\n " "Commit: 1\n FrontEnd: 1\n 'LSQ-Completion': 1\n'Queue-Sizes':\n ROB: " - "32\n Load: 16\n Store: 16\n'Branch-Predictor':\n Type: Perceptron\n " + "32\n Load: 16\n Store: 16\n'Port-Allocator':\n Type: " + "Balanced\n'Branch-Predictor':\n Type: Perceptron\n " "'BTB-Tag-Bits': 8\n 'Global-History-Length': 8\n 'RAS-entries': " "8\n'L1-Data-Memory':\n 'Interface-Type': " "Flat\n'L1-Instruction-Memory':\n 'Interface-Type': " @@ -103,7 +104,8 @@ TEST(ConfigTest, Default) { "100000\n'Register-Set':\n 'GeneralPurpose-Count': 38\n " "'FloatingPoint-Count': 38\n'Pipeline-Widths':\n Commit: 1\n FrontEnd: " "1\n 'LSQ-Completion': 1\n'Queue-Sizes':\n ROB: 32\n Load: 16\n " - "Store: 16\n'Branch-Predictor':\n Type: Perceptron\n 'BTB-Tag-Bits': " + "Store: 16\n'Port-Allocator':\n Type: Balanced\n'Branch-Predictor':\n " + "Type: Perceptron\n 'BTB-Tag-Bits': " "8\n 'Global-History-Length': 8\n 'RAS-entries': " "8\n'L1-Data-Memory':\n 'Interface-Type': " "Flat\n'L1-Instruction-Memory':\n 'Interface-Type': " diff --git a/test/unit/GenericPredictorTest.cc b/test/unit/GenericPredictorTest.cc index c7d6011c29..66ec9155c7 100644 --- a/test/unit/GenericPredictorTest.cc +++ b/test/unit/GenericPredictorTest.cc @@ -1,3 +1,4 @@ +#include "ConfigInit.hh" #include "MockInstruction.hh" #include "gtest/gtest.h" #include "simeng/branchpredictors/GenericPredictor.hh" @@ -18,7 +19,8 @@ class GenericPredictorTest : public testing::Test { // Tests that a GenericPredictor will predict the correct direction on a // miss TEST_F(GenericPredictorTest, Miss) { - simeng::config::SimInfo::addToConfig( + ConfigInit configInit = ConfigInit( + config::ISA::AArch64, "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, " "Saturating-Count-Bits: 2, Global-History-Length: 10, RAS-entries: 5, " "Fallback-Static-Predictor: Always-Taken}}"); @@ -26,7 +28,8 @@ TEST_F(GenericPredictorTest, Miss) { auto prediction = predictor.predict(0, BranchType::Conditional, 0); EXPECT_TRUE(prediction.isTaken); - simeng::config::SimInfo::addToConfig( + configInit = ConfigInit( + config::ISA::AArch64, "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, " "Saturating-Count-Bits: 2, Global-History-Length: 10, RAS-entries: 5, " "Fallback-Static-Predictor: Always-Not-Taken}}"); diff --git a/test/unit/pipeline/DispatchIssueUnitTest.cc b/test/unit/pipeline/DispatchIssueUnitTest.cc index bd3f981463..f7ecb2b9b6 100644 --- a/test/unit/pipeline/DispatchIssueUnitTest.cc +++ b/test/unit/pipeline/DispatchIssueUnitTest.cc @@ -9,6 +9,7 @@ namespace simeng { namespace pipeline { +using ::testing::_; using ::testing::Return; using ::testing::ReturnRef; @@ -269,8 +270,7 @@ TEST_F(PipelineDispatchIssueUnitTest, singleInstr_rsFull) { // All expected calls to instruction during tick() EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts)); // All expected calls to portAllocator during tick() - EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA)); - EXPECT_CALL(portAlloc, deallocate(EAGA)); + EXPECT_CALL(portAlloc, allocate(_)).Times(0); input.getHeadSlots()[0] = uopPtr; diUnit.tick(); // Ensure Reservation station sizes have stayed the same diff --git a/test/unit/pipeline/M1PortAllocatorTest.cc b/test/unit/pipeline/M1PortAllocatorTest.cc index b0adbed8f1..69786bfed9 100644 --- a/test/unit/pipeline/M1PortAllocatorTest.cc +++ b/test/unit/pipeline/M1PortAllocatorTest.cc @@ -24,7 +24,7 @@ class M1PortAllocatorTest : public testing::Test { {0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}, {13}}; // Representation of the M1 Firestorm Reservation Station Arrangement // std::pair = - std::vector> rsArrangement = { + std::vector> rsArrangement = { {0, 24}, {1, 26}, {2, 16}, {3, 12}, {4, 28}, {5, 28}, {6, 12}, {7, 12}, {8, 12}, {9, 12}, {10, 36}, {11, 36}, {12, 36}, {13, 36}};