From 8f4ef1332fbf72532f0836b8549707bf2a85c891 Mon Sep 17 00:00:00 2001
From: Finn Wilkinson <56131608+FinnWilkinson@users.noreply.github.com>
Date: Tue, 17 Dec 2024 17:27:19 +0000
Subject: [PATCH] DI unit balanced port allocation fix (#431)

* Made more balanced RS allocation in DI unit by not stalling on first port allocation failure but instead cycling through all possible RSs.

* Added port allocator config option.
---
 configs/DEMO_RISCV.yaml                       |  2 +
 configs/a64fx.yaml                            |  2 +
 configs/a64fx_SME.yaml                        |  2 +
 configs/m1_firestorm.yaml                     |  2 +
 configs/sst-cores/a64fx-sst.yaml              |  2 +
 configs/sst-cores/m1_firestorm-sst.yaml       |  2 +
 configs/sst-cores/tx2-sst.yaml                |  2 +
 configs/tx2.yaml                              |  2 +
 docs/sphinx/user/configuring_simeng.rst       |  7 ++++
 src/include/simeng/CoreInstance.hh            |  1 +
 .../simeng/pipeline/M1PortAllocator.hh        |  4 +-
 src/lib/CoreInstance.cc                       | 33 ++++++++++++++-
 src/lib/config/ModelConfig.cc                 |  7 ++++
 src/lib/pipeline/DispatchIssueUnit.cc         | 41 ++++++++++++-------
 src/lib/pipeline/M1PortAllocator.cc           |  2 +-
 test/integration/ConfigTest.cc                |  6 ++-
 test/unit/GenericPredictorTest.cc             |  7 +++-
 test/unit/pipeline/DispatchIssueUnitTest.cc   |  4 +-
 test/unit/pipeline/M1PortAllocatorTest.cc     |  2 +-
 19 files changed, 103 insertions(+), 27 deletions(-)

diff --git a/configs/DEMO_RISCV.yaml b/configs/DEMO_RISCV.yaml
index 2ca8c35c36..0d64ca296e 100644
--- a/configs/DEMO_RISCV.yaml
+++ b/configs/DEMO_RISCV.yaml
@@ -26,6 +26,8 @@ Queue-Sizes:
   ROB: 180
   Load: 64
   Store: 36
+Port-Allocator:
+  Type: Balanced
 Branch-Predictor:
   Type: "Perceptron"
   BTB-Tag-Bits: 11
diff --git a/configs/a64fx.yaml b/configs/a64fx.yaml
index 36d09a42c9..c3285a22b7 100644
--- a/configs/a64fx.yaml
+++ b/configs/a64fx.yaml
@@ -28,6 +28,8 @@ Queue-Sizes:
   ROB: 128
   Load: 40
   Store: 24
+Port-Allocator:
+  Type: A64FX
 Branch-Predictor:
   Type: "Perceptron"
   BTB-Tag-Bits: 11
diff --git a/configs/a64fx_SME.yaml b/configs/a64fx_SME.yaml
index 7b1442cc32..b10f955f6f 100644
--- a/configs/a64fx_SME.yaml
+++ b/configs/a64fx_SME.yaml
@@ -30,6 +30,8 @@ Queue-Sizes:
   ROB: 128
   Load: 40
   Store: 24
+Port-Allocator:
+  Type: A64FX
 Branch-Predictor:
   Type: "Perceptron"
   BTB-Tag-Bits: 11
diff --git a/configs/m1_firestorm.yaml b/configs/m1_firestorm.yaml
index a593500685..822856dac5 100644
--- a/configs/m1_firestorm.yaml
+++ b/configs/m1_firestorm.yaml
@@ -24,6 +24,8 @@ Queue-Sizes:
   ROB: 630
   Load: 130
   Store: 60
+Port-Allocator:
+  Type: M1
 Branch-Predictor:
   Type: "Perceptron"
   BTB-Tag-Bits: 11
diff --git a/configs/sst-cores/a64fx-sst.yaml b/configs/sst-cores/a64fx-sst.yaml
index fd503c668d..b984c63970 100644
--- a/configs/sst-cores/a64fx-sst.yaml
+++ b/configs/sst-cores/a64fx-sst.yaml
@@ -28,6 +28,8 @@ Queue-Sizes:
   ROB: 128
   Load: 40
   Store: 24
+Port-Allocator:
+  Type: A64FX
 Branch-Predictor:
   Type: "Perceptron"
   BTB-Tag-Bits: 11
diff --git a/configs/sst-cores/m1_firestorm-sst.yaml b/configs/sst-cores/m1_firestorm-sst.yaml
index e7bc241b8f..ce0302ecc8 100644
--- a/configs/sst-cores/m1_firestorm-sst.yaml
+++ b/configs/sst-cores/m1_firestorm-sst.yaml
@@ -24,6 +24,8 @@ Queue-Sizes:
   ROB: 630
   Load: 130
   Store: 60
+Port-Allocator:
+  Type: M1
 Branch-Predictor:
   Type: "Perceptron"
   BTB-Tag-Bits: 11 
diff --git a/configs/sst-cores/tx2-sst.yaml b/configs/sst-cores/tx2-sst.yaml
index e3d1e3231c..174b30f732 100644
--- a/configs/sst-cores/tx2-sst.yaml
+++ b/configs/sst-cores/tx2-sst.yaml
@@ -26,6 +26,8 @@ Queue-Sizes:
   ROB: 180
   Load: 64
   Store: 36
+Port-Allocator:
+  Type: Balanced
 Branch-Predictor:
   Type: "Perceptron"
   BTB-Tag-Bits: 11
diff --git a/configs/tx2.yaml b/configs/tx2.yaml
index a5e28807f9..45a8bb498b 100644
--- a/configs/tx2.yaml
+++ b/configs/tx2.yaml
@@ -26,6 +26,8 @@ Queue-Sizes:
   ROB: 180
   Load: 64
   Store: 36
+Port-Allocator:
+  Type: Balanced
 Branch-Predictor:
   Type: "Perceptron"
   BTB-Tag-Bits: 11
diff --git a/docs/sphinx/user/configuring_simeng.rst b/docs/sphinx/user/configuring_simeng.rst
index a021369ea6..765e8c7e45 100644
--- a/docs/sphinx/user/configuring_simeng.rst
+++ b/docs/sphinx/user/configuring_simeng.rst
@@ -140,6 +140,13 @@ Load
 Store
     The size of the store queue within the load/store queue unit.
 
+Port-Allocator
+--------------
+
+This section allows a user to select which Port Allocator to use. The available options are:
+
+Type
+    The specific allocator algorithm to use. The current options are ``Balanced``, ``A64FX``, and ``M1``. The former implements a round-robin style algorithm, allocating instructions to compatable ports evenly. The latter two implement the port allocation algorithms found in the respective hardware as per their names.
 
 Branch-Predictor
 ----------------
diff --git a/src/include/simeng/CoreInstance.hh b/src/include/simeng/CoreInstance.hh
index 64e2f9e1f5..2cc739f3f9 100644
--- a/src/include/simeng/CoreInstance.hh
+++ b/src/include/simeng/CoreInstance.hh
@@ -20,6 +20,7 @@
 #include "simeng/models/outoforder/Core.hh"
 #include "simeng/pipeline/A64FXPortAllocator.hh"
 #include "simeng/pipeline/BalancedPortAllocator.hh"
+#include "simeng/pipeline/M1PortAllocator.hh"
 
 namespace simeng {
 
diff --git a/src/include/simeng/pipeline/M1PortAllocator.hh b/src/include/simeng/pipeline/M1PortAllocator.hh
index 136c7636fb..7bfaa94817 100644
--- a/src/include/simeng/pipeline/M1PortAllocator.hh
+++ b/src/include/simeng/pipeline/M1PortAllocator.hh
@@ -19,7 +19,7 @@ class M1PortAllocator : public PortAllocator {
    * a port type which denotes the matching requirements of said instruction
    * groups. */
   M1PortAllocator(const std::vector<std::vector<uint16_t>>& portArrangement,
-                  std::vector<std::pair<uint8_t, uint64_t>> rsArrangement);
+                  std::vector<std::pair<uint16_t, uint64_t>> rsArrangement);
 
   /** Allocate the lowest weighted port available for the specified instruction
    * group. Returns the allocated port, and increases the weight of the port.
@@ -56,7 +56,7 @@ class M1PortAllocator : public PortAllocator {
   std::function<void(std::vector<uint32_t>&)> rsSizes_;
 
   /** Mapping from port index to reservation station <index, size> */
-  std::vector<std::pair<uint8_t, uint64_t>> rsArrangement_;
+  std::vector<std::pair<uint16_t, uint64_t>> rsArrangement_;
 };
 
 }  // namespace pipeline
diff --git a/src/lib/CoreInstance.cc b/src/lib/CoreInstance.cc
index 45832347ce..46f8638286 100644
--- a/src/lib/CoreInstance.cc
+++ b/src/lib/CoreInstance.cc
@@ -236,8 +236,37 @@ void CoreInstance::createCore() {
       portArrangement[i].push_back(grp);
     }
   }
-  portAllocator_ =
-      std::make_unique<pipeline::BalancedPortAllocator>(portArrangement);
+
+  // Initialise the desired port allocator
+  std::string portAllocatorType =
+      config_["Port-Allocator"]["Type"].as<std::string>();
+  if (portAllocatorType == "Balanced") {
+    portAllocator_ =
+        std::make_unique<pipeline::BalancedPortAllocator>(portArrangement);
+  } else if (portAllocatorType == "A64FX") {
+    portAllocator_ =
+        std::make_unique<pipeline::A64FXPortAllocator>(portArrangement);
+  } else if (portAllocatorType == "M1") {
+    // Extract the reservation station arrangement from the config file
+    auto config_rs = config_["Reservation-Stations"];
+    std::vector<std::pair<uint16_t, uint64_t>> rsArrangement;
+    for (size_t i = 0; i < config_rs.num_children(); i++) {
+      auto config_rs_ports = config_rs[i]["Port-Nums"];
+      for (size_t j = 0; j < config_rs_ports.num_children(); j++) {
+        uint16_t port = config_rs_ports[j].as<uint16_t>();
+        if (static_cast<uint16_t>(rsArrangement.size()) < port + 1) {
+          rsArrangement.resize(port + 1);
+        }
+        rsArrangement[port] = {i, config_rs[i]["Size"].as<uint64_t>()};
+      }
+    }
+    portAllocator_ = std::make_unique<pipeline::M1PortAllocator>(
+        portArrangement, rsArrangement);
+  } else {
+    std::cout << "[SimEng:CoreInstnce] Invalid Port Allocator type selected."
+              << std::endl;
+    exit(EXIT_FAILURE);
+  }
 
   // Construct the core object based on the defined simulation mode
   uint64_t entryPoint = process_->getEntryPoint();
diff --git a/src/lib/config/ModelConfig.cc b/src/lib/config/ModelConfig.cc
index 6d6152ced4..1b12e629d7 100644
--- a/src/lib/config/ModelConfig.cc
+++ b/src/lib/config/ModelConfig.cc
@@ -497,6 +497,13 @@ void ModelConfig::setExpectations(bool isDefault) {
       ExpectationNode::createExpectation<uint32_t>(16, "Store"));
   expectations_["Queue-Sizes"]["Store"].setValueBounds<uint32_t>(1, UINT32_MAX);
 
+  // Port-Allocator
+  expectations_.addChild(ExpectationNode::createExpectation("Port-Allocator"));
+  expectations_["Port-Allocator"].addChild(
+      ExpectationNode::createExpectation<std::string>("Balanced", "Type"));
+  expectations_["Port-Allocator"]["Type"].setValueSet(
+      std::vector<std::string>{"Balanced", "A64FX", "M1"});
+
   // Branch-Predictor
   expectations_.addChild(
       ExpectationNode::createExpectation("Branch-Predictor"));
diff --git a/src/lib/pipeline/DispatchIssueUnit.cc b/src/lib/pipeline/DispatchIssueUnit.cc
index b3712715ed..ca2ca44a88 100644
--- a/src/lib/pipeline/DispatchIssueUnit.cc
+++ b/src/lib/pipeline/DispatchIssueUnit.cc
@@ -67,31 +67,42 @@ void DispatchIssueUnit::tick() {
       continue;
     }
 
-    const std::vector<uint16_t>& supportedPorts = uop->getSupportedPorts();
+    std::vector<uint16_t> supportedPorts = uop->getSupportedPorts();
     if (uop->exceptionEncountered()) {
       // Exception; mark as ready to commit, and remove from pipeline
       uop->setCommitReady();
       input_.getHeadSlots()[slot] = nullptr;
       continue;
     }
-    // Allocate issue port to uop
-    uint16_t port = portAllocator_.allocate(supportedPorts);
-    uint16_t RS_Index = portMapping_[port].first;
-    uint16_t RS_Port = portMapping_[port].second;
-    assert(RS_Index < reservationStations_.size() &&
-           "Allocated port inaccessible");
-    ReservationStation& rs = reservationStations_[RS_Index];
 
-    // When appropriate, stall uop or input buffer if stall buffer full
-    if (rs.currentSize == rs.capacity ||
-        dispatches_[RS_Index] == rs.dispatchRate) {
-      // Deallocate port given
-      portAllocator_.deallocate(port);
+    // Loop through all ports and remove any who's RS is at capacity or dispatch
+    // rate has been met
+    auto portIt = supportedPorts.begin();
+    while (portIt != supportedPorts.end()) {
+      uint16_t RS_Index = portMapping_[*portIt].first;
+      ReservationStation* rs = &reservationStations_[RS_Index];
+      if (rs->currentSize == rs->capacity ||
+          dispatches_[RS_Index] == rs->dispatchRate) {
+        portIt = supportedPorts.erase(portIt);
+      } else {
+        portIt++;
+      }
+    }
+    // If no ports left, stall and return
+    if (supportedPorts.size() == 0) {
       input_.stall(true);
       rsStalls_++;
       return;
     }
 
+    // Find an available RS
+    uint16_t port = portAllocator_.allocate(supportedPorts);
+    uint16_t RS_Index = portMapping_[port].first;
+    uint16_t RS_Port = portMapping_[port].second;
+    assert(RS_Index < reservationStations_.size() &&
+           "Allocated port inaccessible");
+    ReservationStation* rs = &reservationStations_[RS_Index];
+
     // Assume the uop will be ready
     bool ready = true;
 
@@ -123,10 +134,10 @@ void DispatchIssueUnit::tick() {
 
     // Increment dispatches made and RS occupied entries size
     dispatches_[RS_Index]++;
-    rs.currentSize++;
+    rs->currentSize++;
 
     if (ready) {
-      rs.ports[RS_Port].ready.push_back(std::move(uop));
+      rs->ports[RS_Port].ready.push_back(std::move(uop));
     }
 
     input_.getHeadSlots()[slot] = nullptr;
diff --git a/src/lib/pipeline/M1PortAllocator.cc b/src/lib/pipeline/M1PortAllocator.cc
index 94a2f18563..5d26b6d550 100644
--- a/src/lib/pipeline/M1PortAllocator.cc
+++ b/src/lib/pipeline/M1PortAllocator.cc
@@ -9,7 +9,7 @@ namespace pipeline {
 
 M1PortAllocator::M1PortAllocator(
     const std::vector<std::vector<uint16_t>>& portArrangement,
-    std::vector<std::pair<uint8_t, uint64_t>> rsArrangement)
+    std::vector<std::pair<uint16_t, uint64_t>> rsArrangement)
     : weights(portArrangement.size(), 0), rsArrangement_(rsArrangement) {}
 
 uint16_t M1PortAllocator::allocate(const std::vector<uint16_t>& ports) {
diff --git a/test/integration/ConfigTest.cc b/test/integration/ConfigTest.cc
index 12c295d2d4..4e6ac2ad68 100644
--- a/test/integration/ConfigTest.cc
+++ b/test/integration/ConfigTest.cc
@@ -49,7 +49,8 @@ TEST(ConfigTest, Default) {
       "'FloatingPoint/SVE-Count': 38\n  'Predicate-Count': 17\n  "
       "'Conditional-Count': 1\n  'Matrix-Count': 1\n'Pipeline-Widths':\n  "
       "Commit: 1\n  FrontEnd: 1\n  'LSQ-Completion': 1\n'Queue-Sizes':\n  ROB: "
-      "32\n  Load: 16\n  Store: 16\n'Branch-Predictor':\n  Type: Perceptron\n  "
+      "32\n  Load: 16\n  Store: 16\n'Port-Allocator':\n  Type: "
+      "Balanced\n'Branch-Predictor':\n  Type: Perceptron\n  "
       "'BTB-Tag-Bits': 8\n  'Global-History-Length': 8\n  'RAS-entries': "
       "8\n'L1-Data-Memory':\n  'Interface-Type': "
       "Flat\n'L1-Instruction-Memory':\n  'Interface-Type': "
@@ -103,7 +104,8 @@ TEST(ConfigTest, Default) {
       "100000\n'Register-Set':\n  'GeneralPurpose-Count': 38\n  "
       "'FloatingPoint-Count': 38\n'Pipeline-Widths':\n  Commit: 1\n  FrontEnd: "
       "1\n  'LSQ-Completion': 1\n'Queue-Sizes':\n  ROB: 32\n  Load: 16\n  "
-      "Store: 16\n'Branch-Predictor':\n  Type: Perceptron\n  'BTB-Tag-Bits': "
+      "Store: 16\n'Port-Allocator':\n  Type: Balanced\n'Branch-Predictor':\n  "
+      "Type: Perceptron\n  'BTB-Tag-Bits': "
       "8\n  'Global-History-Length': 8\n  'RAS-entries': "
       "8\n'L1-Data-Memory':\n  'Interface-Type': "
       "Flat\n'L1-Instruction-Memory':\n  'Interface-Type': "
diff --git a/test/unit/GenericPredictorTest.cc b/test/unit/GenericPredictorTest.cc
index c7d6011c29..66ec9155c7 100644
--- a/test/unit/GenericPredictorTest.cc
+++ b/test/unit/GenericPredictorTest.cc
@@ -1,3 +1,4 @@
+#include "ConfigInit.hh"
 #include "MockInstruction.hh"
 #include "gtest/gtest.h"
 #include "simeng/branchpredictors/GenericPredictor.hh"
@@ -18,7 +19,8 @@ class GenericPredictorTest : public testing::Test {
 // Tests that a GenericPredictor will predict the correct direction on a
 // miss
 TEST_F(GenericPredictorTest, Miss) {
-  simeng::config::SimInfo::addToConfig(
+  ConfigInit configInit = ConfigInit(
+      config::ISA::AArch64,
       "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, "
       "Saturating-Count-Bits: 2, Global-History-Length: 10, RAS-entries: 5, "
       "Fallback-Static-Predictor: Always-Taken}}");
@@ -26,7 +28,8 @@ TEST_F(GenericPredictorTest, Miss) {
   auto prediction = predictor.predict(0, BranchType::Conditional, 0);
   EXPECT_TRUE(prediction.isTaken);
 
-  simeng::config::SimInfo::addToConfig(
+  configInit = ConfigInit(
+      config::ISA::AArch64,
       "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, "
       "Saturating-Count-Bits: 2, Global-History-Length: 10, RAS-entries: 5, "
       "Fallback-Static-Predictor: Always-Not-Taken}}");
diff --git a/test/unit/pipeline/DispatchIssueUnitTest.cc b/test/unit/pipeline/DispatchIssueUnitTest.cc
index bd3f981463..f7ecb2b9b6 100644
--- a/test/unit/pipeline/DispatchIssueUnitTest.cc
+++ b/test/unit/pipeline/DispatchIssueUnitTest.cc
@@ -9,6 +9,7 @@
 namespace simeng {
 namespace pipeline {
 
+using ::testing::_;
 using ::testing::Return;
 using ::testing::ReturnRef;
 
@@ -269,8 +270,7 @@ TEST_F(PipelineDispatchIssueUnitTest, singleInstr_rsFull) {
   // All expected calls to instruction during tick()
   EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts));
   // All expected calls to portAllocator during tick()
-  EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA));
-  EXPECT_CALL(portAlloc, deallocate(EAGA));
+  EXPECT_CALL(portAlloc, allocate(_)).Times(0);
   input.getHeadSlots()[0] = uopPtr;
   diUnit.tick();
   // Ensure Reservation station sizes have stayed the same
diff --git a/test/unit/pipeline/M1PortAllocatorTest.cc b/test/unit/pipeline/M1PortAllocatorTest.cc
index b0adbed8f1..69786bfed9 100644
--- a/test/unit/pipeline/M1PortAllocatorTest.cc
+++ b/test/unit/pipeline/M1PortAllocatorTest.cc
@@ -24,7 +24,7 @@ class M1PortAllocatorTest : public testing::Test {
       {0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}, {13}};
   // Representation of the M1 Firestorm Reservation Station Arrangement
   // std::pair<uint8_t, uint64_t> = <rsIndex, rsSize>
-  std::vector<std::pair<uint8_t, uint64_t>> rsArrangement = {
+  std::vector<std::pair<uint16_t, uint64_t>> rsArrangement = {
       {0, 24}, {1, 26}, {2, 16}, {3, 12},  {4, 28},  {5, 28},  {6, 12},
       {7, 12}, {8, 12}, {9, 12}, {10, 36}, {11, 36}, {12, 36}, {13, 36}};