riscv-software-src
diff --git a/‎CMakeLists.txt‎
Lines changed: 3 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎arches/fusion.yaml‎
Lines changed: 13 additions & 0 deletions b/‎arches/fusion.yaml‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎arches/fusion/dhrystone.json‎
Lines changed: 37 additions & 0 deletions b/‎arches/fusion/dhrystone.json‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎core/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎core/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/Decode.cpp‎
Lines changed: 124 additions & 22 deletions b/‎core/Decode.cpp‎
Lines changed: 124 additions & 22 deletions
@@ -61,13 +61,15 @@ set(CMAKE_CXX_FLAGS_DEBUG     "-O0 -g")
 
 # Include directories
 include_directories (core mss sim)
+include_directories (SYSTEM fusion)
 include_directories (SYSTEM mavis)
 include_directories (SYSTEM stf_lib)
 
-# Mavis, the Core, MSS, and the simulator
+# Mavis, the Core, MSS, the simulator and Fusion
 add_subdirectory (mavis)
 add_subdirectory (core)
 add_subdirectory (mss)
+add_subdirectory (fusion)
 
 # Add STF library to the build
 add_subdirectory (${STF_LIB_BASE})
 
@@ -0,0 +1,13 @@
+include: big_core.yaml
+top.cpu.core0:
+  decode:
+    params:
+      num_to_decode: 8
+      fusion_enable: true
+      fusion_debug:  false
+      fusion_enable_register: 0xFFFFFFFF
+      fusion_max_latency:     8
+      fusion_match_max_tries: 1023
+      fusion_max_group_size:  8
+      fusion_summary_report:    fusion_summary.txt
+      fusion_group_definitions: [ arches/fusion/dhrystone.json ]
@@ -0,0 +1,37 @@
+{
+  "fusiongroups" : [
+    { "name" : "uf039", "uids" : ["0xd","0xa"], "tx" : "dfltXform_" },
+    { "name" : "uf038", "uids" : ["0x3","0xe"], "tx" : "dfltXform_" },
+    { "name" : "uf037", "uids" : ["0x20","0x4"], "tx" : "dfltXform_" },
+    { "name" : "uf036", "uids" : ["0x9","0x2d"], "tx" : "dfltXform_" },
+    { "name" : "uf035", "uids" : ["0x18","0xe"], "tx" : "dfltXform_" },
+    { "name" : "uf034", "uids" : ["0x20","0x18"], "tx" : "dfltXform_" },
+    { "name" : "uf033", "uids" : ["0xe","0xd","0xa"], "tx" : "dfltXform_" },
+    { "name" : "uf032", "uids" : ["0x10","0x10"], "tx" : "dfltXform_" },
+    { "name" : "uf031", "uids" : ["0x18","0x20"], "tx" : "dfltXform_" },
+    { "name" : "uf030", "uids" : ["0x22","0x26"], "tx" : "dfltXform_" },
+    { "name" : "uf029", "uids" : ["0x26","0x34"], "tx" : "dfltXform_" },
+    { "name" : "uf028", "uids" : ["0x21","0x20"], "tx" : "dfltXform_" },
+    { "name" : "uf027", "uids" : ["0x34","0x35"], "tx" : "dfltXform_" },
+    { "name" : "uf026", "uids" : ["0x2d","0x22"], "tx" : "dfltXform_" },
+    { "name" : "uf025", "uids" : ["0x2e","0x2d"], "tx" : "dfltXform_" },
+    { "name" : "uf024", "uids" : ["0x2e","0x21"], "tx" : "dfltXform_" },
+    { "name" : "uf023", "uids" : ["0xd","0xa","0x22"], "tx" : "dfltXform_" },
+    { "name" : "uf022", "uids" : ["0x26","0x34","0x9"], "tx" : "dfltXform_" },
+    { "name" : "uf021", "uids" : ["0xa","0x22","0x26"], "tx" : "dfltXform_" },
+    { "name" : "uf020", "uids" : ["0x18","0x20","0x4"], "tx" : "dfltXform_" },
+    { "name" : "uf019", "uids" : ["0x22","0x26","0x34"], "tx" : "dfltXform_" },
+    { "name" : "uf018", "uids" : ["0x2e","0x21","0x20"], "tx" : "dfltXform_" },
+    { "name" : "uf017", "uids" : ["0x21","0x20","0x18"], "tx" : "dfltXform_" },
+    { "name" : "uf016", "uids" : ["0x20","0x18","0x20"], "tx" : "dfltXform_" },
+    { "name" : "uf008", "uids" : ["0xd","0x35"], "tx" : "dfltXform_" },
+    { "name" : "uf007", "uids" : ["0xa","0x22"], "tx" : "dfltXform_" },
+    { "name" : "uf005", "uids" : ["0xe","0xd"], "tx" : "dfltXform_" },
+    { "name" : "uf004", "uids" : ["0xe","0x34"], "tx" : "dfltXform_" },
+    { "name" : "uf003", "uids" : ["0x34","0x9"], "tx" : "dfltXform_" },
+    { "name" : "uf002", "uids" : ["0x2e","0x35"], "tx" : "dfltXform_" },
+    { "name" : "uf001", "uids" : ["0x35","0x35"], "tx" : "dfltXform_" },
+    { "name" : "uf213", "uids" : ["0x2e","0x2e"], "tx" : "dfltXform_" },
+    { "name" : "uf000", "uids" : ["0x35","0x2e"], "tx" : "dfltXform_" }
+  ]
+}
@@ -1,5 +1,6 @@
 project (core)
 add_library(core
+  FusionDecode.cpp
   Core.cpp
   SimpleBranchPred.cpp
   Fetch.cpp
 
@@ -1,45 +1,94 @@
 // <Decode.cpp> -*- C++ -*-
 
-
-#include <algorithm>
-
 #include "Decode.hpp"
+#include "fusion/FusionTypes.hpp"
 
 #include "sparta/events/StartupEvent.hpp"
 #include "sparta/utils/LogUtils.hpp"
 
+#include <algorithm>
+#include <iostream>
+
+using namespace std;
+
 namespace olympia
 {
     constexpr char Decode::name[];
 
-    Decode::Decode(sparta::TreeNode * node,
-                   const DecodeParameterSet * p) :
+    Decode::Decode(sparta::TreeNode* node, const DecodeParameterSet* p) :
         sparta::Unit(node),
+
         fetch_queue_("FetchQueue", p->fetch_queue_size, node->getClock(), &unit_stat_set_),
-        num_to_decode_(p->num_to_decode)
+
+        fusion_num_fuse_instructions_(&unit_stat_set_, "fusion_num_fuse_instructions",
+                                      "The number of custom instructions created by fusion",
+                                      sparta::Counter::COUNT_NORMAL),
+
+        fusion_num_ghost_instructions_(&unit_stat_set_, "fusion_num_ghost_instructions",
+                                       "The number of instructions eliminated by fusion",
+                                       sparta::Counter::COUNT_NORMAL),
+
+        fusion_num_groups_defined_(&unit_stat_set_, "fusion_num_groups_defined",
+                                   "Number of fusion groups compiled or read at run time",
+                                   sparta::Counter::COUNT_LATEST),
+
+        fusion_num_groups_utilized_(&unit_stat_set_, "fusion_num_groups_utilized",
+                                   "Incremented on first use of a fusion group",
+                                   sparta::Counter::COUNT_LATEST),
+
+        fusion_pred_cycles_saved_(&unit_stat_set_, "fusion_pred_cycles_saved",
+                                  "Optimistic prediction of the cycles saved by fusion",
+                                  sparta::Counter::COUNT_NORMAL),
+
+        num_to_decode_(p->num_to_decode),
+        fusion_enable_(p->fusion_enable),
+        fusion_debug_(p->fusion_debug),
+        fusion_enable_register_(p->fusion_enable_register),
+        fusion_max_latency_(p->fusion_max_latency),
+        fusion_match_max_tries_(p->fusion_match_max_tries),
+        fusion_max_group_size_(p->fusion_max_group_size),
+        fusion_summary_report_(p->fusion_summary_report),
+        fusion_group_definitions_(p->fusion_group_definitions)
     {
+        initializeFusion_();
+
         fetch_queue_.enableCollection(node);
 
-        fetch_queue_write_in_.
-            registerConsumerHandler(CREATE_SPARTA_HANDLER_WITH_DATA(Decode, fetchBufferAppended_, InstGroupPtr));
-        uop_queue_credits_in_.
-            registerConsumerHandler(CREATE_SPARTA_HANDLER_WITH_DATA(Decode, receiveUopQueueCredits_, uint32_t));
-        in_reorder_flush_.
-            registerConsumerHandler(CREATE_SPARTA_HANDLER_WITH_DATA(Decode, handleFlush_, FlushManager::FlushingCriteria));
+        fetch_queue_write_in_.registerConsumerHandler(
+            CREATE_SPARTA_HANDLER_WITH_DATA(Decode, fetchBufferAppended_, InstGroupPtr));
+        uop_queue_credits_in_.registerConsumerHandler(
+            CREATE_SPARTA_HANDLER_WITH_DATA(Decode, receiveUopQueueCredits_, uint32_t));
+        in_reorder_flush_.registerConsumerHandler(
+            CREATE_SPARTA_HANDLER_WITH_DATA(Decode, handleFlush_, FlushManager::FlushingCriteria));
 
         sparta::StartupEvent(node, CREATE_SPARTA_HANDLER(Decode, sendInitialCredits_));
     }
 
     // Send fetch the initial credit count
-    void Decode::sendInitialCredits_()
+    void Decode::sendInitialCredits_() { fetch_queue_credits_outp_.send(fetch_queue_.capacity()); }
+
+    // -------------------------------------------------------------------
+    // -------------------------------------------------------------------
+    void Decode::initializeFusion_()
     {
-        fetch_queue_credits_outp_.send(fetch_queue_.capacity());
+        if (fusion_enable_)
+        {
+            fuser_  = std::make_unique<FusionType>(fusion_group_definitions_);
+            hcache_ = fusion::HCache(FusionGroupType::jenkins_1aat);
+            fusion_num_groups_defined_ = fuser_->getFusionGroupContainer().size();
+        }
+        else
+        {
+            fuser_ = nullptr;
+        }
     }
 
     // Receive Uop credits from Dispatch
-    void Decode::receiveUopQueueCredits_(const uint32_t & credits) {
+    void Decode::receiveUopQueueCredits_(const uint32_t & credits)
+    {
         uop_queue_credits_ += credits;
-        if (fetch_queue_.size() > 0) {
+        if (fetch_queue_.size() > 0)
+        {
             ev_decode_insts_event_.schedule(sparta::Clock::Cycle(0));
         }
 
@@ -52,12 +101,13 @@ namespace olympia
     void Decode::fetchBufferAppended_(const InstGroupPtr & insts)
     {
         // Cache the instructions in the instruction queue if we can't decode this cycle
-        for(auto & i : *insts)
+        for (auto & i : *insts)
         {
             fetch_queue_.push(i);
             ILOG("Received: " << i);
         }
-        if (uop_queue_credits_ > 0) {
+        if (uop_queue_credits_ > 0)
+        {
             ev_decode_insts_event_.schedule(sparta::Clock::Cycle(0));
         }
     }
@@ -76,25 +126,76 @@ namespace olympia
         uint32_t num_decode = std::min(uop_queue_credits_, fetch_queue_.size());
         num_decode = std::min(num_decode, num_to_decode_);
 
-        if(num_decode > 0)
+        // buffer to maximize the chances of a group match limited
+        // by max allowed latency, bounded by max group size
+        if (fusion_enable_)
+        {
+            if (num_decode < fusion_max_group_size_ && latency_count_ < fusion_max_latency_)
+            {
+                ++latency_count_;
+                return;
+            }
+        }
+
+        latency_count_ = 0;
+
+        if (num_decode > 0)
         {
             InstGroupPtr insts =
                 sparta::allocate_sparta_shared_pointer<InstGroup>(instgroup_allocator);
+
+            InstUidListType uids;
             // Send instructions on their way to rename
-            for(uint32_t i = 0; i < num_decode; ++i) {
+            for (uint32_t i = 0; i < num_decode; ++i)
+            {
                 const auto & inst = fetch_queue_.read(0);
                 insts->emplace_back(inst);
                 inst->setStatus(Inst::Status::DECODED);
 
+                if (fusion_enable_)
+                {
+                    uids.push_back(inst->getMavisUid());
+                }
+
                 ILOG("Decoded: " << inst);
 
                 fetch_queue_.pop();
             }
 
+            if (fusion_enable_)
+            {
+                MatchInfoListType matches;
+                uint32_t max_itrs = 0;
+                FusionGroupContainerType & container = fuser_->getFusionGroupContainer();
+                do
+                {
+                    matchFusionGroups_(matches, insts, uids, container);
+                    processMatches_(matches, insts, uids);
+                    // Future feature whereIsEgon(insts,numGhosts);
+                    ++max_itrs;
+                } while (matches.size() > 0 && max_itrs < fusion_match_max_tries_);
+
+                if (max_itrs >= fusion_match_max_tries_)
+                {
+                    throw sparta::SpartaException("Fusion group match watch dog exceeded.");
+                }
+            }
+
+            // Debug statement
+            if (fusion_debug_ && fusion_enable_)
+                infoInsts_(cout, insts);
             // Send decoded instructions to rename
             uop_queue_outp_.send(insts);
 
+            // TODO: whereisegon() would remove the ghosts,
+            // Commented out for now, in practice insts
+            // would be smaller due to the fused ops
+            // uint32_t unfusedInstsSize = insts->size();
+
             // Decrement internal Uop Queue credits
+            sparta_assert(uop_queue_credits_ >= insts->size(),
+                 "Attempt to decrement d0q credits below what is available");
+
             uop_queue_credits_ -= insts->size();
 
             // Send credits back to Fetch to get more instructions
@@ -103,8 +204,9 @@ namespace olympia
 
         // If we still have credits to send instructions as well as
         // instructions in the queue, schedule another decode session
-        if(uop_queue_credits_ > 0 && fetch_queue_.size() > 0) {
+        if (uop_queue_credits_ > 0 && fetch_queue_.size() > 0)
+        {
             ev_decode_insts_event_.schedule(1);
         }
     }
-}
+} // namespace olympia