Skip to content

Commit a0f965f

Browse files
jeffnye-ghJeff Nye
andauthored
1st commit of fusion capable decoder, json support (#163)
fusion decoder implementation. This is suitable for analysis. Adds fusion parameters to fusion.yaml. And specifies external file definition of fusion groups using JSON New statistics for fused instructions and fusion group stats HCache as separate structure in fusion/fusion Includes the FSL.md domain language description Inst.hpp modified to add FUSED and FUSION_GHOST status as extended status Support for non-sequential program ID. Each instr has a PID increment value, and methods Formatting, clean up of Inst.hpp JSON support added to Fusion.hpp New exceptions for JSON errors Fusion default transform no longer modifies the input buffer HCache testbench functions --------- Co-authored-by: Jeff Nye <jeff@www.condorcomputing.com>
1 parent 2ed051b commit a0f965f

38 files changed

+3958
-964
lines changed

CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,15 @@ set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
6161

6262
# Include directories
6363
include_directories (core mss sim)
64+
include_directories (SYSTEM fusion)
6465
include_directories (SYSTEM mavis)
6566
include_directories (SYSTEM stf_lib)
6667

67-
# Mavis, the Core, MSS, and the simulator
68+
# Mavis, the Core, MSS, the simulator and Fusion
6869
add_subdirectory (mavis)
6970
add_subdirectory (core)
7071
add_subdirectory (mss)
72+
add_subdirectory (fusion)
7173

7274
# Add STF library to the build
7375
add_subdirectory (${STF_LIB_BASE})

arches/fusion.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
include: big_core.yaml
2+
top.cpu.core0:
3+
decode:
4+
params:
5+
num_to_decode: 8
6+
fusion_enable: true
7+
fusion_debug: false
8+
fusion_enable_register: 0xFFFFFFFF
9+
fusion_max_latency: 8
10+
fusion_match_max_tries: 1023
11+
fusion_max_group_size: 8
12+
fusion_summary_report: fusion_summary.txt
13+
fusion_group_definitions: [ arches/fusion/dhrystone.json ]

arches/fusion/dhrystone.json

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"fusiongroups" : [
3+
{ "name" : "uf039", "uids" : ["0xd","0xa"], "tx" : "dfltXform_" },
4+
{ "name" : "uf038", "uids" : ["0x3","0xe"], "tx" : "dfltXform_" },
5+
{ "name" : "uf037", "uids" : ["0x20","0x4"], "tx" : "dfltXform_" },
6+
{ "name" : "uf036", "uids" : ["0x9","0x2d"], "tx" : "dfltXform_" },
7+
{ "name" : "uf035", "uids" : ["0x18","0xe"], "tx" : "dfltXform_" },
8+
{ "name" : "uf034", "uids" : ["0x20","0x18"], "tx" : "dfltXform_" },
9+
{ "name" : "uf033", "uids" : ["0xe","0xd","0xa"], "tx" : "dfltXform_" },
10+
{ "name" : "uf032", "uids" : ["0x10","0x10"], "tx" : "dfltXform_" },
11+
{ "name" : "uf031", "uids" : ["0x18","0x20"], "tx" : "dfltXform_" },
12+
{ "name" : "uf030", "uids" : ["0x22","0x26"], "tx" : "dfltXform_" },
13+
{ "name" : "uf029", "uids" : ["0x26","0x34"], "tx" : "dfltXform_" },
14+
{ "name" : "uf028", "uids" : ["0x21","0x20"], "tx" : "dfltXform_" },
15+
{ "name" : "uf027", "uids" : ["0x34","0x35"], "tx" : "dfltXform_" },
16+
{ "name" : "uf026", "uids" : ["0x2d","0x22"], "tx" : "dfltXform_" },
17+
{ "name" : "uf025", "uids" : ["0x2e","0x2d"], "tx" : "dfltXform_" },
18+
{ "name" : "uf024", "uids" : ["0x2e","0x21"], "tx" : "dfltXform_" },
19+
{ "name" : "uf023", "uids" : ["0xd","0xa","0x22"], "tx" : "dfltXform_" },
20+
{ "name" : "uf022", "uids" : ["0x26","0x34","0x9"], "tx" : "dfltXform_" },
21+
{ "name" : "uf021", "uids" : ["0xa","0x22","0x26"], "tx" : "dfltXform_" },
22+
{ "name" : "uf020", "uids" : ["0x18","0x20","0x4"], "tx" : "dfltXform_" },
23+
{ "name" : "uf019", "uids" : ["0x22","0x26","0x34"], "tx" : "dfltXform_" },
24+
{ "name" : "uf018", "uids" : ["0x2e","0x21","0x20"], "tx" : "dfltXform_" },
25+
{ "name" : "uf017", "uids" : ["0x21","0x20","0x18"], "tx" : "dfltXform_" },
26+
{ "name" : "uf016", "uids" : ["0x20","0x18","0x20"], "tx" : "dfltXform_" },
27+
{ "name" : "uf008", "uids" : ["0xd","0x35"], "tx" : "dfltXform_" },
28+
{ "name" : "uf007", "uids" : ["0xa","0x22"], "tx" : "dfltXform_" },
29+
{ "name" : "uf005", "uids" : ["0xe","0xd"], "tx" : "dfltXform_" },
30+
{ "name" : "uf004", "uids" : ["0xe","0x34"], "tx" : "dfltXform_" },
31+
{ "name" : "uf003", "uids" : ["0x34","0x9"], "tx" : "dfltXform_" },
32+
{ "name" : "uf002", "uids" : ["0x2e","0x35"], "tx" : "dfltXform_" },
33+
{ "name" : "uf001", "uids" : ["0x35","0x35"], "tx" : "dfltXform_" },
34+
{ "name" : "uf213", "uids" : ["0x2e","0x2e"], "tx" : "dfltXform_" },
35+
{ "name" : "uf000", "uids" : ["0x35","0x2e"], "tx" : "dfltXform_" }
36+
]
37+
}

core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
project (core)
22
add_library(core
3+
FusionDecode.cpp
34
Core.cpp
45
SimpleBranchPred.cpp
56
Fetch.cpp

core/Decode.cpp

Lines changed: 124 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,94 @@
11
// <Decode.cpp> -*- C++ -*-
22

3-
4-
#include <algorithm>
5-
63
#include "Decode.hpp"
4+
#include "fusion/FusionTypes.hpp"
75

86
#include "sparta/events/StartupEvent.hpp"
97
#include "sparta/utils/LogUtils.hpp"
108

9+
#include <algorithm>
10+
#include <iostream>
11+
12+
using namespace std;
13+
1114
namespace olympia
1215
{
1316
constexpr char Decode::name[];
1417

15-
Decode::Decode(sparta::TreeNode * node,
16-
const DecodeParameterSet * p) :
18+
Decode::Decode(sparta::TreeNode* node, const DecodeParameterSet* p) :
1719
sparta::Unit(node),
20+
1821
fetch_queue_("FetchQueue", p->fetch_queue_size, node->getClock(), &unit_stat_set_),
19-
num_to_decode_(p->num_to_decode)
22+
23+
fusion_num_fuse_instructions_(&unit_stat_set_, "fusion_num_fuse_instructions",
24+
"The number of custom instructions created by fusion",
25+
sparta::Counter::COUNT_NORMAL),
26+
27+
fusion_num_ghost_instructions_(&unit_stat_set_, "fusion_num_ghost_instructions",
28+
"The number of instructions eliminated by fusion",
29+
sparta::Counter::COUNT_NORMAL),
30+
31+
fusion_num_groups_defined_(&unit_stat_set_, "fusion_num_groups_defined",
32+
"Number of fusion groups compiled or read at run time",
33+
sparta::Counter::COUNT_LATEST),
34+
35+
fusion_num_groups_utilized_(&unit_stat_set_, "fusion_num_groups_utilized",
36+
"Incremented on first use of a fusion group",
37+
sparta::Counter::COUNT_LATEST),
38+
39+
fusion_pred_cycles_saved_(&unit_stat_set_, "fusion_pred_cycles_saved",
40+
"Optimistic prediction of the cycles saved by fusion",
41+
sparta::Counter::COUNT_NORMAL),
42+
43+
num_to_decode_(p->num_to_decode),
44+
fusion_enable_(p->fusion_enable),
45+
fusion_debug_(p->fusion_debug),
46+
fusion_enable_register_(p->fusion_enable_register),
47+
fusion_max_latency_(p->fusion_max_latency),
48+
fusion_match_max_tries_(p->fusion_match_max_tries),
49+
fusion_max_group_size_(p->fusion_max_group_size),
50+
fusion_summary_report_(p->fusion_summary_report),
51+
fusion_group_definitions_(p->fusion_group_definitions)
2052
{
53+
initializeFusion_();
54+
2155
fetch_queue_.enableCollection(node);
2256

23-
fetch_queue_write_in_.
24-
registerConsumerHandler(CREATE_SPARTA_HANDLER_WITH_DATA(Decode, fetchBufferAppended_, InstGroupPtr));
25-
uop_queue_credits_in_.
26-
registerConsumerHandler(CREATE_SPARTA_HANDLER_WITH_DATA(Decode, receiveUopQueueCredits_, uint32_t));
27-
in_reorder_flush_.
28-
registerConsumerHandler(CREATE_SPARTA_HANDLER_WITH_DATA(Decode, handleFlush_, FlushManager::FlushingCriteria));
57+
fetch_queue_write_in_.registerConsumerHandler(
58+
CREATE_SPARTA_HANDLER_WITH_DATA(Decode, fetchBufferAppended_, InstGroupPtr));
59+
uop_queue_credits_in_.registerConsumerHandler(
60+
CREATE_SPARTA_HANDLER_WITH_DATA(Decode, receiveUopQueueCredits_, uint32_t));
61+
in_reorder_flush_.registerConsumerHandler(
62+
CREATE_SPARTA_HANDLER_WITH_DATA(Decode, handleFlush_, FlushManager::FlushingCriteria));
2963

3064
sparta::StartupEvent(node, CREATE_SPARTA_HANDLER(Decode, sendInitialCredits_));
3165
}
3266

3367
// Send fetch the initial credit count
34-
void Decode::sendInitialCredits_()
68+
void Decode::sendInitialCredits_() { fetch_queue_credits_outp_.send(fetch_queue_.capacity()); }
69+
70+
// -------------------------------------------------------------------
71+
// -------------------------------------------------------------------
72+
void Decode::initializeFusion_()
3573
{
36-
fetch_queue_credits_outp_.send(fetch_queue_.capacity());
74+
if (fusion_enable_)
75+
{
76+
fuser_ = std::make_unique<FusionType>(fusion_group_definitions_);
77+
hcache_ = fusion::HCache(FusionGroupType::jenkins_1aat);
78+
fusion_num_groups_defined_ = fuser_->getFusionGroupContainer().size();
79+
}
80+
else
81+
{
82+
fuser_ = nullptr;
83+
}
3784
}
3885

3986
// Receive Uop credits from Dispatch
40-
void Decode::receiveUopQueueCredits_(const uint32_t & credits) {
87+
void Decode::receiveUopQueueCredits_(const uint32_t & credits)
88+
{
4189
uop_queue_credits_ += credits;
42-
if (fetch_queue_.size() > 0) {
90+
if (fetch_queue_.size() > 0)
91+
{
4392
ev_decode_insts_event_.schedule(sparta::Clock::Cycle(0));
4493
}
4594

@@ -52,12 +101,13 @@ namespace olympia
52101
void Decode::fetchBufferAppended_(const InstGroupPtr & insts)
53102
{
54103
// Cache the instructions in the instruction queue if we can't decode this cycle
55-
for(auto & i : *insts)
104+
for (auto & i : *insts)
56105
{
57106
fetch_queue_.push(i);
58107
ILOG("Received: " << i);
59108
}
60-
if (uop_queue_credits_ > 0) {
109+
if (uop_queue_credits_ > 0)
110+
{
61111
ev_decode_insts_event_.schedule(sparta::Clock::Cycle(0));
62112
}
63113
}
@@ -76,25 +126,76 @@ namespace olympia
76126
uint32_t num_decode = std::min(uop_queue_credits_, fetch_queue_.size());
77127
num_decode = std::min(num_decode, num_to_decode_);
78128

79-
if(num_decode > 0)
129+
// buffer to maximize the chances of a group match limited
130+
// by max allowed latency, bounded by max group size
131+
if (fusion_enable_)
132+
{
133+
if (num_decode < fusion_max_group_size_ && latency_count_ < fusion_max_latency_)
134+
{
135+
++latency_count_;
136+
return;
137+
}
138+
}
139+
140+
latency_count_ = 0;
141+
142+
if (num_decode > 0)
80143
{
81144
InstGroupPtr insts =
82145
sparta::allocate_sparta_shared_pointer<InstGroup>(instgroup_allocator);
146+
147+
InstUidListType uids;
83148
// Send instructions on their way to rename
84-
for(uint32_t i = 0; i < num_decode; ++i) {
149+
for (uint32_t i = 0; i < num_decode; ++i)
150+
{
85151
const auto & inst = fetch_queue_.read(0);
86152
insts->emplace_back(inst);
87153
inst->setStatus(Inst::Status::DECODED);
88154

155+
if (fusion_enable_)
156+
{
157+
uids.push_back(inst->getMavisUid());
158+
}
159+
89160
ILOG("Decoded: " << inst);
90161

91162
fetch_queue_.pop();
92163
}
93164

165+
if (fusion_enable_)
166+
{
167+
MatchInfoListType matches;
168+
uint32_t max_itrs = 0;
169+
FusionGroupContainerType & container = fuser_->getFusionGroupContainer();
170+
do
171+
{
172+
matchFusionGroups_(matches, insts, uids, container);
173+
processMatches_(matches, insts, uids);
174+
// Future feature whereIsEgon(insts,numGhosts);
175+
++max_itrs;
176+
} while (matches.size() > 0 && max_itrs < fusion_match_max_tries_);
177+
178+
if (max_itrs >= fusion_match_max_tries_)
179+
{
180+
throw sparta::SpartaException("Fusion group match watch dog exceeded.");
181+
}
182+
}
183+
184+
// Debug statement
185+
if (fusion_debug_ && fusion_enable_)
186+
infoInsts_(cout, insts);
94187
// Send decoded instructions to rename
95188
uop_queue_outp_.send(insts);
96189

190+
// TODO: whereisegon() would remove the ghosts,
191+
// Commented out for now, in practice insts
192+
// would be smaller due to the fused ops
193+
// uint32_t unfusedInstsSize = insts->size();
194+
97195
// Decrement internal Uop Queue credits
196+
sparta_assert(uop_queue_credits_ >= insts->size(),
197+
"Attempt to decrement d0q credits below what is available");
198+
98199
uop_queue_credits_ -= insts->size();
99200

100201
// Send credits back to Fetch to get more instructions
@@ -103,8 +204,9 @@ namespace olympia
103204

104205
// If we still have credits to send instructions as well as
105206
// instructions in the queue, schedule another decode session
106-
if(uop_queue_credits_ > 0 && fetch_queue_.size() > 0) {
207+
if (uop_queue_credits_ > 0 && fetch_queue_.size() > 0)
208+
{
107209
ev_decode_insts_event_.schedule(1);
108210
}
109211
}
110-
}
212+
} // namespace olympia

0 commit comments

Comments
 (0)