Skip to content

Commit

Permalink
Improve hierarchical DPI wrapper scheduling performance (verilator#2583
Browse files Browse the repository at this point in the history
  • Loading branch information
b-chmiel authored Jan 20, 2025
1 parent f4a01eb commit 0507fb4
Show file tree
Hide file tree
Showing 12 changed files with 456 additions and 9 deletions.
7 changes: 7 additions & 0 deletions docs/guide/exe_verilator.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2154,6 +2154,13 @@ The grammar of configuration commands is as follows:
:option:`/*verilator&32;public_flat*/`, etc., metacomments. See
also :ref:`VPI Example`.

.. option:: profile_data -hier-dpi "<function_name>" -cost <cost_value>

Internal profiling data inserted during :vlopt:`--hierarchical`; specifies
execution cost of a hierarchical DPI wrappers for modules with
:option:`/*verilator&32;hier_block*/` metacomment. See
:ref:`Hierarchical Verilation`.

.. option:: profile_data -mtask "<mtask_hash>" -cost <cost_value>

Feeds profile-guided optimization data into the Verilator algorithms in
Expand Down
7 changes: 4 additions & 3 deletions src/V3AstNodeOther.h
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,7 @@ class AstCFunc final : public AstNode {
bool m_dpiImportWrapper : 1; // Wrapper for invoking DPI import prototype from generated code
bool m_needProcess : 1; // Needs access to VlProcess of the caller
bool m_recursive : 1; // Recursive or part of recursion
int m_cost; // Function call cost
public:
AstCFunc(FileLine* fl, const string& name, AstScope* scopep, const string& rtnType = "")
: ASTGEN_SUPER_CFunc(fl) {
Expand Down Expand Up @@ -671,6 +672,7 @@ class AstCFunc final : public AstNode {
m_dpiImportPrototype = false;
m_dpiImportWrapper = false;
m_recursive = false;
m_cost = v3Global.opt.instrCountDpi(); // As proxy for unknown general DPI cost
}
ASTGEN_MEMBERS_AstCFunc;
string name() const override VL_MT_STABLE { return m_name; }
Expand All @@ -685,9 +687,7 @@ class AstCFunc final : public AstNode {
}
//
void name(const string& name) override { m_name = name; }
int instrCount() const override {
return dpiImportPrototype() ? v3Global.opt.instrCountDpi() : 0;
}
int instrCount() const override { return m_cost; }
VBoolOrUnknown isConst() const { return m_isConst; }
void isConst(bool flag) { m_isConst.setTrueOrFalse(flag); }
void isConst(VBoolOrUnknown flag) { m_isConst = flag; }
Expand Down Expand Up @@ -746,6 +746,7 @@ class AstCFunc final : public AstNode {
bool isCoroutine() const { return m_rtnType == "VlCoroutine"; }
void recursive(bool flag) { m_recursive = flag; }
bool recursive() const { return m_recursive; }
void cost(int cost) { m_cost = cost; }
// Special methods
bool emptyBody() const {
return argsp() == nullptr && initsp() == nullptr && stmtsp() == nullptr
Expand Down
26 changes: 25 additions & 1 deletion src/V3Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,11 +533,13 @@ class V3ConfigScopeTraceResolver final {
// Resolve modules and files in the design

class V3ConfigResolver final {
enum ProfileDataMode : uint8_t { NONE = 0, MTASK = 1, HIER_DPI = 2 };
V3ConfigModuleResolver m_modules; // Access to module names (with wildcards)
V3ConfigFileResolver m_files; // Access to file names (with wildcards)
V3ConfigScopeTraceResolver m_scopeTraces; // Regexp to trace enables
std::unordered_map<string, std::unordered_map<string, uint64_t>>
m_profileData; // Access to profile_data records
uint8_t m_mode = NONE;
FileLine* m_profileFileLine = nullptr;

V3ConfigResolver() = default;
Expand All @@ -552,10 +554,21 @@ class V3ConfigResolver final {
V3ConfigFileResolver& files() { return m_files; }
V3ConfigScopeTraceResolver& scopeTraces() { return m_scopeTraces; }

void addProfileData(FileLine* fl, const string& model, const string& key, uint64_t cost) {
void addProfileData(FileLine* fl, const string& hierDpi, uint64_t cost) {
// Empty key for hierarchical DPI wrapper costs.
addProfileData(fl, hierDpi, "", cost, HIER_DPI);
}
void addProfileData(FileLine* fl, const string& model, const string& key, uint64_t cost,
ProfileDataMode mode = MTASK) {
if (!m_profileFileLine) m_profileFileLine = fl;
if (cost == 0) cost = 1; // Cost 0 means delete (or no data)
m_profileData[model][key] += cost;
m_mode |= mode;
}
bool containsMTaskProfileData() const { return m_mode & MTASK; }
uint64_t getProfileData(const string& hierDpi) const {
// Empty key for hierarchical DPI wrapper costs.
return getProfileData(hierDpi, "");
}
uint64_t getProfileData(const string& model, const string& key) const {
const auto mit = m_profileData.find(model);
Expand Down Expand Up @@ -619,6 +632,10 @@ void V3Config::addModulePragma(const string& module, VPragmaType pragma) {
V3ConfigResolver::s().modules().at(module).addModulePragma(pragma);
}

void V3Config::addProfileData(FileLine* fl, const string& hierDpi, uint64_t cost) {
V3ConfigResolver::s().addProfileData(fl, hierDpi, cost);
}

void V3Config::addProfileData(FileLine* fl, const string& model, const string& key,
uint64_t cost) {
V3ConfigResolver::s().addProfileData(fl, model, key, cost);
Expand Down Expand Up @@ -724,6 +741,9 @@ void V3Config::applyVarAttr(AstNodeModule* modulep, AstNodeFTask* ftaskp, AstVar
if (vp) vp->apply(varp);
}

uint64_t V3Config::getProfileData(const string& hierDpi) {
return V3ConfigResolver::s().getProfileData(hierDpi);
}
uint64_t V3Config::getProfileData(const string& model, const string& key) {
return V3ConfigResolver::s().getProfileData(model, key);
}
Expand All @@ -736,6 +756,10 @@ bool V3Config::getScopeTraceOn(const string& scope) {

void V3Config::contentsPushText(const string& text) { return WildcardContents::pushText(text); }

bool V3Config::containsMTaskProfileData() {
return V3ConfigResolver::s().containsMTaskProfileData();
}

bool V3Config::waive(FileLine* filelinep, V3ErrorCode code, const string& message) {
V3ConfigFile* filep = V3ConfigResolver::s().files().resolve(filelinep->filename());
if (!filep) return false;
Expand Down
4 changes: 4 additions & 0 deletions src/V3Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class V3Config final {
const string& match);
static void addInline(FileLine* fl, const string& module, const string& ftask, bool on);
static void addModulePragma(const string& module, VPragmaType pragma);
static void addProfileData(FileLine* fl, const string& hierDpi, uint64_t cost);
static void addProfileData(FileLine* fl, const string& model, const string& key,
uint64_t cost);
static void addScopeTraceOn(bool on, const string& scope, int levels);
Expand All @@ -51,12 +52,15 @@ class V3Config final {
static void applyModule(AstNodeModule* modulep);
static void applyVarAttr(AstNodeModule* modulep, AstNodeFTask* ftaskp, AstVar* varp);

static uint64_t getProfileData(const string& hierDpi);
static uint64_t getProfileData(const string& model, const string& key);
static FileLine* getProfileDataFileLine();
static bool getScopeTraceOn(const string& scope);

static void contentsPushText(const string& text);

static bool containsMTaskProfileData();

static bool waive(FileLine* filelinep, V3ErrorCode code, const string& message);
};

Expand Down
8 changes: 5 additions & 3 deletions src/V3ExecGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -528,9 +528,11 @@ void fillinCosts(V3Graph* execMTaskGraphp) {

if (missingProfiles) {
if (FileLine* const fl = V3Config::getProfileDataFileLine()) {
fl->v3warn(PROFOUTOFDATE, "Profile data for mtasks may be out of date. "
<< missingProfiles << " of " << totalEstimates
<< " mtasks had no data");
if (V3Config::containsMTaskProfileData()) {
fl->v3warn(PROFOUTOFDATE, "Profile data for mtasks may be out of date. "
<< missingProfiles << " of " << totalEstimates
<< " mtasks had no data");
}
}
}
}
Expand Down
6 changes: 5 additions & 1 deletion src/V3OrderParallel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1710,7 +1710,11 @@ class DpiImportCallVisitor final : public VNVisitor {
if (nodep->dpiImportWrapper()) {
if (nodep->dpiPure() ? !v3Global.opt.threadsDpiPure()
: !v3Global.opt.threadsDpiUnpure()) {
m_hasDpiHazard = true;
// If hierarchical DPI wrapper cost is not found or is of a 0 cost,
// we have a normal DPI which induces DPI hazard by default.
m_hasDpiHazard = V3Config::getProfileData(nodep->cname()) == 0;
UINFO(9, "DPI wrapper '" << nodep->cname()
<< "' has dpi hazard = " << m_hasDpiHazard << endl);
}
}
iterateChildren(nodep);
Expand Down
30 changes: 30 additions & 0 deletions src/V3ProtectLib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "V3ProtectLib.h"

#include "V3Hasher.h"
#include "V3InstrCount.h"
#include "V3String.h"
#include "V3Task.h"

Expand Down Expand Up @@ -98,6 +99,32 @@ class ProtectVisitor final : public VNVisitor {
txtp->addNodesp(new AstComment{fl, comment});
}

void configSection(AstNodeModule* modp, AstTextBlock* txtp, FileLine* fl) {
txtp->addText(fl, "\n`ifdef VERILATOR\n");
txtp->addText(fl, "`verilator_config\n");

// The `eval` function is called inside both update functions. As those functions
// are created by text bashing, we need to find cost of `_eval` which is the first function
// with a real cost in AST.
uint32_t cost = 0;
modp->foreach([&cost](AstCFunc* cfuncp) {
if (cfuncp->name() == "_eval") cost = V3InstrCount::count(cfuncp, false);
});
txtp->addText(fl, "profile_data -hier-dpi \"" + m_libName
+ "_protectlib_combo_update\" -cost 64'd" + std::to_string(cost)
+ "\n");
txtp->addText(fl, "profile_data -hier-dpi \"" + m_libName
+ "_protectlib_seq_update\" -cost 64'd" + std::to_string(cost)
+ "\n");

// Mark remaining NDA protectlib wrapper DPIs as non-hazardous by deliberately forwarding
// them with non-zero cost.
txtp->addText(fl, "profile_data -hier-dpi \"" + m_libName
+ "_protectlib_combo_ignore\" -cost 64'd1\n");
txtp->addText(fl, "`verilog\n");
txtp->addText(fl, "`endif\n");
}

void hashComment(AstTextBlock* txtp, FileLine* fl) {
addComment(txtp, fl, "Checks to make sure the .sv wrapper and library agree");
}
Expand Down Expand Up @@ -283,6 +310,9 @@ class ProtectVisitor final : public VNVisitor {
txtp->addText(fl, "final " + m_libName + "_protectlib_final(handle__V);\n\n");

txtp->addText(fl, "endmodule\n");

configSection(modp, txtp, fl);

m_vfilep->tblockp(txtp);
}

Expand Down
15 changes: 14 additions & 1 deletion src/V3Task.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "V3Task.h"

#include "V3Config.h"
#include "V3Const.h"
#include "V3EmitCBase.h"
#include "V3Graph.h"
Expand Down Expand Up @@ -372,6 +373,7 @@ class TaskVisitor final : public VNVisitor {
// STATE - across all visitors
DpiCFuncs m_dpiNames; // Map of all created DPI functions
VDouble0 m_statInlines; // Statistic tracking
VDouble0 m_statHierDpisWithCosts; // Statistic tracking

// METHODS

Expand Down Expand Up @@ -980,6 +982,11 @@ class TaskVisitor final : public VNVisitor {
funcp->isMethod(false);
funcp->protect(false);
funcp->dpiPure(nodep->dpiPure());

const int cost = static_cast<int>(V3Config::getProfileData(funcp->name()));
m_statHierDpisWithCosts += (cost != 0);
funcp->cost(cost);

// Add DPI Import to top, since it's a global function
m_topScopep->scopep()->addBlocksp(funcp);
makePortList(nodep, funcp);
Expand Down Expand Up @@ -1259,6 +1266,8 @@ class TaskVisitor final : public VNVisitor {
if (nodep->name() == "new") cfuncp->isConstructor(true);
if (cfuncp->dpiExportImpl()) cfuncp->cname(nodep->cname());

if (cfuncp->dpiImportWrapper()) cfuncp->cname(nodep->cname());

if (!nodep->dpiImport() && !nodep->taskPublic()) {
// Need symbol table
cfuncp->argTypes(EmitCBase::symClassVar());
Expand Down Expand Up @@ -1613,7 +1622,11 @@ class TaskVisitor final : public VNVisitor {
: m_statep{statep} {
iterate(nodep);
}
~TaskVisitor() { V3Stats::addStat("Optimizations, Functions inlined", m_statInlines); }
~TaskVisitor() {
V3Stats::addStat("Optimizations, Functions inlined", m_statInlines);
V3Stats::addStat("Optimizations, Hierarchical DPI wrappers with costs",
m_statHierDpisWithCosts);
}
};

//######################################################################
Expand Down
1 change: 1 addition & 0 deletions src/verilog.l
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ vnum {vnum1}|{vnum2}|{vnum3}|{vnum4}|{vnum5}
-?"-cost" { FL; return yVLT_D_COST; }
-?"-file" { FL; return yVLT_D_FILE; }
-?"-function" { FL; return yVLT_D_FUNCTION; }
-?"-hier-dpi" { FL; return yVLT_D_HIER_DPI; }
-?"-levels" { FL; return yVLT_D_LEVELS; }
-?"-lines" { FL; return yVLT_D_LINES; }
-?"-match" { FL; return yVLT_D_MATCH; }
Expand Down
7 changes: 7 additions & 0 deletions src/verilog.y
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ BISONPRE_VERSION(3.7,%define api.header.include {"V3ParseBison.h"})
%token<fl> yVLT_D_COST "--cost"
%token<fl> yVLT_D_FILE "--file"
%token<fl> yVLT_D_FUNCTION "--function"
%token<fl> yVLT_D_HIER_DPI "--hier-dpi"
%token<fl> yVLT_D_LEVELS "--levels"
%token<fl> yVLT_D_LINES "--lines"
%token<fl> yVLT_D_MATCH "--match"
Expand Down Expand Up @@ -7655,6 +7656,8 @@ vltItem:
{ V3Config::addCaseParallel(*$2, 0); }
| yVLT_PARALLEL_CASE vltDFile yVLT_D_LINES yaINTNUM
{ V3Config::addCaseParallel(*$2, $4->toUInt()); }
| yVLT_PROFILE_DATA vltDHierDpi vltDCost
{ V3Config::addProfileData($<fl>1, *$2, $3->toUQuad()); }
| yVLT_PROFILE_DATA vltDModel vltDMtask vltDCost
{ V3Config::addProfileData($<fl>1, *$2, *$3, $4->toUQuad()); }
;
Expand Down Expand Up @@ -7699,6 +7702,10 @@ vltDFile<strp>: // --file <arg>
yVLT_D_FILE str { $$ = $2; }
;

vltDHierDpi<strp>: // --hier-dpi <arg>
yVLT_D_HIER_DPI str { $$ = $2; }
;

vltDLevels<nump>: // --levels <arg>
yVLT_D_LEVELS yaINTNUM { $$ = $2; }
;
Expand Down
40 changes: 40 additions & 0 deletions test_regress/t/t_hier_block_perf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python3
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
#
# Copyright 2025 by Wilson Snyder. This program is free software; you
# can redistribute it and/or modify it under the terms of either the GNU
# Lesser General Public License Version 3 or the Perl Artistic License
# Version 2.0.
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0

import vltest_bootstrap

test.scenarios('vlt_all')
test.init_benchmarksim()
test.cycles = (int(test.benchmark) if test.benchmark else 1000000)
test.sim_time = test.cycles * 10 + 1000
THREADS = int(os.environ["SIM_THREADS"]) if "SIM_THREADS" in os.environ else 2

test.compile(benchmarksim=1,
v_flags2=[
"+define+SIM_CYCLES=" + str(test.cycles), "--prof-exec", "--hierarchical",
"--stats"
],
threads=(THREADS if test.vltmt else 1))

test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
r'Optimizations, Hierarchical DPI wrappers with costs\s+(\d+)', 3)

test.execute(all_run_flags=[
"+verilator+prof+exec+start+2",
" +verilator+prof+exec+window+2",
" +verilator+prof+exec+file+" + test.obj_dir + "/profile_exec.dat",
" +verilator+prof+vlt+file+" + test.obj_dir + "/profile.vlt"]) # yapf:disable

test.run(cmd=[
os.environ["VERILATOR_ROOT"] + "/bin/verilator_gantt", test.obj_dir +
"/profile_exec.dat", "--vcd " + test.obj_dir + "/profile_exec.vcd", "| tee " + test.obj_dir +
"/gantt.log"
])

test.passes()
Loading

0 comments on commit 0507fb4

Please sign in to comment.