Skip to content

Commit 8dfab86

Browse files
allightcopybara-github
authored andcommitted
Model reduction ops logical effort using a binary tree for delay estimation.
This is not quite the optimal way to model them but should be better than the current implementation which is linear. PiperOrigin-RevId: 807867919
1 parent 62022c1 commit 8dfab86

File tree

3 files changed

+50
-11
lines changed

3 files changed

+50
-11
lines changed

xls/estimators/delay_model/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ cc_library(
3939
srcs = ["delay_estimator.cc"],
4040
hdrs = ["delay_estimator.h"],
4141
deps = [
42+
"//xls/common:math_util",
4243
"//xls/common:test_macros",
4344
"//xls/common/status:status_macros",
4445
"//xls/ir",

xls/estimators/delay_model/delay_estimator.cc

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "absl/status/statusor.h"
3131
#include "absl/strings/str_format.h"
3232
#include "absl/strings/str_join.h"
33+
#include "xls/common/math_util.h"
3334
#include "xls/common/status/status_macros.h"
3435
#include "xls/ir/node.h"
3536
#include "xls/ir/nodes.h"
@@ -117,21 +118,51 @@ namespace {
117118
kind, node->operands().size()));
118119
return std::ceil(invert ? base_effort + int64_t{1} : base_effort);
119120
};
120-
auto get_reduction_logical_effort =
121-
[node](netlist::CellKind kind, bool invert) -> absl::StatusOr<int64_t> {
122-
const int64_t bit_count = node->operand(0)->BitCountOrDie();
121+
// Calculate the reduction logical effort. We use the lower of a binary-tree
122+
// reduction and a linear reduction.
123+
//
124+
// TODO(allight): A binary tree is not actually always the best architecture
125+
// for a reduction and the correspondence between effort and delay is not
126+
// always simple. For now a binary-tree is a good approximation however. See
127+
// chapter 11 of "Sutherland, I., et al., Logical Effort: Designing Fast CMOS
128+
// Circuits" (and frankly the rest of the book) for more information. For a
129+
// follow up we can find the optimal branching factors and use them. See
130+
// chapter 11.2 for more information.
131+
auto get_reduction_logical_effort_of =
132+
[](netlist::CellKind kind, bool invert,
133+
int64_t bit_count) -> absl::StatusOr<int64_t> {
123134
if (bit_count < 2) {
124135
return 0;
125136
}
126137
XLS_ASSIGN_OR_RETURN(
127138
double base_effort,
128139
netlist::logical_effort::GetLogicalEffort(kind, bit_count));
129-
return std::ceil(invert ? base_effort + int64_t{1} : base_effort);
140+
int64_t effort_raw =
141+
std::ceil(invert ? base_effort + int64_t{1} : base_effort);
142+
uint64_t tree_depth = CeilOfLog2(bit_count);
143+
XLS_ASSIGN_OR_RETURN(double base_bin_tree_effort,
144+
netlist::logical_effort::GetLogicalEffort(kind, 2));
145+
int64_t tree_node_effort_raw = std::ceil(
146+
invert ? base_bin_tree_effort + int64_t{1} : base_bin_tree_effort);
147+
int64_t effort_tree = tree_node_effort_raw * tree_depth;
148+
return std::min(effort_raw, effort_tree);
149+
};
150+
auto get_reduction_logical_effort =
151+
[&](netlist::CellKind kind, bool invert) -> absl::StatusOr<int64_t> {
152+
const int64_t bit_count = node->operand(0)->BitCountOrDie();
153+
return get_reduction_logical_effort_of(kind, invert, bit_count);
130154
};
131155
switch (node->op()) {
132156
// TODO(leary): 2019-09-24 Collect real numbers for these.
133157
case Op::kGate:
134158
return get_logical_effort(netlist::CellKind::kNand, /*invert=*/true);
159+
// TODO(allight): The nary ops (And, Nand, Nor, Or, Xor) should all be
160+
// modeled using a tree structure too. Note that the current tree
161+
// implementation won't work with nand/nor since they are not associative
162+
// and care will need to be taken to ensure they are modeled accurately.
163+
// Currently this models it as though a single cell is always used but this
164+
// is simply not true (especially as the synthesizer trades off area and
165+
// delay).
135166
case Op::kAnd:
136167
return get_logical_effort(netlist::CellKind::kNand, /*invert=*/true);
137168
case Op::kNand:
@@ -144,6 +175,11 @@ namespace {
144175
return get_logical_effort(netlist::CellKind::kXor, /*invert=*/false);
145176
case Op::kNot:
146177
return get_logical_effort(netlist::CellKind::kInverter, /*invert=*/false);
178+
// NB By modeling the reduction ops as a binary tree we are assuming the
179+
// synthsizer takes the area/delay tradeoff inherent in which cells it
180+
// chooses in a particular direction. This choice is likely not the optimal
181+
// one for either area or delay in real process nodes however it is a
182+
// reasonable approximation.
147183
case Op::kAndReduce:
148184
return get_reduction_logical_effort(netlist::CellKind::kNand,
149185
/*invert=*/true);
@@ -162,11 +198,13 @@ namespace {
162198
// A 2-bit or less encode simply passes through the MSB.
163199
return 0;
164200
}
165-
XLS_ASSIGN_OR_RETURN(
166-
int64_t nor_delay,
167-
netlist::logical_effort::GetLogicalEffort(netlist::CellKind::kNor,
168-
(operand_width + 1) / 2));
169-
return std::ceil(nor_delay + 1);
201+
int64_t bit_count = (operand_width + 1) / 2;
202+
XLS_ASSIGN_OR_RETURN(int64_t nor_delay, get_reduction_logical_effort_of(
203+
netlist::CellKind::kNor,
204+
/*invert=*/true,
205+
/*bit_count=*/bit_count));
206+
// The last not isn't needed so remove it.
207+
return std::ceil(nor_delay - 1);
170208
}
171209
case Op::kOneHotSel: {
172210
// This should synthesize to something quite similar to a two-level NAND

xls/estimators/delay_model/delay_estimator_test.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ TEST_F(DelayEstimatorTest, LogicalEffortForAndReduces) {
105105
fb.BuildWithReturnValue(fb.AndReduce(x)));
106106
EXPECT_THAT(
107107
DelayEstimator::GetLogicalEffortDelayInPs(f->return_value(), 10),
108-
IsOkAndHolds(350));
108+
IsOkAndHolds(210));
109109
}
110110
}
111111

@@ -131,7 +131,7 @@ TEST_F(DelayEstimatorTest, LogicalEffortForOrReduces) {
131131
fb.BuildWithReturnValue(fb.OrReduce(x)));
132132
EXPECT_THAT(
133133
DelayEstimator::GetLogicalEffortDelayInPs(f->return_value(), 10),
134-
IsOkAndHolds(680));
134+
IsOkAndHolds(210));
135135
}
136136
}
137137

0 commit comments

Comments
 (0)