3030#include " absl/status/statusor.h"
3131#include " absl/strings/str_format.h"
3232#include " absl/strings/str_join.h"
33+ #include " xls/common/math_util.h"
3334#include " xls/common/status/status_macros.h"
3435#include " xls/ir/node.h"
3536#include " xls/ir/nodes.h"
@@ -117,21 +118,51 @@ namespace {
117118 kind, node->operands ().size ()));
118119 return std::ceil (invert ? base_effort + int64_t {1 } : base_effort);
119120 };
120- auto get_reduction_logical_effort =
121- [node](netlist::CellKind kind, bool invert) -> absl::StatusOr<int64_t > {
122- const int64_t bit_count = node->operand (0 )->BitCountOrDie ();
121+ // Calculate the reduction logical effort. We use the lower of a binary-tree
122+ // reduction and a linear reduction.
123+ //
124+ // TODO(allight): A binary tree is not actually always the best architecture
125+ // for a reduction and the correspondence between effort and delay is not
126+ // always simple. For now a binary-tree is a good approximation however. See
127+ // chapter 11 of "Sutherland, I., et al., Logical Effort: Designing Fast CMOS
128+ // Circuits" (and frankly the rest of the book) for more information. For a
129+ // follow up we can find the optimal branching factors and use them. See
130+ // chapter 11.2 for more information.
131+ auto get_reduction_logical_effort_of =
132+ [](netlist::CellKind kind, bool invert,
133+ int64_t bit_count) -> absl::StatusOr<int64_t > {
123134 if (bit_count < 2 ) {
124135 return 0 ;
125136 }
126137 XLS_ASSIGN_OR_RETURN (
127138 double base_effort,
128139 netlist::logical_effort::GetLogicalEffort (kind, bit_count));
129- return std::ceil (invert ? base_effort + int64_t {1 } : base_effort);
140+ int64_t effort_raw =
141+ std::ceil (invert ? base_effort + int64_t {1 } : base_effort);
142+ uint64_t tree_depth = CeilOfLog2 (bit_count);
143+ XLS_ASSIGN_OR_RETURN (double base_bin_tree_effort,
144+ netlist::logical_effort::GetLogicalEffort (kind, 2 ));
145+ int64_t tree_node_effort_raw = std::ceil (
146+ invert ? base_bin_tree_effort + int64_t {1 } : base_bin_tree_effort);
147+ int64_t effort_tree = tree_node_effort_raw * tree_depth;
148+ return std::min (effort_raw, effort_tree);
149+ };
150+ auto get_reduction_logical_effort =
151+ [&](netlist::CellKind kind, bool invert) -> absl::StatusOr<int64_t > {
152+ const int64_t bit_count = node->operand (0 )->BitCountOrDie ();
153+ return get_reduction_logical_effort_of (kind, invert, bit_count);
130154 };
131155 switch (node->op ()) {
132156 // TODO(leary): 2019-09-24 Collect real numbers for these.
133157 case Op::kGate :
134158 return get_logical_effort (netlist::CellKind::kNand , /* invert=*/ true );
159+ // TODO(allight): The nary ops (And, Nand, Nor, Or, Xor) should all be
160+ // modeled using a tree structure too. Note that the current tree
161+ // implementation won't work with nand/nor since they are not associative
162+ // and care will need to be taken to ensure they are modeled accurately.
163+ // Currently this models it as though a single cell is always used but this
164+ // is simply not true (especially as the synthesizer trades off area and
165+ // delay).
135166 case Op::kAnd :
136167 return get_logical_effort (netlist::CellKind::kNand , /* invert=*/ true );
137168 case Op::kNand :
@@ -144,6 +175,11 @@ namespace {
144175 return get_logical_effort (netlist::CellKind::kXor , /* invert=*/ false );
145176 case Op::kNot :
146177 return get_logical_effort (netlist::CellKind::kInverter , /* invert=*/ false );
178+ // NB By modeling the reduction ops as a binary tree we are assuming the
179+ // synthsizer takes the area/delay tradeoff inherent in which cells it
180+ // chooses in a particular direction. This choice is likely not the optimal
181+ // one for either area or delay in real process nodes however it is a
182+ // reasonable approximation.
147183 case Op::kAndReduce :
148184 return get_reduction_logical_effort (netlist::CellKind::kNand ,
149185 /* invert=*/ true );
@@ -162,11 +198,13 @@ namespace {
162198 // A 2-bit or less encode simply passes through the MSB.
163199 return 0 ;
164200 }
165- XLS_ASSIGN_OR_RETURN (
166- int64_t nor_delay,
167- netlist::logical_effort::GetLogicalEffort (netlist::CellKind::kNor ,
168- (operand_width + 1 ) / 2 ));
169- return std::ceil (nor_delay + 1 );
201+ int64_t bit_count = (operand_width + 1 ) / 2 ;
202+ XLS_ASSIGN_OR_RETURN (int64_t nor_delay, get_reduction_logical_effort_of (
203+ netlist::CellKind::kNor ,
204+ /* invert=*/ true ,
205+ /* bit_count=*/ bit_count));
206+ // The last not isn't needed so remove it.
207+ return std::ceil (nor_delay - 1 );
170208 }
171209 case Op::kOneHotSel : {
172210 // This should synthesize to something quite similar to a two-level NAND
0 commit comments