From 58dfc0f8eb2760779415796c2239cd419150031c Mon Sep 17 00:00:00 2001 From: Xuanda Yang Date: Wed, 1 Oct 2025 18:14:35 -0700 Subject: [PATCH] add random pipeline generator --- apps/CMakeLists.txt | 1 + apps/random_pipeline/CMakeLists.txt | 23 + .../random_pipeline_generator.cpp | 1058 +++++++++++++++++ 3 files changed, 1082 insertions(+) create mode 100644 apps/random_pipeline/CMakeLists.txt create mode 100644 apps/random_pipeline/random_pipeline_generator.cpp diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 6ffd7966a991..aeec711f85e8 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -65,3 +65,4 @@ add_app(stencil_chain) add_app(unsharp) add_app(wavelet) add_app(HelloBaremetal) +add_app(random_pipeline) diff --git a/apps/random_pipeline/CMakeLists.txt b/apps/random_pipeline/CMakeLists.txt new file mode 100644 index 000000000000..71de66749651 --- /dev/null +++ b/apps/random_pipeline/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.28) +project(random_pipeline) + +enable_testing() + +# Set up language settings +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED YES) +set(CMAKE_CXX_EXTENSIONS NO) + +# Find Halide +find_package(Halide REQUIRED) + +# Generator +add_halide_generator(random_pipeline.generator + SOURCES random_pipeline_generator.cpp + LINK_LIBRARIES Halide::Tools) + +# Filters +add_halide_library(random_pipeline FROM random_pipeline.generator) +add_halide_library(random_pipeline_auto_schedule FROM random_pipeline.generator + GENERATOR random_pipeline + AUTOSCHEDULER Halide::Mullapudi2016) diff --git a/apps/random_pipeline/random_pipeline_generator.cpp b/apps/random_pipeline/random_pipeline_generator.cpp new file mode 100644 index 000000000000..e8362ef1f4c3 --- /dev/null +++ b/apps/random_pipeline/random_pipeline_generator.cpp @@ -0,0 +1,1058 @@ +#include "Halide.h" +#include +#include +#include +#include +#include + +using namespace Halide; +using namespace Halide::Internal; +using std::unordered_map; +using std::vector; + +// Convert a vector of Vars to Exprs. Useful for generating references +// to Funcs. +vector make_arguments(vector vars) { + vector result; + for (Var i : vars) { + result.push_back(i); + } + return result; +} + +std::mt19937 rng; + +// Helpers to generate random values. +int rand_int(int min, int max) { + return (rng() % (max - min + 1)) + min; +} +bool rand_bool() { + return rng() % 2 == 0; +} +float rand_float() { + return rand_int(0, 1 << 30) / (float)(1 << 30); +} + +// Generate random expressions. Given a vector of expresions and a +// tree depth, recursively generates an expression by combining +// subexpressions. At the base case where depth is 0, we just return +// a randomly chosen input. +Type expr_types[] = {UInt(8), UInt(16), UInt(32), Int(8), Int(16), Int(32), Float(32)}; +const int expr_type_count = sizeof(expr_types) / sizeof(expr_types[0]); + +typedef Expr (*make_bin_op_fn)(Expr, Expr); + +make_bin_op_fn make_bin_op[] = { + (make_bin_op_fn) + operator+, + (make_bin_op_fn) + operator-, + (make_bin_op_fn) + operator*, + (make_bin_op_fn)min, + (make_bin_op_fn)max, + (make_bin_op_fn) + operator/, + (make_bin_op_fn) + operator%, +}; + +make_bin_op_fn make_bool_bin_op[] = { + (make_bin_op_fn) + operator&&, + (make_bin_op_fn) + operator||, +}; + +make_bin_op_fn make_comp_bin_op[] = { + (make_bin_op_fn) operator==, + (make_bin_op_fn) operator!=, + (make_bin_op_fn) operator<, + (make_bin_op_fn) operator<=, + (make_bin_op_fn) operator>, + (make_bin_op_fn) operator>= }; + +const int bin_op_count = sizeof(make_bin_op) / sizeof(make_bin_op[0]); +const int bool_bin_op_count = sizeof(make_bool_bin_op) / sizeof(make_bool_bin_op[0]); +const int comp_bin_op_count = sizeof(make_comp_bin_op) / sizeof(make_comp_bin_op[0]); + +Type random_type() { + Type T = expr_types[rng() % expr_type_count]; + return T; +} + +Expr random_expr_inner(vector inputs, int depth, int func_size); + +Expr random_condition(vector inputs, int depth, int func_size) { + Expr a = random_expr_inner(inputs, depth, func_size); + Expr b = random_expr_inner(inputs, depth, func_size); + int op = rng() % comp_bin_op_count; + return make_comp_bin_op[op](a, b); +} + +// takes a vector of inputs (points in functions) and an expected Type +// if the chosen input is not of the given type, cast it to conform +Expr make_leaf(vector inputs) { + auto chosen_input = inputs[rand_int(0, inputs.size() - 1)]; + return chosen_input; +} + +Expr random_expr_inner(vector inputs, int depth, int func_size) { + const int op_count = bin_op_count + bool_bin_op_count + 9; + const int func_size_thresh = 1e4; // if input is too large do not use trig functions + + if (depth <= 0) { + return make_leaf(inputs); + } + + // pick a random operation to combine exprs + int op = rng() % op_count; // ops need to be defined + switch (op) { + case 0: // casting + { + // Get a random type + Type convertT = random_type(); + auto e1 = random_expr_inner(inputs, depth, func_size); + return cast(convertT, e1); + } + case 1: // select operation + { + auto c = random_condition(inputs, depth - 2, func_size); // arbitrarily chose to make condition expression shorter + auto e1 = random_expr_inner(inputs, depth - 1, func_size); + auto e2 = random_expr_inner(inputs, depth - 2, func_size); + // make sure e1 and e2 have the same type + if (e1.type() != e2.type()) { + e2 = cast(e1.type(), e2); + } + return select(c, e1, e2); + } + case 2: // unary boolean op + { + auto e1 = random_expr_inner(inputs, depth - 1, func_size); + if (e1.type().is_bool()) { + return !e1; + } + break; + } + case 3: // sin + { + if (func_size > func_size_thresh) + break; + auto e1 = random_expr_inner(inputs, depth - 1, func_size); + return sin(cast(e1)); + } + case 4: // tanh + { + if (func_size > func_size_thresh) { + // Don't use expensive ops if the function is very large + break; + } + auto e1 = random_expr_inner(inputs, depth - 1, func_size); + return tanh(cast(e1)); + } + case 5: // exp + { + auto e1 = random_expr_inner(inputs, depth - 1, func_size); + return fast_exp(cast(e1)); + } + case 6: // sqrt + { + auto e1 = random_expr_inner(inputs, depth - 1, func_size); + return sqrt(cast(e1)); + } + case 7: // log + { + auto e1 = random_expr_inner(inputs, depth - 1, func_size); + return fast_log(cast(e1)); + } + case 8: // condition + { + return random_condition(inputs, depth - 1, func_size); + } + default: // binary op + make_bin_op_fn maker; + auto e1 = random_expr_inner(inputs, depth - 1, func_size); + auto e2 = random_expr_inner(inputs, depth - 2, func_size); + if (e1.type().is_bool() && e2.type().is_bool()) { + maker = make_bool_bin_op[op % bool_bin_op_count]; + } else { + maker = make_bin_op[op % bin_op_count]; + } + + return maker(e1, e2); + } + + // selected case did not return an expression, try again + return random_expr_inner(inputs, depth, func_size); +} + +Expr rand_value(Type t) { + if (t.is_bool()) { + return cast(t, rand_int(0, 1)); + } else if (t.is_int() || t.is_uint()) { + return cast(t, rand_int(1, 127)); + } else if (t.is_float()) { + return cast(t, rand_float()); + } else { + // Shouldn't get here. + assert(false); + return undef(t); + } +} + +Expr random_expr(vector inputs, int depth, int func_size) { + for (auto &e : inputs) { + e = Internal::simplify(e); + } + + for (int attempts = 0; attempts < 10; attempts++) { + Expr result = + Internal::simplify(Internal::common_subexpression_elimination(random_expr_inner(inputs, depth, func_size))); + + class Checker : public Internal::IRMutator { + public: + Expr mutate(const Expr &e) override { + exprs_to_find.erase(e); + return IRMutator::mutate(e); + } + using Internal::IRMutator::mutate; + std::set exprs_to_find; + Checker(const vector &inputs) { + for (const auto &e : inputs) { + exprs_to_find.insert(e); + } + } + } checker(inputs); + + checker.mutate(result); + + // Double check all the inputs are used + if (!checker.exprs_to_find.empty()) { + std::cerr << "In random expression: " << result << "\n" + << "The following expressions were unused:\n"; + for (auto &e : checker.exprs_to_find) { + std::cerr << e << "\n"; + } + } else { + return result; + } + } + + // We're having a hard time generating an expression that uses all the inputs. Just sum them. + Type t = inputs[0].type(); + if (t.is_bool()) { + t = UInt(8); + } + Expr result = cast(t, 0); + for (const auto &e : inputs) { + result += e; + } + return result; +} + +// Generator to produce a random pipeline. The generated pipeline will +// be solely a function of the seed and the number of stages. +class RandomPipeline : public Halide::Generator { +public: + int num_stage_types = 18; + // The random seed to use to generate the pipeline. + GeneratorParam seed{"seed", 1}; + // The approximate max number of stages to generate in the random pipeline. + GeneratorParam max_stages{"max_stages", 20}; + + Input> input{"input", 3}; + Input> uint8_weights{"uint8_weights", 4}; + Input> uint16_weights{"uint16_weights", 4}; + Input> uint32_weights{"uint32_weights", 4}; + Input> int8_weights{"int8_weights", 4}; + Input> int16_weights{"int16_weights", 4}; + Input> int32_weights{"int32_weights", 4}; + Input> float32_weights{"float32_weights", 4}; + + Output> output{"output", 3}; + + void set_upcast_types(Type input_type, Type &mult_type, Type &sum_type) { + if (input_type.is_bool()) { + mult_type = UInt(8); + sum_type = UInt(8); + } else if (!input_type.is_float() && rand_int(0, 1)) { + int input_bits = input_type.bits(); + int mult_bits = std::min(32, 2 * input_bits); + int sum_bits = std::min(32, 2 * mult_bits); + mult_type = input_type.with_bits(mult_bits); + sum_type = input_type.with_bits(sum_bits); + } else { + mult_type = input_type; + sum_type = input_type; + } + return; + } + + void set_downcast_type(Type input_type, Type &output_type) { + if (input_type.is_int() && rand_int(0, 1)) { + int input_bits = input_type.bits(); + int factor = rand_int(1, 2) * 2; + int output_bits = std::max(8, input_bits / factor); + output_type = Int(output_bits); + } else { + output_type = input_type; + } + return; + } + + Func get_conv_weights(Type t) { + if (t == UInt(8) || t == Bool()) return uint8_weights; + else if (t == UInt(16)) + return uint16_weights; + else if (t == UInt(32)) + return uint32_weights; + else if (t == Int(8)) + return int8_weights; + else if (t == Int(16)) + return int16_weights; + else if (t == Int(32)) + return int32_weights; + else { + assert(t == Float(32)); + return float32_weights; + } + } + + struct Stage { + Func func; + + // approx width and height and channels. Used to preserve + // spatial scale when combining stages, and to track the total + // sizes of things. + int w, h, c; + + static constexpr int max_size = 100000000; + static constexpr int min_size = 100; + static constexpr int max_stride = 3; // for convs and pools + + int size() const { + return w * h * c; + } + + bool may_increase_size() const { + return size() < max_size && w <= 8000 && h <= 8000 && c <= 512; + } + + bool may_reduce_size() const { + return size() > min_size; + } + + int random_size_increase_factor() const { + int sz = size(); + int max_factor = (max_size + sz - 1) / sz; + if (max_factor <= 1) return 1; + int log_max_factor = std::ceil(std::log(max_factor) / std::log(2)); + int factor = 1 << rand_int(std::max(1, log_max_factor - 3), log_max_factor); + return factor; + } + + int random_size_reduce_factor() const { + int sz = size(); + int max_factor = (sz + min_size - 1) / min_size; + if (max_factor <= 1) return 1; + return std::min(8, 1 << rand_int(1, std::ceil(std::log(max_factor) / std::log(2)))); + } + + int random_out_channels() const { + int min = (min_size + w * h - 1) / (w * h); + int max = std::min(512, max_size / (w * h)); + if (min >= max) return min; + return rand_int(min, max); + } + }; + + // Generate a random convolution of one dimension of f, statically unrolled. + Stage convolve(Stage f, int dim, int kernel_min, int kernel_max) { + vector args = f.func.args(); + + // generate random expression using potentially all values in the stencil + vector inputs; + for (int i = kernel_min; i <= kernel_max; i++) { + vector coords = make_arguments(f.func.args()); + coords[dim] += i; + inputs.push_back(f.func(coords)); + } + int min_depth = std::floor(std::log(kernel_max - kernel_min + 1)); + int max_depth = min_depth + 1; + Expr def = random_expr(inputs, rand_int(min_depth, max_depth), f.size()); + std::cerr << def << "\n"; + + Func conv("conv_" + args[dim].name()); + conv(args) = def; + + return {conv, f.w, f.h, f.c}; + } + + // Generate a random convolution of one dimension of f using a reduction. + Stage convolve_r(Stage f, int dim, int kernel_min, int kernel_max) { + vector args = f.func.args(); + + Func conv("conv_r_" + args[dim].name()); + RDom r(kernel_min, kernel_max - kernel_min + 1); + vector coords = make_arguments(f.func.args()); + coords[dim] += r; + conv(args) += rand_value(f.func.value().type()) * f.func(coords); + + return {conv, f.w, f.h, f.c}; + } + + // Generate a random convolution of one dimension of f using a reduction with a wrapper + Stage convolve_w(Stage f, int dim, int kernel_min, int kernel_max) { + vector args = f.func.args(); + + Func conv("conv_w_" + args[dim].name()); + RDom r(kernel_min, kernel_max - kernel_min + 1); + vector coords = make_arguments(f.func.args()); + coords[dim] += r; + conv(args) = sum(rand_value(f.func.value().type()) * f.func(coords)); + + return {conv, f.w, f.h, f.c}; + } + + // Generate a padding layer (a zero boundary condition) + Stage padding(Stage f) { + Region bounds(3); // assuming all stages have 3 dims + bounds[0].min = 0; + bounds[0].extent = f.w; + bounds[1].min = 0; + bounds[1].extent = f.h; + bounds[2].min = 0; + bounds[2].extent = f.c; + Expr zero = cast(f.func.value().type(), 0); + return {BoundaryConditions::constant_exterior(f.func, zero, bounds), f.w, f.h, f.c}; + } + + Stage convolve2D(Stage f, int kernel_min, int kernel_max) { + int conv_type = rand_int(0, 2); + if (conv_type == 0) return convolve2D_unrolled(f, kernel_min, kernel_max); + if (conv_type == 1) return convolve2D_w(f, kernel_min, kernel_max); + else + return convolve2D_r(f, kernel_min, kernel_max); + } + + Stage pool2D(Stage f, int kernel_min, int kernel_max) { + int pool_type = rand_int(0, 2); + if (pool_type == 0) return pool2D_unrolled(f, kernel_min, kernel_max); + if (pool_type == 1) return pool2D_w(f, kernel_min, kernel_max); + else + return pool2D_r(f, kernel_min, kernel_max); + } + + Stage activation(Stage f) { + return relu_layer(f); + } + + Stage relu_layer(Stage f) { + Func activation("relu"); + // if input type is int, downcast with 50% chance + Type input_type = f.func.value().type(); + Type output_type; + set_downcast_type(input_type, output_type); + + vector coords = make_arguments(f.func.args()); + activation(f.func.args()) = max(cast(output_type, 0), cast(output_type, f.func(coords))); + return {activation, f.w, f.h, f.c}; + } + + Stage tanh_layer(Stage f) { + Func activation("tanh"); + // if input type is int, downcast with 50% chance + Type input_type = f.func.value().type(); + Type output_type; + set_downcast_type(input_type, output_type); + + vector coords = make_arguments(f.func.args()); + Expr exp_pos = fast_exp(2 * cast(f.func(coords))); + activation(f.func.args()) = (exp_pos - 1) / (exp_pos + 1); + return {activation, f.w, f.h, f.c}; + } + + Stage pool2D_unrolled(Stage f, int kernel_min, int kernel_max) { + vector args = f.func.args(); + Func pooled2D("pooled2D" + args[0].name() + args[1].name()); + int stride = f.random_size_reduce_factor(); + + int extent = kernel_max - kernel_min + 1; + int scale = extent * extent; + + if (stride > extent) { + stride = 1; + } + + Expr def = cast(f.func.value().type(), 0); + + // Avoid huge unrolled loops + if (extent >= 4) return pool2D_r(f, kernel_min, kernel_max); + + // assuming input is 3d: w, h, c + for (int i = kernel_min; i <= kernel_max; i++) { + for (int j = kernel_min; j <= kernel_max; j++) { + vector pooled_coords = make_arguments(f.func.args()); + pooled_coords[0] = pooled_coords[0] * stride + i; + pooled_coords[1] = pooled_coords[1] * stride + j; + if (def.type().is_bool()) { + def = def && f.func(pooled_coords); + } else { + def = def + f.func(pooled_coords); + } + } + } + + if (!def.type().is_bool()) { + def /= scale; + } + + pooled2D(args) = def; + + return {pooled2D, (f.w + stride - 1) / stride, (f.h + stride - 1) / stride, f.c}; + } + + Stage pool2D_r(Stage f, int kernel_min, int kernel_max) { + vector args = f.func.args(); + Func pooled2D_r("pool2D_r_" + args[0].name() + args[1].name()); + int stride = f.random_size_reduce_factor(); + int extent = kernel_max - kernel_min + 1; + int scale = extent * extent; + + if (stride > extent) { + stride = 1; + } + + RDom r(kernel_min, extent, + kernel_min, extent); + + vector coords = make_arguments(f.func.args()); + Type ty = f.func.value().type(); + coords[0] = coords[0] * stride + r.x; + coords[1] = coords[1] * stride + r.y; + if (ty.is_bool()) { + pooled2D_r(args) = const_true(); + pooled2D_r(args) = pooled2D_r(args) && f.func(coords); + } else { + pooled2D_r(args) += f.func(coords) / scale; + } + + return {pooled2D_r, (f.w + stride - 1) / stride, (f.h + stride - 1) / stride, f.c}; + } + + Stage pool2D_w(Stage f, int kernel_min, int kernel_max) { + vector args = f.func.args(); + Func pooled2D_w("pooled2D_w_" + args[0].name() + args[1].name()); + int stride = f.random_size_reduce_factor(); + int extent = kernel_max - kernel_min + 1; + int scale = extent * extent; + + if (stride > extent) { + stride = 1; + } + + RDom r(kernel_min, extent, + kernel_min, extent); + + vector coords = make_arguments(f.func.args()); + coords[0] = (coords[0] * stride + r.x); + coords[1] = (coords[1] * stride + r.y); + pooled2D_w(args) = sum(cast(f.func(coords))) / scale; + + return {pooled2D_w, (f.w + stride - 1) / stride, (f.h + stride - 1) / stride, f.c}; + } + + // Convolution in the deep learning sense of the word. + Stage convolve2D_unrolled(Stage f, int kernel_min, int kernel_max) { + vector args = f.func.args(); + // Avoid huge unrolled loops + if (f.c >= 4) return convolve2D_r(f, kernel_min, kernel_max); + + vector inputs; + for (int c = 0; c < f.c; c++) { + for (int i = kernel_min; i <= kernel_max; i++) { + for (int j = kernel_min; j <= kernel_max; j++) { + vector coords = make_arguments(f.func.args()); + coords[0] += i; + coords[1] += j; + coords[2] = c; + inputs.push_back(f.func(coords)); + } + } + } + + int out_channels = f.random_out_channels(); + int kernel_width = kernel_max - kernel_min + 1; + int min_depth = std::floor(std::log(kernel_width * kernel_width * f.c)); + int max_depth = min_depth + 1; + int func_size = f.w * f.h * out_channels; + + Expr def = random_expr(inputs, rand_int(min_depth, max_depth), func_size); + + Func conv("conv2D_" + args[0].name() + args[1].name()); + conv(args) = def; + + return {conv, f.w, f.h, out_channels}; + } + + // Generate a random convolution of one dimension of f using a reduction. + Stage convolve2D_r(Stage f, int kernel_min, int kernel_max) { + + vector args = f.func.args(); + Func conv("conv2D_r_" + args[0].name() + args[1].name()); + // if input type is int, upcast with 50% chance + Type mult_type, sum_type; + Type input_type = f.func.value().type(); + Func weights = get_conv_weights(input_type); + set_upcast_types(input_type, mult_type, sum_type); + + int stride = f.random_size_reduce_factor(); + int extent = kernel_max - kernel_min + 1; + if (stride > extent) { + stride = 1; + } + + RDom r(kernel_min, extent, + kernel_min, extent, + 0, f.c); + + vector coords = make_arguments(f.func.args()); + coords[0] = coords[0] * stride + r.x; // only stride in w and h + coords[1] = coords[1] * stride + r.y; + coords[2] = r.z; + conv(args) += cast(sum_type, cast(mult_type, weights(r.z, r.x, r.y, args[2]) * f.func(coords))); + + Stage out{conv, f.w, f.h, f.random_out_channels()}; + out.w = (out.w + stride - 1) / stride; + out.h = (out.h + stride - 1) / stride; + return out; + } + + // Generate a random convolution of one dimension of f using a reduction with a wrapper + Stage convolve2D_w(Stage f, int kernel_min, int kernel_max) { + + vector args = f.func.args(); + Func conv("conv2D_w_" + args[0].name() + args[1].name()); + // if input type is int, upcast with 50% chance + Type mult_type, sum_type; + Type input_type = f.func.value().type(); + Func weights = get_conv_weights(input_type); + set_upcast_types(input_type, mult_type, sum_type); + + int stride = f.random_size_reduce_factor(); + int extent = kernel_max - kernel_min + 1; + + if (stride > extent) { + stride = 1; + } + + RDom r(kernel_min, extent, + kernel_min, extent, + 0, f.c); + vector coords = make_arguments(f.func.args()); + coords[0] = coords[0] * stride + r.x; + coords[1] = coords[1] * stride + r.y; + coords[2] = r.z; + // sum() captures free vars in the order found, and the new + // autoscheduler isn't clever enough to do storage reordering + // yet, so make sure to put the term that depends on the + // output channel last. + conv(args) = sum(cast(sum_type, cast(mult_type, weights(r.z, r.x, r.y, args[2]) * f.func(coords)))); + + // choose a channel output size - 0.5 prob of doubling channel dim + Stage out{conv, f.w, f.h, f.random_out_channels()}; + out.w = (out.w + stride - 1) / stride; + out.h = (out.h + stride - 1) / stride; + return out; + } + + // Generate an upsampling or downsampling of dimension dim by factor. + Stage upsample(Stage f, int dim, int factor = 0) { + + if (factor == 0) factor = f.random_size_increase_factor(); + + Func resampled; + + if (rand_bool()) { + // Nearest neighbour + resampled = Func("upsampled_nn_" + f.func.args()[dim].name()); + vector resampled_coords = make_arguments(f.func.args()); + resampled_coords[dim] = resampled_coords[dim] / factor; + resampled(f.func.args()) = f.func(resampled_coords); + } else { + // Linear interpolation + resampled = Func("upsampled_linear_" + f.func.args()[dim].name()); + vector resampled_coords = make_arguments(f.func.args()); + Expr x = resampled_coords[dim]; + resampled_coords[dim] = x / factor; + Expr s1 = f.func(resampled_coords); + resampled_coords[dim] += 1; + Expr s2 = f.func(resampled_coords); + x = x % factor; + + Type mult_type, sum_type; + Type input_type = f.func.value().type(); + set_upcast_types(input_type, mult_type, sum_type); + s1 = cast(sum_type, s1); + s2 = cast(sum_type, s2); + + resampled(f.func.args()) = cast(input_type, ((factor - x) * s1 + x * s2) / (2 * factor)); + } + + Stage s{resampled, f.w, f.h, f.c}; + if (dim == 0) { + s.w *= factor; + } else if (dim == 1) { + s.h *= factor; + } else { + assert(false); + } + return s; + } + + Stage downsample(Stage f, int dim, int factor = 0) { + + if (factor == 0) factor = f.random_size_reduce_factor(); + + Func resampled; + if (rand_bool()) { + // Nearest neighbour + resampled = Func("downsampled_nn_" + f.func.args()[dim].name()); + vector resampled_coords = make_arguments(f.func.args()); + resampled_coords[dim] = resampled_coords[dim] * factor; + resampled(f.func.args()) = f.func(resampled_coords); + } else { + // Averaging down + resampled = Func("downsampled_box_" + f.func.args()[dim].name()); + vector resampled_coords = make_arguments(f.func.args()); + resampled_coords[dim] = resampled_coords[dim] * factor; + Expr e = cast(f.func.value().type(), 0); + for (int i = 0; i < factor; i++) { + resampled_coords[dim] += 1; + e += f.func(resampled_coords); + } + resampled(f.func.args()) = e; + } + + Stage s{resampled, f.w, f.h, f.c}; + if (dim == 0) { + s.w = (s.w + factor - 1) / factor; + } else if (dim == 1) { + s.h = (s.h + factor - 1) / factor; + } else { + assert(false); + } + return s; + } + + // A random pointwise combination of two stages. + Stage binary_op(Stage f, Stage g) { + + // They are first resized to match scales. + if (f.w != g.w || f.h != g.h || f.c != g.c) { + if (f.size() < g.size()) { + f = resample_to(f, g.w, g.h, g.c); + } else { + g = resample_to(g, f.w, f.h, f.c); + } + } + + Func binary("binary_op"); + + vector inputs = {f.func(f.func.args()), g.func(f.func.args())}; + int min_depth = 1; + int max_depth = 3; + int func_size = f.w * f.h * std::min(f.c, g.c); + Expr def = random_expr(inputs, rand_int(min_depth, max_depth), func_size); + std::cerr << def << "\n"; + binary(f.func.args()) = def; + return {binary, f.w, f.h, std::min(f.c, g.c)}; + } + + Stage unary_op(Stage f) { + + Func unary("unary_op"); + vector coords = make_arguments(f.func.args()); + int op_type = rand_int(0, 2); // exp, log, sqrt + + if (op_type == 0) { + unary(f.func.args()) = fast_exp(cast(f.func(coords))); + } else if (op_type == 1) { + unary(f.func.args()) = fast_log(cast(f.func(coords))); + } else if (op_type == 2) { + unary(f.func.args()) = sqrt(cast(f.func(coords))); + } + return {unary, f.w, f.h, f.c}; + } + + // Generate an all-to-all communication in dimension dim, + // statically unrolled. Currently only every applied over the + // channels dimension. + Stage all_to_all(Stage f, int dim) { + + if (f.c > 16) return all_to_all_r(f, dim); + + vector reduction_coords = make_arguments(f.func.args()); + Expr e = 0.f; + for (int i = 0; i < f.c; i++) { + reduction_coords[dim] = i; + e += f.func(reduction_coords) * ((i + 1) * f.c + (f.func.args()[dim] + 1)); + } + + Func all("all"); + all(f.func.args()) = e; + + return {all, f.w, f.h, f.random_out_channels()}; + } + + // Generate an all-to-all communication in dimension dim using an RDom + Stage all_to_all_r(Stage f, int dim) { + + vector reduction_coords = make_arguments(f.func.args()); + RDom r(0, f.c); + reduction_coords[dim] = r; + Func all("all_r"); + all(f.func.args()) += f.func(reduction_coords) * ((r + 1) * f.c + (f.func.args()[dim] + 1)); + + return {all, f.w, f.h, f.random_out_channels()}; + } + + // Generate an all-to-all communication in dimension dim using an RDom with wrapper func + Stage all_to_all_w(Stage f, int dim) { + + vector reduction_coords = make_arguments(f.func.args()); + RDom r(0, f.c); + reduction_coords[dim] = r; + Func all("all_w"); + all(f.func.args()) = sum(f.func(reduction_coords) * ((r + 1) * f.c + (f.func.args()[dim] + 1))); + + return {all, f.w, f.h, f.random_out_channels()}; + } + + // Generate a forwards-then-backwards scan along a dimension + Stage scan(Stage f, int dim) { + int extent = dim == 0 ? f.w : dim == 1 ? f.h : + 3; + RDom r(1, extent - 1); + Func scan("scan_" + f.func.args()[dim].name()); + vector coords = make_arguments(f.func.args()); + scan(coords) = f.func(coords); + coords[dim] = r; + vector prev_coords = coords; + prev_coords[dim] = r - 1; + scan(coords) += scan(prev_coords); + // Now in reverse + coords[dim] = extent - r - 1; + prev_coords[dim] = extent - r; + scan(coords) += scan(prev_coords); + return {scan, f.w, f.h, f.c}; + } + + // Do a data-dependent looking into one stage using another as the + // index. + Stage slice(Stage f, Stage g) { + if (f.c > g.c) { + std::swap(f, g); + } + + // Index g's channels using f + + f = resample_to(f, g.w, g.h, 1); + + Func sliced("sliced"); + vector coords = make_arguments(f.func.args()); + coords.back() = clamp(cast(f.func(f.func.args())), 0, g.c - 1); + sliced(f.func.args()) = g.func(coords); + + return {sliced, f.w, f.h, f.c}; + } + + // Construct a tiled histogram of regions of a stage. + Stage tiled_histogram(Stage f) { + + int old_c = f.c; + f = resample_to(f, f.w, f.h, 1); + + int box_size = 1 << rand_int(1, 3); + int histogram_buckets = box_size * box_size * old_c; + + RDom r(0, box_size, 0, box_size); + vector from_coords = make_arguments(f.func.args()); + vector to_coords = from_coords; + + Func hist("hist"); + hist(f.func.args()) = 0.0f; + from_coords[0] = to_coords[0] * box_size + r.x; + from_coords[1] = to_coords[1] * box_size + r.y; + from_coords[2] = 0; + to_coords[2] = clamp(cast(f.func(from_coords) * histogram_buckets), 0, histogram_buckets - 1); + hist(to_coords) += 1; + + return {hist, f.w / box_size, f.h / box_size, histogram_buckets}; + } + + // Resample a stage to a different size. + Stage resample_to(Stage f, int w, int h, int c) { + Stage out = f; + // First decrease any sizes that need decreasing + if (out.w > w) { + int factor = (out.w + w / 2) / w; + if (factor != 1) { + out = downsample(out, 0, factor); + } + } + if (out.h > h) { + int factor = (out.h + h / 2) / h; + if (factor != 1) { + out = downsample(out, 1, (out.h + h / 2) / h); + } + } + // Adapt channel count with an all-to-all + if (out.c != c) { + out = all_to_all_r(out, 2); + out.c = c; + } + // Increase any sizes that need increasing + if (out.w < w) { + int factor = (w + out.w / 2) / out.w; + if (factor != 1) { + out = upsample(out, 0, factor); + } + } + if (out.h < h) { + int factor = (h + out.h / 2) / out.h; + if (factor != 1) { + out = upsample(out, 1, factor); + } + } + + return out; + } + + Stage cast_stage(Type t, Stage f) { + Func casted("casted"); + casted(f.func.args()) = cast(t, f.func(f.func.args())); + return {casted, f.w, f.h, f.c}; + } + + // Add a random new stage onto the end of the pipeline. + Stage random_stage(const vector &s) { + int m = (int)s.size() - 1; + int i2 = m > 0 ? rand_int(0, m - 1) : 0; + int i1 = m > 0 ? rand_int(i2 + 1, m) : 0; + Stage f = s[i1], g = s[i2]; + + int stage_type = rand_int(0, 17); + + if (stage_type == 0) { + int dim = rand_int(0, 1); + int kernel_min = rand_int(-3, 0); + int kernel_max = rand_int(0, 3); + return convolve(f, dim, kernel_min, kernel_max); + } else if (stage_type == 1) { + int dim = rand_int(0, 1); + int kernel_min = rand_int(-10, 0); + int kernel_max = rand_int(0, 10); + return convolve_r(f, dim, kernel_min, kernel_max); + } else if (stage_type == 2) { + int dim = rand_int(0, 1); + int kernel_min = rand_int(-10, 0); + int kernel_max = rand_int(0, 10); + return convolve_w(f, dim, kernel_min, kernel_max); + } else if (stage_type == 3) { + int kernel_min = rand_int(-5, 0); + int kernel_max = rand_int(0, 5); + return convolve2D(f, kernel_min, kernel_max); + } else if (stage_type == 4 && f.may_reduce_size() && f.w >= 32 && f.h >= 32) { + int kernel_min = rand_int(-5, 0); + int kernel_max = rand_int(0, 5); + return pool2D(f, kernel_min, kernel_max); + } else if (stage_type == 5) { + return activation(f); + } else if (stage_type == 6) { + return padding(f); + } else if (stage_type == 7 && f.may_increase_size()) { + // For now, only upsample dimensions 0 or 1. + return upsample(f, rand_int(0, 1)); + } else if (stage_type == 8 && f.may_reduce_size()) { + // For now, only downsample dimensions 0 or 1. + return downsample(f, rand_int(0, 1)); + } else if (stage_type == 9) { + int dim = 2; + return all_to_all(f, dim); + } else if (stage_type == 10) { + int dim = 2; + return all_to_all_r(f, dim); + } else if (stage_type == 11) { + int dim = 2; + return all_to_all_w(f, dim); + } else if (stage_type == 12) { + int dim = rand_int(0, 2); + return scan(f, dim); + } else if (stage_type == 13 && f.size() < 10000) { + return unary_op(f); + } else if (stage_type == 14 && f.w > 32 && f.h > 32) { + return tiled_histogram(f); + } else if (stage_type == 15) { + return slice(f, g); + } else if (i1 != i2) { + return binary_op(f, g); + } else { + // Try again + return random_stage(s); + } + } + + void generate() { + rng.seed((int)seed); + + Var x("x"), y("y"), c("c"); + + Func first; + first(x, y, c) = input(x, y, c); + + vector stages; + // Assume input starts at ~2000x2000 + stages.emplace_back(Stage{first, 2000, 2000, 3}); + + for (int i = 0; i < max_stages - 2; i++) { + Stage next = random_stage(stages); + stages.push_back(next); + if (!using_autoscheduler()) { + stages.back().func.compute_root().reorder(x, c, y).vectorize(x, 8).parallel(y, 8); + } + } + + Stage tail = stages.back(); + + // Resample back to the correct resolution + tail = resample_to(tail, 2000, 2000, 3); + Stage casted = cast_stage(output.type(), tail); + output = casted.func; + + if (!using_autoscheduler()) { + output.compute_root().reorder(x, c, y).vectorize(x, 8).parallel(y); + } + + if (using_autoscheduler()) { + input.dim(0).set_estimate(0, 2000).dim(1).set_estimate(0, 2000).dim(2).set_estimate(0, 3); + uint8_weights.dim(0).set_estimate(0, 512).dim(1).set_estimate(-5, 5).dim(2).set_estimate(-5, 5).dim(3).set_estimate(0, 512); + uint16_weights.dim(0).set_estimate(0, 512).dim(1).set_estimate(-5, 5).dim(2).set_estimate(-5, 5).dim(3).set_estimate(0, 512); + uint32_weights.dim(0).set_estimate(0, 512).dim(1).set_estimate(-5, 5).dim(2).set_estimate(-5, 5).dim(3).set_estimate(0, 512); + int8_weights.dim(0).set_estimate(0, 512).dim(1).set_estimate(-5, 5).dim(2).set_estimate(-5, 5).dim(3).set_estimate(0, 512); + int16_weights.dim(0).set_estimate(0, 512).dim(1).set_estimate(-5, 5).dim(2).set_estimate(-5, 5).dim(3).set_estimate(0, 512); + int32_weights.dim(0).set_estimate(0, 512).dim(1).set_estimate(-5, 5).dim(2).set_estimate(-5, 5).dim(3).set_estimate(0, 512); + float32_weights.dim(0).set_estimate(0, 512).dim(1).set_estimate(-5, 5).dim(2).set_estimate(-5, 5).dim(3).set_estimate(0, 512); + + output.set_estimate(output.args()[0], 0, 2000); + output.set_estimate(output.args()[1], 0, 2000); + output.set_estimate(output.args()[2], 0, 3); + + output.dim(0).set_estimate(0, 2000); + output.dim(1).set_estimate(0, 2000); + output.dim(2).set_estimate(0, 3); + } + } +}; + +HALIDE_REGISTER_GENERATOR(RandomPipeline, random_pipeline) \ No newline at end of file