From 9c26a78d6af6dbdec3041e59205c60cb8a71cb45 Mon Sep 17 00:00:00 2001 From: FilippoCordella Date: Sun, 22 Feb 2026 13:56:33 +0000 Subject: [PATCH 01/11] SILU(s8->s32) and RQSILU(s8->s8) support for Generic & Siracusa --- .gitignore | 2 + Deeploy/Targets/GAP9/Bindings.py | 15 ++- Deeploy/Targets/GAP9/Platform.py | 13 ++- Deeploy/Targets/GAP9/Tiler.py | 9 +- Deeploy/Targets/Generic/Bindings.py | 14 ++- Deeploy/Targets/Generic/Layers.py | 17 ++++ Deeploy/Targets/Generic/Parsers.py | 24 +++++ Deeploy/Targets/Generic/Platform.py | 13 ++- .../Generic/Templates/RQSILUTemplate.py | 33 +++++++ .../Targets/Generic/Templates/SILUTemplate.py | 33 +++++++ .../TileConstraints/SILUTileConstraint.py | 86 ++++++++++++++++++ Deeploy/Targets/Generic/TypeCheckers.py | 16 ++++ Deeploy/Targets/PULPOpen/Bindings.py | 14 ++- Deeploy/Targets/PULPOpen/Platform.py | 14 ++- Deeploy/Targets/PULPOpen/Tiler.py | 9 +- Deeploy/TilingExtension/TilerExtension.py | 1 + .../Tests/Kernels/Integer/RQSILU/inputs.npz | Bin 0 -> 3340 bytes .../Tests/Kernels/Integer/RQSILU/network.onnx | Bin 0 -> 195 bytes .../Tests/Kernels/Integer/RQSILU/outputs.npz | Bin 0 -> 3342 bytes .../Tests/Kernels/Integer/SILU/inputs.npz | Bin 0 -> 3340 bytes .../Tests/Kernels/Integer/SILU/network.onnx | Bin 0 -> 151 bytes .../Tests/Kernels/Integer/SILU/outputs.npz | Bin 0 -> 12558 bytes DeeployTest/test_gap9_config.py | 2 +- DeeployTest/test_generic_config.py | 1 + DeeployTest/test_siracusa_config.py | 1 + .../Generic/inc/DeeployBasicMath.h | 2 + TargetLibraries/Generic/inc/kernel/RQSILU.h | 34 +++++++ TargetLibraries/Generic/inc/kernel/SILU.h | 34 +++++++ TargetLibraries/Generic/inc/macros.h | 7 ++ TargetLibraries/Generic/src/RQ_SILU.c | 15 +++ TargetLibraries/Generic/src/SILU_s8.c | 42 +++++++++ 31 files changed, 431 insertions(+), 20 deletions(-) create mode 100644 Deeploy/Targets/Generic/Templates/RQSILUTemplate.py create mode 100644 Deeploy/Targets/Generic/Templates/SILUTemplate.py create mode 100644 Deeploy/Targets/Generic/TileConstraints/SILUTileConstraint.py create mode 100644 DeeployTest/Tests/Kernels/Integer/RQSILU/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/Integer/RQSILU/network.onnx create mode 100644 DeeployTest/Tests/Kernels/Integer/RQSILU/outputs.npz create mode 100644 DeeployTest/Tests/Kernels/Integer/SILU/inputs.npz create mode 100644 DeeployTest/Tests/Kernels/Integer/SILU/network.onnx create mode 100644 DeeployTest/Tests/Kernels/Integer/SILU/outputs.npz create mode 100644 TargetLibraries/Generic/inc/kernel/RQSILU.h create mode 100644 TargetLibraries/Generic/inc/kernel/SILU.h create mode 100644 TargetLibraries/Generic/src/RQ_SILU.c create mode 100644 TargetLibraries/Generic/src/SILU_s8.c diff --git a/.gitignore b/.gitignore index dc93328e4a..7f5a70a202 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,5 @@ DeeployTest/Tests/**/generateTest.py DeeployTest/out.txt CHANGELOG_GEN.md +TargetLibraries/PULPOpen/third_party/pulp-nn-mixed/ +TargetLibraries/PULPOpen/third_party/pulp-nnx/ diff --git a/Deeploy/Targets/GAP9/Bindings.py b/Deeploy/Targets/GAP9/Bindings.py index 2bda98af8f..1a50fed73a 100644 --- a/Deeploy/Targets/GAP9/Bindings.py +++ b/Deeploy/Targets/GAP9/Bindings.py @@ -22,11 +22,12 @@ from Deeploy.Targets.GAP9.DMA.MchanDma import GAP9MchanDma # Import templates from PULPOpen and Generic from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, DequantTemplate, FloatReduceMeanTemplate, \ - FloatReduceSumTemplate, GatherTemplate, QuantTemplate, RQSiGELUTemplate, SliceTemplate, iHardswishTemplate + FloatReduceSumTemplate, GatherTemplate, QuantTemplate, RQSiGELUTemplate, SliceTemplate, iHardswishTemplate, \ + SILUTemplate, RQSILUTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DequantChecker, \ GatherChecker, GELUChecker, GEMMChecker, HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, \ QuantChecker, ReduceMeanChecker, ReluChecker, ReshapeChecker, RQAddChecker, RQHardswishChecker, SGDChecker, \ - SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker + SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker, SILUChecker from Deeploy.Targets.PULPOpen.Bindings import ForkClosure, L3MemoryAwareFunctionCallClosure, \ MemoryAwareForkTransformer, MemoryAwareFunctionCallClosure, TilingCallClosure from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass @@ -380,6 +381,16 @@ GELUChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]), FloatGELUTemplate.referenceTemplate, GAP9Transformer) +GAP9SILUBindings = [ + NodeBinding(SILUChecker([PointerClass(int8_t)], [PointerClass(int32_t)]), + SILUTemplate.referenceTemplate, GAP9Transformer) +] + +GAP9RQSILUBindings = [ + NodeBinding(SILUChecker([PointerClass(int8_t)], [PointerClass(int8_t)]), + RQSILUTemplate.referenceTemplate, GAP9Transformer) +] + GAP9GatherBindings = [ NodeBinding(GatherChecker([PointerClass(float32_t), PointerClass(type)], [PointerClass(float32_t)]), GatherTemplate.referenceTemplate, GAP9Transformer) for type in IntegerDataTypes diff --git a/Deeploy/Targets/GAP9/Platform.py b/Deeploy/Targets/GAP9/Platform.py index bad6f8d859..17392fe06a 100644 --- a/Deeploy/Targets/GAP9/Platform.py +++ b/Deeploy/Targets/GAP9/Platform.py @@ -22,20 +22,21 @@ GAP9RQSTallGEMMTilingReadyBindings, GAP9RQSTilingReadyBindings, GAP9SGDTilingReadyBindings, \ GAP9SoftmaxCrossEntropyGradTilingReadyBindings, GAP9SoftmaxCrossEntropyTilingReadyBindings, \ GAP9SoftmaxGradTilingReadyBindings, GAP9SoftmaxTilingReadyBindings, GAP9TransposeTilingReadyBindings, \ - GAP9UniformRQSTilingReadyBindings + GAP9UniformRQSTilingReadyBindings, GAP9SILUTilingReadyBindings, GAP9RQSILUTilingReadyBindings from Deeploy.Targets.Generic.Bindings import BasicGEMMBindings, BasicPad1DBindings, BasicPad2DBindings, \ BasicRQIntegerDivBinding from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, ConvLayer, GatherLayer, GELULayer, GEMMLayer, \ LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, QuantLayer, ReduceMeanLayer, ReduceSumLayer, \ ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, RQSiHardswishLayer, SGDLayer, \ SliceLayer, SoftmaxCrossEntropyLossGradLayer, SoftmaxCrossEntropyLossLayer, SoftmaxGradLayer, SoftmaxLayer, \ - TransposeLayer, iHardswishLayer, iRMSNormLayer + TransposeLayer, iHardswishLayer, iRMSNormLayer, SILULayer, RQSILULayer from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, DequantParser, FlattenParser, GatherParser, \ GELUParser, GEMMParser, LayerNormParser, MatMulParser, MaxPool2DParser, MulParser, Pad1DParser, Pad2DParser, \ QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQAddParser, \ RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SGDParser, SliceParser, \ SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, SoftmaxGradParser, SoftmaxParser, \ - TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, iSoftmaxParser + TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, iSoftmaxParser, \ + SILUParser from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate from Deeploy.Targets.PULPOpen.Bindings import BasicDequantBindings, BasicQuantBindings, PULPDMASliceBindings, \ PULPDWConv1DBinding, PULPReduceMeanBindings, PULPRQSConv1DBindings, PULPSliceBindings @@ -49,6 +50,8 @@ GAP9_AddMapper = NodeMapper(AddParser(), GAP9AddTilingReadyBindings) GAP9_FlattenMapper = NodeMapper(FlattenParser(), GAP9FlattenTilingReadyBindings) GAP9_GELUMapper = NodeMapper(GELUParser(), GAP9FPGELUTilingReadyBindings) +GAP9_SILUMapper = NodeMapper(SILUParser(), GAP9SILUTilingReadyBindings) +GAP9_RQSILUMapper = NodeMapper(SILUParser(), GAP9RQSILUTilingReadyBindings) GAP9_GatherMapper = NodeMapper(GatherParser(), GAP9GatherTilingReadyBindings) GAP9_MulMapper = NodeMapper(MulParser(), GAP9MulTilingReadyBindings) GAP9_Pad1DMapper = NodeMapper(Pad1DParser(), BasicPad1DBindings) @@ -106,6 +109,10 @@ GEMMLayer([GAP9_FloatGEMMMapper, GAP9_GEMMDequantMapper]), 'Gelu': GELULayer([GAP9_GELUMapper]), + 'SILU': + SILULayer([GAP9_SILUMapper]), + 'RQSILU': + RQSILULayer([GAP9_RQSILUMapper]), 'LayerNormalization': LayerNormLayer([GAP9_LayerNormMapper]), 'MaxPool': diff --git a/Deeploy/Targets/GAP9/Tiler.py b/Deeploy/Targets/GAP9/Tiler.py index fefe12b6d7..1f3004d889 100644 --- a/Deeploy/Targets/GAP9/Tiler.py +++ b/Deeploy/Targets/GAP9/Tiler.py @@ -17,7 +17,7 @@ GAP9RQAddBindings, GAP9RQSBindings, GAP9RQSConv2DBindings, GAP9RQSDWConv2DBindings, GAP9RQSGEMMBindings, \ GAP9RQSiHardswishBindings, GAP9RQSMatrixVecBindings, GAP9RQSTallGEMMBindings, GAP9SGDBindings, \ GAP9SoftmaxBindings, GAP9SoftmaxCrossEntropyLossBindings, GAP9SoftmaxCrossEntropyLossGradBindings, \ - GAP9SoftmaxGradBindings, GAP9TransposeBindings, GAP9UniformRQSBindings + GAP9SoftmaxGradBindings, GAP9TransposeBindings, GAP9UniformRQSBindings, GAP9SILUBindings, GAP9RQSILUBindings from Deeploy.Targets.Generic.TileConstraints.AddTileConstraint import AddTileConstraint from Deeploy.Targets.Generic.TileConstraints.ConcatTileConstraint import ConcatTileConstraint from Deeploy.Targets.Generic.TileConstraints.iHardswishTileConstraint import iHardswishTileConstraint @@ -29,6 +29,7 @@ from Deeploy.Targets.Generic.TileConstraints.TransposeTileConstraint import TransposeTileConstraint from Deeploy.Targets.Generic.TileConstraints.UnaryTileConstraint import UnaryTileConstraint from Deeploy.Targets.Generic.TileConstraints.UntiledTileConstraint import UntiledTileConstraint +from Deeploy.Targets.Generic.TileConstraints.SILUTileConstraint import SILUTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.ConvTileConstraint import Conv2DTileConstraint, RQConv2DTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.DWConvTileConstraint import DWConv2DTileConstraint, \ RQDWConv2DTileConstraint @@ -142,3 +143,9 @@ GAP9SGDTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = GAP9SGDBindings, tileConstraint = SGDTileConstraint()) + +GAP9SILUTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = GAP9SILUBindings, + tileConstraint = SILUTileConstraint()) + +GAP9RQSILUTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = GAP9RQSILUBindings, + tileConstraint = SILUTileConstraint()) \ No newline at end of file diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 308b179aef..0c20a34b6f 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -19,12 +19,12 @@ GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, \ MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, TransposeTemplate, \ - iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate + iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate, SILUTemplate, RQSILUTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \ DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \ LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \ ReduceSumChecker, ReluChecker, RequantShiftChecker, ReshapeChecker, RQIntegerDivChecker, SliceChecker, \ - SoftmaxChecker, TransposeChecker + SoftmaxChecker, TransposeChecker, SILUChecker BasicTransformer = CodeTransformation([ArgumentStructGeneration(), MemoryManagementGeneration(), FutureGeneration()]) @@ -327,3 +327,13 @@ ConvTransposeTemplate.referenceTemplate, BasicTransformer) for type in FloatDataTypes ] + +BasicSILUBindings = [ + NodeBinding(SILUChecker([PointerClass(int8_t)], [PointerClass(int32_t)]), SILUTemplate.referenceTemplate, + BasicTransformer) +] + +BasicRQSILUBindings = [ + NodeBinding(SILUChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int8_t)]), RQSILUTemplate.referenceTemplate, + BasicTransformer) +] \ No newline at end of file diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index cc733937cc..4a9347202d 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -709,3 +709,20 @@ def computeOps(self): numPx = opRep['dim_im_out_x'] return numPx * opsPerPx + +class SILULayer(ONNXLayer): + + def __init__(self, maps: List[NodeMapper]): + super().__init__(maps) + + def computeOps(self): + # LUT-based SiLU: implemented via a 256-entry table lookup per element. + # Arithmetic operations per element = 0 (only a memory load + store). + # To count memory accesses instead, return: size * 2 + return 0 + + +class RQSILULayer(SILULayer): + + def __init__(self, maps: List[NodeMapper]): + super().__init__(maps) \ No newline at end of file diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index ad787d9e4b..55af3ba394 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2886,3 +2886,27 @@ def parseNodeCtxt(self, self.operatorRepresentation['size'] = int(np.prod(data_in.shape)) return ctxt, True + +class SILUParser(NodeParser): + + def __init__(self): + super().__init__() + + def parseNode(self, node: gs.Node) -> bool: + + ret = all([len(node.inputs) >= 1, len(node.outputs) == 1]) + + return ret + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + + data_in = ctxt.lookup(node.inputs[0].name) + data_out = ctxt.lookup(node.outputs[0].name) + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['data_out'] = data_out.name + self.operatorRepresentation['size'] = np.prod(data_in.shape) + + return ctxt, True \ No newline at end of file diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index e05e897270..fc91d1e20d 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -14,19 +14,19 @@ BasicPad1DBindings, BasicPad2DBindings, BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, \ BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, \ BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicTransposeBindings, \ - DummyBinding + BasicSILUBindings, BasicRQSILUBindings, DummyBinding from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, ConcatLayer, ConvLayer, \ ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, GatherLayer, GELULayer, GEMMLayer, ITAMaxLayer, \ LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, \ ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, \ - SoftmaxLayer, SqrtLayer, TransposeLayer + SoftmaxLayer, SqrtLayer, TransposeLayer, SILULayer, RQSILULayer from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, ConcatParser, ConvTranspose1DParser, \ DebugParser, DequantParser, DivParser, DummyParser, FlattenParser, GatherParser, GELUParser, GenericConv1DParser, \ GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, GenericMaxPool2DParser, \ IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, MatMulParser, MaxPool1DParser, MulParser, \ Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, \ RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, SoftmaxParser, SqrtParser, \ - TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser + TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser, SILUParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \ @@ -73,6 +73,8 @@ BatchNormalizationMapper = NodeMapper(BatchNormParser(), BasicBatchNormBindings) ConvTransposeMapper = NodeMapper(ConvTranspose1DParser(), BasicConvTransposeBindings) SliceMapper = NodeMapper(SliceParser(), BasicSliceBindings) +SILUMapper = NodeMapper(SILUParser(), BasicSILUBindings) +RQSILUMapper = NodeMapper(SILUParser(), BasicRQSILUBindings) # Dummy nodes are intended for development purposes only! # They should always generate compiler errors to not accidentally end up in production code @@ -118,7 +120,10 @@ 'Quant': QuantLayer([QuantMapper]), 'Dequant': DequantLayer([DequantMapper]), 'BatchNormalization': BatchNormalizationLayer([BatchNormalizationMapper]), - 'ConvTranspose': ConvTransposeLayer([ConvTransposeMapper]) + 'ConvTranspose': ConvTransposeLayer([ConvTransposeMapper]), + 'SILU': SILULayer([SILUMapper]), + 'RQSILU': RQSILULayer([RQSILUMapper]), + # # For example, you can use the DummpyMapper, in case you want to test # # deployment or optimizations with GlobalAveragePool nodes but did not yet # # implement the corresponding kernel diff --git a/Deeploy/Targets/Generic/Templates/RQSILUTemplate.py b/Deeploy/Targets/Generic/Templates/RQSILUTemplate.py new file mode 100644 index 0000000000..99f0e3beec --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/RQSILUTemplate.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: 2024 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import Dict, List, Tuple + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _SILUTemplate(NodeTemplate): + + def __init__(self, templateStr): + super().__init__(templateStr) + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + data_out = ctxt.lookup(operatorRepresentation['data_out']) + operatorRepresentation['input_offset'] = 0 + if hasattr(data_in, "_signed") and hasattr(data_in, "nLevels"): + operatorRepresentation['input_offset'] = (data_in._signed == 0) * int(data_in.nLevels / 2) + operatorRepresentation['output_offset'] = 0 + if hasattr(data_out, "_signed") and hasattr(data_out, "nLevels"): + operatorRepresentation['output_offset'] = -(data_out._signed == 0) * int(data_out.nLevels / 2) + + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _SILUTemplate(""" +// RQSILU (Name: ${nodeName}, Op: ${nodeOp}) +SINGLE_CORE RQSILU_s${data_in_type.referencedType.typeWidth}_s${data_out_type.referencedType.typeWidth}(${data_in}, ${data_out}, ${size}, ${input_offset}); +""") diff --git a/Deeploy/Targets/Generic/Templates/SILUTemplate.py b/Deeploy/Targets/Generic/Templates/SILUTemplate.py new file mode 100644 index 0000000000..eb84a268d9 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/SILUTemplate.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: 2024 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import Dict, List, Tuple + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _SILUTemplate(NodeTemplate): + + def __init__(self, templateStr): + super().__init__(templateStr) + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + data_out = ctxt.lookup(operatorRepresentation['data_out']) + operatorRepresentation['input_offset'] = 0 + if hasattr(data_in, "_signed") and hasattr(data_in, "nLevels"): + operatorRepresentation['input_offset'] = (data_in._signed == 0) * int(data_in.nLevels / 2) + operatorRepresentation['output_offset'] = 0 + if hasattr(data_out, "_signed") and hasattr(data_out, "nLevels"): + operatorRepresentation['output_offset'] = -(data_out._signed == 0) * int(data_out.nLevels / 2) + + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _SILUTemplate(""" +// SILU (Name: ${nodeName}, Op: ${nodeOp}) +SINGLE_CORE SILU_s${data_in_type.referencedType.typeWidth}_s${data_out_type.referencedType.typeWidth}(${data_in}, ${data_out}, ${size}, ${input_offset}); +""") diff --git a/Deeploy/Targets/Generic/TileConstraints/SILUTileConstraint.py b/Deeploy/Targets/Generic/TileConstraints/SILUTileConstraint.py new file mode 100644 index 0000000000..cd14b07045 --- /dev/null +++ b/Deeploy/Targets/Generic/TileConstraints/SILUTileConstraint.py @@ -0,0 +1,86 @@ +# SPDX-FileCopyrightText: 2024 ETH Zurich and University of Bologna +# +# SPDX-License-Identifier: Apache-2.0 + +""" from typing import Dict, List, Tuple + +import numpy as np + +from Deeploy.AbstractDataTypes import PointerClass +from Deeploy.CommonExtensions.DataTypes import uint16_t +from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation +from Deeploy.TilingExtension.MemoryConstraints import NodeMemoryConstraint +from Deeploy.TilingExtension.TileConstraint import TileConstraint +from Deeploy.TilingExtension.TilerModel import TilerModel +from Deeploy.TilingExtension.TilingCodegen import AbsoluteHyperRectangle, TilingSchedule, VariableReplacementScheme +from Deeploy.TilingExtension.TilingCodegen import HyperRectangle + + +class SILUTileConstraint(TileConstraint): + + @staticmethod + def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel: + + inputBuffer1Name = parseDict['data_in'] + outputBufferName = parseDict['data_out'] + siluLUTBufferName = parseDict['silu_lut'] + + for bufferName in [inputBuffer1Name, outputBufferName, siluLUTBufferName]: + tilerModel.addTensorDimToModel(ctxt, bufferName) + + input1Shape = ctxt.lookup(inputBuffer1Name).shape + + for dim in range(len(input1Shape)): + inputDim1Var = tilerModel.getTensorDimVar(tensorName = inputBuffer1Name, dimIdx = dim) + outputDimVar = tilerModel.getTensorDimVar(tensorName = outputBufferName, dimIdx = dim) + + tilerModel.addConstraint(inputDim1Var == outputDimVar) + + return tilerModel + + @classmethod + def serializeTilingSolution( + cls, tilingSolution: NodeMemoryConstraint, absoluteOutputCubes: List[AbsoluteHyperRectangle], + targetMemLevel: str, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> Tuple[VariableReplacementScheme, TilingSchedule]: + outputCubes = [cube.rectangle for cube in absoluteOutputCubes] + + addrNames = ['data_in', 'data_out', 'silu_lut'] + inputBaseOffsets, outputBaseOffsets = cls.extractBaseAddr(tilingSolution, targetMemLevel, + operatorRepresentation, addrNames) + + replacements = {"size": []} + replacementTypes = {"size": PointerClass(uint16_t)} + + for cube in outputCubes: + newSize = np.prod(cube.dims) + replacements["size"].append(newSize) + + inputLoadSchedule = [] + outputLoadSchedule = [] + + # LUT is loaded once and reused for all tiles (stays at same location in L1) + lutName = operatorRepresentation['silu_lut'] + lutBuffer = ctxt.lookup(lutName) + lutShape = lutBuffer.shape + lutCube = HyperRectangle(tuple([0] * len(lutShape)), tuple(lutShape)) + + # Each step includes both LUT (same offset) and data_in (different tile) + # The LUT is loaded once and stays in place, data_in tiles are loaded per iteration + for cube in outputCubes: + inputLoadSchedule.append({"data_in": cube, "silu_lut": lutCube}) + + for out in outputCubes: + outputLoadSchedule.append({"data_out": out}) + + tilingSchedule = TilingSchedule(inputBaseOffsets, outputBaseOffsets, inputLoadSchedule, outputLoadSchedule) + variableReplacementSchedule = VariableReplacementScheme(replacements, replacementTypes) + + return variableReplacementSchedule, tilingSchedule + """ + +from Deeploy.Targets.Generic.TileConstraints.UnaryTileConstraint import UnaryTileConstraint + + +class SILUTileConstraint(UnaryTileConstraint): + pass \ No newline at end of file diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py index c2c8d436f8..a9e41b4137 100644 --- a/Deeploy/Targets/Generic/TypeCheckers.py +++ b/Deeploy/Targets/Generic/TypeCheckers.py @@ -610,3 +610,19 @@ def _inferNumLevels(self, inputs: List[VariableBuffer], def _inferSignedness(self, inputs: List[VariableBuffer], operatorRepresentation: OperatorRepresentation) -> List[bool]: return [True] + +class SILUChecker(SignPropTypeChecker): #same as GELU due to the same input-output quantization parameters + + def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): + super().__init__(input_types, output_types) + + def _inferNumLevels(self, inputs: List[VariableBuffer], + operatorRepresentation: OperatorRepresentation) -> List[int]: + return [2**(self.input_types[0].referencedType.typeWidth)] + + def _inferSignedness(self, inputs: List[VariableBuffer], + operatorRepresentation: OperatorRepresentation) -> List[bool]: + if inputs[0]._signed: + return [True] + else: + return [False] \ No newline at end of file diff --git a/Deeploy/Targets/PULPOpen/Bindings.py b/Deeploy/Targets/PULPOpen/Bindings.py index 84ee2420e3..24a4fa2ee0 100644 --- a/Deeploy/Targets/PULPOpen/Bindings.py +++ b/Deeploy/Targets/PULPOpen/Bindings.py @@ -15,11 +15,11 @@ from Deeploy.FutureExtension.Bindings.AutoFutureBinding import AutoFutureBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, DequantTemplate, FloatReduceSumTemplate, \ - GatherTemplate, QuantTemplate, RQSiGELUTemplate, SliceTemplate, iHardswishTemplate + GatherTemplate, QuantTemplate, RQSiGELUTemplate, SliceTemplate, iHardswishTemplate, SILUTemplate, RQSILUTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DequantChecker, \ GatherChecker, GELUChecker, GEMMChecker, HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, \ QuantChecker, ReduceMeanChecker, ReluChecker, ReshapeChecker, RQAddChecker, RQHardswishChecker, SGDChecker, \ - SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker + SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker, SILUChecker from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling @@ -446,6 +446,16 @@ GatherTemplate.referenceTemplate, ForkTransformer) for type in IntegerDataTypes ] +PULPSILUBindings = [ + NodeBinding(SILUChecker([PointerClass(int8_t), PointerClass(int32_t)], [PointerClass(int32_t)]), SILUTemplate.referenceTemplate, + ForkTransformer) #See with ClusterTransformer also +] + +PULPRQSILUBindings = [ + NodeBinding(SILUChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int8_t)]), RQSILUTemplate.referenceTemplate, + ForkTransformer) +] + BasicQuantBindings = [ NodeBinding(QuantChecker([PointerClass(float32_t)], [PointerClass(int8_t)]), QuantTemplate.referenceTemplate, ForkTransformer), diff --git a/Deeploy/Targets/PULPOpen/Platform.py b/Deeploy/Targets/PULPOpen/Platform.py index 7456dd9e1b..0f905c71ca 100644 --- a/Deeploy/Targets/PULPOpen/Platform.py +++ b/Deeploy/Targets/PULPOpen/Platform.py @@ -17,14 +17,14 @@ GEMMLayer, LayerNormGradLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, QuantLayer, \ ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \ RQSiHardswishLayer, SGDLayer, SliceLayer, SoftmaxCrossEntropyLossGradLayer, SoftmaxCrossEntropyLossLayer, \ - SoftmaxGradLayer, SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer + SoftmaxGradLayer, SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer, SILULayer, RQSILULayer from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, DequantParser, FlattenParser, GatherParser, \ GELUGradParser, GELUParser, GEMMParser, LayerNormGradParser, LayerNormParser, MatMulParser, MaxPool1DParser, \ MaxPool2DParser, MulParser, Pad1DParser, Pad2DParser, QuantParser, ReduceSumParser, ReluParser, \ RequantShiftParser, ReshapeParser, RQAddParser, RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, \ SGDParser, SliceParser, SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, SoftmaxGradParser, \ SoftmaxParser, TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, \ - iSoftmaxParser + iSoftmaxParser, SILUParser from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, IntegerDivRequantMergePass, \ MergeConstAddAndRequantPass, MergeTrueIntegerDivRequantShiftPass, QuantPatternPass, RQSSplitPass, \ @@ -41,7 +41,7 @@ PULPFPGELUGradTilingReadyBindings, PULPFPGELUTilingReadyBindings, PULPFPGEMMTilingReadyBindings, \ PULPGatherTilingReadyBindings, PULPiHardswishTilingReadyBindings, PULPiRMSNormTilingReadyBindings, \ PULPiRQSGELUTilingReadyBindings, PULPLayernormGradTilingReadyBindings, PULPLayernormTilingReadyBindings, \ - PULPMatMulTilingReadyBindings, PULPMaxPool1DTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, \ + PULPMatMulTilingReadyBindings, PULPMaxPool1DTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, \ PULPMulTilingReadyBindings, PULPReduceMeanTilingReadyBindings, PULPReduceSumTilingReadyBindings, \ PULPReluTilingReadyBindings, PULPRQAddTilingReadyBindings, PULPRQSConv1DTilingReadyBindings, \ PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, PULPRQSGEMMTilingReadyBindings, \ @@ -49,7 +49,7 @@ PULPRQSTilingReadyBindings, PULPSGDTilingReadyBindings, PULPSliceTilingReadyBindings, \ PULPSoftmaxCrossEntropyGradTilingReadyBindings, PULPSoftmaxCrossEntropyTilingReadyBindings, \ PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, PULPTransposeTilingReadyBindings, \ - PULPUniformRQSTilingReadyBindings + PULPUniformRQSTilingReadyBindings, PULPSILUTilingReadyBindings, PULPRQSILUTilingReadyBindings from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import PULPAddRequantMergePass, \ PULPConvRequantMergePass, PULPGEMMRequantMergePass, PULPMatMulRequantMergePass @@ -58,6 +58,8 @@ FlattenMapper = NodeMapper(FlattenParser(), PULPFlattenTilingReadyBindings) GELUMapper = NodeMapper(GELUParser(), PULPFPGELUTilingReadyBindings) GELUGradMapper = NodeMapper(GELUGradParser(), PULPFPGELUGradTilingReadyBindings) +SILUMapper = NodeMapper(SILUParser(), PULPSILUTilingReadyBindings) +RQSILUMapper = NodeMapper(SILUParser(), PULPRQSILUTilingReadyBindings) GatherMapper = NodeMapper(GatherParser(), PULPGatherTilingReadyBindings) MulMapper = NodeMapper(MulParser(), PULPMulTilingReadyBindings) Pad1DMapper = NodeMapper(Pad1DParser(), BasicPad1DBindings) @@ -74,6 +76,8 @@ MatMulMapper = NodeMapper(MatMulParser(), PULPMatMulTilingReadyBindings) RQIntegerDivMapper = NodeMapper(RQIntegerDivParser(), [BasicRQIntegerDivBinding]) RQGELU_int8_Mapper = NodeMapper(RQSiGELUParser(), PULPiRQSGELUTilingReadyBindings) +SILUMapper = NodeMapper(SILUParser(), PULPSILUTilingReadyBindings) +RQSILUMapper = NodeMapper(SILUParser(), PULPRQSILUTilingReadyBindings) Conv1DMapper = NodeMapper(PULPConv1DParser(), PULPRQSConv1DTilingReadyBindings) DWConv1DMapper = NodeMapper(PULPDWConv1DParser(), [PULPDWConv1DBinding]) @@ -118,6 +122,8 @@ 'Gemm': GEMMLayer([FloatGEMMMapper, GEMMDequantMapper]), 'Gelu': GELULayer([GELUMapper]), 'GeluGrad': GELUGradLayer([GELUGradMapper]), + 'SILU': SILULayer([SILUMapper]), + 'RQSILU': RQSILULayer([RQSILUMapper]), 'LayerNormalization': LayerNormLayer([LayerNormMapper]), 'LayerNormalizationGrad': LayerNormGradLayer([LayerNormGradMapper]), 'MaxPool': MaxPoolLayer([MaxPool1DMapper, MaxPool2DMapper]), diff --git a/Deeploy/Targets/PULPOpen/Tiler.py b/Deeploy/Targets/PULPOpen/Tiler.py index 901106459e..0fd3413fda 100644 --- a/Deeploy/Targets/PULPOpen/Tiler.py +++ b/Deeploy/Targets/PULPOpen/Tiler.py @@ -13,6 +13,7 @@ from Deeploy.Targets.Generic.TileConstraints.RQSiGELUTileConstraint import RQSiGELUTileConstraint from Deeploy.Targets.Generic.TileConstraints.RQSiHardswishTileConstraint import RQSiHardswishTileConstraint from Deeploy.Targets.Generic.TileConstraints.TransposeTileConstraint import TransposeTileConstraint +from Deeploy.Targets.Generic.TileConstraints.SILUTileConstraint import SILUTileConstraint from Deeploy.Targets.Generic.TileConstraints.UnaryTileConstraint import UnaryTileConstraint from Deeploy.Targets.PULPOpen.Bindings import PULPAddBindings, PULPConcatBindings, PULPFloatConv2DBindings, \ PULPFloatDWConv2DBindings, PULPFloatGELUBinding, PULPFloatGELUGradBinding, PULPFloatGEMMBindings, \ @@ -22,7 +23,7 @@ PULPRQSBindings, PULPRQSConv1DBindings, PULPRQSConv2DBindings, PULPRQSDWConv2DBindings, PULPRQSGEMMBindings, \ PULPRQSiHardswishBindings, PULPRQSMatrixVecBindings, PULPRQSTallGEMMBindings, PULPSGDBindings, PULPSliceBindings, \ PULPSoftmaxBindings, PULPSoftmaxCrossEntropyLossBindings, PULPSoftmaxCrossEntropyLossGradBindings, \ - PULPSoftmaxGradBindings, PULPTransposeBindings, PULPUniformRQSBindings + PULPSoftmaxGradBindings, PULPTransposeBindings, PULPUniformRQSBindings, PULPSILUBindings, PULPRQSILUBindings from Deeploy.Targets.PULPOpen.TileConstraints.ConvTileConstraint import Conv2DTileConstraint, RQConv1DTileConstraint, \ RQConv2DTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.DWConvTileConstraint import DWConv2DTileConstraint, \ @@ -137,6 +138,12 @@ PULPFPGELUGradTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = [PULPFloatGELUGradBinding], tileConstraint = GeluGradTileConstraint()) +PULPSILUTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPSILUBindings, + tileConstraint = SILUTileConstraint()) + +PULPRQSILUTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPRQSILUBindings, + tileConstraint = SILUTileConstraint()) + PULPGatherTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPGatherBindings, tileConstraint = GatherTileConstraint()) diff --git a/Deeploy/TilingExtension/TilerExtension.py b/Deeploy/TilingExtension/TilerExtension.py index 9b48d9456c..e28c2e7101 100644 --- a/Deeploy/TilingExtension/TilerExtension.py +++ b/Deeploy/TilingExtension/TilerExtension.py @@ -320,6 +320,7 @@ def computeTilingSchedule(self, ctxt: NetworkContext) -> TilingSolution: log.debug(" - Extract Memory Allocation") self.innerMemoryScheduler.annotateSolution(ctxt, self.tilerModel) self.outerMemoryScheduler.annotateSolution(ctxt, self.tilerModel) + print(tilingSolution) return tilingSolution def computeMemoryMap(self, ctxt: NetworkContext, tilingSolution: TilingSolution) -> MemoryMap: diff --git a/DeeployTest/Tests/Kernels/Integer/RQSILU/inputs.npz b/DeeployTest/Tests/Kernels/Integer/RQSILU/inputs.npz new file mode 100644 index 0000000000000000000000000000000000000000..cd5012a292ecafdae922b5aba19fcbf0fb41ff54 GIT binary patch literal 3340 zcmb7{^;Z*)8pfG)3IftFKq(19kQ!Zs(cR$)alnu+De00PouhMte5IwknIbR{kS`%E zjj*dX&bjx;`@Hd-_c`xB@X>|f;tAmVOYAsoM8k#He~T1{2FKpc%g)BxP0-EVAAm!N z6H4~4$NzYmwvm>u834xzCy?9T!4vAiEd}Hbau(+12XZ^YJ-j^Z+-%?;_6~n?RXZ0? zhu^v9OFMUm-#3piKk(V_7Wp&0{J=NB|63$P;bw&T<{zXtjqPgNHI(~7^d+q9Jdh58ENt#6mGXBO@ zcjG^+k`XwhtS&1;52Wk=zWn4GrJW8&Ef_v}4vg5E008w%yVyxJ=x~qQW@dgC%+$7( ze0+#d>h~uXk{gFY=RPMN1V~u8bJ!TvN}3HOD~hsv{~WJYicM84yh1;dZBbk#Zr(`Q z=fnDSP-mU^(sJ5A%_b9ghwN8NRv}V+WMY6qcvNB#(Ki>Z{fF32$(DFv0F1N*4_i_5 zVnWtn%u>)0TW{FV}7xXQ;t=rid-I*&v={W&Tj8_WVNYe&%nlE()g(0s{ zfJUDI-1AD}-s09+8KH4awW|+ug!5I>nQh$3xRfbwmrt%8#oenLoe#q-ORBs!tBu6l zKeF3A=zrF23e95a^_}|ggG%`{Ajv3hdH3)%Hd1@E;L79-jM~Udb#}0|))^7znd`ib zk`NTu59egmQ!O3ns5e41)HMxRCu(opy$D0uzJc`E{#2LtQc>IIYvMCulen}I#}~yM zzaFHsuu{vg>!LsaZq7z_4gvO&3{)18B(aAe^->CRlP<8yOui#7$&uA*-jgp}MBLhn z43|Opj^^Fd&nP{I2Gdi0jg{1*vZ)sxe{pP0f+*|O#K~`#8xV6%mG}9+V<9K}Iui#> zbBq@P(CAt*?*%_G&BIAqn!v3{cuS@YWN;i!yA41W1`D%YnqxTaVn)uO?NU8o^`E-B zBuRHATA)>9h9f`0UO~Mp$1K+y4!+Z78TyZW3Px>Bh2Sy_;5wM-!mdg)9zGOnsNcjsFo0_O}*#hk=NBICiTNYca3 zbFwlj@ypzis%zwZ$KA_*9K8~xd%WwL^1P@S?B~Q@kg6F!3}jTBx~ck^O1wG1H7Z+p_YGpf|AfD044b^vgYt4+Y1ODxaY)T|QSQN*Ekj z`j@wCpgW1MqGx_n*=H(=$^A?ukCgUa=lb2loc9%dTvlsNHSZQ6Uf3TJ33kMOm@Q9@)Y$c2%LHzw~N|kXg4*QOG~WO&P_&qkI>PR z9-=wU4=1Y%{8UHBOLK0MI%d&M5E2oLu2;+uD7)L0te1#&)Z*>sVmTPsjhKQ>(E)8h z4y78$gtSx6ompB7%UriVy@4!9){JIy=(uIgV9aQpK zS?c^crI5XE_!z8wMo8tXO`AyTBl@;+=9QB@x{-Z}DadO+(O(h)98RW`vMrMwHGE~FYNL(U)|9F&!M3C^0lPxGKjoHue!0 z@p>d9@A-FhhDKM9r+nq_LZG1$YOz*PvsNtXen?tdk}{+q@OKUj4fb@cpZBq02W62b?r75Hb^^Cw;;)gE9&;^C zNq;7Ho;LO)Sd=de>%Zs=jk&Gn?RrSh_w2O3klEZV)hS#?pZ=AWv^w)a#w^!`#{D%8 zhaN|;K9_+hwI^l7PNfa+wxxUSkb~Gr^dZ`imA&}WR&x-=&lI&Eh0f3C z@U-U=blvw6wMw(Lu&El1dHDWRTg08vZs(-?Bm~?~>qOfzV8 zh9OejZSobVgfeWSxt-x+=SHFpx1iutNcABGs%KEa`sm> z(K(zipoj(%j8XE*T<+QGMB)_d$(GikqKn@C%~RUz)$I9?lg71#UM5N>0h-E#=}v2p zw>uq>Dggr(*|P^!Gg^HeGc+kCq-qhW5Y@5iUa;nQxXEu|-JVz7Z_q`rNli2F%?`yM9Z;BUJW7iqlBjqnpFMr9bco z*E@YnwNeOs*kO0|=WEpBKy^^B$;7p(yYYI5OU--y~OYm`-? zQ5>dCOHXJWe3t7g&@7c_-V972iL7Kn?_Kcu*4}2X3xumHK?)gR={Y7CC@ijNq9lx-s6;cMC?4+77OuPC)f zFK81S1RCB$@KaKs_Gd}f7sSH zdO|!MAq^VdL8GODK+InQ9W4K@*CU?%phUTMz)`B!Gf*c@4JmutjtNifBDyHWPA5W( zr};a+R)QH3KKF}_$`q3_GI#a4`7@;dhMGSX3iz}C3ubg7 S_ym6gNx~NXAwz zK%1hbcEGLu3fLkP9?VRa5y>9dm!9`06GG3gzSqoX0V z487Q~vce}=C;16EdymK&t@v_FA?lUh-PTESI#D!6^qXrBk3%?@a@bfyAX^Icm4|SG z{3|Fw*p=`u!Z~lz05eI4tgsk8+!fae*Gx!*8H3!sj4yL3`i-`~2-oPg~G|N_n zGuO$ym2I)$)q||TOm}@bM%Y$eHW2n7;d7qQYiN$}M{8$3Dc(Vw(;^)32QY&gowjMg zN(*KEvo9SuZzfSiBls$&6qOT3mtlOlP`n`q+~f9L2WqAvBW%vqk|?IjK#fjP#0sT_ zhk!v5(>^5B$aAe`k-Ygf3tJd7Hn)NTOD1_EPlkeoK;18C^~)qM`iDVF3b;mRXOMp5 zkIhU$2wWUlVvVwreBY>?(iB(SJ2#!VdNI>ufpxMQQ3c8kj-8uJDsq|?E@C+M zH9v&IJuaqVD>A}~V+$uG*j=SGO*}b;E^*eXIpm$PafxwHOGG1;UR9rdICZh?2dgy6 zYi6T+-IJT{XH27;??17;s4QSy)hO>wvuh656zX`{=d8S$VktSPM&-Pwas%ZtE;u=u z(`p-@2W@{3FOsA3YTX6b)0ongyD#Kbek-l)ZmKo9nzW9G5A4WqUN+r2ge3BgvNp|{ z;K~n}XD(Kr<4Wh6BE`nOYMOG%uqN$EQYGpZEqBf;w)gOg6f)XWGLxy{n<1=RY}wNa zy5Cc0Y`6!e(Y-_)zsy}b^{7cLdbet8>gVt?;K)_^n;S6OtL$8C5=lo5YTn{m<7jyx z84{N!K;eozT`#^C+*avKp;N6eX5mOdb%4Hr>IFP0oIFkG=$MnGpEgv%iJ#}>Q`I4a zWAJhF${)Dmswhf{q;f#<@dC-hRKjI%M%=$DRcbk|rh$6=1`mXXHEz=JzAJ7|wAds< zx4ZBS4_;#N6pJDXT;dcfTL=9H@X7Gbl^vi$~Ydx<LEvXftyxf@Y|P^PI;Z`NstDo?%L zLz|Ub@wD5lUMr%ee0-}vMy=Y=$5=1$VxE)gO^mAj(O$T?l#7w;II_g_#(pq@T z>KAgS*7JlYv9AkZjlGE4|3*Vdqk{f>;>4$?#W>dNb zI(}*;owTDhQ6j0Nv=BvU;U-2rH7Y6^~2$PLjsbtHl_q%gks0-7b%bj@uLtT?FvWj7ou(FO{OS7eGM2Zk@DfM+U|4#VVk5QT8M6cd^TYk zv{>8ap!Lcv$XV&2K9?EwHMaVDmCUb)HuCp%gb{|_>5@l0`jOcmYSyiBfsfg;%2I&EjND%@&J;@b8Nt9-U8`m1C z*dCpGmyC7-Z%*ooS}gs%L)`4~kq2LOhY?f34vSm23t9$wWtyh$pQd{H{7Uz%_H4#; zE6cHjrYgSESHkf5b^mFm-~3IckUk>BN?3vw+HMh2O#7-M&`pX)Q>@-IlHh*LSGWOB zUv#UupmBD?lMNe`x2Tc~%9>xHLG5N1v);AoRx1&?iZa({{vmkc$tuLmcw#;zyw^1O vG{Lbon|%=f`Tk|f;tAmVOYAsoM8k#He~T1{2FKpc%g)BxP0-EVAAm!N z6H4~4$NzYmwvm>u834xzCy?9T!4vAiEd}Hbau(+12XZ^YJ-j^Z+-%?;_6~n?RXZ0? zhu^v9OFMUm-#3piKk(V_7Wp&0{J=NB|63$P;bw&T<{zXtjqPgNHI(~7^d+q9Jdh58ENt#6mGXBO@ zcjG^+k`XwhtS&1;52Wk=zWn4GrJW8&Ef_v}4vg5E008w%yVyxJ=x~qQW@dgC%+$7( ze0+#d>h~uXk{gFY=RPMN1V~u8bJ!TvN}3HOD~hsv{~WJYicM84yh1;dZBbk#Zr(`Q z=fnDSP-mU^(sJ5A%_b9ghwN8NRv}V+WMY6qcvNB#(Ki>Z{fF32$(DFv0F1N*4_i_5 zVnWtn%u>)0TW{FV}7xXQ;t=rid-I*&v={W&Tj8_WVNYe&%nlE()g(0s{ zfJUDI-1AD}-s09+8KH4awW|+ug!5I>nQh$3xRfbwmrt%8#oenLoe#q-ORBs!tBu6l zKeF3A=zrF23e95a^_}|ggG%`{Ajv3hdH3)%Hd1@E;L79-jM~Udb#}0|))^7znd`ib zk`NTu59egmQ!O3ns5e41)HMxRCu(opy$D0uzJc`E{#2LtQc>IIYvMCulen}I#}~yM zzaFHsuu{vg>!LsaZq7z_4gvO&3{)18B(aAe^->CRlP<8yOui#7$&uA*-jgp}MBLhn z43|Opj^^Fd&nP{I2Gdi0jg{1*vZ)sxe{pP0f+*|O#K~`#8xV6%mG}9+V<9K}Iui#> zbBq@P(CAt*?*%_G&BIAqn!v3{cuS@YWN;i!yA41W1`D%YnqxTaVn)uO?NU8o^`E-B zBuRHATA)>9h9f`0UO~Mp$1K+y4!+Z78TyZW3Px>Bh2Sy_;5wM-!mdg)9zGOnsNcjsFo0_O}*#hk=NBICiTNYca3 zbFwlj@ypzis%zwZ$KA_*9K8~xd%WwL^1P@S?B~Q@kg6F!3}jTBx~ck^O1wG1H7Z+p_YGpf|AfD044b^vgYt4+Y1ODxaY)T|QSQN*Ekj z`j@wCpgW1MqGx_n*=H(=$^A?ukCgUa=lb2loc9%dTvlsNHSZQ6Uf3TJ33kMOm@Q9@)Y$c2%LHzw~N|kXg4*QOG~WO&P_&qkI>PR z9-=wU4=1Y%{8UHBOLK0MI%d&M5E2oLu2;+uD7)L0te1#&)Z*>sVmTPsjhKQ>(E)8h z4y78$gtSx6ompB7%UriVy@4!9){JIy=(uIgV9aQpK zS?c^crI5XE_!z8wMo8tXO`AyTBl@;+=9QB@x{-Z}DadO+(O(h)98RW`vMrMwHGE~FYNL(U)|9F&!M3C^0lPxGKjoHue!0 z@p>d9@A-FhhDKM9r+nq_LZG1$YOz*PvsNtXen?tdk}{+q@OKUj4fb@cpZBq02W62b?r75Hb^^Cw;;)gE9&;^C zNq;7Ho;LO)Sd=de>%Zs=jk&Gn?RrSh_w2O3klEZV)hS#?pZ=AWv^w)a#w^!`#{D%8 zhaN|;K9_+hwI^l7PNfa+wxxUSkb~Gr^dZ`imA&}WR&x-=&lI&Eh0f3C z@U-U=blvw6wMw(Lu&El1dHDWRTg08vZs(-?Bm~?~>qOfzV8 zh9OejZSobVgfeWSxt-x+=SHFpx1iutNcABGs%KEa`sm> z(K(zipoj(%j8XE*T<+QGMB)_d$(GikqKn@C%~RUz)$I9?lg71#UM5N>0h-E#=}v2p zw>uq>Dggr(*|P^!Gg^HeGc+kCq-qhW5Y@5iUa;nQxXEu|-JVz7Z_q`rNli2F%?`yM9Z;BUJW7iqlBjqnpFMr9bco z*E@YnwNeOs*kO0|=WEpBKy^^B$;7p(yYYI5OU--y~OYm`-? zQ5>dCOHXJWe3t7g&@7c_-V972iL7Kn?_Kcu*4}2X3xumHK?)gR={Y7CC@ijNq9lx-s6;cMC?4+77OuPC)f zFK81S1RCB$@KaKs_Gd}f7sSH zdO|!MAq^VdL8GODK+InQ9W4K@*CU?%phUTMz)`B!Gf*c@4JmutjtNifBDyHWPA5W( zr};a+R)QH3KKF}_$`q3_GI#a4`7@;dhMGSX3iz}C3ubg7 S_ym6&&(6z05kGSOQbl#YV-0_ zQk7VOJ$*v07=$1~Anj3#2rXhAUvI@n&91qW!3Qcvxi5QjSSD7J^a;`tJbVqvf|p6tF9aO`6Wwk zSUsMfufBfCjpOo28T{Kry;A1Gz<;Zof9Ce0wP) z(>rm-k!GElzv#yC8zbzoC^2!}ZTa2ui`%j9+=#_T24e4)!MNzCh__#gIPTq27A(m9 z^4eE&|8;r&(pw|W{#z;Azuc+!%>46U%-hlIQ#SNs?PMu$zTCv~=Fi*l%%O}g_YjX* z?s%<~u~S>I?T&%Cb8$OvJ=DsLB9^{c%7JfpVrE|ZOs`wlyx^p$7>M!qUw2|8W02GS z``cl!%~yA0>b+7T?>+eUQjX6!ydTT!@6K3NcGgW1^ES2O@PmW#?xTs(u7OzoOvJ%E z+i~GkDH{{BEsvKXF1bw0T-X1#6NmHut5$a+GXB;1{rl&3W5&^WZ(=p|NGs;#bzAQ* zW%Ki;z{_VBp4g3ziT_A4Zq-w*y7#(?PRLg-u$gf%V|cdt5${CiT+;)JH)(WVeXT0S$}h4Y5x~;T^w@Vc*Z;ZsZvhMpUuczGL`B`!cmxl717gqG*;iFoW z2d|6{#>Ae17|ZV}&we%#YjO>oh5^pWap`9|mBZpulc{A*ZfXUy+dkfl&!vWL{ay3E zfo|M;-$39A-oOs$hJM^|ZS|D{6)Su6uNNPD>A=0Pu-^s$E@e~3HkP&Yv-}^; zt2+B$o-{J{wMMIOqy_v|A3PE}EggI*uixLPHNo?hhdOnysaFT$n4flH=Z^*}$8mUN zuKUDTFWiH#=ls4DII8K>ujD$pHvI5`25}KjXj5(Q;+U*Geop^v#I~m!J<{6Ftc$pK zmKs`r_<;x@rj@B{#SA+_v6Xp6%+r& zY1rbHnC3s;tNhshYV)2a@MpDzISs0LU*FcIb2+d7Rjc}sn!+Ey&pJJIkRF5cd~nUN zy~cmi8~N16$GUaBy&3QLw8SNIrycR*CjG(^Ml@0No1gOx*l+!OtM)`|d4JCJQGLsG z`jFV+V@%IUgdt?ub+xbjrBkLJo<_3ZF2{z|J44Q81f{c-7<5*9WV|;y+H<8wUJCFVO?u%DvUvdpEe_XK_?O1DaUlXfY zfdBKx+G8_U`1O6;yaQkP;sd0z%^UQnJ=9y||UHZA>wRXizPf`Q%!_RwlaW9X80les(_rr(AVRi5BrdIY;GhQoR z*sC9G?%CDg>t1!_e|%S-CuA*YaVqs8FMO*rSJp$%5Ja=e{+-A z^*43l-#)`HzG#-g(F55}Xoc4Kq?*A?{`4Y8I&>YcfuWq-!{_mZzVOlgw4{GppFZha zzw*5vtH#z9r&`khp6N;U)C*|PyyALZMO%}JhqZYvZknz6hQ2(JF_=k38ThEFd`5C@{S_DWQT|1jb)Sp}f7PgumATCx z;?)b(*uCPlFD~rQy{xO3%bhpEi)Z4MnyCZdc7Eyi8NB7g>fr1ITRobN=+3@8Tn>Cy zK72;+q%XMQ8-M0O<|z7Hm+#?$pYVp0nTw~(&pak}{gdX!E(aQRzdqtxHK0E=!ZVuC zi}Ap$BR4(;TkF%BxOnTF#F~G5<=Ob|T0MnktqqS+HBK$jDhzm9ohh^5@+sHCm>0v7 zxA8-9!IFR2L;mnIPvE{-Ce0cp0JR6#S$KmHu%xITB;FWx1M>}>?wCOf{(Mf`5NBhjNe&Of3Xf;zz=465g)~6I*43fkHum2H@i5y;1(|F*J{E0^nd4;evgDN z-{+a;7aHY3IKxZ$ynHHV`JT?Lrw(|b4zSyDbF0oZ=FxiA%-H#v_-O&>_Ni&T{z6o2@CvI#(puXcj(jQcL{k z;{bCpi&g)`JzfDz>(DDq?d@!iUurgzxQkU>xWXHqPps`T{-@qNjW&5Tj})`M=o)7? z8qrhC+BoWb3Mbs<8$4MI{6?>p7k`zjdgx8=t!K0kdT8Jw~u|*O5gJt{Ak2? zo)PHNJkNFV(bxC{{_#@$#d)vl2ZuTNimPJ773U55bymhf?_n^S*O|R(oR84Fd+=8; zl0X0OnYin-mGo8|$Is2rH~gL0lXbtWdA4O1#Sc94IgN=)jrAD!;WY0uPw9K+198%U zddZzvO=Pa>fde?hW7WY7;(Pb@pB2zK&sP)thrL=^6X(qx-qVNc%wE=}dmJ=_>g_m( z+q}&+@TNz67QeW8wD}fC@sc*pGCa+BOrOMAyi`Lt$-((sUo;n}7w)QqdYD6bx4DZi z;wC>5v)p)}`L*_&)F2(eNxb^0eDMIjyjgs_+FD~7x43BQo|Bq6SFU)97xt7R?Bv6f zyy5_^&(E6hQaHjMeznJBo#C%0W(d8Nr{FS8@G3Jg&!ionl_)@(lebKoO z$M_8W;T#Xc4?WSj!W^xq;}d^$E--`WT`-m>pTJvvL0j6D@lV*pkvBLit2Zv;Jtz0o7v<@_-tT*Jls;mP)#G@JTG76m&^TS0<9N2ZnIFtd z^vb_*if-Y+vv{Uj@=3i2AI+k!_3TK`gpYe*sW0*%KBpeeMmWRE;DiS?mwe_SUiDBb znp0<7uFs6JZZzf`tUthqXNVtudI}!nh&tF?er6JN<0W)UbIxJ-p(o)yPx72n+|4tB zoQG34c&~nZQrvW;PsxK9$hXcR&!(pHbL;B8G)P}Cr#raGacAaKXYE|u*$^k_2p8p} zUx~-Ha+DvcHTfEk@=Ru0zK4_F)d|;FJn(n8k z{&QjThkJP@-1QLk;<>QoQGCeScx+ERvk!giVf+#VR`naoR&ftE> zRdcNVZq~Q2dW+fhFo6fpRCn0>(d*sQpT#TYc`uCR#~<9Uubcb$K24d4c&h!y=-C6V z`#TnV;2C^E{dp|>)Ih)T_X6q%1NCtph7X?MFK<#aXFBWJ7anRVW@lA>R}X{v@m9ryew^&+2c^^!llK$VGpz`c1zw zo0wJfJNMJL+?)!k&pYVt=>3=6a3k^hj*Tl_u~I(E{hlR={~a){)!iN z?lI%xD4#LY@w9p#O;1!Gc<_6i(Bo*=dalJ?yx=|N63=Sso%iY8bSHLP^}G+>@Z%ZQ zq%rl@fAL(eM?XL4nD%uVXJqW_te(hDF3=h>cS`7=UQ_SuJHpp6~DdJ#$09& z!L@p>&UkT(c04CkFL-%=BW`D5y6~!R;Q}q{7qIfmYxt3S@C(;@qTJNUnXW&>@m>0o zzqt8=Ug~=@5Ko|g{N(#`2%K&+t$8tF?US8NbZLw1xM286D81*u`gtp>4m@ zCO?3gSxT?74i0*SHO}LP{%Yp7H$1HY7yX@&^LF~-SNfIwaT|wd53igtL0ZCjnB$B*`LH^mIbYy=uqz^vh zcb+nmIPd|UcLTL!4Et3A@NW z^)5X@U#a{{4h-dcd5Oz)&ZT&yFIiK4@Qz2&A8z|GcU!}c{NP5v{6){jA#*d`s*iik zcD%~(H0@e`rf&FwXJ!}NhPOJ5W_)tBKiuiCf2NiT&GIsQ<8S%_{M^T<%>VkSo>|W( zSqHe#DNn;!y%uMzWsdN7xpJpF^CF(&m6^f3LW}sW-^|Lr>?=q7f)y`zjhf@g$@xmx z^jdY3d74+9@!(^Iu!cF;oB}_bgFS5cq?*E5t*y=P%szO>XXOV|*uer0&O>^kdYYTu z!#{DzUU;QPoAYo}tZ-K!@6EILY+pWN#+#E+=}$aG?f4`YTK+IJ$C#z{ zWNQxPKJwKE>4p}>YM$gB_Q4@>(E$D7wfy;$_-P4OX~Ldxo|)H)p9k6#2XPZN_@=h} zMy>dodchMn`7uu8p*pDHqKwlFO+S3kxyUO%z{B&5zMsfnY%owibyPoIN{80v3Fb1s z=sp~xNinz=&NQPoa91nZbSA-5{y_WYWB8d1VGIv*Q1y_7`QO&0Z>bOL@zC1*R$Xz) z9{Q2>#Y#^&LyK^r3(whjhtK-;(0l81v#h0OO>*`;44!(4I?^DmigjAzXI;J1y6zP- zKJs4IngeiD-{u{BmB!3Ta1y)t)Sr&bLOjCE;XGtrI=4Se>6C}_e3;-JUhA{9hdbjZ zZ*fn*f;EhB3qG_-PjFIqF{rgzd6JpUY-Juc_wyV57~W=3`Qn9IS|5+_)C|~veyJvU z6^@9-S%{X^NN(_PFHMS*-qcclW=mYIb9(xY*&Xk3+n&~Q4V}W$oUb?Vb8`(`@!EI% z(|i4j?rOiu`{3$Y^;3UZle5{re@^Gk;)JVOi=U5~6Zwq31UEVAue_gcsi9e`&T6T@ zs`2zNbL;Z$A3Ph(zC^=y{>q#u_B3Y))n|U@i8t1T9l!B>hYzT)o;Euy)Wj-Qqp6HxtKENgUiWe^GWgbum7*t=$b@XogTbtj> z9p7BbC-if#=3QDoob#u>VTj{&;2t&8XZaYOnhl%-^geThobU|aY{8-?zWhoF(Ya{bq+9`8^J)CvC}1ZSaX_?95mCmy_%H5+1?9d{5VWk)HUM z&z-S6i!-d6{_NhlgrU1GR^yPw;yl?7KNxTfF31Jjnhy=EuHt z>^oYbId!kIQexrn2=Lur)-%7@Oa1Ucj`D&5jP-6kfH%{!edUJ}bY{-u88khbanU1; z)EK9%0b_Cdj)rm9dwois#mHCTkI%HHZ+O1!T0Utn{mnUvSE&cg@fD6RbwBR-USGx& z{mq<4EBc>UaS=XdB3^6{^A8_1oADvDg;yMOJzVuFd=>*Ntc#y;({t#D@9;Vv3RAtI zAD?}2(%-}QG3Tkb84%9upeNz3+TkD^JO_}+qRU$Y^Www*e!;&@@G~u;>i@YV9|6UO X{x!p*%ex7CL@WPye!f1GcSQUTx|F-# literal 0 HcmV?d00001 diff --git a/DeeployTest/test_gap9_config.py b/DeeployTest/test_gap9_config.py index 69b940f0c3..f5cd3d46e0 100644 --- a/DeeployTest/test_gap9_config.py +++ b/DeeployTest/test_gap9_config.py @@ -9,7 +9,7 @@ "Kernels/Integer/Add/Regular", "Kernels/Integer/Add/MultIO", "Kernels/Integer/Pad/Regular_1D", "Kernels/Integer/Pad/Regular_2D", "Kernels/Integer/MatMul/Regular", "Kernels/Integer/MatMul/Add", "Kernels/Integer/Conv/DW_2D_RQ", "Kernels/Integer/Conv/Regular_2D_RQ", "Kernels/Integer/Softmax/Regular", - "Kernels/Integer/Concat", "Kernels/Integer/Hardswish/Regular", "Others/Backtracking", "Kernels/FP32/Add/Regular", + "Kernels/Integer/Concat", "Kernels/Integer/Hardswish/Regular", "Kernels/Integer/SILU", "Others/Backtracking", "Kernels/FP32/Add/Regular", "Kernels/FP32/GEMM/Regular", "Kernels/FP32/Conv/Regular_2D_Bias", "Kernels/FP32/Conv/Regular_2D_NoBias", "Kernels/FP32/Conv/Regular_2D_ZeroValuedBias", "Kernels/FP32/Conv/DW_2D_Bias", "Kernels/FP32/Conv/DW_2D_NoBias", "Kernels/FP32/Conv/DW_2D_ZeroValuedBias", "Kernels/FP32/LayerNorm", "Kernels/FP32/ReLU", diff --git a/DeeployTest/test_generic_config.py b/DeeployTest/test_generic_config.py index b0d8c659ca..311b932a23 100644 --- a/DeeployTest/test_generic_config.py +++ b/DeeployTest/test_generic_config.py @@ -63,6 +63,7 @@ "Kernels/Integer/ReduceMean", "Kernels/Integer/ReduceSum", "Kernels/Integer/Slice", + "Kernels/Integer/SILU", # Special test from TinyViT model layers "Models/TinyViT/5M/Layers/FP32/ReduceMean", # Mixed Precision / Quantization diff --git a/DeeployTest/test_siracusa_config.py b/DeeployTest/test_siracusa_config.py index 8fa105d9f4..18cba35918 100644 --- a/DeeployTest/test_siracusa_config.py +++ b/DeeployTest/test_siracusa_config.py @@ -55,6 +55,7 @@ "Kernels/Integer/Pad/Regular_1D", "Kernels/Integer/Pad/Regular_2D", "Kernels/Integer/RMSNorm", + "Kernels/Integer/SILU", "Models/TinyViT/5M/Layers/FP32/ReduceMean", "Others/Backtracking", "Kernels/Mixed/Dequant", diff --git a/TargetLibraries/Generic/inc/DeeployBasicMath.h b/TargetLibraries/Generic/inc/DeeployBasicMath.h index 22081701a3..bba6a556ac 100644 --- a/TargetLibraries/Generic/inc/DeeployBasicMath.h +++ b/TargetLibraries/Generic/inc/DeeployBasicMath.h @@ -50,6 +50,8 @@ #include "kernel/RQHardswish.h" #include "kernel/Relu.h" #include "kernel/RequantShift.h" +#include "kernel/SILU.h" +#include "kernel/RQSILU.h" #include "kernel/Softmax.h" #include "kernel/Sqrt.h" diff --git a/TargetLibraries/Generic/inc/kernel/RQSILU.h b/TargetLibraries/Generic/inc/kernel/RQSILU.h new file mode 100644 index 0000000000..1b9e74a19f --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/RQSILU.h @@ -0,0 +1,34 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_RQSILU_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_RQSILU_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +static PI_L1 int8_t RQSILU_lut_s8_s8[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -2, -2, -2, -1, -1, 0, + 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 58, 59, 60, 61, 62, + 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, + 79, 80, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +}; + +void RQSILU_s8_s8(int8_t *data_in, int8_t *data_out, int32_t dataSize, + int32_t input_offset); + +#endif //__DEEPLOY_BASIC_MATH_RQSILU_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/SILU.h b/TargetLibraries/Generic/inc/kernel/SILU.h new file mode 100644 index 0000000000..f8cf1fa360 --- /dev/null +++ b/TargetLibraries/Generic/inc/kernel/SILU.h @@ -0,0 +1,34 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_SILU_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_SILU_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +static PI_L1 int32_t SILU_lut_s8_s32[256] = { + -176, -186, -196, -207, -219, -231, -244, -257, -272, -287, -303, -320, -337, -356, -375, -396, + -418, -441, -465, -491, -517, -546, -575, -607, -639, -674, -711, -749, -789, -832, -876, -923, + -972, -1024, -1078, -1136, -1196, -1259, -1325, -1394, -1467, -1544, -1624, -1708, -1796, -1888, -1985, -2087, + -2193, -2304, -2421, -2543, -2670, -2803, -2943, -3088, -3240, -3399, -3564, -3737, -3917, -4105, -4300, -4503, + -4715, -4935, -5163, -5401, -5647, -5902, -6167, -6440, -6724, -7016, -7318, -7630, -7950, -8280, -8620, -8968, + -9324, -9689, -10062, -10442, -10829, -11222, -11620, -12023, -12429, -12837, -13245, -13654, -14060, -14462, -14858, -15246, + -15624, -15989, -16339, -16670, -16979, -17264, -17520, -17745, -17933, -18082, -18186, -18241, -18244, -18188, -18070, -17884, + -17625, -17290, -16871, -16366, -15769, -15075, -14281, -13380, -12371, -11249, -10011, -8653, -7173, -5570, -3840, -1984, + 0, 2112, 4352, 6718, 9211, 11827, 14565, 17423, 20397, 23484, 26679, 29981, 33383, 36882, 40473, 44150, + 47911, 51748, 55658, 59636, 63676, 67775, 71926, 76126, 80371, 84655, 88976, 93328, 97709, 102114, 106541, 110987, + 115448, 119922, 124406, 128898, 133396, 137898, 142403, 146907, 151411, 155913, 160412, 164906, 169395, 173878, 178354, 182823, + 187284, 191736, 196180, 200616, 205042, 209458, 213866, 218264, 222652, 227032, 231401, 235762, 240113, 244455, 248789, 253113, + 257429, 261737, 266036, 270327, 274611, 278887, 283156, 287417, 291672, 295920, 300161, 304397, 308626, 312849, 317067, 321280, + 325487, 329689, 333887, 338080, 342268, 346452, 350632, 354808, 358981, 363150, 367315, 371477, 375636, 379792, 383946, 388096, + 392244, 396389, 400532, 404672, 408811, 412947, 417081, 421214, 425345, 429473, 433601, 437726, 441851, 445973, 450095, 454215, + 458334, 462452, 466569, 470684, 474799, 478912, 483025, 487137, 491248, 495359, 499468, 503577, 507685, 511793, 515900, 520006, +}; + +void SILU_s8_s32(int8_t *data_in, int32_t *data_out, int32_t dataSize, + int32_t input_offset); + +#endif //__DEEPLOY_BASIC_MATH_SILU_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/macros.h b/TargetLibraries/Generic/inc/macros.h index d97cfecb7c..a488043b12 100644 --- a/TargetLibraries/Generic/inc/macros.h +++ b/TargetLibraries/Generic/inc/macros.h @@ -7,6 +7,13 @@ #ifndef __DEEPLOY_BASIC_MATH_MACROS_HEADER_ #define __DEEPLOY_BASIC_MATH_MACROS_HEADER_ +// Provide target-neutral defaults for section placement macros used by +// PULP-targeted code. For Generic builds these should be empty so that +// declarations like `PI_L1 int32_t foo[...]` compile as plain globals. +#ifndef PI_L1 +#define PI_L1 +#endif + #define MAX(a, b) \ ({ \ __typeof__(a) _a = (a); \ diff --git a/TargetLibraries/Generic/src/RQ_SILU.c b/TargetLibraries/Generic/src/RQ_SILU.c new file mode 100644 index 0000000000..c26cdbd4b9 --- /dev/null +++ b/TargetLibraries/Generic/src/RQ_SILU.c @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" + +void RQSILU_s8_s8(int8_t *data_in, int8_t *data_out, int32_t dataSize, + int32_t input_offset) { + for (int i = 0; i < dataSize; i++) { + int32_t x = data_in[i] + 128 - input_offset; + data_out[i] = RQSILU_lut_s8_s8[x]; + } +} diff --git a/TargetLibraries/Generic/src/SILU_s8.c b/TargetLibraries/Generic/src/SILU_s8.c new file mode 100644 index 0000000000..6d99bfff29 --- /dev/null +++ b/TargetLibraries/Generic/src/SILU_s8.c @@ -0,0 +1,42 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" +// #include "perf_utils.h" + +void SILU_s8_s32(int8_t *data_in, int32_t *data_out, int32_t dataSize, + int32_t input_offset) { + + // int8_t core_id = pi_core_id(); + // int8_t log2Core = LOG2(NUM_CORES); + + //RW: Performance monitoring is currently disabled + // perf_stats_t perf_start, perf_end, perf_total; + + // Initialize and start performance counters (only core 0) + // if (core_id == 0) { + // perf_bench_init(); + // perf_bench_start(); + //perf_bench_read(&perf_start); + // } + + for (int i = 0; i < dataSize; i++) { + int32_t x = data_in[i] + 128 - input_offset; + data_out[i] = SILU_lut_s8_s32[x]; + } + + // RW: Stop performance counters and print results (only core 0) + // if (core_id == 0) { + // perf_bench_stop(); + // perf_bench_read(&perf_end); + // perf_bench_diff(&perf_total, &perf_end, &perf_start); + + // char label[100]; + // snprintf(label, sizeof(label), "GEMM M=%u N=%u O=%u transA=%u transB=%u", + // M, N, O, transA, transB); + // perf_bench_print(label, &perf_total); + // } +} \ No newline at end of file From e5065724201b3d775949f8a8084ba4486f09516c Mon Sep 17 00:00:00 2001 From: FilippoCordella Date: Mon, 23 Feb 2026 16:23:53 +0000 Subject: [PATCH 02/11] Generic/Siracusa/GAP9 Kernels fix --- .../TileConstraints/SILUTileConstraint.py | 78 ------------------- TargetLibraries/GAP9/inc/DeeployGAP9Math.h | 2 +- TargetLibraries/Generic/inc/kernel/RQSILU.h | 2 +- TargetLibraries/Generic/inc/kernel/SILU.h | 2 +- TargetLibraries/Generic/inc/macros.h | 7 -- .../Generic/src/{RQ_SILU.c => RQSILU_s8.c} | 0 TargetLibraries/Generic/src/SILU_s8.c | 27 ------- .../PULPOpen/inc/DeeployPULPMath.h | 3 +- TargetLibraries/PULPOpen/inc/kernel/RQSILU.h | 34 ++++++++ TargetLibraries/PULPOpen/inc/kernel/SILU.h | 34 ++++++++ TargetLibraries/PULPOpen/src/RQSILU_s8.c | 15 ++++ TargetLibraries/PULPOpen/src/SILU_s8.c | 15 ++++ 12 files changed, 103 insertions(+), 116 deletions(-) rename TargetLibraries/Generic/src/{RQ_SILU.c => RQSILU_s8.c} (100%) create mode 100644 TargetLibraries/PULPOpen/inc/kernel/RQSILU.h create mode 100644 TargetLibraries/PULPOpen/inc/kernel/SILU.h create mode 100644 TargetLibraries/PULPOpen/src/RQSILU_s8.c create mode 100644 TargetLibraries/PULPOpen/src/SILU_s8.c diff --git a/Deeploy/Targets/Generic/TileConstraints/SILUTileConstraint.py b/Deeploy/Targets/Generic/TileConstraints/SILUTileConstraint.py index cd14b07045..c22acec5e7 100644 --- a/Deeploy/Targets/Generic/TileConstraints/SILUTileConstraint.py +++ b/Deeploy/Targets/Generic/TileConstraints/SILUTileConstraint.py @@ -2,85 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -""" from typing import Dict, List, Tuple - -import numpy as np - -from Deeploy.AbstractDataTypes import PointerClass -from Deeploy.CommonExtensions.DataTypes import uint16_t -from Deeploy.DeeployTypes import NetworkContext, OperatorRepresentation -from Deeploy.TilingExtension.MemoryConstraints import NodeMemoryConstraint -from Deeploy.TilingExtension.TileConstraint import TileConstraint -from Deeploy.TilingExtension.TilerModel import TilerModel -from Deeploy.TilingExtension.TilingCodegen import AbsoluteHyperRectangle, TilingSchedule, VariableReplacementScheme -from Deeploy.TilingExtension.TilingCodegen import HyperRectangle - - -class SILUTileConstraint(TileConstraint): - - @staticmethod - def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel: - - inputBuffer1Name = parseDict['data_in'] - outputBufferName = parseDict['data_out'] - siluLUTBufferName = parseDict['silu_lut'] - - for bufferName in [inputBuffer1Name, outputBufferName, siluLUTBufferName]: - tilerModel.addTensorDimToModel(ctxt, bufferName) - - input1Shape = ctxt.lookup(inputBuffer1Name).shape - - for dim in range(len(input1Shape)): - inputDim1Var = tilerModel.getTensorDimVar(tensorName = inputBuffer1Name, dimIdx = dim) - outputDimVar = tilerModel.getTensorDimVar(tensorName = outputBufferName, dimIdx = dim) - - tilerModel.addConstraint(inputDim1Var == outputDimVar) - - return tilerModel - - @classmethod - def serializeTilingSolution( - cls, tilingSolution: NodeMemoryConstraint, absoluteOutputCubes: List[AbsoluteHyperRectangle], - targetMemLevel: str, ctxt: NetworkContext, - operatorRepresentation: OperatorRepresentation) -> Tuple[VariableReplacementScheme, TilingSchedule]: - outputCubes = [cube.rectangle for cube in absoluteOutputCubes] - - addrNames = ['data_in', 'data_out', 'silu_lut'] - inputBaseOffsets, outputBaseOffsets = cls.extractBaseAddr(tilingSolution, targetMemLevel, - operatorRepresentation, addrNames) - - replacements = {"size": []} - replacementTypes = {"size": PointerClass(uint16_t)} - - for cube in outputCubes: - newSize = np.prod(cube.dims) - replacements["size"].append(newSize) - - inputLoadSchedule = [] - outputLoadSchedule = [] - - # LUT is loaded once and reused for all tiles (stays at same location in L1) - lutName = operatorRepresentation['silu_lut'] - lutBuffer = ctxt.lookup(lutName) - lutShape = lutBuffer.shape - lutCube = HyperRectangle(tuple([0] * len(lutShape)), tuple(lutShape)) - - # Each step includes both LUT (same offset) and data_in (different tile) - # The LUT is loaded once and stays in place, data_in tiles are loaded per iteration - for cube in outputCubes: - inputLoadSchedule.append({"data_in": cube, "silu_lut": lutCube}) - - for out in outputCubes: - outputLoadSchedule.append({"data_out": out}) - - tilingSchedule = TilingSchedule(inputBaseOffsets, outputBaseOffsets, inputLoadSchedule, outputLoadSchedule) - variableReplacementSchedule = VariableReplacementScheme(replacements, replacementTypes) - - return variableReplacementSchedule, tilingSchedule - """ - from Deeploy.Targets.Generic.TileConstraints.UnaryTileConstraint import UnaryTileConstraint - class SILUTileConstraint(UnaryTileConstraint): pass \ No newline at end of file diff --git a/TargetLibraries/GAP9/inc/DeeployGAP9Math.h b/TargetLibraries/GAP9/inc/DeeployGAP9Math.h index 0efa74c72e..e4aa3ec134 100644 --- a/TargetLibraries/GAP9/inc/DeeployGAP9Math.h +++ b/TargetLibraries/GAP9/inc/DeeployGAP9Math.h @@ -24,4 +24,4 @@ #include "pmsis.h" -#endif // __DEEPLOY_MATH_HEADER_ +#endif // __DEEPLOY_MATH_HEADER_ \ No newline at end of file diff --git a/TargetLibraries/Generic/inc/kernel/RQSILU.h b/TargetLibraries/Generic/inc/kernel/RQSILU.h index 1b9e74a19f..031d59d1b3 100644 --- a/TargetLibraries/Generic/inc/kernel/RQSILU.h +++ b/TargetLibraries/Generic/inc/kernel/RQSILU.h @@ -9,7 +9,7 @@ #include "DeeployBasicMath.h" -static PI_L1 int8_t RQSILU_lut_s8_s8[256] = { +static int8_t RQSILU_lut_s8_s8[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, diff --git a/TargetLibraries/Generic/inc/kernel/SILU.h b/TargetLibraries/Generic/inc/kernel/SILU.h index f8cf1fa360..5d81f2ab4b 100644 --- a/TargetLibraries/Generic/inc/kernel/SILU.h +++ b/TargetLibraries/Generic/inc/kernel/SILU.h @@ -9,7 +9,7 @@ #include "DeeployBasicMath.h" -static PI_L1 int32_t SILU_lut_s8_s32[256] = { +static int32_t SILU_lut_s8_s32[256] = { -176, -186, -196, -207, -219, -231, -244, -257, -272, -287, -303, -320, -337, -356, -375, -396, -418, -441, -465, -491, -517, -546, -575, -607, -639, -674, -711, -749, -789, -832, -876, -923, -972, -1024, -1078, -1136, -1196, -1259, -1325, -1394, -1467, -1544, -1624, -1708, -1796, -1888, -1985, -2087, diff --git a/TargetLibraries/Generic/inc/macros.h b/TargetLibraries/Generic/inc/macros.h index a488043b12..d97cfecb7c 100644 --- a/TargetLibraries/Generic/inc/macros.h +++ b/TargetLibraries/Generic/inc/macros.h @@ -7,13 +7,6 @@ #ifndef __DEEPLOY_BASIC_MATH_MACROS_HEADER_ #define __DEEPLOY_BASIC_MATH_MACROS_HEADER_ -// Provide target-neutral defaults for section placement macros used by -// PULP-targeted code. For Generic builds these should be empty so that -// declarations like `PI_L1 int32_t foo[...]` compile as plain globals. -#ifndef PI_L1 -#define PI_L1 -#endif - #define MAX(a, b) \ ({ \ __typeof__(a) _a = (a); \ diff --git a/TargetLibraries/Generic/src/RQ_SILU.c b/TargetLibraries/Generic/src/RQSILU_s8.c similarity index 100% rename from TargetLibraries/Generic/src/RQ_SILU.c rename to TargetLibraries/Generic/src/RQSILU_s8.c diff --git a/TargetLibraries/Generic/src/SILU_s8.c b/TargetLibraries/Generic/src/SILU_s8.c index 6d99bfff29..c33ce26da9 100644 --- a/TargetLibraries/Generic/src/SILU_s8.c +++ b/TargetLibraries/Generic/src/SILU_s8.c @@ -5,38 +5,11 @@ */ #include "DeeployBasicMath.h" -// #include "perf_utils.h" void SILU_s8_s32(int8_t *data_in, int32_t *data_out, int32_t dataSize, int32_t input_offset) { - - // int8_t core_id = pi_core_id(); - // int8_t log2Core = LOG2(NUM_CORES); - - //RW: Performance monitoring is currently disabled - // perf_stats_t perf_start, perf_end, perf_total; - - // Initialize and start performance counters (only core 0) - // if (core_id == 0) { - // perf_bench_init(); - // perf_bench_start(); - //perf_bench_read(&perf_start); - // } - for (int i = 0; i < dataSize; i++) { int32_t x = data_in[i] + 128 - input_offset; data_out[i] = SILU_lut_s8_s32[x]; } - - // RW: Stop performance counters and print results (only core 0) - // if (core_id == 0) { - // perf_bench_stop(); - // perf_bench_read(&perf_end); - // perf_bench_diff(&perf_total, &perf_end, &perf_start); - - // char label[100]; - // snprintf(label, sizeof(label), "GEMM M=%u N=%u O=%u transA=%u transB=%u", - // M, N, O, transA, transB); - // perf_bench_print(label, &perf_total); - // } } \ No newline at end of file diff --git a/TargetLibraries/PULPOpen/inc/DeeployPULPMath.h b/TargetLibraries/PULPOpen/inc/DeeployPULPMath.h index f6e8308c97..6d1a9830f0 100644 --- a/TargetLibraries/PULPOpen/inc/DeeployPULPMath.h +++ b/TargetLibraries/PULPOpen/inc/DeeployPULPMath.h @@ -34,7 +34,8 @@ #include "kernel/UniformRequantShift.h" #include "kernel/gemv.h" #include "kernel/iRMSnorm.h" +#include "kernel/silu.h" #define LOG2(x) (__builtin_pulp_fl1(x)) -#endif // __DEEPLOY_MATH_HEADER_ +#endif // __DEEPLOY_MATH_HEADER_ \ No newline at end of file diff --git a/TargetLibraries/PULPOpen/inc/kernel/RQSILU.h b/TargetLibraries/PULPOpen/inc/kernel/RQSILU.h new file mode 100644 index 0000000000..eb710cb2fc --- /dev/null +++ b/TargetLibraries/PULPOpen/inc/kernel/RQSILU.h @@ -0,0 +1,34 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_RQSILU_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_RQSILU_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +static PI_L1 int8_t RQSILU_lut_s8_s8[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -2, -2, -2, -1, -1, 0, + 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 58, 59, 60, 61, 62, + 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, + 79, 80, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +}; + +void RQSILU_s8_s8(int8_t *data_in, int8_t *data_out, int32_t dataSize, + int32_t input_offset); + +#endif //__DEEPLOY_BASIC_MATH_RQSILU_KERNEL_HEADER_ \ No newline at end of file diff --git a/TargetLibraries/PULPOpen/inc/kernel/SILU.h b/TargetLibraries/PULPOpen/inc/kernel/SILU.h new file mode 100644 index 0000000000..8a5d45e0b6 --- /dev/null +++ b/TargetLibraries/PULPOpen/inc/kernel/SILU.h @@ -0,0 +1,34 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef __DEEPLOY_BASIC_MATH_SILU_KERNEL_HEADER_ +#define __DEEPLOY_BASIC_MATH_SILU_KERNEL_HEADER_ + +#include "DeeployBasicMath.h" + +static PI_L1 int32_t SILU_lut_s8_s32[256] = { + -176, -186, -196, -207, -219, -231, -244, -257, -272, -287, -303, -320, -337, -356, -375, -396, + -418, -441, -465, -491, -517, -546, -575, -607, -639, -674, -711, -749, -789, -832, -876, -923, + -972, -1024, -1078, -1136, -1196, -1259, -1325, -1394, -1467, -1544, -1624, -1708, -1796, -1888, -1985, -2087, + -2193, -2304, -2421, -2543, -2670, -2803, -2943, -3088, -3240, -3399, -3564, -3737, -3917, -4105, -4300, -4503, + -4715, -4935, -5163, -5401, -5647, -5902, -6167, -6440, -6724, -7016, -7318, -7630, -7950, -8280, -8620, -8968, + -9324, -9689, -10062, -10442, -10829, -11222, -11620, -12023, -12429, -12837, -13245, -13654, -14060, -14462, -14858, -15246, + -15624, -15989, -16339, -16670, -16979, -17264, -17520, -17745, -17933, -18082, -18186, -18241, -18244, -18188, -18070, -17884, + -17625, -17290, -16871, -16366, -15769, -15075, -14281, -13380, -12371, -11249, -10011, -8653, -7173, -5570, -3840, -1984, + 0, 2112, 4352, 6718, 9211, 11827, 14565, 17423, 20397, 23484, 26679, 29981, 33383, 36882, 40473, 44150, + 47911, 51748, 55658, 59636, 63676, 67775, 71926, 76126, 80371, 84655, 88976, 93328, 97709, 102114, 106541, 110987, + 115448, 119922, 124406, 128898, 133396, 137898, 142403, 146907, 151411, 155913, 160412, 164906, 169395, 173878, 178354, 182823, + 187284, 191736, 196180, 200616, 205042, 209458, 213866, 218264, 222652, 227032, 231401, 235762, 240113, 244455, 248789, 253113, + 257429, 261737, 266036, 270327, 274611, 278887, 283156, 287417, 291672, 295920, 300161, 304397, 308626, 312849, 317067, 321280, + 325487, 329689, 333887, 338080, 342268, 346452, 350632, 354808, 358981, 363150, 367315, 371477, 375636, 379792, 383946, 388096, + 392244, 396389, 400532, 404672, 408811, 412947, 417081, 421214, 425345, 429473, 433601, 437726, 441851, 445973, 450095, 454215, + 458334, 462452, 466569, 470684, 474799, 478912, 483025, 487137, 491248, 495359, 499468, 503577, 507685, 511793, 515900, 520006, +}; + +void SILU_s8_s32(int8_t *data_in, int32_t *data_out, int32_t dataSize, + int32_t input_offset); + +#endif //__DEEPLOY_BASIC_MATH_SILU_KERNEL_HEADER_ \ No newline at end of file diff --git a/TargetLibraries/PULPOpen/src/RQSILU_s8.c b/TargetLibraries/PULPOpen/src/RQSILU_s8.c new file mode 100644 index 0000000000..c26cdbd4b9 --- /dev/null +++ b/TargetLibraries/PULPOpen/src/RQSILU_s8.c @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" + +void RQSILU_s8_s8(int8_t *data_in, int8_t *data_out, int32_t dataSize, + int32_t input_offset) { + for (int i = 0; i < dataSize; i++) { + int32_t x = data_in[i] + 128 - input_offset; + data_out[i] = RQSILU_lut_s8_s8[x]; + } +} diff --git a/TargetLibraries/PULPOpen/src/SILU_s8.c b/TargetLibraries/PULPOpen/src/SILU_s8.c new file mode 100644 index 0000000000..c33ce26da9 --- /dev/null +++ b/TargetLibraries/PULPOpen/src/SILU_s8.c @@ -0,0 +1,15 @@ +/* + * SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "DeeployBasicMath.h" + +void SILU_s8_s32(int8_t *data_in, int32_t *data_out, int32_t dataSize, + int32_t input_offset) { + for (int i = 0; i < dataSize; i++) { + int32_t x = data_in[i] + 128 - input_offset; + data_out[i] = SILU_lut_s8_s32[x]; + } +} \ No newline at end of file From 771a86c1fc1634bfdc33a489b030644c48548dec Mon Sep 17 00:00:00 2001 From: FilippoCordella Date: Mon, 23 Feb 2026 20:46:42 +0000 Subject: [PATCH 03/11] Fix silu.h typo --- TargetLibraries/PULPOpen/inc/DeeployPULPMath.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/TargetLibraries/PULPOpen/inc/DeeployPULPMath.h b/TargetLibraries/PULPOpen/inc/DeeployPULPMath.h index 6d1a9830f0..6ad9fdd05d 100644 --- a/TargetLibraries/PULPOpen/inc/DeeployPULPMath.h +++ b/TargetLibraries/PULPOpen/inc/DeeployPULPMath.h @@ -34,7 +34,8 @@ #include "kernel/UniformRequantShift.h" #include "kernel/gemv.h" #include "kernel/iRMSnorm.h" -#include "kernel/silu.h" +#include "kernel/SILU.h" +#include "kernel/RQSILU.h" #define LOG2(x) (__builtin_pulp_fl1(x)) From eb2ae87715449f03be5cb84353ddaa6a6718ca17 Mon Sep 17 00:00:00 2001 From: FilippoCordella Date: Mon, 23 Feb 2026 21:59:18 +0000 Subject: [PATCH 04/11] Pre-commit run --- Deeploy/Targets/GAP9/Bindings.py | 14 +++--- Deeploy/Targets/GAP9/Platform.py | 21 ++++----- Deeploy/Targets/GAP9/Tiler.py | 8 ++-- Deeploy/Targets/Generic/Bindings.py | 16 +++---- Deeploy/Targets/Generic/Layers.py | 5 +- Deeploy/Targets/Generic/Parsers.py | 5 +- Deeploy/Targets/Generic/Platform.py | 14 +++--- .../TileConstraints/SILUTileConstraint.py | 3 +- Deeploy/Targets/Generic/TypeCheckers.py | 3 +- Deeploy/Targets/PULPOpen/Bindings.py | 12 ++--- Deeploy/Targets/PULPOpen/Platform.py | 16 ++++++- Deeploy/Targets/PULPOpen/Tiler.py | 6 +-- .../Generic/inc/DeeployBasicMath.h | 2 +- TargetLibraries/Generic/inc/kernel/RQSILU.h | 34 +++++++------- TargetLibraries/Generic/inc/kernel/SILU.h | 47 ++++++++++++------- TargetLibraries/Generic/src/SILU_s8.c | 8 ++-- .../PULPOpen/inc/DeeployPULPMath.h | 4 +- TargetLibraries/PULPOpen/inc/kernel/RQSILU.h | 34 +++++++------- TargetLibraries/PULPOpen/inc/kernel/SILU.h | 47 ++++++++++++------- TargetLibraries/PULPOpen/src/SILU_s8.c | 8 ++-- 20 files changed, 176 insertions(+), 131 deletions(-) diff --git a/Deeploy/Targets/GAP9/Bindings.py b/Deeploy/Targets/GAP9/Bindings.py index 1a50fed73a..d90b56b6e6 100644 --- a/Deeploy/Targets/GAP9/Bindings.py +++ b/Deeploy/Targets/GAP9/Bindings.py @@ -22,12 +22,12 @@ from Deeploy.Targets.GAP9.DMA.MchanDma import GAP9MchanDma # Import templates from PULPOpen and Generic from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, DequantTemplate, FloatReduceMeanTemplate, \ - FloatReduceSumTemplate, GatherTemplate, QuantTemplate, RQSiGELUTemplate, SliceTemplate, iHardswishTemplate, \ - SILUTemplate, RQSILUTemplate + FloatReduceSumTemplate, GatherTemplate, QuantTemplate, RQSiGELUTemplate, RQSILUTemplate, SILUTemplate, \ + SliceTemplate, iHardswishTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DequantChecker, \ GatherChecker, GELUChecker, GEMMChecker, HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, \ QuantChecker, ReduceMeanChecker, ReluChecker, ReshapeChecker, RQAddChecker, RQHardswishChecker, SGDChecker, \ - SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker, SILUChecker + SILUChecker, SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker from Deeploy.Targets.PULPOpen.Bindings import ForkClosure, L3MemoryAwareFunctionCallClosure, \ MemoryAwareForkTransformer, MemoryAwareFunctionCallClosure, TilingCallClosure from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass @@ -382,13 +382,13 @@ FloatGELUTemplate.referenceTemplate, GAP9Transformer) GAP9SILUBindings = [ - NodeBinding(SILUChecker([PointerClass(int8_t)], [PointerClass(int32_t)]), - SILUTemplate.referenceTemplate, GAP9Transformer) + NodeBinding(SILUChecker([PointerClass(int8_t)], [PointerClass(int32_t)]), SILUTemplate.referenceTemplate, + GAP9Transformer) ] GAP9RQSILUBindings = [ - NodeBinding(SILUChecker([PointerClass(int8_t)], [PointerClass(int8_t)]), - RQSILUTemplate.referenceTemplate, GAP9Transformer) + NodeBinding(SILUChecker([PointerClass(int8_t)], [PointerClass(int8_t)]), RQSILUTemplate.referenceTemplate, + GAP9Transformer) ] GAP9GatherBindings = [ diff --git a/Deeploy/Targets/GAP9/Platform.py b/Deeploy/Targets/GAP9/Platform.py index 17392fe06a..64214d8655 100644 --- a/Deeploy/Targets/GAP9/Platform.py +++ b/Deeploy/Targets/GAP9/Platform.py @@ -18,25 +18,24 @@ GAP9LayernormTilingReadyBindings, GAP9MatMulTilingReadyBindings, GAP9MaxPool2DTilingReadyBindings, \ GAP9MulTilingReadyBindings, GAP9ReduceSumTilingReadyBindings, GAP9ReluTilingReadyBindings, \ GAP9RQAddTilingReadyBindings, GAP9RQSConv2DTilingReadyBindings, GAP9RQSDWConv2DTilingReadyBindings, \ - GAP9RQSGEMMTilingReadyBindings, GAP9RQSiHardswishTilingReadyBindings, GAP9RQSMatrixVecTilingReadyBindings, \ - GAP9RQSTallGEMMTilingReadyBindings, GAP9RQSTilingReadyBindings, GAP9SGDTilingReadyBindings, \ - GAP9SoftmaxCrossEntropyGradTilingReadyBindings, GAP9SoftmaxCrossEntropyTilingReadyBindings, \ - GAP9SoftmaxGradTilingReadyBindings, GAP9SoftmaxTilingReadyBindings, GAP9TransposeTilingReadyBindings, \ - GAP9UniformRQSTilingReadyBindings, GAP9SILUTilingReadyBindings, GAP9RQSILUTilingReadyBindings + GAP9RQSGEMMTilingReadyBindings, GAP9RQSiHardswishTilingReadyBindings, GAP9RQSILUTilingReadyBindings, \ + GAP9RQSMatrixVecTilingReadyBindings, GAP9RQSTallGEMMTilingReadyBindings, GAP9RQSTilingReadyBindings, \ + GAP9SGDTilingReadyBindings, GAP9SILUTilingReadyBindings, GAP9SoftmaxCrossEntropyGradTilingReadyBindings, \ + GAP9SoftmaxCrossEntropyTilingReadyBindings, GAP9SoftmaxGradTilingReadyBindings, GAP9SoftmaxTilingReadyBindings, \ + GAP9TransposeTilingReadyBindings, GAP9UniformRQSTilingReadyBindings from Deeploy.Targets.Generic.Bindings import BasicGEMMBindings, BasicPad1DBindings, BasicPad2DBindings, \ BasicRQIntegerDivBinding from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, ConvLayer, GatherLayer, GELULayer, GEMMLayer, \ LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, QuantLayer, ReduceMeanLayer, ReduceSumLayer, \ - ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, RQSiHardswishLayer, SGDLayer, \ - SliceLayer, SoftmaxCrossEntropyLossGradLayer, SoftmaxCrossEntropyLossLayer, SoftmaxGradLayer, SoftmaxLayer, \ - TransposeLayer, iHardswishLayer, iRMSNormLayer, SILULayer, RQSILULayer + ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, RQSiHardswishLayer, RQSILULayer, \ + SGDLayer, SILULayer, SliceLayer, SoftmaxCrossEntropyLossGradLayer, SoftmaxCrossEntropyLossLayer, SoftmaxGradLayer, \ + SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, DequantParser, FlattenParser, GatherParser, \ GELUParser, GEMMParser, LayerNormParser, MatMulParser, MaxPool2DParser, MulParser, Pad1DParser, Pad2DParser, \ QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQAddParser, \ - RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SGDParser, SliceParser, \ + RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SGDParser, SILUParser, SliceParser, \ SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, SoftmaxGradParser, SoftmaxParser, \ - TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, iSoftmaxParser, \ - SILUParser + TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate from Deeploy.Targets.PULPOpen.Bindings import BasicDequantBindings, BasicQuantBindings, PULPDMASliceBindings, \ PULPDWConv1DBinding, PULPReduceMeanBindings, PULPRQSConv1DBindings, PULPSliceBindings diff --git a/Deeploy/Targets/GAP9/Tiler.py b/Deeploy/Targets/GAP9/Tiler.py index 1f3004d889..6c54da8854 100644 --- a/Deeploy/Targets/GAP9/Tiler.py +++ b/Deeploy/Targets/GAP9/Tiler.py @@ -15,9 +15,9 @@ GAP9iHardswishBindings, GAP9iRMSNormBindings, GAP9iRQSGELUBindings, GAP9LayernormBinding, GAP9MatMulBindings, \ GAP9MaxPool2DBindings, GAP9MulBindings, GAP9ReduceSumBindings, GAP9ReluBinding, GAP9ReshapeBindings, \ GAP9RQAddBindings, GAP9RQSBindings, GAP9RQSConv2DBindings, GAP9RQSDWConv2DBindings, GAP9RQSGEMMBindings, \ - GAP9RQSiHardswishBindings, GAP9RQSMatrixVecBindings, GAP9RQSTallGEMMBindings, GAP9SGDBindings, \ - GAP9SoftmaxBindings, GAP9SoftmaxCrossEntropyLossBindings, GAP9SoftmaxCrossEntropyLossGradBindings, \ - GAP9SoftmaxGradBindings, GAP9TransposeBindings, GAP9UniformRQSBindings, GAP9SILUBindings, GAP9RQSILUBindings + GAP9RQSiHardswishBindings, GAP9RQSILUBindings, GAP9RQSMatrixVecBindings, GAP9RQSTallGEMMBindings, GAP9SGDBindings, \ + GAP9SILUBindings, GAP9SoftmaxBindings, GAP9SoftmaxCrossEntropyLossBindings, \ + GAP9SoftmaxCrossEntropyLossGradBindings, GAP9SoftmaxGradBindings, GAP9TransposeBindings, GAP9UniformRQSBindings from Deeploy.Targets.Generic.TileConstraints.AddTileConstraint import AddTileConstraint from Deeploy.Targets.Generic.TileConstraints.ConcatTileConstraint import ConcatTileConstraint from Deeploy.Targets.Generic.TileConstraints.iHardswishTileConstraint import iHardswishTileConstraint @@ -26,10 +26,10 @@ from Deeploy.Targets.Generic.TileConstraints.NOPTileConstraint import NOPTileConstraint from Deeploy.Targets.Generic.TileConstraints.RQSiGELUTileConstraint import RQSiGELUTileConstraint from Deeploy.Targets.Generic.TileConstraints.RQSiHardswishTileConstraint import RQSiHardswishTileConstraint +from Deeploy.Targets.Generic.TileConstraints.SILUTileConstraint import SILUTileConstraint from Deeploy.Targets.Generic.TileConstraints.TransposeTileConstraint import TransposeTileConstraint from Deeploy.Targets.Generic.TileConstraints.UnaryTileConstraint import UnaryTileConstraint from Deeploy.Targets.Generic.TileConstraints.UntiledTileConstraint import UntiledTileConstraint -from Deeploy.Targets.Generic.TileConstraints.SILUTileConstraint import SILUTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.ConvTileConstraint import Conv2DTileConstraint, RQConv2DTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.DWConvTileConstraint import DWConv2DTileConstraint, \ RQDWConv2DTileConstraint diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 0c20a34b6f..43de2667b8 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -18,13 +18,13 @@ FloatPowTemplate, FloatReduceMeanTemplate, FloatReluTemplate, FloatSoftmaxTemplate, FloatSqrtTemplate, \ GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, \ MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, \ - RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, TransposeTemplate, \ - iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate, SILUTemplate, RQSILUTemplate + RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, RQSILUTemplate, SILUTemplate, \ + SliceTemplate, TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, BatchNormChecker, ConcatChecker, ConvChecker, \ DebugPrintChecker, DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, \ LayerNormChecker, MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, \ - ReduceSumChecker, ReluChecker, RequantShiftChecker, ReshapeChecker, RQIntegerDivChecker, SliceChecker, \ - SoftmaxChecker, TransposeChecker, SILUChecker + ReduceSumChecker, ReluChecker, RequantShiftChecker, ReshapeChecker, RQIntegerDivChecker, SILUChecker, \ + SliceChecker, SoftmaxChecker, TransposeChecker BasicTransformer = CodeTransformation([ArgumentStructGeneration(), MemoryManagementGeneration(), FutureGeneration()]) @@ -331,9 +331,9 @@ BasicSILUBindings = [ NodeBinding(SILUChecker([PointerClass(int8_t)], [PointerClass(int32_t)]), SILUTemplate.referenceTemplate, BasicTransformer) -] +] BasicRQSILUBindings = [ - NodeBinding(SILUChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int8_t)]), RQSILUTemplate.referenceTemplate, - BasicTransformer) -] \ No newline at end of file + NodeBinding(SILUChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int8_t)]), + RQSILUTemplate.referenceTemplate, BasicTransformer) +] diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index 4a9347202d..0550763af8 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -710,6 +710,7 @@ def computeOps(self): return numPx * opsPerPx + class SILULayer(ONNXLayer): def __init__(self, maps: List[NodeMapper]): @@ -720,9 +721,9 @@ def computeOps(self): # Arithmetic operations per element = 0 (only a memory load + store). # To count memory accesses instead, return: size * 2 return 0 - + class RQSILULayer(SILULayer): def __init__(self, maps: List[NodeMapper]): - super().__init__(maps) \ No newline at end of file + super().__init__(maps) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 55af3ba394..8b8d466a0c 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -2887,6 +2887,7 @@ def parseNodeCtxt(self, return ctxt, True + class SILUParser(NodeParser): def __init__(self): @@ -2895,7 +2896,7 @@ def __init__(self): def parseNode(self, node: gs.Node) -> bool: ret = all([len(node.inputs) >= 1, len(node.outputs) == 1]) - + return ret def parseNodeCtxt(self, @@ -2909,4 +2910,4 @@ def parseNodeCtxt(self, self.operatorRepresentation['data_out'] = data_out.name self.operatorRepresentation['size'] = np.prod(data_in.shape) - return ctxt, True \ No newline at end of file + return ctxt, True diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index fc91d1e20d..a4ee4456aa 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -13,20 +13,20 @@ BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool1DBindings, BasicMaxPool2DBindings, BasicMulBindings, \ BasicPad1DBindings, BasicPad2DBindings, BasicPowBindings, BasicQuantBindings, BasicReduceMeanBindings, \ BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, \ - BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, BasicSqrtBindings, BasicTransposeBindings, \ - BasicSILUBindings, BasicRQSILUBindings, DummyBinding + BasicRQSGELUBinding, BasicRQSILUBindings, BasicSILUBindings, BasicSliceBindings, BasicSoftmaxBindings, \ + BasicSqrtBindings, BasicTransposeBindings, DummyBinding from Deeploy.Targets.Generic.Layers import AddLayer, BatchNormalizationLayer, ConcatLayer, ConvLayer, \ ConvTransposeLayer, DebugPrintLayer, DequantLayer, DivLayer, GatherLayer, GELULayer, GEMMLayer, ITAMaxLayer, \ LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, PowLayer, QuantLayer, ReduceMeanLayer, \ - ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, \ - SoftmaxLayer, SqrtLayer, TransposeLayer, SILULayer, RQSILULayer + ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, RQSILULayer, \ + SILULayer, SliceLayer, SoftmaxLayer, SqrtLayer, TransposeLayer from Deeploy.Targets.Generic.Parsers import AddParser, BatchNormParser, ConcatParser, ConvTranspose1DParser, \ DebugParser, DequantParser, DivParser, DummyParser, FlattenParser, GatherParser, GELUParser, GenericConv1DParser, \ GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, GenericMaxPool2DParser, \ IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, MatMulParser, MaxPool1DParser, MulParser, \ Pad1DParser, Pad2DParser, PowParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, \ - RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, SoftmaxParser, SqrtParser, \ - TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser, SILUParser + RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SILUParser, SliceParser, SoftmaxParser, \ + SqrtParser, TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \ @@ -123,7 +123,7 @@ 'ConvTranspose': ConvTransposeLayer([ConvTransposeMapper]), 'SILU': SILULayer([SILUMapper]), 'RQSILU': RQSILULayer([RQSILUMapper]), - + # # For example, you can use the DummpyMapper, in case you want to test # # deployment or optimizations with GlobalAveragePool nodes but did not yet # # implement the corresponding kernel diff --git a/Deeploy/Targets/Generic/TileConstraints/SILUTileConstraint.py b/Deeploy/Targets/Generic/TileConstraints/SILUTileConstraint.py index c22acec5e7..7d5cb4af15 100644 --- a/Deeploy/Targets/Generic/TileConstraints/SILUTileConstraint.py +++ b/Deeploy/Targets/Generic/TileConstraints/SILUTileConstraint.py @@ -4,5 +4,6 @@ from Deeploy.Targets.Generic.TileConstraints.UnaryTileConstraint import UnaryTileConstraint + class SILUTileConstraint(UnaryTileConstraint): - pass \ No newline at end of file + pass diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py index a9e41b4137..8a0d4ea19e 100644 --- a/Deeploy/Targets/Generic/TypeCheckers.py +++ b/Deeploy/Targets/Generic/TypeCheckers.py @@ -611,6 +611,7 @@ def _inferSignedness(self, inputs: List[VariableBuffer], operatorRepresentation: OperatorRepresentation) -> List[bool]: return [True] + class SILUChecker(SignPropTypeChecker): #same as GELU due to the same input-output quantization parameters def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): @@ -625,4 +626,4 @@ def _inferSignedness(self, inputs: List[VariableBuffer], if inputs[0]._signed: return [True] else: - return [False] \ No newline at end of file + return [False] diff --git a/Deeploy/Targets/PULPOpen/Bindings.py b/Deeploy/Targets/PULPOpen/Bindings.py index 24a4fa2ee0..ae877af781 100644 --- a/Deeploy/Targets/PULPOpen/Bindings.py +++ b/Deeploy/Targets/PULPOpen/Bindings.py @@ -15,11 +15,11 @@ from Deeploy.FutureExtension.Bindings.AutoFutureBinding import AutoFutureBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, DequantTemplate, FloatReduceSumTemplate, \ - GatherTemplate, QuantTemplate, RQSiGELUTemplate, SliceTemplate, iHardswishTemplate, SILUTemplate, RQSILUTemplate + GatherTemplate, QuantTemplate, RQSiGELUTemplate, RQSILUTemplate, SILUTemplate, SliceTemplate, iHardswishTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DequantChecker, \ GatherChecker, GELUChecker, GEMMChecker, HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, \ QuantChecker, ReduceMeanChecker, ReluChecker, ReshapeChecker, RQAddChecker, RQHardswishChecker, SGDChecker, \ - SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker, SILUChecker + SILUChecker, SliceChecker, SoftmaxChecker, SoftmaxCrossEntropyLossChecker, TransposeChecker from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling @@ -447,13 +447,13 @@ ] PULPSILUBindings = [ - NodeBinding(SILUChecker([PointerClass(int8_t), PointerClass(int32_t)], [PointerClass(int32_t)]), SILUTemplate.referenceTemplate, - ForkTransformer) #See with ClusterTransformer also + NodeBinding(SILUChecker([PointerClass(int8_t), PointerClass(int32_t)], [PointerClass(int32_t)]), + SILUTemplate.referenceTemplate, ForkTransformer) #See with ClusterTransformer also ] PULPRQSILUBindings = [ - NodeBinding(SILUChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int8_t)]), RQSILUTemplate.referenceTemplate, - ForkTransformer) + NodeBinding(SILUChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int8_t)]), + RQSILUTemplate.referenceTemplate, ForkTransformer) ] BasicQuantBindings = [ diff --git a/Deeploy/Targets/PULPOpen/Platform.py b/Deeploy/Targets/PULPOpen/Platform.py index 0f905c71ca..10f5817846 100644 --- a/Deeploy/Targets/PULPOpen/Platform.py +++ b/Deeploy/Targets/PULPOpen/Platform.py @@ -16,8 +16,8 @@ from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, ConvLayer, GatherLayer, GELUGradLayer, GELULayer, \ GEMMLayer, LayerNormGradLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, QuantLayer, \ ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \ - RQSiHardswishLayer, SGDLayer, SliceLayer, SoftmaxCrossEntropyLossGradLayer, SoftmaxCrossEntropyLossLayer, \ - SoftmaxGradLayer, SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer, SILULayer, RQSILULayer + RQSiHardswishLayer, RQSILULayer, SGDLayer, SILULayer, SliceLayer, SoftmaxCrossEntropyLossGradLayer, \ + SoftmaxCrossEntropyLossLayer, SoftmaxGradLayer, SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, DequantParser, FlattenParser, GatherParser, \ GELUGradParser, GELUParser, GEMMParser, LayerNormGradParser, LayerNormParser, MatMulParser, MaxPool1DParser, \ MaxPool2DParser, MulParser, Pad1DParser, Pad2DParser, QuantParser, ReduceSumParser, ReluParser, \ @@ -41,6 +41,7 @@ PULPFPGELUGradTilingReadyBindings, PULPFPGELUTilingReadyBindings, PULPFPGEMMTilingReadyBindings, \ PULPGatherTilingReadyBindings, PULPiHardswishTilingReadyBindings, PULPiRMSNormTilingReadyBindings, \ PULPiRQSGELUTilingReadyBindings, PULPLayernormGradTilingReadyBindings, PULPLayernormTilingReadyBindings, \ +<<<<<<< HEAD PULPMatMulTilingReadyBindings, PULPMaxPool1DTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, \ PULPMulTilingReadyBindings, PULPReduceMeanTilingReadyBindings, PULPReduceSumTilingReadyBindings, \ PULPReluTilingReadyBindings, PULPRQAddTilingReadyBindings, PULPRQSConv1DTilingReadyBindings, \ @@ -50,6 +51,17 @@ PULPSoftmaxCrossEntropyGradTilingReadyBindings, PULPSoftmaxCrossEntropyTilingReadyBindings, \ PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, PULPTransposeTilingReadyBindings, \ PULPUniformRQSTilingReadyBindings, PULPSILUTilingReadyBindings, PULPRQSILUTilingReadyBindings +======= + PULPMatMulTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, PULPMulTilingReadyBindings, \ + PULPReduceMeanTilingReadyBindings, PULPReduceSumTilingReadyBindings, PULPReluTilingReadyBindings, \ + PULPRQAddTilingReadyBindings, PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, \ + PULPRQSGEMMTilingReadyBindings, PULPRQSiHardswishTilingReadyBindings, PULPRQSILUTilingReadyBindings, \ + PULPRQSMatrixVecTilingReadyBindings, PULPRQSTallGEMMTilingReadyBindings, PULPRQSTilingReadyBindings, \ + PULPSGDTilingReadyBindings, PULPSILUTilingReadyBindings, PULPSliceTilingReadyBindings, \ + PULPSoftmaxCrossEntropyGradTilingReadyBindings, PULPSoftmaxCrossEntropyTilingReadyBindings, \ + PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, PULPTransposeTilingReadyBindings, \ + PULPUniformRQSTilingReadyBindings +>>>>>>> 8e517d6 (Pre-commit run) from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import PULPAddRequantMergePass, \ PULPConvRequantMergePass, PULPGEMMRequantMergePass, PULPMatMulRequantMergePass diff --git a/Deeploy/Targets/PULPOpen/Tiler.py b/Deeploy/Targets/PULPOpen/Tiler.py index 0fd3413fda..c898ace08a 100644 --- a/Deeploy/Targets/PULPOpen/Tiler.py +++ b/Deeploy/Targets/PULPOpen/Tiler.py @@ -12,8 +12,8 @@ from Deeploy.Targets.Generic.TileConstraints.NOPTileConstraint import NOPTileConstraint from Deeploy.Targets.Generic.TileConstraints.RQSiGELUTileConstraint import RQSiGELUTileConstraint from Deeploy.Targets.Generic.TileConstraints.RQSiHardswishTileConstraint import RQSiHardswishTileConstraint -from Deeploy.Targets.Generic.TileConstraints.TransposeTileConstraint import TransposeTileConstraint from Deeploy.Targets.Generic.TileConstraints.SILUTileConstraint import SILUTileConstraint +from Deeploy.Targets.Generic.TileConstraints.TransposeTileConstraint import TransposeTileConstraint from Deeploy.Targets.Generic.TileConstraints.UnaryTileConstraint import UnaryTileConstraint from Deeploy.Targets.PULPOpen.Bindings import PULPAddBindings, PULPConcatBindings, PULPFloatConv2DBindings, \ PULPFloatDWConv2DBindings, PULPFloatGELUBinding, PULPFloatGELUGradBinding, PULPFloatGEMMBindings, \ @@ -139,10 +139,10 @@ tileConstraint = GeluGradTileConstraint()) PULPSILUTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPSILUBindings, - tileConstraint = SILUTileConstraint()) + tileConstraint = SILUTileConstraint()) PULPRQSILUTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPRQSILUBindings, - tileConstraint = SILUTileConstraint()) + tileConstraint = SILUTileConstraint()) PULPGatherTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPGatherBindings, tileConstraint = GatherTileConstraint()) diff --git a/TargetLibraries/Generic/inc/DeeployBasicMath.h b/TargetLibraries/Generic/inc/DeeployBasicMath.h index bba6a556ac..e57a15f3db 100644 --- a/TargetLibraries/Generic/inc/DeeployBasicMath.h +++ b/TargetLibraries/Generic/inc/DeeployBasicMath.h @@ -48,10 +48,10 @@ #include "kernel/RQDiv.h" #include "kernel/RQGELU.h" #include "kernel/RQHardswish.h" +#include "kernel/RQSILU.h" #include "kernel/Relu.h" #include "kernel/RequantShift.h" #include "kernel/SILU.h" -#include "kernel/RQSILU.h" #include "kernel/Softmax.h" #include "kernel/Sqrt.h" diff --git a/TargetLibraries/Generic/inc/kernel/RQSILU.h b/TargetLibraries/Generic/inc/kernel/RQSILU.h index 031d59d1b3..9e1956d9d2 100644 --- a/TargetLibraries/Generic/inc/kernel/RQSILU.h +++ b/TargetLibraries/Generic/inc/kernel/RQSILU.h @@ -10,22 +10,24 @@ #include "DeeployBasicMath.h" static int8_t RQSILU_lut_s8_s8[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -4, -4, -4, - -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, - -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -2, -2, -2, -1, -1, 0, - 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, - 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 58, 59, 60, 61, 62, - 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, - 79, 80, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, + -2, -2, -2, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, + -3, -3, -2, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 4, + 4, 5, 6, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + 127, }; void RQSILU_s8_s8(int8_t *data_in, int8_t *data_out, int32_t dataSize, diff --git a/TargetLibraries/Generic/inc/kernel/SILU.h b/TargetLibraries/Generic/inc/kernel/SILU.h index 5d81f2ab4b..a10af2dd74 100644 --- a/TargetLibraries/Generic/inc/kernel/SILU.h +++ b/TargetLibraries/Generic/inc/kernel/SILU.h @@ -10,25 +10,38 @@ #include "DeeployBasicMath.h" static int32_t SILU_lut_s8_s32[256] = { - -176, -186, -196, -207, -219, -231, -244, -257, -272, -287, -303, -320, -337, -356, -375, -396, - -418, -441, -465, -491, -517, -546, -575, -607, -639, -674, -711, -749, -789, -832, -876, -923, - -972, -1024, -1078, -1136, -1196, -1259, -1325, -1394, -1467, -1544, -1624, -1708, -1796, -1888, -1985, -2087, - -2193, -2304, -2421, -2543, -2670, -2803, -2943, -3088, -3240, -3399, -3564, -3737, -3917, -4105, -4300, -4503, - -4715, -4935, -5163, -5401, -5647, -5902, -6167, -6440, -6724, -7016, -7318, -7630, -7950, -8280, -8620, -8968, - -9324, -9689, -10062, -10442, -10829, -11222, -11620, -12023, -12429, -12837, -13245, -13654, -14060, -14462, -14858, -15246, - -15624, -15989, -16339, -16670, -16979, -17264, -17520, -17745, -17933, -18082, -18186, -18241, -18244, -18188, -18070, -17884, - -17625, -17290, -16871, -16366, -15769, -15075, -14281, -13380, -12371, -11249, -10011, -8653, -7173, -5570, -3840, -1984, - 0, 2112, 4352, 6718, 9211, 11827, 14565, 17423, 20397, 23484, 26679, 29981, 33383, 36882, 40473, 44150, - 47911, 51748, 55658, 59636, 63676, 67775, 71926, 76126, 80371, 84655, 88976, 93328, 97709, 102114, 106541, 110987, - 115448, 119922, 124406, 128898, 133396, 137898, 142403, 146907, 151411, 155913, 160412, 164906, 169395, 173878, 178354, 182823, - 187284, 191736, 196180, 200616, 205042, 209458, 213866, 218264, 222652, 227032, 231401, 235762, 240113, 244455, 248789, 253113, - 257429, 261737, 266036, 270327, 274611, 278887, 283156, 287417, 291672, 295920, 300161, 304397, 308626, 312849, 317067, 321280, - 325487, 329689, 333887, 338080, 342268, 346452, 350632, 354808, 358981, 363150, 367315, 371477, 375636, 379792, 383946, 388096, - 392244, 396389, 400532, 404672, 408811, 412947, 417081, 421214, 425345, 429473, 433601, 437726, 441851, 445973, 450095, 454215, - 458334, 462452, 466569, 470684, 474799, 478912, 483025, 487137, 491248, 495359, 499468, 503577, 507685, 511793, 515900, 520006, + -176, -186, -196, -207, -219, -231, -244, -257, -272, + -287, -303, -320, -337, -356, -375, -396, -418, -441, + -465, -491, -517, -546, -575, -607, -639, -674, -711, + -749, -789, -832, -876, -923, -972, -1024, -1078, -1136, + -1196, -1259, -1325, -1394, -1467, -1544, -1624, -1708, -1796, + -1888, -1985, -2087, -2193, -2304, -2421, -2543, -2670, -2803, + -2943, -3088, -3240, -3399, -3564, -3737, -3917, -4105, -4300, + -4503, -4715, -4935, -5163, -5401, -5647, -5902, -6167, -6440, + -6724, -7016, -7318, -7630, -7950, -8280, -8620, -8968, -9324, + -9689, -10062, -10442, -10829, -11222, -11620, -12023, -12429, -12837, + -13245, -13654, -14060, -14462, -14858, -15246, -15624, -15989, -16339, + -16670, -16979, -17264, -17520, -17745, -17933, -18082, -18186, -18241, + -18244, -18188, -18070, -17884, -17625, -17290, -16871, -16366, -15769, + -15075, -14281, -13380, -12371, -11249, -10011, -8653, -7173, -5570, + -3840, -1984, 0, 2112, 4352, 6718, 9211, 11827, 14565, + 17423, 20397, 23484, 26679, 29981, 33383, 36882, 40473, 44150, + 47911, 51748, 55658, 59636, 63676, 67775, 71926, 76126, 80371, + 84655, 88976, 93328, 97709, 102114, 106541, 110987, 115448, 119922, + 124406, 128898, 133396, 137898, 142403, 146907, 151411, 155913, 160412, + 164906, 169395, 173878, 178354, 182823, 187284, 191736, 196180, 200616, + 205042, 209458, 213866, 218264, 222652, 227032, 231401, 235762, 240113, + 244455, 248789, 253113, 257429, 261737, 266036, 270327, 274611, 278887, + 283156, 287417, 291672, 295920, 300161, 304397, 308626, 312849, 317067, + 321280, 325487, 329689, 333887, 338080, 342268, 346452, 350632, 354808, + 358981, 363150, 367315, 371477, 375636, 379792, 383946, 388096, 392244, + 396389, 400532, 404672, 408811, 412947, 417081, 421214, 425345, 429473, + 433601, 437726, 441851, 445973, 450095, 454215, 458334, 462452, 466569, + 470684, 474799, 478912, 483025, 487137, 491248, 495359, 499468, 503577, + 507685, 511793, 515900, 520006, }; void SILU_s8_s32(int8_t *data_in, int32_t *data_out, int32_t dataSize, - int32_t input_offset); + int32_t input_offset); #endif //__DEEPLOY_BASIC_MATH_SILU_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/src/SILU_s8.c b/TargetLibraries/Generic/src/SILU_s8.c index c33ce26da9..4af4d02677 100644 --- a/TargetLibraries/Generic/src/SILU_s8.c +++ b/TargetLibraries/Generic/src/SILU_s8.c @@ -8,8 +8,8 @@ void SILU_s8_s32(int8_t *data_in, int32_t *data_out, int32_t dataSize, int32_t input_offset) { - for (int i = 0; i < dataSize; i++) { - int32_t x = data_in[i] + 128 - input_offset; - data_out[i] = SILU_lut_s8_s32[x]; - } + for (int i = 0; i < dataSize; i++) { + int32_t x = data_in[i] + 128 - input_offset; + data_out[i] = SILU_lut_s8_s32[x]; + } } \ No newline at end of file diff --git a/TargetLibraries/PULPOpen/inc/DeeployPULPMath.h b/TargetLibraries/PULPOpen/inc/DeeployPULPMath.h index 6ad9fdd05d..edacf416c3 100644 --- a/TargetLibraries/PULPOpen/inc/DeeployPULPMath.h +++ b/TargetLibraries/PULPOpen/inc/DeeployPULPMath.h @@ -28,14 +28,14 @@ #include "kernel/Layernorm.h" #include "kernel/Matmul.h" #include "kernel/MaxPool.h" +#include "kernel/RQSILU.h" #include "kernel/RQiHardswish.h" #include "kernel/RequantShift.h" +#include "kernel/SILU.h" #include "kernel/Softmax.h" #include "kernel/UniformRequantShift.h" #include "kernel/gemv.h" #include "kernel/iRMSnorm.h" -#include "kernel/SILU.h" -#include "kernel/RQSILU.h" #define LOG2(x) (__builtin_pulp_fl1(x)) diff --git a/TargetLibraries/PULPOpen/inc/kernel/RQSILU.h b/TargetLibraries/PULPOpen/inc/kernel/RQSILU.h index eb710cb2fc..ffeb68c71a 100644 --- a/TargetLibraries/PULPOpen/inc/kernel/RQSILU.h +++ b/TargetLibraries/PULPOpen/inc/kernel/RQSILU.h @@ -10,22 +10,24 @@ #include "DeeployBasicMath.h" static PI_L1 int8_t RQSILU_lut_s8_s8[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -4, -4, -4, - -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, - -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -2, -2, -2, -1, -1, 0, - 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, - 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 58, 59, 60, 61, 62, - 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, - 79, 80, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -2, -2, -2, + -2, -2, -2, -2, -2, -2, -2, -2, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, + -3, -3, -2, -2, -2, -1, -1, 0, 0, 1, 1, 2, 2, 3, 4, + 4, 5, 6, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + 127, }; void RQSILU_s8_s8(int8_t *data_in, int8_t *data_out, int32_t dataSize, diff --git a/TargetLibraries/PULPOpen/inc/kernel/SILU.h b/TargetLibraries/PULPOpen/inc/kernel/SILU.h index 8a5d45e0b6..fbd4d48e2c 100644 --- a/TargetLibraries/PULPOpen/inc/kernel/SILU.h +++ b/TargetLibraries/PULPOpen/inc/kernel/SILU.h @@ -10,25 +10,38 @@ #include "DeeployBasicMath.h" static PI_L1 int32_t SILU_lut_s8_s32[256] = { - -176, -186, -196, -207, -219, -231, -244, -257, -272, -287, -303, -320, -337, -356, -375, -396, - -418, -441, -465, -491, -517, -546, -575, -607, -639, -674, -711, -749, -789, -832, -876, -923, - -972, -1024, -1078, -1136, -1196, -1259, -1325, -1394, -1467, -1544, -1624, -1708, -1796, -1888, -1985, -2087, - -2193, -2304, -2421, -2543, -2670, -2803, -2943, -3088, -3240, -3399, -3564, -3737, -3917, -4105, -4300, -4503, - -4715, -4935, -5163, -5401, -5647, -5902, -6167, -6440, -6724, -7016, -7318, -7630, -7950, -8280, -8620, -8968, - -9324, -9689, -10062, -10442, -10829, -11222, -11620, -12023, -12429, -12837, -13245, -13654, -14060, -14462, -14858, -15246, - -15624, -15989, -16339, -16670, -16979, -17264, -17520, -17745, -17933, -18082, -18186, -18241, -18244, -18188, -18070, -17884, - -17625, -17290, -16871, -16366, -15769, -15075, -14281, -13380, -12371, -11249, -10011, -8653, -7173, -5570, -3840, -1984, - 0, 2112, 4352, 6718, 9211, 11827, 14565, 17423, 20397, 23484, 26679, 29981, 33383, 36882, 40473, 44150, - 47911, 51748, 55658, 59636, 63676, 67775, 71926, 76126, 80371, 84655, 88976, 93328, 97709, 102114, 106541, 110987, - 115448, 119922, 124406, 128898, 133396, 137898, 142403, 146907, 151411, 155913, 160412, 164906, 169395, 173878, 178354, 182823, - 187284, 191736, 196180, 200616, 205042, 209458, 213866, 218264, 222652, 227032, 231401, 235762, 240113, 244455, 248789, 253113, - 257429, 261737, 266036, 270327, 274611, 278887, 283156, 287417, 291672, 295920, 300161, 304397, 308626, 312849, 317067, 321280, - 325487, 329689, 333887, 338080, 342268, 346452, 350632, 354808, 358981, 363150, 367315, 371477, 375636, 379792, 383946, 388096, - 392244, 396389, 400532, 404672, 408811, 412947, 417081, 421214, 425345, 429473, 433601, 437726, 441851, 445973, 450095, 454215, - 458334, 462452, 466569, 470684, 474799, 478912, 483025, 487137, 491248, 495359, 499468, 503577, 507685, 511793, 515900, 520006, + -176, -186, -196, -207, -219, -231, -244, -257, -272, + -287, -303, -320, -337, -356, -375, -396, -418, -441, + -465, -491, -517, -546, -575, -607, -639, -674, -711, + -749, -789, -832, -876, -923, -972, -1024, -1078, -1136, + -1196, -1259, -1325, -1394, -1467, -1544, -1624, -1708, -1796, + -1888, -1985, -2087, -2193, -2304, -2421, -2543, -2670, -2803, + -2943, -3088, -3240, -3399, -3564, -3737, -3917, -4105, -4300, + -4503, -4715, -4935, -5163, -5401, -5647, -5902, -6167, -6440, + -6724, -7016, -7318, -7630, -7950, -8280, -8620, -8968, -9324, + -9689, -10062, -10442, -10829, -11222, -11620, -12023, -12429, -12837, + -13245, -13654, -14060, -14462, -14858, -15246, -15624, -15989, -16339, + -16670, -16979, -17264, -17520, -17745, -17933, -18082, -18186, -18241, + -18244, -18188, -18070, -17884, -17625, -17290, -16871, -16366, -15769, + -15075, -14281, -13380, -12371, -11249, -10011, -8653, -7173, -5570, + -3840, -1984, 0, 2112, 4352, 6718, 9211, 11827, 14565, + 17423, 20397, 23484, 26679, 29981, 33383, 36882, 40473, 44150, + 47911, 51748, 55658, 59636, 63676, 67775, 71926, 76126, 80371, + 84655, 88976, 93328, 97709, 102114, 106541, 110987, 115448, 119922, + 124406, 128898, 133396, 137898, 142403, 146907, 151411, 155913, 160412, + 164906, 169395, 173878, 178354, 182823, 187284, 191736, 196180, 200616, + 205042, 209458, 213866, 218264, 222652, 227032, 231401, 235762, 240113, + 244455, 248789, 253113, 257429, 261737, 266036, 270327, 274611, 278887, + 283156, 287417, 291672, 295920, 300161, 304397, 308626, 312849, 317067, + 321280, 325487, 329689, 333887, 338080, 342268, 346452, 350632, 354808, + 358981, 363150, 367315, 371477, 375636, 379792, 383946, 388096, 392244, + 396389, 400532, 404672, 408811, 412947, 417081, 421214, 425345, 429473, + 433601, 437726, 441851, 445973, 450095, 454215, 458334, 462452, 466569, + 470684, 474799, 478912, 483025, 487137, 491248, 495359, 499468, 503577, + 507685, 511793, 515900, 520006, }; void SILU_s8_s32(int8_t *data_in, int32_t *data_out, int32_t dataSize, - int32_t input_offset); + int32_t input_offset); #endif //__DEEPLOY_BASIC_MATH_SILU_KERNEL_HEADER_ \ No newline at end of file diff --git a/TargetLibraries/PULPOpen/src/SILU_s8.c b/TargetLibraries/PULPOpen/src/SILU_s8.c index c33ce26da9..4af4d02677 100644 --- a/TargetLibraries/PULPOpen/src/SILU_s8.c +++ b/TargetLibraries/PULPOpen/src/SILU_s8.c @@ -8,8 +8,8 @@ void SILU_s8_s32(int8_t *data_in, int32_t *data_out, int32_t dataSize, int32_t input_offset) { - for (int i = 0; i < dataSize; i++) { - int32_t x = data_in[i] + 128 - input_offset; - data_out[i] = SILU_lut_s8_s32[x]; - } + for (int i = 0; i < dataSize; i++) { + int32_t x = data_in[i] + 128 - input_offset; + data_out[i] = SILU_lut_s8_s32[x]; + } } \ No newline at end of file From 9c794932873c138e1ca909f15faef0e1e06b11f6 Mon Sep 17 00:00:00 2001 From: FilippoCordella Date: Tue, 24 Feb 2026 16:12:10 +0000 Subject: [PATCH 05/11] Fix GAP9 CI --- .github/workflows/ci-platform-gap9-tiled.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-platform-gap9-tiled.yml b/.github/workflows/ci-platform-gap9-tiled.yml index 0043f8d3e9..6f19f3522c 100644 --- a/.github/workflows/ci-platform-gap9-tiled.yml +++ b/.github/workflows/ci-platform-gap9-tiled.yml @@ -27,7 +27,7 @@ jobs: select-env: uses: ./.github/workflows/_select-env.yml with: - docker_image_deeploy: ${{ github.event.inputs.docker_image_deeploy || github.repository == 'pulp-platform/Deeploy' && 'ghcr.io/pulp-platform/deeploy-gap9:latest'}} + docker_image_deeploy: ${{'ghcr.io/pulp-platform/deeploy-gap9:latest'}} gap9-kernels-tiled-singlebuffer-L2: needs: select-env From 7050d393977876768968236f5683fb9444cf88f0 Mon Sep 17 00:00:00 2001 From: FilippoCordella Date: Wed, 25 Feb 2026 13:05:57 +0000 Subject: [PATCH 06/11] Tiled-GAP CI fix --- .github/workflows/ci-platform-gap9-tiled.yml | 2 +- .github/workflows/ci-platform-gap9.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-platform-gap9-tiled.yml b/.github/workflows/ci-platform-gap9-tiled.yml index 6f19f3522c..a2caff22d8 100644 --- a/.github/workflows/ci-platform-gap9-tiled.yml +++ b/.github/workflows/ci-platform-gap9-tiled.yml @@ -27,7 +27,7 @@ jobs: select-env: uses: ./.github/workflows/_select-env.yml with: - docker_image_deeploy: ${{'ghcr.io/pulp-platform/deeploy-gap9:latest'}} + docker_image_deeploy: ${{ 'ghcr.io/runwangdl/deeploy:gap9' }} gap9-kernels-tiled-singlebuffer-L2: needs: select-env diff --git a/.github/workflows/ci-platform-gap9.yml b/.github/workflows/ci-platform-gap9.yml index 079f13c2a5..874097da6c 100644 --- a/.github/workflows/ci-platform-gap9.yml +++ b/.github/workflows/ci-platform-gap9.yml @@ -28,7 +28,7 @@ jobs: select-env: uses: ./.github/workflows/_select-env.yml with: - docker_image_deeploy: ${{ github.event.inputs.docker_image_deeploy || (github.repository == 'pulp-platform/Deeploy' && 'ghcr.io/pulp-platform/deeploy-gap9:latest') }} + docker_image_deeploy: ${{ 'ghcr.io/runwangdl/deeploy:gap9' }} gap9-kernels: needs: select-env From 302dffae8dce2496d8dc301e7e1716a99a34b997 Mon Sep 17 00:00:00 2001 From: FilippoCordella Date: Wed, 25 Feb 2026 17:17:30 +0000 Subject: [PATCH 07/11] CI fix --- .github/workflows/ci-platform-gap9-tiled.yml | 2 +- .github/workflows/ci-platform-gap9.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-platform-gap9-tiled.yml b/.github/workflows/ci-platform-gap9-tiled.yml index a2caff22d8..a3d4b29923 100644 --- a/.github/workflows/ci-platform-gap9-tiled.yml +++ b/.github/workflows/ci-platform-gap9-tiled.yml @@ -27,7 +27,7 @@ jobs: select-env: uses: ./.github/workflows/_select-env.yml with: - docker_image_deeploy: ${{ 'ghcr.io/runwangdl/deeploy:gap9' }} + docker_image_deeploy: "ghcr.io/FilippoCordella/deeploy:gap9" gap9-kernels-tiled-singlebuffer-L2: needs: select-env diff --git a/.github/workflows/ci-platform-gap9.yml b/.github/workflows/ci-platform-gap9.yml index 874097da6c..78d2cf8df5 100644 --- a/.github/workflows/ci-platform-gap9.yml +++ b/.github/workflows/ci-platform-gap9.yml @@ -28,7 +28,7 @@ jobs: select-env: uses: ./.github/workflows/_select-env.yml with: - docker_image_deeploy: ${{ 'ghcr.io/runwangdl/deeploy:gap9' }} + docker_image_deeploy: "ghcr.io/FilippoCordella/deeploy:gap9" gap9-kernels: needs: select-env From bbbda6950e44575dcf165941c10e1c102b197729 Mon Sep 17 00:00:00 2001 From: FilippoCordella Date: Wed, 25 Feb 2026 19:04:17 +0000 Subject: [PATCH 08/11] Lowercase fix --- .github/workflows/ci-platform-gap9-tiled.yml | 2 +- .github/workflows/ci-platform-gap9.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-platform-gap9-tiled.yml b/.github/workflows/ci-platform-gap9-tiled.yml index a3d4b29923..fd10a4b6e7 100644 --- a/.github/workflows/ci-platform-gap9-tiled.yml +++ b/.github/workflows/ci-platform-gap9-tiled.yml @@ -27,7 +27,7 @@ jobs: select-env: uses: ./.github/workflows/_select-env.yml with: - docker_image_deeploy: "ghcr.io/FilippoCordella/deeploy:gap9" + docker_image_deeploy: "ghcr.io/filippocordella/deeploy:gap9" gap9-kernels-tiled-singlebuffer-L2: needs: select-env diff --git a/.github/workflows/ci-platform-gap9.yml b/.github/workflows/ci-platform-gap9.yml index 78d2cf8df5..2f3cb80fbe 100644 --- a/.github/workflows/ci-platform-gap9.yml +++ b/.github/workflows/ci-platform-gap9.yml @@ -28,7 +28,7 @@ jobs: select-env: uses: ./.github/workflows/_select-env.yml with: - docker_image_deeploy: "ghcr.io/FilippoCordella/deeploy:gap9" + docker_image_deeploy: "ghcr.io/filippocordella/deeploy:gap9" gap9-kernels: needs: select-env From 9c838efe84b84d48c0122caf9359cee5a14b4e82 Mon Sep 17 00:00:00 2001 From: FilippoCordella Date: Wed, 25 Feb 2026 20:58:28 +0000 Subject: [PATCH 09/11] Code clean --- Deeploy/Targets/Generic/Layers.py | 6 ------ Deeploy/TilingExtension/TilerExtension.py | 1 - 2 files changed, 7 deletions(-) diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index 0550763af8..65e03b59b0 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -716,12 +716,6 @@ class SILULayer(ONNXLayer): def __init__(self, maps: List[NodeMapper]): super().__init__(maps) - def computeOps(self): - # LUT-based SiLU: implemented via a 256-entry table lookup per element. - # Arithmetic operations per element = 0 (only a memory load + store). - # To count memory accesses instead, return: size * 2 - return 0 - class RQSILULayer(SILULayer): diff --git a/Deeploy/TilingExtension/TilerExtension.py b/Deeploy/TilingExtension/TilerExtension.py index e28c2e7101..9b48d9456c 100644 --- a/Deeploy/TilingExtension/TilerExtension.py +++ b/Deeploy/TilingExtension/TilerExtension.py @@ -320,7 +320,6 @@ def computeTilingSchedule(self, ctxt: NetworkContext) -> TilingSolution: log.debug(" - Extract Memory Allocation") self.innerMemoryScheduler.annotateSolution(ctxt, self.tilerModel) self.outerMemoryScheduler.annotateSolution(ctxt, self.tilerModel) - print(tilingSolution) return tilingSolution def computeMemoryMap(self, ctxt: NetworkContext, tilingSolution: TilingSolution) -> MemoryMap: From f6dde3634f5f773cd26b7f952b02f13e7e6b9c97 Mon Sep 17 00:00:00 2001 From: FilippoCordella Date: Thu, 26 Feb 2026 08:13:10 +0000 Subject: [PATCH 10/11] Generate-ccache-gap9 update --- .github/workflows/infra-generate-ccache-gap9.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/infra-generate-ccache-gap9.yml b/.github/workflows/infra-generate-ccache-gap9.yml index b189bfd708..7171197c5e 100644 --- a/.github/workflows/infra-generate-ccache-gap9.yml +++ b/.github/workflows/infra-generate-ccache-gap9.yml @@ -20,7 +20,7 @@ jobs: generate-ccache-gap9: runs-on: ubuntu-latest container: - image: ${{ github.event.inputs.docker_image_deeploy || 'ghcr.io/pulp-platform/deeploy-gap9:latest' }} + image: ${{ github.event.inputs.docker_image_deeploy || 'ghcr.io/filippocordell/deeploy-gap9:latest' }} steps: - name: Checkout Repo uses: actions/checkout@v4 From b0a0e00dd285d386f4385641288706b5abdb96b4 Mon Sep 17 00:00:00 2001 From: FilippoCordella Date: Thu, 26 Feb 2026 23:21:11 +0000 Subject: [PATCH 11/11] Devel rebase --- .../workflows/infra-generate-ccache-gap9.yml | 2 +- Deeploy/Targets/PULPOpen/Platform.py | 30 ++++++------------- Deeploy/Targets/PULPOpen/Tiler.py | 6 ++-- DeeployTest/test_gap9_config.py | 14 ++++----- 4 files changed, 20 insertions(+), 32 deletions(-) diff --git a/.github/workflows/infra-generate-ccache-gap9.yml b/.github/workflows/infra-generate-ccache-gap9.yml index 7171197c5e..109b28bee8 100644 --- a/.github/workflows/infra-generate-ccache-gap9.yml +++ b/.github/workflows/infra-generate-ccache-gap9.yml @@ -20,7 +20,7 @@ jobs: generate-ccache-gap9: runs-on: ubuntu-latest container: - image: ${{ github.event.inputs.docker_image_deeploy || 'ghcr.io/filippocordell/deeploy-gap9:latest' }} + image: ${{ github.event.inputs.docker_image_deeploy || 'ghcr.io/filippocordella/deeploy:gap9' }} steps: - name: Checkout Repo uses: actions/checkout@v4 diff --git a/Deeploy/Targets/PULPOpen/Platform.py b/Deeploy/Targets/PULPOpen/Platform.py index 10f5817846..99045d13a9 100644 --- a/Deeploy/Targets/PULPOpen/Platform.py +++ b/Deeploy/Targets/PULPOpen/Platform.py @@ -22,9 +22,9 @@ GELUGradParser, GELUParser, GEMMParser, LayerNormGradParser, LayerNormParser, MatMulParser, MaxPool1DParser, \ MaxPool2DParser, MulParser, Pad1DParser, Pad2DParser, QuantParser, ReduceSumParser, ReluParser, \ RequantShiftParser, ReshapeParser, RQAddParser, RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, \ - SGDParser, SliceParser, SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, SoftmaxGradParser, \ - SoftmaxParser, TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, \ - iSoftmaxParser, SILUParser + SGDParser, SILUParser, SliceParser, SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, \ + SoftmaxGradParser, SoftmaxParser, TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, \ + iRMSNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, IntegerDivRequantMergePass, \ MergeConstAddAndRequantPass, MergeTrueIntegerDivRequantShiftPass, QuantPatternPass, RQSSplitPass, \ @@ -41,27 +41,15 @@ PULPFPGELUGradTilingReadyBindings, PULPFPGELUTilingReadyBindings, PULPFPGEMMTilingReadyBindings, \ PULPGatherTilingReadyBindings, PULPiHardswishTilingReadyBindings, PULPiRMSNormTilingReadyBindings, \ PULPiRQSGELUTilingReadyBindings, PULPLayernormGradTilingReadyBindings, PULPLayernormTilingReadyBindings, \ -<<<<<<< HEAD - PULPMatMulTilingReadyBindings, PULPMaxPool1DTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, \ + PULPMatMulTilingReadyBindings, PULPMaxPool1DTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, \ PULPMulTilingReadyBindings, PULPReduceMeanTilingReadyBindings, PULPReduceSumTilingReadyBindings, \ PULPReluTilingReadyBindings, PULPRQAddTilingReadyBindings, PULPRQSConv1DTilingReadyBindings, \ PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, PULPRQSGEMMTilingReadyBindings, \ - PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, PULPRQSTallGEMMTilingReadyBindings, \ - PULPRQSTilingReadyBindings, PULPSGDTilingReadyBindings, PULPSliceTilingReadyBindings, \ - PULPSoftmaxCrossEntropyGradTilingReadyBindings, PULPSoftmaxCrossEntropyTilingReadyBindings, \ - PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, PULPTransposeTilingReadyBindings, \ - PULPUniformRQSTilingReadyBindings, PULPSILUTilingReadyBindings, PULPRQSILUTilingReadyBindings -======= - PULPMatMulTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, PULPMulTilingReadyBindings, \ - PULPReduceMeanTilingReadyBindings, PULPReduceSumTilingReadyBindings, PULPReluTilingReadyBindings, \ - PULPRQAddTilingReadyBindings, PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, \ - PULPRQSGEMMTilingReadyBindings, PULPRQSiHardswishTilingReadyBindings, PULPRQSILUTilingReadyBindings, \ - PULPRQSMatrixVecTilingReadyBindings, PULPRQSTallGEMMTilingReadyBindings, PULPRQSTilingReadyBindings, \ - PULPSGDTilingReadyBindings, PULPSILUTilingReadyBindings, PULPSliceTilingReadyBindings, \ - PULPSoftmaxCrossEntropyGradTilingReadyBindings, PULPSoftmaxCrossEntropyTilingReadyBindings, \ - PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, PULPTransposeTilingReadyBindings, \ - PULPUniformRQSTilingReadyBindings ->>>>>>> 8e517d6 (Pre-commit run) + PULPRQSiHardswishTilingReadyBindings, PULPRQSILUTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, \ + PULPRQSTallGEMMTilingReadyBindings, PULPRQSTilingReadyBindings, PULPSGDTilingReadyBindings, \ + PULPSILUTilingReadyBindings, PULPSliceTilingReadyBindings, PULPSoftmaxCrossEntropyGradTilingReadyBindings, \ + PULPSoftmaxCrossEntropyTilingReadyBindings, PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, \ + PULPTransposeTilingReadyBindings, PULPUniformRQSTilingReadyBindings from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import PULPAddRequantMergePass, \ PULPConvRequantMergePass, PULPGEMMRequantMergePass, PULPMatMulRequantMergePass diff --git a/Deeploy/Targets/PULPOpen/Tiler.py b/Deeploy/Targets/PULPOpen/Tiler.py index c898ace08a..4f23eef087 100644 --- a/Deeploy/Targets/PULPOpen/Tiler.py +++ b/Deeploy/Targets/PULPOpen/Tiler.py @@ -21,9 +21,9 @@ PULPLayernormGradBinding, PULPMatMulBindings, PULPMaxPool1DBindings, PULPMaxPool2DBindings, PULPMulBindings, \ PULPReduceMeanBindings, PULPReduceSumBindings, PULPReluBinding, PULPReshapeBindings, PULPRQAddBindings, \ PULPRQSBindings, PULPRQSConv1DBindings, PULPRQSConv2DBindings, PULPRQSDWConv2DBindings, PULPRQSGEMMBindings, \ - PULPRQSiHardswishBindings, PULPRQSMatrixVecBindings, PULPRQSTallGEMMBindings, PULPSGDBindings, PULPSliceBindings, \ - PULPSoftmaxBindings, PULPSoftmaxCrossEntropyLossBindings, PULPSoftmaxCrossEntropyLossGradBindings, \ - PULPSoftmaxGradBindings, PULPTransposeBindings, PULPUniformRQSBindings, PULPSILUBindings, PULPRQSILUBindings + PULPRQSiHardswishBindings, PULPRQSILUBindings, PULPRQSMatrixVecBindings, PULPRQSTallGEMMBindings, PULPSGDBindings, \ + PULPSILUBindings, PULPSliceBindings, PULPSoftmaxBindings, PULPSoftmaxCrossEntropyLossBindings, \ + PULPSoftmaxCrossEntropyLossGradBindings, PULPSoftmaxGradBindings, PULPTransposeBindings, PULPUniformRQSBindings from Deeploy.Targets.PULPOpen.TileConstraints.ConvTileConstraint import Conv2DTileConstraint, RQConv1DTileConstraint, \ RQConv2DTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.DWConvTileConstraint import DWConv2DTileConstraint, \ diff --git a/DeeployTest/test_gap9_config.py b/DeeployTest/test_gap9_config.py index f5cd3d46e0..1c7ec75731 100644 --- a/DeeployTest/test_gap9_config.py +++ b/DeeployTest/test_gap9_config.py @@ -9,13 +9,13 @@ "Kernels/Integer/Add/Regular", "Kernels/Integer/Add/MultIO", "Kernels/Integer/Pad/Regular_1D", "Kernels/Integer/Pad/Regular_2D", "Kernels/Integer/MatMul/Regular", "Kernels/Integer/MatMul/Add", "Kernels/Integer/Conv/DW_2D_RQ", "Kernels/Integer/Conv/Regular_2D_RQ", "Kernels/Integer/Softmax/Regular", - "Kernels/Integer/Concat", "Kernels/Integer/Hardswish/Regular", "Kernels/Integer/SILU", "Others/Backtracking", "Kernels/FP32/Add/Regular", - "Kernels/FP32/GEMM/Regular", "Kernels/FP32/Conv/Regular_2D_Bias", "Kernels/FP32/Conv/Regular_2D_NoBias", - "Kernels/FP32/Conv/Regular_2D_ZeroValuedBias", "Kernels/FP32/Conv/DW_2D_Bias", "Kernels/FP32/Conv/DW_2D_NoBias", - "Kernels/FP32/Conv/DW_2D_ZeroValuedBias", "Kernels/FP32/LayerNorm", "Kernels/FP32/ReLU", - "Kernels/FP32/MaxPool/Regular_2D", "Kernels/FP32/MatMul", "Kernels/FP32/Softmax/Regular", "Kernels/FP32/Transpose", - "Kernels/FP32/Mul", "Kernels/Mixed/Dequant", "Kernels/Mixed/Quant", "Kernels/FP32/ReduceSum", - "Kernels/FP32/Reshape/SkipConnection" + "Kernels/Integer/Concat", "Kernels/Integer/Hardswish/Regular", "Kernels/Integer/SILU", "Others/Backtracking", + "Kernels/FP32/Add/Regular", "Kernels/FP32/GEMM/Regular", "Kernels/FP32/Conv/Regular_2D_Bias", + "Kernels/FP32/Conv/Regular_2D_NoBias", "Kernels/FP32/Conv/Regular_2D_ZeroValuedBias", + "Kernels/FP32/Conv/DW_2D_Bias", "Kernels/FP32/Conv/DW_2D_NoBias", "Kernels/FP32/Conv/DW_2D_ZeroValuedBias", + "Kernels/FP32/LayerNorm", "Kernels/FP32/ReLU", "Kernels/FP32/MaxPool/Regular_2D", "Kernels/FP32/MatMul", + "Kernels/FP32/Softmax/Regular", "Kernels/FP32/Transpose", "Kernels/FP32/Mul", "Kernels/Mixed/Dequant", + "Kernels/Mixed/Quant", "Kernels/FP32/ReduceSum", "Kernels/FP32/Reshape/SkipConnection" ] MODEL_TESTS = [