Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ ELSEIF(CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_L
SET(XNNPACK_ENABLE_ARM_SME2 OFF)
ENDIF()
OPTION(XNNPACK_ENABLE_RISCV_VECTOR "Build XNNPACK with RISC-V Vector micro-kernels" ON)
OPTION(XNNPACK_ENABLE_RISCV_FP16_VECTOR "Build XNNPACK with RISC-V FP16 (FP16 data processing) Vector micro-kernels" ON)
OPTION(XNNPACK_ENABLE_VSX "Build XNNPACK with VSX Vector micro-kernels for Power" ON)
OPTION(XNNPACK_ENABLE_AVXVNNI "Build XNNPACK with AVX-VNNI micro-kernels" ON)
IF(CMAKE_C_COMPILER_ID STREQUAL "GNU")
Expand Down Expand Up @@ -315,6 +316,7 @@ ADD_COMPILE_DEFINITIONS("XNN_ENABLE_ARM_I8MM=$<BOOL:${XNNPACK_ENABLE_ARM_I8MM}>"
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_ARM_SME=$<BOOL:${XNNPACK_ENABLE_ARM_SME}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_ARM_SME2=$<BOOL:${XNNPACK_ENABLE_ARM_SME2}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_RISCV_VECTOR=$<BOOL:${XNNPACK_ENABLE_RISCV_VECTOR}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_RISCV_FP16_VECTOR=$<BOOL:${XNNPACK_ENABLE_RISCV_FP16_VECTOR}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_AVXVNNI=$<BOOL:${XNNPACK_ENABLE_AVXVNNI}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_AVXVNNIINT8=$<BOOL:${XNNPACK_ENABLE_AVXVNNIINT8}>")
ADD_COMPILE_DEFINITIONS("XNN_ENABLE_AVX256SKX=$<BOOL:${XNNPACK_ENABLE_AVX256SKX}>")
Expand Down Expand Up @@ -985,7 +987,9 @@ IF(XNNPACK_TARGET_PROCESSOR MATCHES "^ppc64")
ENDIF()
ENDIF()
IF(XNNPACK_TARGET_PROCESSOR MATCHES "^riscv")
IF(XNNPACK_ENABLE_RISCV_VECTOR)
IF(XNNPACK_ENABLE_RISCV_FP16_VECTOR)
SET_PROPERTY(SOURCE ${ALL_RVV_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=rv64gcv_zvfh -mabi=lp64d ")
ELSEIF(XNNPACK_ENABLE_RISCV_VECTOR)
SET_PROPERTY(SOURCE ${ALL_RVV_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=rv64gcv -mabi=lp64d ")
ENDIF()
SET_PROPERTY(SOURCE ${ALL_RVVFP16ARITH_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=rv64gc_zvfh -mabi=lp64d ")
Expand Down
2 changes: 1 addition & 1 deletion build_params.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,7 @@ XNNPACK_PARAMS_FOR_ARCH = {
"rvvfp16arith": _create_params(
cond = "//:riscv_fp16_vector_enabled",
copts = [
"-march=rv64gc_zvfh",
"-march=rv64gcv_zvfh",
"-mabi=lp64d",
],
),
Expand Down
8 changes: 8 additions & 0 deletions cmake/gen/rvv_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ SET(NON_PROD_RVV_MICROKERNEL_SRCS
src/f32-vclamp/gen/f32-vclamp-rvv-u8v.c
src/f32-vcmul/gen/f32-vcmul-rvv-u1v.c
src/f32-vcmul/gen/f32-vcmul-rvv-u4v.c
src/f32-vexp/gen/f32-vexp-rvv-exp-u1v.c
src/f32-vexp/gen/f32-vexp-rvv-exp-u2v.c
src/f32-vexp/gen/f32-vexp-rvv-exp-u4v.c
src/f32-vexp/gen/f32-vexp-rvv-exp-u8v.c
src/f32-vhswish/gen/f32-vhswish-rvv-u1v.c
src/f32-vhswish/gen/f32-vhswish-rvv-u2v.c
src/f32-vhswish/gen/f32-vhswish-rvv-u8v.c
Expand All @@ -202,6 +206,10 @@ SET(NON_PROD_RVV_MICROKERNEL_SRCS
src/f32-vsqrt/gen/f32-vsqrt-rvv-sqrt-u2v.c
src/f32-vsqrt/gen/f32-vsqrt-rvv-sqrt-u4v.c
src/f32-vsqrt/gen/f32-vsqrt-rvv-sqrt-u8v.c
src/f32-vtanh/gen/f32-vtanh-rvv-tanh-u1v.c
src/f32-vtanh/gen/f32-vtanh-rvv-tanh-u2v.c
src/f32-vtanh/gen/f32-vtanh-rvv-tanh-u4v.c
src/f32-vtanh/gen/f32-vtanh-rvv-tanh-u8v.c
src/f32-vunary/gen/f32-vabs-rvv-u1v.c
src/f32-vunary/gen/f32-vabs-rvv-u2v.c
src/f32-vunary/gen/f32-vabs-rvv-u4v.c
Expand Down
8 changes: 8 additions & 0 deletions cmake/gen/rvvfp16arith_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ SET(NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u2v.c
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u4v.c
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u8v.c
src/f16-vexp/gen/f16-vexp-rvvfp16arith-exp-u1v.c
src/f16-vexp/gen/f16-vexp-rvvfp16arith-exp-u2v.c
src/f16-vexp/gen/f16-vexp-rvvfp16arith-exp-u4v.c
src/f16-vexp/gen/f16-vexp-rvvfp16arith-exp-u8v.c
src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-tanh-u1v.c
src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-tanh-u2v.c
src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-tanh-u4v.c
src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-tanh-u8v.c
src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u1v.c
src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u2v.c
src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u4v.c
Expand Down
8 changes: 8 additions & 0 deletions gen/rvv_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ NON_PROD_RVV_MICROKERNEL_SRCS = [
"src/f32-vclamp/gen/f32-vclamp-rvv-u8v.c",
"src/f32-vcmul/gen/f32-vcmul-rvv-u1v.c",
"src/f32-vcmul/gen/f32-vcmul-rvv-u4v.c",
"src/f32-vexp/gen/f32-vexp-rvv-exp-u1v.c",
"src/f32-vexp/gen/f32-vexp-rvv-exp-u2v.c",
"src/f32-vexp/gen/f32-vexp-rvv-exp-u4v.c",
"src/f32-vexp/gen/f32-vexp-rvv-exp-u8v.c",
"src/f32-vhswish/gen/f32-vhswish-rvv-u1v.c",
"src/f32-vhswish/gen/f32-vhswish-rvv-u2v.c",
"src/f32-vhswish/gen/f32-vhswish-rvv-u8v.c",
Expand All @@ -199,6 +203,10 @@ NON_PROD_RVV_MICROKERNEL_SRCS = [
"src/f32-vsqrt/gen/f32-vsqrt-rvv-sqrt-u2v.c",
"src/f32-vsqrt/gen/f32-vsqrt-rvv-sqrt-u4v.c",
"src/f32-vsqrt/gen/f32-vsqrt-rvv-sqrt-u8v.c",
"src/f32-vtanh/gen/f32-vtanh-rvv-tanh-u1v.c",
"src/f32-vtanh/gen/f32-vtanh-rvv-tanh-u2v.c",
"src/f32-vtanh/gen/f32-vtanh-rvv-tanh-u4v.c",
"src/f32-vtanh/gen/f32-vtanh-rvv-tanh-u8v.c",
"src/f32-vunary/gen/f32-vabs-rvv-u1v.c",
"src/f32-vunary/gen/f32-vabs-rvv-u2v.c",
"src/f32-vunary/gen/f32-vabs-rvv-u4v.c",
Expand Down
8 changes: 8 additions & 0 deletions gen/rvvfp16arith_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS = [
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u2v.c",
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u4v.c",
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u8v.c",
"src/f16-vexp/gen/f16-vexp-rvvfp16arith-exp-u1v.c",
"src/f16-vexp/gen/f16-vexp-rvvfp16arith-exp-u2v.c",
"src/f16-vexp/gen/f16-vexp-rvvfp16arith-exp-u4v.c",
"src/f16-vexp/gen/f16-vexp-rvvfp16arith-exp-u8v.c",
"src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-tanh-u1v.c",
"src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-tanh-u2v.c",
"src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-tanh-u4v.c",
"src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-tanh-u8v.c",
"src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u1v.c",
"src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u2v.c",
"src/f32-f16-vcvt/gen/f32-f16-vcvt-rvvfp16arith-u4v.c",
Expand Down
5 changes: 5 additions & 0 deletions scripts/generate-f16-vexp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,9 @@
tools/xngen src/f16-vexp/poly-3.c.in -D ARCH=scalar -D BATCH_TILES=1,2,4,8 -o src/f16-vexp/gen/f16-vexp-scalar-poly-3.c &
tools/xngen src/f16-vexp/poly-3.c.in -D ARCH=neonfp16arith -D BATCH_TILES=8,16,32 -o src/f16-vexp/gen/f16-vexp-neonfp16arith-poly-3.c &

tools/xngen src/f16-vexp/rvvfp16arith.c.in -D LMUL=1 -o src/f16-vexp/gen/f16-vexp-rvvfp16arith-exp-u1v.c &
tools/xngen src/f16-vexp/rvvfp16arith.c.in -D LMUL=2 -o src/f16-vexp/gen/f16-vexp-rvvfp16arith-exp-u2v.c &
tools/xngen src/f16-vexp/rvvfp16arith.c.in -D LMUL=4 -o src/f16-vexp/gen/f16-vexp-rvvfp16arith-exp-u4v.c &
tools/xngen src/f16-vexp/rvvfp16arith.c.in -D LMUL=8 -o src/f16-vexp/gen/f16-vexp-rvvfp16arith-exp-u8v.c &

wait
5 changes: 5 additions & 0 deletions scripts/generate-f16-vtanh.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,9 @@ tools/xngen src/f16-vtanh/neonfp16arith-expm1minus.c.in -D P=3 -D H=2 -D PS=0 -D
tools/xngen src/f16-vtanh/neonfp16arith-expm1minus.c.in -D P=3 -D H=2 -D PS=0 -D BATCH_TILE=24 -D SAT=MINMAX -D DIV=RECPEADJ -o src/f16-vtanh/gen/f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-recpeadj-u24.c &
tools/xngen src/f16-vtanh/neonfp16arith-expm1minus.c.in -D P=3 -D H=2 -D PS=0 -D BATCH_TILE=32 -D SAT=MINMAX -D DIV=RECPEADJ -o src/f16-vtanh/gen/f16-vtanh-neonfp16arith-expm1minus-rr1-p3h2ts-recpeadj-u32.c &

tools/xngen src/f16-vtanh/rvvfp16arith.c.in -D LMUL=1 -o src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-tanh-u1v.c &
tools/xngen src/f16-vtanh/rvvfp16arith.c.in -D LMUL=2 -o src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-tanh-u2v.c &
tools/xngen src/f16-vtanh/rvvfp16arith.c.in -D LMUL=4 -o src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-tanh-u4v.c &
tools/xngen src/f16-vtanh/rvvfp16arith.c.in -D LMUL=8 -o src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-tanh-u8v.c &

wait
5 changes: 5 additions & 0 deletions scripts/generate-f32-vexp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,9 @@ tools/xngen src/f32-vexp/rational-3-2.c.in -D ARCH=hvx -D BATCH_TILES=32,64,128

tools/xngen src/f32-vexp/rational-3-2.c.in -D ARCH=avx512f -D BATCH_TILES=16,32,48,64 -D DIV=NR -o src/f32-vexp/gen/f32-vexp-avx512f-rational-3-2-nr.c &

tools/xngen src/f32-vexp/rvv.c.in -D LMUL=1 -o src/f32-vexp/gen/f32-vexp-rvv-exp-u1v.c &
tools/xngen src/f32-vexp/rvv.c.in -D LMUL=2 -o src/f32-vexp/gen/f32-vexp-rvv-exp-u2v.c &
tools/xngen src/f32-vexp/rvv.c.in -D LMUL=4 -o src/f32-vexp/gen/f32-vexp-rvv-exp-u4v.c &
tools/xngen src/f32-vexp/rvv.c.in -D LMUL=8 -o src/f32-vexp/gen/f32-vexp-rvv-exp-u8v.c &

wait
5 changes: 5 additions & 0 deletions scripts/generate-f32-vtanh.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,9 @@ tools/xngen src/f32-vtanh/rational-9-8.c.in -D ARCH=fma3 -D BATCH_TILES=8,16
tools/xngen src/f32-vtanh/rational-9-8.c.in -D ARCH=avx512f -D BATCH_TILES=16,32,48,64 -D DIV=NR -o src/f32-vtanh/gen/f32-vtanh-avx512f-rational-9-8-nr.c &
tools/xngen src/f32-vtanh/rational-9-8.c.in -D ARCH=hvx -D BATCH_TILES=32,64,96,128 -D DIV=NR -o src/f32-vtanh/gen/f32-vtanh-hvx-rational-9-8-nr.c &

tools/xngen src/f32-vtanh/rvv.c.in -D LMUL=1 -o src/f32-vtanh/gen/f32-vtanh-rvv-tanh-u1v.c &
tools/xngen src/f32-vtanh/rvv.c.in -D LMUL=2 -o src/f32-vtanh/gen/f32-vtanh-rvv-tanh-u2v.c &
tools/xngen src/f32-vtanh/rvv.c.in -D LMUL=4 -o src/f32-vtanh/gen/f32-vtanh-rvv-tanh-u4v.c &
tools/xngen src/f32-vtanh/rvv.c.in -D LMUL=8 -o src/f32-vtanh/gen/f32-vtanh-rvv-tanh-u8v.c &

wait
4 changes: 4 additions & 0 deletions src/configs/hardware-config.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,12 @@ static void init_hardware_config(void) {
const bool use_riscv_vector = (hwcap & COMPAT_HWCAP_ISA_V) != 0;
set_arch_flag(xnn_arch_riscv_vector, use_riscv_vector);

#if XNN_ENABLE_RISCV_FP16_VECTOR
set_arch_flag(xnn_arch_riscv_vector_fp16_arith, true);
#else
/* There is no HWCAP for fp16 so disable by default */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For OS that have /proc/cpuinfo the fp16 is detectable:
eg. see libyuv
https://chromium.googlesource.com/libyuv/libyuv/+/refs/heads/main/source/cpu_id.cc#335

But the emulator I use, which is supposed to be for sifive x280 that has fp16, the emulator does not support fp16.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Standard way to detect FP16
https://github.com/google/XNNPACK/blob/master/src/xnnpack/math.h#L504-L506

When building hardware-config.c, the __riscv_zvfh flag is not defined.
Apparently, this file is compiled separately from the microkernels.
Adding
https://github.com/google/XNNPACK/pull/8740/files#diff-1e7de1ae2d059d21e1dd75d5812d5a34b0222cef273b7c3a2af62eb747f9d20aR990-R992
apparently is not enough.
Can you tell me where I should change the build configuration?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In commit
3bd1297
a dynamic check of ISA for the presence of FP16 was added

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fbarchard Could you, please, look at the changes? Do you have any other comments?

set_arch_flag(xnn_arch_riscv_vector_fp16_arith, false);
#endif

if (use_riscv_vector) {
register uint32_t vlenb __asm__ ("t0");
Expand Down
19 changes: 14 additions & 5 deletions src/f16-vexp/f16-vexp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,22 @@
#include "src/xnnpack/common.h"
#include "src/xnnpack/math.h"

XNN_UKERNEL(xnn_arch_none, xnn_f16_vexp_ukernel__scalar_poly_3_u1, 1, false, xnn_float16, struct xnn_f16_default_params, NULL)
XNN_UKERNEL(xnn_arch_none, xnn_f16_vexp_ukernel__scalar_poly_3_u2, 2, false, xnn_float16, struct xnn_f16_default_params, NULL)
XNN_UKERNEL(xnn_arch_none, xnn_f16_vexp_ukernel__scalar_poly_3_u4, 4, false, xnn_float16, struct xnn_f16_default_params, NULL)
XNN_UKERNEL(xnn_arch_none, xnn_f16_vexp_ukernel__scalar_poly_3_u8, 8, false, xnn_float16, struct xnn_f16_default_params, NULL)

#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
XNN_UKERNEL(xnn_arch_none, xnn_f16_vexp_ukernel__neonfp16arith_poly_3_u8, 8, false, xnn_float16, struct xnn_f16_default_params, NULL)
XNN_UKERNEL(xnn_arch_none, xnn_f16_vexp_ukernel__neonfp16arith_poly_3_u16, 16, false, xnn_float16, struct xnn_f16_default_params, NULL)
XNN_UKERNEL(xnn_arch_none, xnn_f16_vexp_ukernel__neonfp16arith_poly_3_u32, 32, false, xnn_float16, struct xnn_f16_default_params, NULL)
#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)

#if XNN_ENABLE_RISCV_FP16_VECTOR && (XNN_ARCH_RISCV)
XNN_UKERNEL(xnn_arch_riscv_vector_fp16_arith, xnn_f16_vexp_ukernel__rvvfp16arith_exp_u1v, 1, true, xnn_float16, struct xnn_f16_default_params, NULL)
XNN_UKERNEL(xnn_arch_riscv_vector_fp16_arith, xnn_f16_vexp_ukernel__rvvfp16arith_exp_u2v, 2, true, xnn_float16, struct xnn_f16_default_params, NULL)
XNN_UKERNEL(xnn_arch_riscv_vector_fp16_arith, xnn_f16_vexp_ukernel__rvvfp16arith_exp_u4v, 4, true, xnn_float16, struct xnn_f16_default_params, NULL)
XNN_UKERNEL(xnn_arch_riscv_vector_fp16_arith, xnn_f16_vexp_ukernel__rvvfp16arith_exp_u8v, 8, true, xnn_float16, struct xnn_f16_default_params, NULL)
#endif // XNN_ARCH_RISCV && XNNPACK_ENABLE_RISCV_FP16_VECTOR

//#if XNN_ARCH_ARM64
XNN_UKERNEL(xnn_arch_none, xnn_f16_vexp_ukernel__scalar_poly_3_u1, 1, false, xnn_float16, struct xnn_f16_default_params, NULL)
XNN_UKERNEL(xnn_arch_none, xnn_f16_vexp_ukernel__scalar_poly_3_u2, 2, false, xnn_float16, struct xnn_f16_default_params, NULL)
XNN_UKERNEL(xnn_arch_none, xnn_f16_vexp_ukernel__scalar_poly_3_u4, 4, false, xnn_float16, struct xnn_f16_default_params, NULL)
XNN_UKERNEL(xnn_arch_none, xnn_f16_vexp_ukernel__scalar_poly_3_u8, 8, false, xnn_float16, struct xnn_f16_default_params, NULL)
//#endif
Loading