@@ -110,36 +110,7 @@ __ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
110110}
111111#endif // __SYCL_DEVICE_ONLY__
112112
113- #ifdef __SYCL_DEVICE_ONLY__
114-
115- // TODO: __esimd_dpas* should have single declaration for host and device:
116- // Ret __esimd_dpas*(...)
117- template <typename T, typename T0, typename T1, typename T2, int N, int N1,
118- int N2>
119- SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __ESIMD_DNS::vector_type_t <T, N>
120- __esimd_dpas (__ESIMD_DNS::vector_type_t <T0, N> src0,
121- __ESIMD_DNS::vector_type_t <T1, N1> src1,
122- __ESIMD_DNS::vector_type_t <T2, N2> src2, int src1_precision,
123- int src2_precision, int depth, int repeat, int sign_res,
124- int sign_acc);
125-
126- template <typename T, typename T1, typename T2, int N, int N1, int N2>
127- SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __ESIMD_DNS::vector_type_t <T, N>
128- __esimd_dpas2 (__ESIMD_DNS::vector_type_t <T1, N1> src1,
129- __ESIMD_DNS::vector_type_t <T2, N2> src2, int dpas_info);
130-
131- template <typename T, typename T1, typename T2, int N, int N1, int N2>
132- SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __ESIMD_DNS::vector_type_t <T, N>
133- __esimd_dpasw (__ESIMD_DNS::vector_type_t <T, N> src0,
134- __ESIMD_DNS::vector_type_t <T1, N1> src1,
135- __ESIMD_DNS::vector_type_t <T2, N2> src2, int dpas_info);
136-
137- template <typename T, typename T1, typename T2, int N, int N1, int N2>
138- SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __ESIMD_DNS::vector_type_t <T, N>
139- __esimd_dpasw2 (__ESIMD_DNS::vector_type_t <T1, N1> src1,
140- __ESIMD_DNS::vector_type_t <T2, N2> src2, int dpas_info);
141-
142- #else // __SYCL_DEVICE_ONLY__
113+ #ifndef __SYCL_DEVICE_ONLY__
143114
144115template <typename T0, typename T1, int SZ>
145116__ESIMD_INTRIN __ESIMD_raw_vec_t (T0, SZ)
@@ -659,56 +630,73 @@ __esimd_dpas_inner(const __ESIMD_DNS::vector_type_t<T0, SZ> *src0,
659630
660631 return retv;
661632}
633+ #endif // #ifndef __SYCL_DEVICE_ONLY__
662634
663635template <__ESIMD_ENS::argument_type src1_precision,
664636 __ESIMD_ENS::argument_type src2_precision, int systolic_depth,
665637 int repeat_count, typename T, typename T0, typename T1, typename T2,
666- int N, int N1, int N2>
667- inline __ESIMD_DNS::vector_type_t <T, N>
668- __esimd_dpas (__ESIMD_DNS::vector_type_t <T0, N> src0,
669- __ESIMD_DNS::vector_type_t <T1, N1> src1,
670- __ESIMD_DNS::vector_type_t <T2, N2> src2) {
638+ int N, int N1, int N2, int res_sign = std::is_signed_v<T>,
639+ int acc_sign = std::is_signed_v<T0>>
640+ __ESIMD_INTRIN __ESIMD_DNS::vector_type_t <T, N>
641+ __esimd_dpas2 (__ESIMD_DNS::vector_type_t <T0, N> src0,
642+ __ESIMD_DNS::vector_type_t <T1, N1> src1,
643+ __ESIMD_DNS::vector_type_t <T2, N2> src2)
644+ #ifdef __SYCL_DEVICE_ONLY__
645+ ;
646+ #else // !__SYCL_DEVICE_ONLY__
647+ {
671648 return __esimd_dpas_inner<src1_precision, src2_precision, systolic_depth,
672649 repeat_count, T, T0, T1, T2, N, N1, N2>(
673650 std::addressof (src0), src1, src2);
674651}
652+ #endif // !__SYCL_DEVICE_ONLY__
675653
676- template <__ESIMD_ENS::argument_type src1_precision,
677- __ESIMD_ENS::argument_type src2_precision, int systolic_depth,
678- int repeat_count, typename T, typename T1, typename T2, int N, int N1,
679- int N2>
680- inline __ESIMD_DNS::vector_type_t <T, N>
681- __esimd_dpas2 (__ESIMD_DNS::vector_type_t <T1, N1> src1,
682- __ESIMD_DNS::vector_type_t <T2, N2> src2) {
654+ template <int Info, typename T, typename T1, typename T2, int N, int N1, int N2>
655+ __ESIMD_INTRIN __ESIMD_DNS::vector_type_t <T, N>
656+ __esimd_dpas_nosrc0 (__ESIMD_DNS::vector_type_t <T1, N1> src1,
657+ __ESIMD_DNS::vector_type_t <T2, N2> src2)
658+ #ifdef __SYCL_DEVICE_ONLY__
659+ ;
660+ #else // !__SYCL_DEVICE_ONLY__
661+ {
662+ constexpr __ESIMD_ENS::argument_type src1_precision =
663+ static_cast <__ESIMD_ENS::argument_type>(Info & 0xff );
664+ constexpr __ESIMD_ENS::argument_type src2_precision =
665+ static_cast <__ESIMD_ENS::argument_type>((Info >> 8 ) & 0xff );
666+ constexpr int systolic_depth = (Info >> 16 ) & 0xff ;
667+ constexpr int repeat_count = (Info >> 24 ) & 0xff ;
683668 return __esimd_dpas_inner<src1_precision, src2_precision, systolic_depth,
684- repeat_count, T, T, T1, T2, N, N1, N2>(nullptr , src1,
685- src2);
669+ repeat_count, T, T, T1, T2, N, N1, N2>(nullptr ,
670+ src1, src2);
686671}
672+ #endif // !__SYCL_DEVICE_ONLY__
687673
688- template <__ESIMD_ENS::argument_type src1_precision,
689- __ESIMD_ENS::argument_type src2_precision, int systolic_depth,
690- int repeat_count, typename T, typename T1, typename T2, int N, int N1,
691- int N2>
692- inline __ESIMD_DNS::vector_type_t <T, N>
674+ template <int Info, typename T, typename T1, typename T2, int N, int N1, int N2>
675+ __ESIMD_INTRIN __ESIMD_DNS::vector_type_t <T, N>
693676__esimd_dpasw (__ESIMD_DNS::vector_type_t <T, N> src0,
694677 __ESIMD_DNS::vector_type_t <T1, N1> src1,
695- __ESIMD_DNS::vector_type_t <T2, N2> src2) {
678+ __ESIMD_DNS::vector_type_t <T2, N2> src2)
679+ #ifdef __SYCL_DEVICE_ONLY__
680+ ;
681+ #else // !__SYCL_DEVICE_ONLY__
682+ {
696683 __ESIMD_UNSUPPORTED_ON_HOST;
697684 return __ESIMD_DNS::vector_type_t <T, N>();
698685}
686+ #endif // !__SYCL_DEVICE_ONLY__
699687
700- template <__ESIMD_ENS::argument_type src1_precision,
701- __ESIMD_ENS::argument_type src2_precision, int systolic_depth,
702- int repeat_count, typename T, typename T1, typename T2, int N, int N1,
703- int N2>
704- inline __ESIMD_DNS::vector_type_t <T, N>
705- __esimd_dpasw2 (__ESIMD_DNS::vector_type_t <T1, N1> src1,
706- __ESIMD_DNS::vector_type_t <T2, N2> src2) {
688+ template <int Info, typename T, typename T1, typename T2, int N, int N1, int N2>
689+ __ESIMD_INTRIN __ESIMD_DNS::vector_type_t <T, N>
690+ __esimd_dpasw_nosrc0 (__ESIMD_DNS::vector_type_t <T1, N1> src1,
691+ __ESIMD_DNS::vector_type_t <T2, N2> src2)
692+ #ifdef __SYCL_DEVICE_ONLY__
693+ ;
694+ #else // !__SYCL_DEVICE_ONLY__
695+ {
707696 __ESIMD_UNSUPPORTED_ON_HOST;
708697 return __ESIMD_DNS::vector_type_t <T, N>();
709698}
710-
711- #endif // #ifdef __SYCL_DEVICE_ONLY__
699+ #endif // !__SYCL_DEVICE_ONLY__
712700
713701#undef __ESIMD_raw_vec_t
714702#undef __ESIMD_cpp_vec_t
0 commit comments