@@ -1512,14 +1512,15 @@ static void addmm_impl_cpu_(
1512
1512
TORCH_INTERNAL_ASSERT_DEBUG_ONLY (!c.is_conj ());
1513
1513
1514
1514
bool dispatched = false ;
1515
- #if defined(__aarch64__) && AT_MKLDNN_ACL_ENABLED()
1515
+ // #if defined(__aarch64__) && AT_MKLDNN_ACL_ENABLED()
1516
+ #if defined(__aarch64__)
1516
1517
// On AArch64 if LHS matrix in BLAS routine is transposed but RHS is not then
1517
1518
// it is faster to call oneDNN matrix multiplication primitive with RHS*LHS
1518
1519
// that will call then into Arm® Compute Library (ACL) GEMM kernel and also
1519
1520
// additionally have support for running kernel with BF16 instructions
1520
1521
if (transpose_c) {
1521
1522
bool apply_heur = apply_mkldnn_matmul_heur (b.sizes ()[0 ], b.sizes ()[1 ], a.sizes ()[1 ]);
1522
- if (apply_heur && transpose_a && !transpose_b && result.scalar_type () == at::ScalarType::Float) {
1523
+ // if (apply_heur && transpose_a && !transpose_b && result.scalar_type() == at::ScalarType::Float) {
1523
1524
try {
1524
1525
mkldnn_matmul (b, a, c, beta.to <float >(), alpha.to <float >());
1525
1526
// We have dispatched to ACL GEMM for single precision float
@@ -1529,7 +1530,7 @@ static void addmm_impl_cpu_(
1529
1530
TORCH_WARN (" mkldnn_matmul failed, switching to BLAS gemm:" , e.what ());
1530
1531
at::globalContext ().setUserEnabledMkldnn (false );
1531
1532
}
1532
- }
1533
+ // }
1533
1534
}
1534
1535
#endif
1535
1536
@@ -1776,7 +1777,8 @@ static inline void bmm_out_or_baddbmm_(const Tensor& self_or_result_, const Tens
1776
1777
};
1777
1778
1778
1779
bool apply_heur = apply_mkldnn_matmul_heur (batch1.sizes ()[1 ], batch1.sizes ()[2 ], batch2.sizes ()[2 ]);
1779
- if (apply_heur && use_mkldnn_matmul (batch1, batch2, self_or_result)) {
1780
+ // if (apply_heur && use_mkldnn_matmul(batch1, batch2, self_or_result)) {
1781
+ if (apply_heur) {
1780
1782
try {
1781
1783
mkldnn_matmul (batch1, batch2, self_or_result, beta.to <float >(), alpha.to <float >());
1782
1784
return ;
0 commit comments