@@ -713,16 +713,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
713713 vdivps (x, op1, op2);
714714 }
715715
716- void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
716+ void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
717717 const Xbyak::Operand &op2) {
718718 if (is_valid_isa (avx))
719719 vaddps (x, op1, op2);
720720 else {
721- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
722- addps (x, op2);
721+ if (x.getIdx () == op1.getIdx ()) {
722+ addps (x, op2);
723+ } else if (x.isEqualIfNotInherited (op2)) {
724+ addps (x, op1);
725+ } else {
726+ movups (x, op1);
727+ addps (x, op2);
728+ }
723729 }
724730 }
725- void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
731+ void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
726732 const Xbyak::Operand &op2) {
727733 vaddps (x, op1, op2);
728734 }
@@ -831,16 +837,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
831837 vsubss (x, Xbyak::Xmm (op1.getIdx ()), Xbyak::Xmm (op2.getIdx ()));
832838 }
833839
834- void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
840+ void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
835841 const Xbyak::Operand &op2) {
836842 if (is_valid_isa (avx))
837843 vsubps (x, op1, op2);
838844 else {
839- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
840- subps (x, op2);
845+ if (x.getIdx () == op1.getIdx ()) {
846+ subps (x, op2);
847+ } else if (x.isEqualIfNotInherited (op2)) {
848+ push (op1);
849+ subps (op1, op2);
850+ movups (x, op1);
851+ pop (op1);
852+ } else {
853+ movups (x, op1);
854+ subps (x, op2);
855+ }
841856 }
842857 }
843- void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
858+ void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
844859 const Xbyak::Operand &op2) {
845860 vsubps (x, op1, op2);
846861 }
@@ -874,30 +889,42 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
874889 }
875890 }
876891
877- void uni_vpmulld (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
878- const Xbyak::Operand &op ) {
892+ void uni_vpmulld (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
893+ const Xbyak::Operand &op2 ) {
879894 if (is_valid_isa (avx)) {
880- vpmulld (x1, x2, op );
895+ vpmulld (x, op1, op2 );
881896 } else {
882- if (x1.getIdx () != x2.getIdx ()) movdqa (x1, x2);
883- pmulld (x1, op);
897+ if (x.getIdx () == op1.getIdx ()) {
898+ pmulld (x, op2);
899+ } else if (x.isEqualIfNotInherited (op2)) {
900+ pmulld (x, op1);
901+ } else {
902+ movdqa (x, op1);
903+ pmulld (x, op2);
904+ }
884905 }
885906 }
886907 void uni_vpmulld (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
887908 const Xbyak::Operand &op) {
888909 vpmulld (x1, x2, op);
889910 }
890911
891- void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
912+ void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
892913 const Xbyak::Operand &op2) {
893914 if (is_valid_isa (avx))
894915 vmulps (x, op1, op2);
895916 else {
896- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
897- mulps (x, op2);
917+ if (x.getIdx () == op1.getIdx ()) {
918+ mulps (x, op2);
919+ } else if (x.isEqualIfNotInherited (op2)) {
920+ mulps (x, op1);
921+ } else {
922+ movups (x, op1);
923+ mulps (x, op2);
924+ }
898925 }
899926 }
900- void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
927+ void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
901928 const Xbyak::Operand &op2) {
902929 vmulps (x, op1, op2);
903930 }
@@ -1299,16 +1326,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
12991326 vpsrld (x, op, imm);
13001327 }
13011328
1302- void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1329+ void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
13031330 const Xbyak::Operand &op2) {
13041331 if (is_valid_isa (avx))
13051332 vmaxps (x, op1, op2);
13061333 else {
1307- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1308- maxps (x, op2);
1334+ if (x.getIdx () == op1.getIdx ()) {
1335+ maxps (x, op2);
1336+ } else if (x.isEqualIfNotInherited (op2)) {
1337+ maxps (x, op1);
1338+ } else {
1339+ movups (x, op1);
1340+ maxps (x, op2);
1341+ }
13091342 }
13101343 }
1311- void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1344+ void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
13121345 const Xbyak::Operand &op2) {
13131346 vmaxps (x, op1, op2);
13141347 }
@@ -1323,17 +1356,23 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
13231356 }
13241357 }
13251358
1326- void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1359+ void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
13271360 const Xbyak::Operand &op2) {
13281361 if (is_valid_isa (avx))
13291362 vminps (x, op1, op2);
13301363 else {
1331- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1332- minps (x, op2);
1364+ if (x.getIdx () == op1.getIdx ()) {
1365+ minps (x, op2);
1366+ } else if (x.isEqualIfNotInherited (op2)) {
1367+ minps (x, op1);
1368+ } else {
1369+ movups (x, op1);
1370+ minps (x, op2);
1371+ }
13331372 }
13341373 }
13351374
1336- void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1375+ void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
13371376 const Xbyak::Operand &op2) {
13381377 vminps (x, op1, op2);
13391378 }
@@ -1370,13 +1409,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
13701409 vpmovzxbd (y, op);
13711410 }
13721411
1373- void uni_vcmpps (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
1374- const Xbyak::Operand &op , int cmp_predicate) {
1412+ void uni_vcmpps (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
1413+ const Xbyak::Operand &op2 , int cmp_predicate) {
13751414 if (is_valid_isa (avx))
1376- vcmpps (x1, x2, op , cmp_predicate);
1415+ vcmpps (x, op1, op2 , cmp_predicate);
13771416 else {
1378- if (x1.getIdx () != x2.getIdx ()) uni_vmovups (x1, x2);
1379- cmpps (x1, op, cmp_predicate);
1417+ if (x.getIdx () == op1.getIdx ()) {
1418+ cmpps (x, op2, cmp_predicate);
1419+ } else if (x.isEqualIfNotInherited (op2)) {
1420+ push (op1);
1421+ cmpps (op1, op2, cmp_predicate);
1422+ movups (x, op1);
1423+ pop (op1);
1424+ } else {
1425+ movups (x, op1);
1426+ cmpps (x, op2, cmp_predicate);
1427+ }
13801428 }
13811429 }
13821430 void uni_vcmpps (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
0 commit comments