@@ -657,16 +657,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
657657 vdivps (x, op1, op2);
658658 }
659659
660- void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
660+ void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
661661 const Xbyak::Operand &op2) {
662662 if (is_valid_isa (avx))
663663 vaddps (x, op1, op2);
664664 else {
665- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
666- addps (x, op2);
665+ if (x.getIdx () == op1.getIdx ()) {
666+ addps (x, op2);
667+ } else if (x.isEqualIfNotInherited (op2)) {
668+ addps (x, op1);
669+ } else {
670+ movups (x, op1);
671+ addps (x, op2);
672+ }
667673 }
668674 }
669- void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
675+ void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
670676 const Xbyak::Operand &op2) {
671677 vaddps (x, op1, op2);
672678 }
@@ -775,16 +781,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
775781 vsubss (x, Xbyak::Xmm (op1.getIdx ()), Xbyak::Xmm (op2.getIdx ()));
776782 }
777783
778- void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
784+ void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
779785 const Xbyak::Operand &op2) {
780786 if (is_valid_isa (avx))
781787 vsubps (x, op1, op2);
782788 else {
783- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
784- subps (x, op2);
789+ if (x.getIdx () == op1.getIdx ()) {
790+ subps (x, op2);
791+ } else if (x.isEqualIfNotInherited (op2)) {
792+ push (op1);
793+ subps (op1, op2);
794+ movups (x, op1);
795+ pop (op1);
796+ } else {
797+ movups (x, op1);
798+ subps (x, op2);
799+ }
785800 }
786801 }
787- void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
802+ void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
788803 const Xbyak::Operand &op2) {
789804 vsubps (x, op1, op2);
790805 }
@@ -818,30 +833,42 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
818833 }
819834 }
820835
821- void uni_vpmulld (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
822- const Xbyak::Operand &op ) {
836+ void uni_vpmulld (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
837+ const Xbyak::Operand &op2 ) {
823838 if (is_valid_isa (avx)) {
824- vpmulld (x1, x2, op );
839+ vpmulld (x, op1, op2 );
825840 } else {
826- if (x1.getIdx () != x2.getIdx ()) movdqa (x1, x2);
827- pmulld (x1, op);
841+ if (x.getIdx () == op1.getIdx ()) {
842+ pmulld (x, op2);
843+ } else if (x.isEqualIfNotInherited (op2)) {
844+ pmulld (x, op1);
845+ } else {
846+ movdqa (x, op1);
847+ pmulld (x, op2);
848+ }
828849 }
829850 }
830851 void uni_vpmulld (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
831852 const Xbyak::Operand &op) {
832853 vpmulld (x1, x2, op);
833854 }
834855
835- void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
856+ void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
836857 const Xbyak::Operand &op2) {
837858 if (is_valid_isa (avx))
838859 vmulps (x, op1, op2);
839860 else {
840- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
841- mulps (x, op2);
861+ if (x.getIdx () == op1.getIdx ()) {
862+ mulps (x, op2);
863+ } else if (x.isEqualIfNotInherited (op2)) {
864+ mulps (x, op1);
865+ } else {
866+ movups (x, op1);
867+ mulps (x, op2);
868+ }
842869 }
843870 }
844- void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
871+ void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
845872 const Xbyak::Operand &op2) {
846873 vmulps (x, op1, op2);
847874 }
@@ -1243,16 +1270,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
12431270 vpsrld (x, op, imm);
12441271 }
12451272
1246- void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1273+ void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
12471274 const Xbyak::Operand &op2) {
12481275 if (is_valid_isa (avx))
12491276 vmaxps (x, op1, op2);
12501277 else {
1251- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1252- maxps (x, op2);
1278+ if (x.getIdx () == op1.getIdx ()) {
1279+ maxps (x, op2);
1280+ } else if (x.isEqualIfNotInherited (op2)) {
1281+ maxps (x, op1);
1282+ } else {
1283+ movups (x, op1);
1284+ maxps (x, op2);
1285+ }
12531286 }
12541287 }
1255- void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1288+ void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
12561289 const Xbyak::Operand &op2) {
12571290 vmaxps (x, op1, op2);
12581291 }
@@ -1267,17 +1300,23 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
12671300 }
12681301 }
12691302
1270- void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1303+ void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
12711304 const Xbyak::Operand &op2) {
12721305 if (is_valid_isa (avx))
12731306 vminps (x, op1, op2);
12741307 else {
1275- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1276- minps (x, op2);
1308+ if (x.getIdx () == op1.getIdx ()) {
1309+ minps (x, op2);
1310+ } else if (x.isEqualIfNotInherited (op2)) {
1311+ minps (x, op1);
1312+ } else {
1313+ movups (x, op1);
1314+ minps (x, op2);
1315+ }
12771316 }
12781317 }
12791318
1280- void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1319+ void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
12811320 const Xbyak::Operand &op2) {
12821321 vminps (x, op1, op2);
12831322 }
@@ -1314,13 +1353,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
13141353 vpmovzxbd (y, op);
13151354 }
13161355
1317- void uni_vcmpps (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
1318- const Xbyak::Operand &op , int cmp_predicate) {
1356+ void uni_vcmpps (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
1357+ const Xbyak::Operand &op2 , int cmp_predicate) {
13191358 if (is_valid_isa (avx))
1320- vcmpps (x1, x2, op , cmp_predicate);
1359+ vcmpps (x, op1, op2 , cmp_predicate);
13211360 else {
1322- if (x1.getIdx () != x2.getIdx ()) uni_vmovups (x1, x2);
1323- cmpps (x1, op, cmp_predicate);
1361+ if (x.getIdx () == op1.getIdx ()) {
1362+ cmpps (x, op2, cmp_predicate);
1363+ } else if (x.isEqualIfNotInherited (op2)) {
1364+ push (op1);
1365+ cmpps (op1, op2, cmp_predicate);
1366+ movups (x, op1);
1367+ pop (op1);
1368+ } else {
1369+ movups (x, op1);
1370+ cmpps (x, op2, cmp_predicate);
1371+ }
13241372 }
13251373 }
13261374 void uni_vcmpps (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
0 commit comments