x86_64: Optimize arithmetic instructions

bjorng · Nov 24, 2023 · ca94548 · ca94548
1 parent bfb96c5
commit ca94548
Showing 1 changed file with 52 additions and 43 deletions.
diff --git a/erts/emulator/beam/jit/x86/instr_arith.cpp b/erts/emulator/beam/jit/x86/instr_arith.cpp
@@ -174,31 +174,30 @@ void BeamModuleAssembler::emit_i_plus(const ArgSource &LHS,
                                       const ArgSource &RHS,
                                       const ArgLabel &Fail,
                                       const ArgRegister &Dst) {
+    bool is_rhs_literal = false;
+    Sint rhs_untagged;
+
     bool small_result = is_sum_small_if_args_are_small(LHS, RHS);
 
-    if (always_small(LHS) && always_small(RHS) && small_result) {
-        /* Since we don't need the order on this path (no exceptions), we'll
-         * simplify the code below by shuffling constants to the right-hand
-         * side. */
-        const ArgSource A = LHS.isSmall() ? RHS : LHS,
-                        B = LHS.isSmall() ? LHS : RHS;
+    if (RHS.isSmall()) {
+        /* Must be signed for the template magic in isInt32 to work for
+         * negative numbers. */
+        rhs_untagged = RHS.as<ArgSmall>().getSigned() << _TAG_IMMED1_SIZE;
+        is_rhs_literal = Support::isInt32(rhs_untagged);
+    }
 
+    if (always_small(LHS) && always_small(RHS) && small_result) {
         comment("add without overflow check");
-        mov_arg(RET, A);
 
-        if (B.isSmall()) {
-            /* Must be signed for the template magic in isInt32 to work for
-             * negative numbers. */
-            Sint untagged = B.as<ArgSmall>().getSigned() << _TAG_IMMED1_SIZE;
+        mov_arg(RET, LHS);
 
-            if (Support::isInt32(untagged)) {
-                a.add(RET, imm(untagged));
-            } else {
-                mov_imm(ARG2, B.as<ArgSmall>().get() & ~_TAG_IMMED1_MASK);
-                a.add(RET, ARG2);
-            }
+        if (is_rhs_literal) {
+            a.add(RET, imm(rhs_untagged));
+        } else if (RHS.isSmall()) {
+            mov_imm(ARG2, RHS.as<ArgSmall>().get() & ~_TAG_IMMED1_MASK);
+            a.add(RET, ARG2);
         } else {
-            mov_arg(ARG2, B);
+            mov_arg(ARG2, RHS);
             a.lea(RET, x86::qword_ptr(RET, ARG2, 0, -_TAG_IMMED1_SMALL));
         }
 
@@ -216,13 +215,13 @@ void BeamModuleAssembler::emit_i_plus(const ArgSource &LHS,
     } else {
         emit_are_both_small(mixed, LHS, ARG2, RHS, ARG3);
 
-        a.mov(RET, ARG2);
-        a.and_(RET, imm(~_TAG_IMMED1_MASK));
-        a.add(RET, ARG3);
         if (small_result) {
             comment("skipped overflow test because the result is always small");
+            a.lea(RET, x86::qword_ptr(ARG2, ARG3, 0, -_TAG_IMMED1_SMALL));
             a.short_().jmp(next);
         } else {
+            a.lea(RET, x86::qword_ptr(ARG2, -_TAG_IMMED1_SMALL));
+            a.add(RET, ARG3);
             a.short_().jno(next);
         }
     }
@@ -297,23 +296,26 @@ void BeamModuleAssembler::emit_i_minus(const ArgSource &LHS,
                                        const ArgSource &RHS,
                                        const ArgLabel &Fail,
                                        const ArgRegister &Dst) {
+    bool is_rhs_literal = false;
+    Sint rhs_untagged = 0;
     bool small_result = is_diff_small_if_args_are_small(LHS, RHS);
 
+    if (RHS.isSmall()) {
+        /* Must be signed for the template magic in isInt32 to work for
+         * negative numbers. */
+        rhs_untagged = RHS.as<ArgSmall>().getSigned() << _TAG_IMMED1_SIZE;
+        is_rhs_literal = Support::isInt32(rhs_untagged);
+    }
+
     if (always_small(LHS) && always_small(RHS) && small_result) {
         comment("subtract without overflow check");
         mov_arg(RET, LHS);
 
-        if (RHS.isSmall()) {
-            /* Must be signed for the template magic in isInt32 to work for
-             * negative numbers. */
-            Sint untagged = RHS.as<ArgSmall>().getSigned() << _TAG_IMMED1_SIZE;
-
-            if (Support::isInt32(untagged)) {
-                a.sub(RET, imm(untagged));
-            } else {
-                mov_imm(ARG2, RHS.as<ArgSmall>().get() & ~_TAG_IMMED1_MASK);
-                a.sub(RET, ARG2);
-            }
+        if (is_rhs_literal) {
+            a.sub(RET, imm(rhs_untagged));
+        } else if (RHS.isSmall()) {
+            mov_imm(ARG2, RHS.as<ArgSmall>().get() & ~_TAG_IMMED1_MASK);
+            a.sub(RET, ARG2);
         } else {
             mov_arg(ARG2, RHS);
             a.and_(ARG2, imm(~_TAG_IMMED1_MASK));
@@ -337,14 +339,21 @@ void BeamModuleAssembler::emit_i_minus(const ArgSource &LHS,
         if (small_result) {
             comment("skipped overflow test because the result is always small");
             a.mov(RET, ARG2);
-            a.and_(ARG3, imm(~_TAG_IMMED1_MASK));
-            a.sub(RET, ARG3);
+            if (is_rhs_literal) {
+                a.sub(RET, imm(rhs_untagged));
+            } else {
+                a.and_(ARG3, imm(~_TAG_IMMED1_MASK));
+                a.sub(RET, ARG3);
+            }
             a.short_().jmp(next);
         } else {
             a.mov(RET, ARG2);
-            a.mov(ARG4, ARG3);
-            a.and_(ARG4, imm(~_TAG_IMMED1_MASK));
-            a.sub(RET, ARG4);
+            if (is_rhs_literal) {
+                a.sub(RET, imm(rhs_untagged));
+            } else {
+                a.lea(ARG4, x86::qword_ptr(ARG3, -_TAG_IMMED1_SMALL));
+                a.sub(RET, ARG4);
+            }
             a.short_().jno(next);
         }
     }
@@ -546,7 +555,7 @@ void BeamGlobalAssembler::emit_int_div_rem_guard_shared() {
  * Quotient is returned in RAX, remainder in RDX. */
 void BeamGlobalAssembler::emit_int_div_rem_body_shared() {
     Label div_zero = a.newLabel(), generic_div = a.newLabel(),
-          generic_error = a.newLabel();
+        generic_error = a.newLabel();
 
     emit_enter_frame();
 
@@ -916,7 +925,7 @@ void BeamGlobalAssembler::emit_mul_add_guard_shared() {
  * Result is returned in RET. */
 void BeamGlobalAssembler::emit_mul_add_body_shared() {
     Label mul_only = a.newLabel(), error = a.newLabel(),
-          mul_error = a.newLabel(), do_error = a.newLabel();
+        mul_error = a.newLabel(), do_error = a.newLabel();
 
     emit_enter_frame();
     emit_enter_runtime();
@@ -1006,7 +1015,7 @@ void BeamModuleAssembler::emit_i_mul_add(const ArgLabel &Fail,
     bool is_sum_small = is_sum_small_if_args_are_small(Src3, Src4);
     bool sometimes_small = !(Src2.isLiteral() || Src4.isLiteral());
     bool is_increment_zero =
-            Src4.isSmall() && Src4.as<ArgSmall>().getSigned() == 0;
+        Src4.isSmall() && Src4.as<ArgSmall>().getSigned() == 0;
     Sint factor = 0;
     int left_shift = -1;
 
@@ -1115,11 +1124,11 @@ void BeamModuleAssembler::emit_i_mul_add(const ArgLabel &Fail,
             a.and_(RETd, ARG3.r32());
             a.and_(RETd, ARG4.r32());
             if (always_one_of<BeamTypeId::Integer, BeamTypeId::AlwaysBoxed>(
-                        Src1) &&
+                                                                            Src1) &&
                 always_one_of<BeamTypeId::Integer, BeamTypeId::AlwaysBoxed>(
-                        Src2) &&
+                                                                            Src2) &&
                 always_one_of<BeamTypeId::Integer, BeamTypeId::AlwaysBoxed>(
-                        Src4)) {
+                                                                            Src4)) {
                 emit_is_not_boxed(mixed, RET);
             } else {
                 a.and_(RETb, imm(_TAG_IMMED1_MASK));