[PATCH 08/18] target/i386/tcg: simplify effective address calculation

Paolo Bonzini posted 18 patches 6 days, 14 hours ago
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, Zhao Liu <zhao1.liu@intel.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>
[PATCH 08/18] target/i386/tcg: simplify effective address calculation
Posted by Paolo Bonzini 6 days, 14 hours ago
Split gen_lea_v_seg_dest into three simple phases (extend from
16 bits, add, final extend), with optimization for known-zero bases
to avoid back-to-back extensions.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/translate.c | 64 ++++++++++++-------------------------
 1 file changed, 20 insertions(+), 44 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 0cb87d02012..2ab3c2ac663 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -627,54 +627,30 @@ static TCGv eip_cur_tl(DisasContext *s)
 static void gen_lea_v_seg_dest(DisasContext *s, MemOp aflag, TCGv dest, TCGv a0,
                                int def_seg, int ovr_seg)
 {
-    switch (aflag) {
-#ifdef TARGET_X86_64
-    case MO_64:
-        if (ovr_seg < 0) {
-            tcg_gen_mov_tl(dest, a0);
-            return;
+    int easize;
+    bool has_base;
+
+    if (ovr_seg < 0) {
+        ovr_seg = def_seg;
+    }
+
+    has_base = ovr_seg >= 0 && (ADDSEG(s) || ovr_seg >= R_FS);
+    easize = CODE64(s) ? MO_64 : MO_32;
+
+    if (has_base) {
+        if (aflag < easize) {
+            /* Truncate before summing base.  */
+            tcg_gen_ext_tl(dest, a0, aflag);
+            a0 = dest;
         }
-        break;
-#endif
-    case MO_32:
-        /* 32 bit address */
-        if (ovr_seg < 0 && ADDSEG(s)) {
-            ovr_seg = def_seg;
-        }
-        if (ovr_seg < 0) {
-            tcg_gen_ext32u_tl(dest, a0);
-            return;
-        }
-        break;
-    case MO_16:
-        /* 16 bit address */
-        tcg_gen_ext16u_tl(dest, a0);
+        tcg_gen_add_tl(dest, a0, cpu_seg_base[ovr_seg]);
         a0 = dest;
-        if (ovr_seg < 0) {
-            if (ADDSEG(s)) {
-                ovr_seg = def_seg;
-            } else {
-                return;
-            }
-        }
-        break;
-    default:
-        g_assert_not_reached();
+    } else {
+        /* Possibly one extension, but that's it.  */
+        easize = aflag;
     }
 
-    if (ovr_seg >= 0) {
-        TCGv seg = cpu_seg_base[ovr_seg];
-
-        if (aflag == MO_64) {
-            tcg_gen_add_tl(dest, a0, seg);
-        } else if (CODE64(s)) {
-            tcg_gen_ext32u_tl(dest, a0);
-            tcg_gen_add_tl(dest, dest, seg);
-        } else {
-            tcg_gen_add_tl(dest, a0, seg);
-            tcg_gen_ext32u_tl(dest, dest);
-        }
-    }
+    tcg_gen_ext_tl(dest, a0, easize);
 }
 
 static void gen_lea_v_seg(DisasContext *s, TCGv a0,
-- 
2.52.0
Re: [PATCH 08/18] target/i386/tcg: simplify effective address calculation
Posted by Richard Henderson 5 days, 11 hours ago
On 12/10/25 07:16, Paolo Bonzini wrote:
> Split gen_lea_v_seg_dest into three simple phases (extend from
> 16 bits, add, final extend), with optimization for known-zero bases
> to avoid back-to-back extensions.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   target/i386/tcg/translate.c | 64 ++++++++++++-------------------------
>   1 file changed, 20 insertions(+), 44 deletions(-)
> 
> diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
> index 0cb87d02012..2ab3c2ac663 100644
> --- a/target/i386/tcg/translate.c
> +++ b/target/i386/tcg/translate.c
> @@ -627,54 +627,30 @@ static TCGv eip_cur_tl(DisasContext *s)
>   static void gen_lea_v_seg_dest(DisasContext *s, MemOp aflag, TCGv dest, TCGv a0,
>                                  int def_seg, int ovr_seg)
>   {
> -    switch (aflag) {
> -#ifdef TARGET_X86_64
> -    case MO_64:
> -        if (ovr_seg < 0) {
> -            tcg_gen_mov_tl(dest, a0);
> -            return;
> +    int easize;
> +    bool has_base;
> +
> +    if (ovr_seg < 0) {
> +        ovr_seg = def_seg;
> +    }
> +
> +    has_base = ovr_seg >= 0 && (ADDSEG(s) || ovr_seg >= R_FS);

I guess def_seg is -1 for LEA, so ovr_seg can still be -1.
I wonder if it would be clearer to avoid this duplication of segment earlier in decode?

Anyway, for here, maybe clearer as

     has_base = ovr_seg >= R_FS || (ovr_seg >= 0 && ADDSEG(s));

even though the end result is the same.

Nice cleanup.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~