tcg/linux-user/test patch queue

[PULL 1/5] accel/tcg: Introduce and use MO_ALIGN_TLB_ONLY
Posted by Richard Henderson 3 months, 1 week ago
For Arm, we need 3 cases: (1) the alignment required when accessing
Normal memory, (2) the alignment required when accessing Device memory,
and (3) the atomicity of the access.

When we added TLB_CHECK_ALIGNED, we assumed that cases 2 and 3 were
identical, and thus used memop_atomicity_bits for TLB_CHECK_ALIGNED.

This is incorrect for multiple reasons, including that the atomicity
of the access is adjusted depending on whether or not we are executing
within a serial context.

For Arm, what is true is that there is an underlying alignment
requirement of the access, and for that access Normal memory
will support unalignement.

Introduce MO_ALIGN_TLB_ONLY to indicate that the alignment
specified in MO_AMASK only applies when the TLB entry has
TLB_CHECK_ALIGNED set; otherwise no alignment required.

Introduce memop_tlb_alignment_bits with an additional bool
argument that specifies whether TLB_CHECK_ALIGNED is set.
All other usage of memop_alignment_bits assumes it is not.

Remove memop_atomicity_bits as unused; it didn't properly
support MO_ATOM_SUBWORD anyway.

Update target/arm finalize_memop_atom to set MO_ALIGN_TLB_ONLY
when strict alignment isn't otherwise required.

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3171
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
 include/exec/memop.h            | 43 +++++++++++++++------------------
 target/arm/tcg/translate.h      |  4 +--
 accel/tcg/cputlb.c              | 13 +---------
 target/arm/ptw.c                |  2 +-
 target/arm/tcg/translate-a64.c  | 10 +++-----
 target/arm/tcg/translate-neon.c |  2 +-
 tcg/tcg.c                       | 10 +++++---
 7 files changed, 35 insertions(+), 49 deletions(-)

diff --git a/include/exec/memop.h b/include/exec/memop.h
index cf7da3362e..799b5b4221 100644
--- a/include/exec/memop.h
+++ b/include/exec/memop.h
@@ -72,6 +72,16 @@ typedef enum MemOp {
     MO_ALIGN_64 = 6 << MO_ASHIFT,
     MO_ALIGN    = MO_AMASK,
 
+    /*
+     * MO_ALIGN_TLB_ONLY:
+     * Apply MO_AMASK only along the TCG slow path if TLB_CHECK_ALIGNED
+     * is set; otherwise unaligned access is permitted.
+     * This is used by target/arm, where unaligned accesses are
+     * permitted for pages marked Normal but aligned accesses are
+     * required for pages marked Device.
+     */
+    MO_ALIGN_TLB_ONLY = 1 << 8,
+
     /*
      * MO_ATOM_* describes the atomicity requirements of the operation:
      * MO_ATOM_IFALIGN: the operation must be single-copy atomic if it
@@ -104,7 +114,7 @@ typedef enum MemOp {
      * size of the operation, if aligned.  This retains the behaviour
      * from before this field was introduced.
      */
-    MO_ATOM_SHIFT         = 8,
+    MO_ATOM_SHIFT         = 9,
     MO_ATOM_IFALIGN       = 0 << MO_ATOM_SHIFT,
     MO_ATOM_IFALIGN_PAIR  = 1 << MO_ATOM_SHIFT,
     MO_ATOM_WITHIN16      = 2 << MO_ATOM_SHIFT,
@@ -169,16 +179,16 @@ static inline MemOp size_memop(unsigned size)
 }
 
 /**
- * memop_alignment_bits:
+ * memop_tlb_alignment_bits:
  * @memop: MemOp value
  *
- * Extract the alignment size from the memop.
+ * Extract the alignment size for use with TLB_CHECK_ALIGNED.
  */
-static inline unsigned memop_alignment_bits(MemOp memop)
+static inline unsigned memop_tlb_alignment_bits(MemOp memop, bool tlb_check)
 {
     unsigned a = memop & MO_AMASK;
 
-    if (a == MO_UNALN) {
+    if (a == MO_UNALN || (!tlb_check && (memop & MO_ALIGN_TLB_ONLY))) {
         /* No alignment required.  */
         a = 0;
     } else if (a == MO_ALIGN) {
@@ -191,28 +201,15 @@ static inline unsigned memop_alignment_bits(MemOp memop)
     return a;
 }
 
-/*
- * memop_atomicity_bits:
+/**
+ * memop_alignment_bits:
  * @memop: MemOp value
  *
- * Extract the atomicity size from the memop.
+ * Extract the alignment size from the memop.
  */
-static inline unsigned memop_atomicity_bits(MemOp memop)
+static inline unsigned memop_alignment_bits(MemOp memop)
 {
-    unsigned size = memop & MO_SIZE;
-
-    switch (memop & MO_ATOM_MASK) {
-    case MO_ATOM_NONE:
-        size = MO_8;
-        break;
-    case MO_ATOM_IFALIGN_PAIR:
-    case MO_ATOM_WITHIN16_PAIR:
-        size = size ? size - 1 : 0;
-        break;
-    default:
-        break;
-    }
-    return size;
+    return memop_tlb_alignment_bits(memop, false);
 }
 
 #endif
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 9a85ea74db..b62104b4ae 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -744,8 +744,8 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
  */
 static inline MemOp finalize_memop_atom(DisasContext *s, MemOp opc, MemOp atom)
 {
-    if (s->align_mem && !(opc & MO_AMASK)) {
-        opc |= MO_ALIGN;
+    if (!(opc & MO_AMASK)) {
+        opc |= MO_ALIGN | (s->align_mem ? 0 : MO_ALIGN_TLB_ONLY);
     }
     return opc | atom | s->be_data;
 }
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 631f1fe135..fd1606c856 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1668,18 +1668,7 @@ static bool mmu_lookup1(CPUState *cpu, MMULookupPageData *data, MemOp memop,
 
     if (likely(!maybe_resized)) {
         /* Alignment has not been checked by tlb_fill_align. */
-        int a_bits = memop_alignment_bits(memop);
-
-        /*
-         * This alignment check differs from the one above, in that this is
-         * based on the atomicity of the operation. The intended use case is
-         * the ARM memory type field of each PTE, where access to pages with
-         * Device memory type require alignment.
-         */
-        if (unlikely(flags & TLB_CHECK_ALIGNED)) {
-            int at_bits = memop_atomicity_bits(memop);
-            a_bits = MAX(a_bits, at_bits);
-        }
+        int a_bits = memop_tlb_alignment_bits(memop, flags & TLB_CHECK_ALIGNED);
         if (unlikely(addr & ((1 << a_bits) - 1))) {
             cpu_unaligned_access(cpu, addr, access_type, mmu_idx, ra);
         }
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index 23f6616811..2e6b149b2d 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -2352,7 +2352,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
      * CPUs with ARM_FEATURE_LPAE but not ARM_FEATURE_V7VE anyway.)
      */
     if (device) {
-        unsigned a_bits = memop_atomicity_bits(memop);
+        unsigned a_bits = memop_tlb_alignment_bits(memop, true);
         if (address & ((1 << a_bits) - 1)) {
             fi->type = ARMFault_Alignment;
             goto do_fault;
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 3292d7cbfd..08b21d7dbf 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -3691,9 +3691,8 @@ static bool trans_STP(DisasContext *s, arg_ldstpair *a)
      * In all cases, issue one operation with the correct atomicity.
      */
     mop = a->sz + 1;
-    if (s->align_mem) {
-        mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
-    }
+    mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
+    mop |= (s->align_mem ? 0 : MO_ALIGN_TLB_ONLY);
     mop = finalize_memop_pair(s, mop);
     if (a->sz == 2) {
         TCGv_i64 tmp = tcg_temp_new_i64();
@@ -3742,9 +3741,8 @@ static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
      * since that reuses the most code below.
      */
     mop = a->sz + 1;
-    if (s->align_mem) {
-        mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
-    }
+    mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
+    mop |= (s->align_mem ? 0 : MO_ALIGN_TLB_ONLY);
     mop = finalize_memop_pair(s, mop);
     if (a->sz == 2) {
         int o2 = s->be_data == MO_LE ? 32 : 0;
diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c
index 844d2e29e4..e3c7d9217b 100644
--- a/target/arm/tcg/translate-neon.c
+++ b/target/arm/tcg/translate-neon.c
@@ -520,7 +520,7 @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
     if (a->align) {
         align = pow2_align(a->align + 2); /* 4 ** a->align */
     } else {
-        align = s->align_mem ? MO_ALIGN : 0;
+        align = MO_ALIGN | (s->align_mem ? 0 : MO_ALIGN_TLB_ONLY);
     }
 
     /*
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 294762c283..fbf09f5c82 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -3039,20 +3039,22 @@ void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
             case INDEX_op_qemu_ld2:
             case INDEX_op_qemu_st2:
                 {
-                    const char *s_al, *s_op, *s_at;
+                    const char *s_al, *s_tlb, *s_op, *s_at;
                     MemOpIdx oi = op->args[k++];
                     MemOp mop = get_memop(oi);
                     unsigned ix = get_mmuidx(oi);
 
+                    s_tlb = mop & MO_ALIGN_TLB_ONLY ? "tlb+" : "";
                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
-                    mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
+                    mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE |
+                             MO_ATOM_MASK | MO_ALIGN_TLB_ONLY);
 
                     /* If all fields are accounted for, print symbolically. */
                     if (!mop && s_al && s_op && s_at) {
-                        col += ne_fprintf(f, ",%s%s%s,%u",
-                                          s_at, s_al, s_op, ix);
+                        col += ne_fprintf(f, ",%s%s%s%s,%u",
+                                          s_at, s_al, s_tlb, s_op, ix);
                     } else {
                         mop = get_memop(oi);
                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
-- 
2.43.0
[PULL 1/5] accel/tcg: Introduce and use MO_ALIGN_TLB_ONLY
[PULL 2/5] tcg: Simplify extract2 usage in tcg_gen_shifti_i64
[PULL 3/5] tests/functional: Mark the MIPS replay tests as flaky
[PULL 4/5] tests/functional: Mark the MIPS Debian Wheezy tests as flaky
[PULL 5/5] linux-user: permit sendto() with NULL buf and 0 len