[PATCH 066/147] include/exec: Move TLB_MMIO, TLB_DISCARD_WRITE to slow flags

Richard Henderson posted 147 patches 5 months, 3 weeks ago
There is a newer version of this series
[PATCH 066/147] include/exec: Move TLB_MMIO, TLB_DISCARD_WRITE to slow flags
Posted by Richard Henderson 5 months, 3 weeks ago
Recover two bits from the inline flags.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/tlb-flags.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/exec/tlb-flags.h b/include/exec/tlb-flags.h
index a0e51a4b37..54a6bae768 100644
--- a/include/exec/tlb-flags.h
+++ b/include/exec/tlb-flags.h
@@ -53,20 +53,15 @@
  * contain the page physical address.
  */
 #define TLB_NOTDIRTY        (1 << (TARGET_PAGE_BITS_MIN - 2))
-/* Set if TLB entry is an IO callback.  */
-#define TLB_MMIO            (1 << (TARGET_PAGE_BITS_MIN - 3))
-/* Set if TLB entry writes ignored.  */
-#define TLB_DISCARD_WRITE   (1 << (TARGET_PAGE_BITS_MIN - 4))
 /* Set if the slow path must be used; more flags in CPUTLBEntryFull. */
-#define TLB_FORCE_SLOW      (1 << (TARGET_PAGE_BITS_MIN - 5))
+#define TLB_FORCE_SLOW      (1 << (TARGET_PAGE_BITS_MIN - 3))
 
 /*
  * Use this mask to check interception with an alignment mask
  * in a TCG backend.
  */
 #define TLB_FLAGS_MASK \
-    (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \
-    | TLB_FORCE_SLOW | TLB_DISCARD_WRITE)
+    (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_FORCE_SLOW)
 
 /*
  * Flags stored in CPUTLBEntryFull.slow_flags[x].
@@ -78,8 +73,14 @@
 #define TLB_WATCHPOINT       (1 << 1)
 /* Set if TLB entry requires aligned accesses.  */
 #define TLB_CHECK_ALIGNED    (1 << 2)
+/* Set if TLB entry writes ignored.  */
+#define TLB_DISCARD_WRITE    (1 << 3)
+/* Set if TLB entry is an IO callback.  */
+#define TLB_MMIO             (1 << 4)
 
-#define TLB_SLOW_FLAGS_MASK  (TLB_BSWAP | TLB_WATCHPOINT | TLB_CHECK_ALIGNED)
+#define TLB_SLOW_FLAGS_MASK \
+    (TLB_BSWAP | TLB_WATCHPOINT | TLB_CHECK_ALIGNED | \
+     TLB_DISCARD_WRITE | TLB_MMIO)
 
 /* The two sets of flags must not overlap. */
 QEMU_BUILD_BUG_ON(TLB_FLAGS_MASK & TLB_SLOW_FLAGS_MASK);
-- 
2.43.0
Re: [PATCH 066/147] include/exec: Move TLB_MMIO, TLB_DISCARD_WRITE to slow flags
Posted by Jonathan Cameron via 5 months, 3 weeks ago
On Tue, 22 Apr 2025 12:26:55 -0700
Richard Henderson <richard.henderson@linaro.org> wrote:

> Recover two bits from the inline flags.


Hi Richard,

Early days but something (I'm fairly sure in this patch) is tripping up my favourite
TCG corner case of running code out of MMIO memory (interleaved CXL memory).

Only seeing it on arm64 tests so far which isn't upstream yet..
(guess what I was getting ready to post today)

Back trace is:

#0  0x0000555555fd4296 in cpu_atomic_fetch_andq_le_mmu (env=0x555557ee19b0, addr=18442241572520067072, val=18446744073701163007, oi=8244, retaddr=<optimized out>) at ../../accel/tcg/atomic_template.h:140
#1  0x00007fffb6894125 in code_gen_buffer ()
#2  0x0000555555fc4c46 in cpu_tb_exec (cpu=cpu@entry=0x555557ededf0, itb=itb@entry=0x7fffb6894000 <code_gen_buffer+200511443>, tb_exit=tb_exit@entry=0x7ffff4bfb744) at ../../accel/tcg/cpu-exec.c:455
#3  0x0000555555fc51c2 in cpu_loop_exec_tb (tb_exit=0x7ffff4bfb744, last_tb=<synthetic pointer>, pc=<optimized out>, tb=0x7fffb6894000 <code_gen_buffer+200511443>, cpu=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:904
#4  cpu_exec_loop (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1018
#5  0x0000555555fc58f1 in cpu_exec_setjmp (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1035
#6  0x0000555555fc5f6c in cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:1061 
#7  0x0000555556146ac3 in tcg_cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops.c:81
#8  0x0000555556146ee3 in mttcg_cpu_thread_fn (arg=arg@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops-mttcg.c:94
#9  0x00005555561f6450 in qemu_thread_start (args=0x555557f8f430) at ../../util/qemu-thread-posix.c:541
#10 0x00007ffff7750aa4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
#11 0x00007ffff77ddc3c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78  

I haven't pushed out the rebased tree yet making this a truly awful bug report.

The pull request you sent with this in wasn't bisectable so this was a bit of a guessing
game. I see the seg fault only after this patch.

> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  include/exec/tlb-flags.h | 17 +++++++++--------
>  1 file changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/include/exec/tlb-flags.h b/include/exec/tlb-flags.h
> index a0e51a4b37..54a6bae768 100644
> --- a/include/exec/tlb-flags.h
> +++ b/include/exec/tlb-flags.h
> @@ -53,20 +53,15 @@
>   * contain the page physical address.
>   */
>  #define TLB_NOTDIRTY        (1 << (TARGET_PAGE_BITS_MIN - 2))
> -/* Set if TLB entry is an IO callback.  */
> -#define TLB_MMIO            (1 << (TARGET_PAGE_BITS_MIN - 3))
> -/* Set if TLB entry writes ignored.  */
> -#define TLB_DISCARD_WRITE   (1 << (TARGET_PAGE_BITS_MIN - 4))
>  /* Set if the slow path must be used; more flags in CPUTLBEntryFull. */
> -#define TLB_FORCE_SLOW      (1 << (TARGET_PAGE_BITS_MIN - 5))
> +#define TLB_FORCE_SLOW      (1 << (TARGET_PAGE_BITS_MIN - 3))
>  
>  /*
>   * Use this mask to check interception with an alignment mask
>   * in a TCG backend.
>   */
>  #define TLB_FLAGS_MASK \
> -    (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \
> -    | TLB_FORCE_SLOW | TLB_DISCARD_WRITE)
> +    (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_FORCE_SLOW)
>  
>  /*
>   * Flags stored in CPUTLBEntryFull.slow_flags[x].
> @@ -78,8 +73,14 @@
>  #define TLB_WATCHPOINT       (1 << 1)
>  /* Set if TLB entry requires aligned accesses.  */
>  #define TLB_CHECK_ALIGNED    (1 << 2)
> +/* Set if TLB entry writes ignored.  */
> +#define TLB_DISCARD_WRITE    (1 << 3)
> +/* Set if TLB entry is an IO callback.  */
> +#define TLB_MMIO             (1 << 4)
>  
> -#define TLB_SLOW_FLAGS_MASK  (TLB_BSWAP | TLB_WATCHPOINT | TLB_CHECK_ALIGNED)
> +#define TLB_SLOW_FLAGS_MASK \
> +    (TLB_BSWAP | TLB_WATCHPOINT | TLB_CHECK_ALIGNED | \
> +     TLB_DISCARD_WRITE | TLB_MMIO)
>  
>  /* The two sets of flags must not overlap. */
>  QEMU_BUILD_BUG_ON(TLB_FLAGS_MASK & TLB_SLOW_FLAGS_MASK);
Re: [PATCH 066/147] include/exec: Move TLB_MMIO, TLB_DISCARD_WRITE to slow flags
Posted by Alistair Francis 5 months, 2 weeks ago
On Sat, Apr 26, 2025 at 3:36 AM Jonathan Cameron via
<qemu-devel@nongnu.org> wrote:
>
> On Tue, 22 Apr 2025 12:26:55 -0700
> Richard Henderson <richard.henderson@linaro.org> wrote:
>
> > Recover two bits from the inline flags.
>
>
> Hi Richard,
>
> Early days but something (I'm fairly sure in this patch) is tripping up my favourite
> TCG corner case of running code out of MMIO memory (interleaved CXL memory).
>
> Only seeing it on arm64 tests so far which isn't upstream yet..
> (guess what I was getting ready to post today)
>
> Back trace is:
>
> #0  0x0000555555fd4296 in cpu_atomic_fetch_andq_le_mmu (env=0x555557ee19b0, addr=18442241572520067072, val=18446744073701163007, oi=8244, retaddr=<optimized out>) at ../../accel/tcg/atomic_template.h:140
> #1  0x00007fffb6894125 in code_gen_buffer ()
> #2  0x0000555555fc4c46 in cpu_tb_exec (cpu=cpu@entry=0x555557ededf0, itb=itb@entry=0x7fffb6894000 <code_gen_buffer+200511443>, tb_exit=tb_exit@entry=0x7ffff4bfb744) at ../../accel/tcg/cpu-exec.c:455
> #3  0x0000555555fc51c2 in cpu_loop_exec_tb (tb_exit=0x7ffff4bfb744, last_tb=<synthetic pointer>, pc=<optimized out>, tb=0x7fffb6894000 <code_gen_buffer+200511443>, cpu=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:904
> #4  cpu_exec_loop (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1018
> #5  0x0000555555fc58f1 in cpu_exec_setjmp (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1035
> #6  0x0000555555fc5f6c in cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:1061
> #7  0x0000555556146ac3 in tcg_cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops.c:81
> #8  0x0000555556146ee3 in mttcg_cpu_thread_fn (arg=arg@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops-mttcg.c:94
> #9  0x00005555561f6450 in qemu_thread_start (args=0x555557f8f430) at ../../util/qemu-thread-posix.c:541
> #10 0x00007ffff7750aa4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
> #11 0x00007ffff77ddc3c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78
>
> I haven't pushed out the rebased tree yet making this a truly awful bug report.
>
> The pull request you sent with this in wasn't bisectable so this was a bit of a guessing
> game. I see the seg fault only after this patch.

I see the same thing with some RISC-V tests. I can provide the test
images if you want as well

build/qemu-system-riscv64 -machine virt -cpu rv64,h=false -m 1G \
    -serial mon:stdio -serial null -nographic \
    -append "root=/dev/vda ro" \
    -netdev user,id=net0 -device virtio-net-device,netdev=net0 \
    -smp 4 -d guest_errors \
    -bios none \
    -device loader,file=./images/qemuriscv64/buildroot/Image,addr=0x80200000 \
    -kernel ./images/qemuriscv64/buildroot/fw_jump.elf \
    -drive id=disk0,file=./images/qemuriscv64/buildroot/rootfs.ext2,if=none,format=raw
\
    -device virtio-blk-device,drive=disk0


#0  0x000055555598b0f1 in cpu_atomic_xchgl_le_mmu (env=0x5555567ff290,
addr=33554444, val=0, oi=3619, retaddr=<optimized out>)
   at ../accel/tcg/atomic_template.h:111
#1  0x00007fffb2c5e537 in code_gen_buffer ()
#2  0x000055555597c661 in cpu_tb_exec
   (cpu=cpu@entry=0x5555567fc6d0, itb=itb@entry=0x7fffb2c5e400
<code_gen_buffer+113632211>, tb_exit=tb_exit@entry=0x7fff47ffe764)
   at ../accel/tcg/cpu-exec.c:453
#3  0x000055555597cb4a in cpu_loop_exec_tb
   (cpu=0x5555567fc6d0, tb=0x7fffb2c5e400 <code_gen_buffer+113632211>,
pc=<optimized out>, last_tb=<synthetic pointer>,
tb_exit=0x7fff47ffe764)
   at ../accel/tcg/cpu-exec.c:903
#4  cpu_exec_loop (cpu=cpu@entry=0x5555567fc6d0,
sc=sc@entry=0x7fff47ffe810) at ../accel/tcg/cpu-exec.c:1017
#5  0x000055555597d23d in cpu_exec_setjmp
(cpu=cpu@entry=0x5555567fc6d0, sc=sc@entry=0x7fff47ffe810) at
../accel/tcg/cpu-exec.c:1034
#6  0x000055555597d909 in cpu_exec (cpu=cpu@entry=0x5555567fc6d0) at
../accel/tcg/cpu-exec.c:1060
#7  0x0000555555af1c62 in tcg_cpu_exec (cpu=cpu@entry=0x5555567fc6d0)
at ../accel/tcg/tcg-accel-ops.c:81
#8  0x0000555555af2012 in mttcg_cpu_thread_fn (arg=0x5555567fc6d0) at
../accel/tcg/tcg-accel-ops-mttcg.c:94
#9  0x0000555555b956c7 in qemu_thread_start (args=0x5555569e8da0) at
../util/qemu-thread-posix.c:541
#10 0x00007ffff77f2f14 in start_thread () at /lib64/libc.so.6
#11 0x00007ffff7875aac in __clone3 () at /lib64/libc.so.6

Alistair
Re: [PATCH 066/147] include/exec: Move TLB_MMIO, TLB_DISCARD_WRITE to slow flags
Posted by Richard Henderson 5 months, 2 weeks ago
On 4/29/25 14:35, Alistair Francis wrote:
> On Sat, Apr 26, 2025 at 3:36 AM Jonathan Cameron via
> <qemu-devel@nongnu.org> wrote:
>>
>> On Tue, 22 Apr 2025 12:26:55 -0700
>> Richard Henderson <richard.henderson@linaro.org> wrote:
>>
>>> Recover two bits from the inline flags.
>>
>>
>> Hi Richard,
>>
>> Early days but something (I'm fairly sure in this patch) is tripping up my favourite
>> TCG corner case of running code out of MMIO memory (interleaved CXL memory).
>>
>> Only seeing it on arm64 tests so far which isn't upstream yet..
>> (guess what I was getting ready to post today)
>>
>> Back trace is:
>>
>> #0  0x0000555555fd4296 in cpu_atomic_fetch_andq_le_mmu (env=0x555557ee19b0, addr=18442241572520067072, val=18446744073701163007, oi=8244, retaddr=<optimized out>) at ../../accel/tcg/atomic_template.h:140
>> #1  0x00007fffb6894125 in code_gen_buffer ()
>> #2  0x0000555555fc4c46 in cpu_tb_exec (cpu=cpu@entry=0x555557ededf0, itb=itb@entry=0x7fffb6894000 <code_gen_buffer+200511443>, tb_exit=tb_exit@entry=0x7ffff4bfb744) at ../../accel/tcg/cpu-exec.c:455
>> #3  0x0000555555fc51c2 in cpu_loop_exec_tb (tb_exit=0x7ffff4bfb744, last_tb=<synthetic pointer>, pc=<optimized out>, tb=0x7fffb6894000 <code_gen_buffer+200511443>, cpu=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:904
>> #4  cpu_exec_loop (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1018
>> #5  0x0000555555fc58f1 in cpu_exec_setjmp (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1035
>> #6  0x0000555555fc5f6c in cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:1061
>> #7  0x0000555556146ac3 in tcg_cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops.c:81
>> #8  0x0000555556146ee3 in mttcg_cpu_thread_fn (arg=arg@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops-mttcg.c:94
>> #9  0x00005555561f6450 in qemu_thread_start (args=0x555557f8f430) at ../../util/qemu-thread-posix.c:541
>> #10 0x00007ffff7750aa4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
>> #11 0x00007ffff77ddc3c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78
>>
>> I haven't pushed out the rebased tree yet making this a truly awful bug report.
>>
>> The pull request you sent with this in wasn't bisectable so this was a bit of a guessing
>> game. I see the seg fault only after this patch.
> 
> I see the same thing with some RISC-V tests. I can provide the test
> images if you want as well


Yes please.


r~

Re: [PATCH 066/147] include/exec: Move TLB_MMIO, TLB_DISCARD_WRITE to slow flags
Posted by Pierrick Bouvier 5 months, 3 weeks ago
On 4/22/25 12:26, Richard Henderson wrote:
> Recover two bits from the inline flags.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   include/exec/tlb-flags.h | 17 +++++++++--------
>   1 file changed, 9 insertions(+), 8 deletions(-)
>

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>