target-i386: defer VMEXIT to do_interrupt

[Qemu-devel] [PATCH] target-i386: defer VMEXIT to do_interrupt

Posted by Paolo Bonzini 8 years, 11 months ago

Paths through the softmmu code during code generation now need to be audited
to check for double locking of tb_lock.  In particular, VMEXIT can take tb_lock
through cpu_vmexit -> cpu_x86_update_cr4 -> tlb_flush.

To avoid this, split VMEXIT delivery in two parts, similar to what is done with
exceptions.  cpu_vmexit only records the VMEXIT exit code and information, and
cc->do_interrupt can then deliver it when it is safe to take the lock.

Reported-by: Alexander Boettcher <alexander.boettcher@genode-labs.com>
Suggested-by: Richard Henderson <rth@twiddle.net>
Tested-by: Alexander Boettcher <alexander.boettcher@genode-labs.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/cpu.h        |  2 ++
 target/i386/seg_helper.c | 20 +++++++++++---------
 target/i386/svm_helper.c | 22 +++++++++++++---------
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index ac2ad6d..40a6ff7 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -694,6 +694,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 
 #define EXCP_SYSCALL    0x100 /* only happens in user only emulation
                                  for syscall instruction */
+#define EXCP_VMEXIT     0x100
 
 /* i386-specific interrupt pending bits.  */
 #define CPU_INTERRUPT_POLL      CPU_INTERRUPT_TGT_EXT_1
@@ -1629,6 +1630,7 @@ void cpu_svm_check_intercept_param(CPUX86State *env1, uint32_t type,
                                    uint64_t param, uintptr_t retaddr);
 void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, uint64_t exit_info_1,
                 uintptr_t retaddr);
+void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1);
 
 /* seg_helper.c */
 void do_interrupt_x86_hardirq(CPUX86State *env, int intno, int is_hw);
diff --git a/target/i386/seg_helper.c b/target/i386/seg_helper.c
index 5c845dc..0374031 100644
--- a/target/i386/seg_helper.c
+++ b/target/i386/seg_helper.c
@@ -1297,15 +1297,17 @@ void x86_cpu_do_interrupt(CPUState *cs)
     /* successfully delivered */
     env->old_exception = -1;
 #else
-    /* simulate a real cpu exception. On i386, it can
-       trigger new exceptions, but we do not handle
-       double or triple faults yet. */
-    do_interrupt_all(cpu, cs->exception_index,
-                     env->exception_is_int,
-                     env->error_code,
-                     env->exception_next_eip, 0);
-    /* successfully delivered */
-    env->old_exception = -1;
+    if (cs->exception_index >= EXCP_VMEXIT) {
+        assert(env->old_exception == -1);
+        do_vmexit(env, cs->exception_index - EXCP_VMEXIT, env->error_code);
+    } else {
+        do_interrupt_all(cpu, cs->exception_index,
+                         env->exception_is_int,
+                         env->error_code,
+                         env->exception_next_eip, 0);
+        /* successfully delivered */
+        env->old_exception = -1;
+    }
 #endif
 }
 
diff --git a/target/i386/svm_helper.c b/target/i386/svm_helper.c
index 78d8df4..59e8b50 100644
--- a/target/i386/svm_helper.c
+++ b/target/i386/svm_helper.c
@@ -580,12 +580,10 @@ void helper_svm_check_io(CPUX86State *env, uint32_t port, uint32_t param,
     }
 }
 
-/* Note: currently only 32 bits of exit_code are used */
 void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1,
                 uintptr_t retaddr)
 {
     CPUState *cs = CPU(x86_env_get_cpu(env));
-    uint32_t int_ctl;
 
     if (retaddr) {
         cpu_restore_state(cs, retaddr);
@@ -598,6 +596,19 @@ void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1,
                                                    control.exit_info_2)),
                   env->eip);
 
+    cs->exception_index = EXCP_VMEXIT + exit_code;
+    env->error_code = exit_info_1;
+
+    /* remove any pending exception */
+    env->old_exception = -1;
+    cpu_loop_exit(cs);
+}
+
+void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1)
+{
+    CPUState *cs = CPU(x86_env_get_cpu(env));
+    uint32_t int_ctl;
+
     if (env->hflags & HF_INHIBIT_IRQ_MASK) {
         x86_stl_phys(cs,
                  env->vm_vmcb + offsetof(struct vmcb, control.int_state),
@@ -759,13 +770,6 @@ void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1,
     /* If the host's rIP reloaded by #VMEXIT is outside the limit of the
        host's code segment or non-canonical (in the case of long mode), a
        #GP fault is delivered inside the host. */
-
-    /* remove any pending exception */
-    cs->exception_index = -1;
-    env->error_code = 0;
-    env->old_exception = -1;
-
-    cpu_loop_exit(cs);
 }
 
 #endif
-- 
2.9.3

Re: [Qemu-devel] [PATCH] target-i386: defer VMEXIT to do_interrupt

Posted by Alex Bennée 8 years, 11 months ago

Paolo Bonzini <pbonzini@redhat.com> writes:

> Paths through the softmmu code during code generation now need to be audited
> to check for double locking of tb_lock.  In particular, VMEXIT can take tb_lock
> through cpu_vmexit -> cpu_x86_update_cr4 -> tlb_flush.
>
> To avoid this, split VMEXIT delivery in two parts, similar to what is done with
> exceptions.  cpu_vmexit only records the VMEXIT exit code and information, and
> cc->do_interrupt can then deliver it when it is safe to take the lock.
>
> Reported-by: Alexander Boettcher <alexander.boettcher@genode-labs.com>
> Suggested-by: Richard Henderson <rth@twiddle.net>
> Tested-by: Alexander Boettcher <alexander.boettcher@genode-labs.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Looks good to me. When I ran it against Alexander's test case I got:

  [init -> log_terminal]
  [init -> log_terminal] [ 0] CORE:0:0:0 10:2:3:0 [0] AMD Phenom(tm) 9550 Quad-Core Processor
  [init -> log_terminal] [ 0] Killed EC:0xc002c160 SC:0xc002d100 V:0xd CS:0x1b EIP:0x14455e CR2:0xe0004004 ERR:0x0 (PT not found) Pd::root

But that could be because I'm running remotely in a terminal environment
with a null display. I did test against my known good x86 setup and gave
it some stress and it looks good on that. As long as Alexander is happy
with his testing I'll snarf this into my series and post today.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>


> ---
>  target/i386/cpu.h        |  2 ++
>  target/i386/seg_helper.c | 20 +++++++++++---------
>  target/i386/svm_helper.c | 22 +++++++++++++---------
>  3 files changed, 26 insertions(+), 18 deletions(-)
>
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index ac2ad6d..40a6ff7 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -694,6 +694,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
>
>  #define EXCP_SYSCALL    0x100 /* only happens in user only emulation
>                                   for syscall instruction */
> +#define EXCP_VMEXIT     0x100
>
>  /* i386-specific interrupt pending bits.  */
>  #define CPU_INTERRUPT_POLL      CPU_INTERRUPT_TGT_EXT_1
> @@ -1629,6 +1630,7 @@ void cpu_svm_check_intercept_param(CPUX86State *env1, uint32_t type,
>                                     uint64_t param, uintptr_t retaddr);
>  void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, uint64_t exit_info_1,
>                  uintptr_t retaddr);
> +void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1);
>
>  /* seg_helper.c */
>  void do_interrupt_x86_hardirq(CPUX86State *env, int intno, int is_hw);
> diff --git a/target/i386/seg_helper.c b/target/i386/seg_helper.c
> index 5c845dc..0374031 100644
> --- a/target/i386/seg_helper.c
> +++ b/target/i386/seg_helper.c
> @@ -1297,15 +1297,17 @@ void x86_cpu_do_interrupt(CPUState *cs)
>      /* successfully delivered */
>      env->old_exception = -1;
>  #else
> -    /* simulate a real cpu exception. On i386, it can
> -       trigger new exceptions, but we do not handle
> -       double or triple faults yet. */
> -    do_interrupt_all(cpu, cs->exception_index,
> -                     env->exception_is_int,
> -                     env->error_code,
> -                     env->exception_next_eip, 0);
> -    /* successfully delivered */
> -    env->old_exception = -1;
> +    if (cs->exception_index >= EXCP_VMEXIT) {
> +        assert(env->old_exception == -1);
> +        do_vmexit(env, cs->exception_index - EXCP_VMEXIT, env->error_code);
> +    } else {
> +        do_interrupt_all(cpu, cs->exception_index,
> +                         env->exception_is_int,
> +                         env->error_code,
> +                         env->exception_next_eip, 0);
> +        /* successfully delivered */
> +        env->old_exception = -1;
> +    }
>  #endif
>  }
>
> diff --git a/target/i386/svm_helper.c b/target/i386/svm_helper.c
> index 78d8df4..59e8b50 100644
> --- a/target/i386/svm_helper.c
> +++ b/target/i386/svm_helper.c
> @@ -580,12 +580,10 @@ void helper_svm_check_io(CPUX86State *env, uint32_t port, uint32_t param,
>      }
>  }
>
> -/* Note: currently only 32 bits of exit_code are used */
>  void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1,
>                  uintptr_t retaddr)
>  {
>      CPUState *cs = CPU(x86_env_get_cpu(env));
> -    uint32_t int_ctl;
>
>      if (retaddr) {
>          cpu_restore_state(cs, retaddr);
> @@ -598,6 +596,19 @@ void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1,
>                                                     control.exit_info_2)),
>                    env->eip);
>
> +    cs->exception_index = EXCP_VMEXIT + exit_code;
> +    env->error_code = exit_info_1;
> +
> +    /* remove any pending exception */
> +    env->old_exception = -1;
> +    cpu_loop_exit(cs);
> +}
> +
> +void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1)
> +{
> +    CPUState *cs = CPU(x86_env_get_cpu(env));
> +    uint32_t int_ctl;
> +
>      if (env->hflags & HF_INHIBIT_IRQ_MASK) {
>          x86_stl_phys(cs,
>                   env->vm_vmcb + offsetof(struct vmcb, control.int_state),
> @@ -759,13 +770,6 @@ void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1,
>      /* If the host's rIP reloaded by #VMEXIT is outside the limit of the
>         host's code segment or non-canonical (in the case of long mode), a
>         #GP fault is delivered inside the host. */
> -
> -    /* remove any pending exception */
> -    cs->exception_index = -1;
> -    env->error_code = 0;
> -    env->old_exception = -1;
> -
> -    cpu_loop_exit(cs);
>  }
>
>  #endif


--
Alex Bennée

Re: [Qemu-devel] [PATCH] target-i386: defer VMEXIT to do_interrupt

Posted by Alexander Boettcher 8 years, 11 months ago

On 07.03.2017 15:35, Alex Bennée wrote:
> 
> Paolo Bonzini <pbonzini@redhat.com> writes:
> 
>> Paths through the softmmu code during code generation now need to be audited
>> to check for double locking of tb_lock.  In particular, VMEXIT can take tb_lock
>> through cpu_vmexit -> cpu_x86_update_cr4 -> tlb_flush.
>>
>> To avoid this, split VMEXIT delivery in two parts, similar to what is done with
>> exceptions.  cpu_vmexit only records the VMEXIT exit code and information, and
>> cc->do_interrupt can then deliver it when it is safe to take the lock.
>>
>> Reported-by: Alexander Boettcher <alexander.boettcher@genode-labs.com>
>> Suggested-by: Richard Henderson <rth@twiddle.net>
>> Tested-by: Alexander Boettcher <alexander.boettcher@genode-labs.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> Looks good to me. When I ran it against Alexander's test case I got:
> 
>   [init -> log_terminal]
>   [init -> log_terminal] [ 0] CORE:0:0:0 10:2:3:0 [0] AMD Phenom(tm) 9550 Quad-Core Processor
>   [init -> log_terminal] [ 0] Killed EC:0xc002c160 SC:0xc002d100 V:0xd CS:0x1b EIP:0x14455e CR2:0xe0004004 ERR:0x0 (PT not found) Pd::root
> 
> But that could be because I'm running remotely in a terminal environment
> with a null display. I did test against my known good x86 setup and gave
> it some stress and it looks good on that. As long as Alexander is happy
> with his testing I'll snarf this into my series and post today.

That is fine. The GP (0xd) exception will be fixed as soon as the
reorder SVM I/O patch get into qemu master ( I posted it some days ago
with subject "[PATCH± SVM I/O permission bitmap for user-level (ring-3)
code ignored" )

Thanks!

-- 
Alexander Boettcher
Genode Labs

http://www.genode-labs.com - http://www.genode.org

Genode Labs GmbH - Amtsgericht Dresden - HRB 28424 - Sitz Dresden
Geschäftsführer: Dr.-Ing. Norman Feske, Christian Helmuth