[PATCH] target/riscv: fix the issue of guest reboot then no response or crash in kvm-mode

liguang.zhang posted 1 patch 11 months, 1 week ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20230612110215.6955-1-18622748025@163.com
Maintainers: Palmer Dabbelt <palmer@dabbelt.com>, Alistair Francis <alistair.francis@wdc.com>, Bin Meng <bin.meng@windriver.com>, Weiwei Li <liweiwei@iscas.ac.cn>, Daniel Henrique Barboza <dbarboza@ventanamicro.com>, Liu Zhiwei <zhiwei_liu@linux.alibaba.com>, Paolo Bonzini <pbonzini@redhat.com>
There is a newer version of this series
target/riscv/kvm.c       | 43 ++++++++++++++++++++++++++++++++++++++++
target/riscv/kvm_riscv.h |  1 +
2 files changed, 44 insertions(+)
[PATCH] target/riscv: fix the issue of guest reboot then no response or crash in kvm-mode
Posted by liguang.zhang 11 months, 1 week ago
From: "liguang.zhang" <liguang.zhang@hexintek.com>

There have a issue of guest reboot bug in kvm-mode:
1. in guest shell just run the reboot, guest can't reboot success, and host kvm stop the vcpu schedual.
2. for smp guest, ctrl+a+c switch to qemu command, use system_reset command to reset the guest, then vcpu crash

kernel log
```shell
$reboot

The system is going down NOW!
Sent SIGTERM to all processes
logout
Sent SIGKILL to all processes
Requesting system reboot

```
then no response

for qemu command:
$system_reset:

kernel log:
```shell
[   53.739556] kvm [150]: VCPU exit error -95
[   53.739563] kvm [148]: VCPU exit error -95
[   53.739557] kvm [149]: VCPU exit error -95
[   53.740957] kvm [149]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
[   53.740957] kvm [148]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
[   53.741054] kvm [148]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
[   53.741058] kvm [149]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
[   53.756187] kvm [150]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
[   53.757797] kvm [150]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
```

solution:

add reset csr and context for riscv vcpu
qemu ioctl reset vcpu->arch.power_off state of kvm

tests:

qemu-system-riscv64 -M virt -bios none -kernel Image \
   -smp 4 -enable-kvm \
   -append "rootwait root=/dev/vda ro" \
   -drive file=rootfs.ext2,format=raw,id=hd0 \
   -device virtio-blk-device,drive=hd0

in guest shell:
$reboot

qemu command:
$system_reset

---
v2:
- update submit description

Signed-off-by: liguang.zhang <liguang.zhang@hexintek.com>
---
 target/riscv/kvm.c       | 43 ++++++++++++++++++++++++++++++++++++++++
 target/riscv/kvm_riscv.h |  1 +
 2 files changed, 44 insertions(+)

diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 0f932a5b96..c6a7824c9e 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -42,6 +42,8 @@
 #include "migration/migration.h"
 #include "sysemu/runstate.h"
 
+static bool cap_has_mp_state;
+
 static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
                                  uint64_t idx)
 {
@@ -335,6 +337,25 @@ int kvm_arch_get_registers(CPUState *cs)
     return ret;
 }
 
+int kvm_riscv_set_mpstate_to_kvm(RISCVCPU *cpu, int state)
+{
+    if (cap_has_mp_state) {
+
+        struct kvm_mp_state mp_state = {
+            .mp_state = state
+        };
+
+        int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
+        if (ret) {
+            fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n",
+                    __func__, ret, strerror(-ret));
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
 int kvm_arch_put_registers(CPUState *cs, int level)
 {
     int ret = 0;
@@ -354,6 +375,18 @@ int kvm_arch_put_registers(CPUState *cs, int level)
         return ret;
     }
 
+    if (KVM_PUT_RESET_STATE == level) {
+        RISCVCPU *cpu = RISCV_CPU(cs);
+        if (cs->cpu_index == 0) {
+            ret = kvm_riscv_set_mpstate_to_kvm(cpu, KVM_MP_STATE_RUNNABLE);
+        } else {
+            ret = kvm_riscv_set_mpstate_to_kvm(cpu, KVM_MP_STATE_STOPPED);
+        }
+        if (ret) {
+            return ret;
+        }
+    }
+
     return ret;
 }
 
@@ -428,6 +461,7 @@ int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
 
 int kvm_arch_init(MachineState *ms, KVMState *s)
 {
+    cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
     return 0;
 }
 
@@ -506,10 +540,19 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
     if (!kvm_enabled()) {
         return;
     }
+    for (int i=0; i<32; i++)
+        env->gpr[i] = 0;
     env->pc = cpu->env.kernel_addr;
     env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
     env->gpr[11] = cpu->env.fdt_addr;          /* a1 */
     env->satp = 0;
+    env->mie = 0;
+    env->stvec = 0;
+    env->sscratch = 0;
+    env->sepc = 0;
+    env->scause = 0;
+    env->stval = 0;
+    env->mip = 0;
 }
 
 void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
index ed281bdce0..4a4c262820 100644
--- a/target/riscv/kvm_riscv.h
+++ b/target/riscv/kvm_riscv.h
@@ -21,5 +21,6 @@
 
 void kvm_riscv_reset_vcpu(RISCVCPU *cpu);
 void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level);
+int kvm_riscv_set_mpstate_to_kvm(RISCVCPU *cpu, int state);
 
 #endif
-- 
2.17.1
Re: [PATCH] target/riscv: fix the issue of guest reboot then no response or crash in kvm-mode
Posted by Alistair Francis 10 months, 4 weeks ago
On Mon, Jun 12, 2023 at 11:07 PM liguang.zhang <18622748025@163.com> wrote:
>
> From: "liguang.zhang" <liguang.zhang@hexintek.com>

Hello, thanks for the patch

>
> There have a issue of guest reboot bug in kvm-mode:
> 1. in guest shell just run the reboot, guest can't reboot success, and host kvm stop the vcpu schedual.
> 2. for smp guest, ctrl+a+c switch to qemu command, use system_reset command to reset the guest, then vcpu crash

There are two issues when rebooting a guest using KVM
 1. When the guest initiates a reboot the host is unable to stop the vcpu
 2. When running a SMP guest the qemu monitor system_reset causes a vcpu crash

This can be fixed by clearing the CSR values at reset and syncing the
MPSTATE with the host.

>
> kernel log
> ```shell
> $reboot
>
> The system is going down NOW!
> Sent SIGTERM to all processes
> logout
> Sent SIGKILL to all processes
> Requesting system reboot
>
> ```
> then no response
>
> for qemu command:
> $system_reset:
>
> kernel log:
> ```shell
> [   53.739556] kvm [150]: VCPU exit error -95
> [   53.739563] kvm [148]: VCPU exit error -95
> [   53.739557] kvm [149]: VCPU exit error -95
> [   53.740957] kvm [149]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
> [   53.740957] kvm [148]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
> [   53.741054] kvm [148]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
> [   53.741058] kvm [149]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
> [   53.756187] kvm [150]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
> [   53.757797] kvm [150]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
> ```
>
> solution:
>
> add reset csr and context for riscv vcpu
> qemu ioctl reset vcpu->arch.power_off state of kvm
>
> tests:
>
> qemu-system-riscv64 -M virt -bios none -kernel Image \
>    -smp 4 -enable-kvm \
>    -append "rootwait root=/dev/vda ro" \
>    -drive file=rootfs.ext2,format=raw,id=hd0 \
>    -device virtio-blk-device,drive=hd0
>
> in guest shell:
> $reboot
>
> qemu command:
> $system_reset
>
> ---
> v2:
> - update submit description
>
> Signed-off-by: liguang.zhang <liguang.zhang@hexintek.com>
> ---
>  target/riscv/kvm.c       | 43 ++++++++++++++++++++++++++++++++++++++++
>  target/riscv/kvm_riscv.h |  1 +
>  2 files changed, 44 insertions(+)
>
> diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
> index 0f932a5b96..c6a7824c9e 100644
> --- a/target/riscv/kvm.c
> +++ b/target/riscv/kvm.c
> @@ -42,6 +42,8 @@
>  #include "migration/migration.h"
>  #include "sysemu/runstate.h"
>
> +static bool cap_has_mp_state;
> +
>  static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
>                                   uint64_t idx)
>  {
> @@ -335,6 +337,25 @@ int kvm_arch_get_registers(CPUState *cs)
>      return ret;
>  }
>
> +int kvm_riscv_set_mpstate_to_kvm(RISCVCPU *cpu, int state)

This should probably be called:

kvm_riscv_sync_mpstate_to_kvm()

instead

> +{
> +    if (cap_has_mp_state) {
> +

No newline required

Otherwise the patch looks good

Alistair

> +        struct kvm_mp_state mp_state = {
> +            .mp_state = state
> +        };
> +
> +        int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
> +        if (ret) {
> +            fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n",
> +                    __func__, ret, strerror(-ret));
> +            return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  int kvm_arch_put_registers(CPUState *cs, int level)
>  {
>      int ret = 0;
> @@ -354,6 +375,18 @@ int kvm_arch_put_registers(CPUState *cs, int level)
>          return ret;
>      }
>
> +    if (KVM_PUT_RESET_STATE == level) {
> +        RISCVCPU *cpu = RISCV_CPU(cs);
> +        if (cs->cpu_index == 0) {
> +            ret = kvm_riscv_set_mpstate_to_kvm(cpu, KVM_MP_STATE_RUNNABLE);
> +        } else {
> +            ret = kvm_riscv_set_mpstate_to_kvm(cpu, KVM_MP_STATE_STOPPED);
> +        }
> +        if (ret) {
> +            return ret;
> +        }
> +    }
> +
>      return ret;
>  }
>
> @@ -428,6 +461,7 @@ int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
>
>  int kvm_arch_init(MachineState *ms, KVMState *s)
>  {
> +    cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
>      return 0;
>  }
>
> @@ -506,10 +540,19 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
>      if (!kvm_enabled()) {
>          return;
>      }
> +    for (int i=0; i<32; i++)
> +        env->gpr[i] = 0;
>      env->pc = cpu->env.kernel_addr;
>      env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
>      env->gpr[11] = cpu->env.fdt_addr;          /* a1 */
>      env->satp = 0;
> +    env->mie = 0;
> +    env->stvec = 0;
> +    env->sscratch = 0;
> +    env->sepc = 0;
> +    env->scause = 0;
> +    env->stval = 0;
> +    env->mip = 0;
>  }
>
>  void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
> diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
> index ed281bdce0..4a4c262820 100644
> --- a/target/riscv/kvm_riscv.h
> +++ b/target/riscv/kvm_riscv.h
> @@ -21,5 +21,6 @@
>
>  void kvm_riscv_reset_vcpu(RISCVCPU *cpu);
>  void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level);
> +int kvm_riscv_set_mpstate_to_kvm(RISCVCPU *cpu, int state);
>
>  #endif
> --
> 2.17.1
>
>
Re:Re:[PATCH v3] target/riscv: fix the issue of guest reboot then no response or crash in kvm-mode
Posted by liguang.zhang 10 months ago
From: "liguang.zhang" <liguang.zhang@hexintek.com>

On Sun, July 18, 2023 at 6:55 PM liguang.zhang <18622748025@163.com> wrote:

> On Sun, July 10, 2023 at 9:17 alistair <alistair23@gmail.com> wrote:
> 
> > From: "liguang.zhang" <liguang.zhang@hexintek.com>
> > 
> > There are two issues when rebooting a guest using KVM
> > 1. When the guest initiates a reboot the host is unable to stop the vcpu
> > 2. When running a SMP guest the qemu monitor system_reset causes a vcpu crash
> > 
> > This can be fixed by clearing the CSR values at reset and syncing the
> > MPSTATE with the host.
> 
> Thanks for the patch.
> 
> I think we should move everything
> 
> --- from here ---
> 
> 
> > kernel log
> > ```shell
> > $reboot
> > 
> > The system is going down NOW!
> > Sent SIGTERM to all processes
> > logout
> > Sent SIGKILL to all processes
> > Requesting system reboot
> > 
> > ```
> > then no response
> > 
> > for qemu command:
> > $system_reset:
> > 
> > kernel log:
> > ```shell
> > [   53.739556] kvm [150]: VCPU exit error -95
> > [   53.739563] kvm [148]: VCPU exit error -95
> > [   53.739557] kvm [149]: VCPU exit error -95
> > [   53.740957] kvm [149]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
> > [   53.740957] kvm [148]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
> > [   53.741054] kvm [148]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
> > [   53.741058] kvm [149]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
> > [   53.756187] kvm [150]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
> > [   53.757797] kvm [150]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
> > ```
> > 
> > solution:
> > 
> > add reset csr and context for riscv vcpu
> > qemu ioctl reset vcpu->arch.power_off state of kvm
> > 
> > tests:
> > 
> > qemu-system-riscv64 -M virt -bios none -kernel Image \
> > -smp 4 -enable-kvm \
> > -append "rootwait root=/dev/vda ro" \
> > -drive file=rootfs.ext2,format=raw,id=hd0 \
> > -device virtio-blk-device,drive=hd0
> > 
> > in guest shell:
> > $reboot
> > 
> > qemu command:
> > $system_reset
> > 
> > ---
> > v3:
> > - change kvm_riscv_set_mpstate_to_kvm to kvm_riscv_sync_mpstate_to_kvm
> > - remove newline after if(cap_has_mp_state)
> > - update submit description
> > 
> --- to here ---
> 
> 
> > Signed-off-by: liguang.zhang <liguang.zhang@hexintek.com>
> > ---
> 
> below this line. That way it will be included in the patch submission,
> but won't be included in the git tree.
> 
> We never include the patch changelog in the git tree and I don't think
> we need to include the steps as well (they will be preserved on the
> mailing list).
> 

Ok, It's no problem.

> 
> For the patch title (the git commit title) can you explain what the
> patch is doing?
> 
How about the title use:
"target/riscv: Clearing the CSR values at reset and syncing the MPSTATE with the host"

And the reason description is

Fix the guest reboot error when using KVM
There are two issues when rebooting a guest using KVM
1. When the guest initiates a reboot the host is unable to stop the vcpu
2. When running a SMP guest the qemu monitor system_reset causes a vcpu crash

This can be fixed by clearing the CSR values at reset and syncing the
MPSTATE with the host.

> 
> Also, do you mind rebasing on
> https://github.com/alistair23/qemu/tree/riscv-to-apply.next
> 
I don't remind rebase on it.
Thanks~
-- liguang.zhang
> 
> Then the patch should be good to go!
> 
> Alistair
> 
> target/riscv/kvm.c       | 44 +++++++++++++++++++++++++++++++++++++++-
> target/riscv/kvm_riscv.h |  1 +
> 2 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
> index 0f932a5b96..c478c71905 100644
> --- a/target/riscv/kvm.c
> +++ b/target/riscv/kvm.c
> @@ -42,6 +42,8 @@
> #include "migration/migration.h"
> #include "sysemu/runstate.h"
> 
> +static bool cap_has_mp_state;
> +
> static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
> uint64_t idx)
> {
> @@ -99,7 +101,7 @@ static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
> 
> #define KVM_RISCV_SET_TIMER(cs, env, name, reg) \
> do { \
> -        int ret = kvm_set_one_reg(cs, RISCV_TIMER_REG(env, time), &reg); \
> +        int ret = kvm_set_one_reg(cs, RISCV_TIMER_REG(env, name), &reg); \
> if (ret) { \
> abort(); \
> } \
> @@ -335,6 +337,24 @@ int kvm_arch_get_registers(CPUState *cs)
> return ret;
> }
> 
> +int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state)
> +{
> +    if (cap_has_mp_state) {
> +        struct kvm_mp_state mp_state = {
> +            .mp_state = state
> +        };
> +
> +        int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
> +        if (ret) {
> +            fprintf(stderr, "%s: failed to sync MP_STATE %d/%s\n",
> +                    __func__, ret, strerror(-ret));
> +            return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> int kvm_arch_put_registers(CPUState *cs, int level)
> {
> int ret = 0;
> @@ -354,6 +374,18 @@ int kvm_arch_put_registers(CPUState *cs, int level)
> return ret;
> }
> 
> +    if (KVM_PUT_RESET_STATE == level) {
> +        RISCVCPU *cpu = RISCV_CPU(cs);
> +        if (cs->cpu_index == 0) {
> +            ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_RUNNABLE);
> +        } else {
> +            ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_STOPPED);
> +        }
> +        if (ret) {
> +            return ret;
> +        }
> +    }
> +
> return ret;
> }
> 
> @@ -428,6 +460,7 @@ int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
> 
> int kvm_arch_init(MachineState *ms, KVMState *s)
> {
> +    cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
> return 0;
> }
> 
> @@ -506,10 +539,19 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
> if (!kvm_enabled()) {
> return;
> }
> +    for (int i=0; i<32; i++)
> +        env->gpr[i] = 0;
> env->pc = cpu->env.kernel_addr;
> env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
> env->gpr[11] = cpu->env.fdt_addr;          /* a1 */
> env->satp = 0;
> +    env->mie = 0;
> +    env->stvec = 0;
> +    env->sscratch = 0;
> +    env->sepc = 0;
> +    env->scause = 0;
> +    env->stval = 0;
> +    env->mip = 0;
> }
> 
> void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
> diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
> index ed281bdce0..88aee902dd 100644
> --- a/target/riscv/kvm_riscv.h
> +++ b/target/riscv/kvm_riscv.h
> @@ -21,5 +21,6 @@
> 
> void kvm_riscv_reset_vcpu(RISCVCPU *cpu);
> void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level);
> +int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state);
> 
> #endif
> --
> 2.17.1


Re:Re:[PATCH v3] target/riscv: fix the issue of guest reboot then no response or crash in kvm-mode
Posted by liguang.zhang 10 months ago
From: "liguang.zhang" <liguang.zhang@hexintek.com>

On Sun, July 18, 2023 at 6:55 PM liguang.zhang <18622748025@163.com> wrote:

> On Sun, July 10, 2023 at 9:17 alistair <alistair23@gmail.com> wrote:
> 
> > From: "liguang.zhang" <liguang.zhang@hexintek.com>
> > 
> > There are two issues when rebooting a guest using KVM
> > 1. When the guest initiates a reboot the host is unable to stop the vcpu
> > 2. When running a SMP guest the qemu monitor system_reset causes a vcpu crash
> > 
> > This can be fixed by clearing the CSR values at reset and syncing the
> > MPSTATE with the host.
> 
> Thanks for the patch.
> 
> I think we should move everything
> 
> --- from here ---
> 
> 
> > kernel log
> > ```shell
> > $reboot
> > 
> > The system is going down NOW!
> > Sent SIGTERM to all processes
> > logout
> > Sent SIGKILL to all processes
> > Requesting system reboot
> > 
> > ```
> > then no response
> > 
> > for qemu command:
> > $system_reset:
> > 
> > kernel log:
> > ```shell
> > [   53.739556] kvm [150]: VCPU exit error -95
> > [   53.739563] kvm [148]: VCPU exit error -95
> > [   53.739557] kvm [149]: VCPU exit error -95
> > [   53.740957] kvm [149]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
> > [   53.740957] kvm [148]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
> > [   53.741054] kvm [148]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
> > [   53.741058] kvm [149]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
> > [   53.756187] kvm [150]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
> > [   53.757797] kvm [150]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
> > ```
> > 
> > solution:
> > 
> > add reset csr and context for riscv vcpu
> > qemu ioctl reset vcpu->arch.power_off state of kvm
> > 
> > tests:
> > 
> > qemu-system-riscv64 -M virt -bios none -kernel Image \
> > -smp 4 -enable-kvm \
> > -append "rootwait root=/dev/vda ro" \
> > -drive file=rootfs.ext2,format=raw,id=hd0 \
> > -device virtio-blk-device,drive=hd0
> > 
> > in guest shell:
> > $reboot
> > 
> > qemu command:
> > $system_reset
> > 
> > ---
> > v3:
> > - change kvm_riscv_set_mpstate_to_kvm to kvm_riscv_sync_mpstate_to_kvm
> > - remove newline after if(cap_has_mp_state)
> > - update submit description
> > 
> --- to here ---
> 
> 
> > Signed-off-by: liguang.zhang <liguang.zhang@hexintek.com>
> > ---
> 
> below this line. That way it will be included in the patch submission,
> but won't be included in the git tree.
> 
> We never include the patch changelog in the git tree and I don't think
> we need to include the steps as well (they will be preserved on the
> mailing list).
> 

Ok, It's no problem.

> 
> For the patch title (the git commit title) can you explain what the
> patch is doing?
> 
How about the title use:
"target/riscv: Clearing the CSR values at reset and syncing the MPSTATE with the host"

And the reason description is

Fix the guest reboot error when using KVM
There are two issues when rebooting a guest using KVM
1. When the guest initiates a reboot the host is unable to stop the vcpu
2. When running a SMP guest the qemu monitor system_reset causes a vcpu crash

This can be fixed by clearing the CSR values at reset and syncing the
MPSTATE with the host.

> 
> Also, do you mind rebasing on
> https://github.com/alistair23/qemu/tree/riscv-to-apply.next
> 
I don't remind rebase on it.
Thanks~
-- liguang.zhang
> 
> Then the patch should be good to go!
> 
> Alistair
> 
> target/riscv/kvm.c       | 44 +++++++++++++++++++++++++++++++++++++++-
> target/riscv/kvm_riscv.h |  1 +
> 2 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
> index 0f932a5b96..c478c71905 100644
> --- a/target/riscv/kvm.c
> +++ b/target/riscv/kvm.c
> @@ -42,6 +42,8 @@
> #include "migration/migration.h"
> #include "sysemu/runstate.h"
> 
> +static bool cap_has_mp_state;
> +
> static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
> uint64_t idx)
> {
> @@ -99,7 +101,7 @@ static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
> 
> #define KVM_RISCV_SET_TIMER(cs, env, name, reg) \
> do { \
> -        int ret = kvm_set_one_reg(cs, RISCV_TIMER_REG(env, time), &reg); \
> +        int ret = kvm_set_one_reg(cs, RISCV_TIMER_REG(env, name), &reg); \
> if (ret) { \
> abort(); \
> } \
> @@ -335,6 +337,24 @@ int kvm_arch_get_registers(CPUState *cs)
> return ret;
> }
> 
> +int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state)
> +{
> +    if (cap_has_mp_state) {
> +        struct kvm_mp_state mp_state = {
> +            .mp_state = state
> +        };
> +
> +        int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
> +        if (ret) {
> +            fprintf(stderr, "%s: failed to sync MP_STATE %d/%s\n",
> +                    __func__, ret, strerror(-ret));
> +            return -1;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> int kvm_arch_put_registers(CPUState *cs, int level)
> {
> int ret = 0;
> @@ -354,6 +374,18 @@ int kvm_arch_put_registers(CPUState *cs, int level)
> return ret;
> }
> 
> +    if (KVM_PUT_RESET_STATE == level) {
> +        RISCVCPU *cpu = RISCV_CPU(cs);
> +        if (cs->cpu_index == 0) {
> +            ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_RUNNABLE);
> +        } else {
> +            ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_STOPPED);
> +        }
> +        if (ret) {
> +            return ret;
> +        }
> +    }
> +
> return ret;
> }
> 
> @@ -428,6 +460,7 @@ int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
> 
> int kvm_arch_init(MachineState *ms, KVMState *s)
> {
> +    cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
> return 0;
> }
> 
> @@ -506,10 +539,19 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
> if (!kvm_enabled()) {
> return;
> }
> +    for (int i=0; i<32; i++)
> +        env->gpr[i] = 0;
> env->pc = cpu->env.kernel_addr;
> env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
> env->gpr[11] = cpu->env.fdt_addr;          /* a1 */
> env->satp = 0;
> +    env->mie = 0;
> +    env->stvec = 0;
> +    env->sscratch = 0;
> +    env->sepc = 0;
> +    env->scause = 0;
> +    env->stval = 0;
> +    env->mip = 0;
> }
> 
> void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
> diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
> index ed281bdce0..88aee902dd 100644
> --- a/target/riscv/kvm_riscv.h
> +++ b/target/riscv/kvm_riscv.h
> @@ -21,5 +21,6 @@
> 
> void kvm_riscv_reset_vcpu(RISCVCPU *cpu);
> void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level);
> +int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state);
> 
> #endif
> --
> 2.17.1


Re: [PATCH] target/riscv: fix the issue of guest reboot then no response or crash in kvm-mode
Posted by 张立广 10 months, 3 weeks ago
Hi Alistair:

Thanks for your reply.
I'll modify the change follow your suggestion, and push the new change, pls help to review again.

---- Replied Message ----
From    Alistair Francis<alistair23@gmail.com> <mailto:alistair23@gmail.com>
Date    6/23/2023 10:15
To      liguang.zhang<18622748025@163.com> <mailto:18622748025@163.com>
Cc      qemu-devel@nongnu.org<qemu-devel@nongnu.org> ,
<mailto:qemu-devel@nongnu.org>pbonzini@redhat.com<pbonzini@redhat.com> ,
<mailto:pbonzini@redhat.com>张立广<liguang.zhang@hexintek.com> <mailto:liguang.zhang@hexintek.com>
Subject         Re: [PATCH] target/riscv: fix the issue of guest reboot then no response or crash in kvm-mode
On Mon, Jun 12, 2023 at 11:07 PM liguang.zhang <18622748025@163.com> wrote:

From: "liguang.zhang" <liguang.zhang@hexintek.com>

Hello, thanks for the patch


There have a issue of guest reboot bug in kvm-mode:
1. in guest shell just run the reboot, guest can't reboot success, and host kvm stop the vcpu schedual.
2. for smp guest, ctrl+a+c switch to qemu command, use system_reset command to reset the guest, then vcpu crash

There are two issues when rebooting a guest using KVM
1. When the guest initiates a reboot the host is unable to stop the vcpu
2. When running a SMP guest the qemu monitor system_reset causes a vcpu crash

This can be fixed by clearing the CSR values at reset and syncing the
MPSTATE with the host.


kernel log
```shell
$reboot

The system is going down NOW!
Sent SIGTERM to all processes
logout
Sent SIGKILL to all processes
Requesting system reboot

```
then no response

for qemu command:
$system_reset:

kernel log:
```shell
[   53.739556] kvm [150]: VCPU exit error -95
[   53.739563] kvm [148]: VCPU exit error -95
[   53.739557] kvm [149]: VCPU exit error -95
[   53.740957] kvm [149]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
[   53.740957] kvm [148]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
[   53.741054] kvm [148]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
[   53.741058] kvm [149]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
[   53.756187] kvm [150]: SEPC=0x0 SSTATUS=0x200004120 HSTATUS=0x2002001c0
[   53.757797] kvm [150]: SCAUSE=0x14 STVAL=0x0 HTVAL=0x0 HTINST=0x0
```

solution:

add reset csr and context for riscv vcpu
qemu ioctl reset vcpu->arch.power_off state of kvm

tests:

qemu-system-riscv64 -M virt -bios none -kernel Image \
-smp 4 -enable-kvm \
-append "rootwait root=/dev/vda ro" \
-drive file=rootfs.ext2,format=raw,id=hd0 \
-device virtio-blk-device,drive=hd0

in guest shell:
$reboot

qemu command:
$system_reset

---
v2:
- update submit description

Signed-off-by: liguang.zhang <liguang.zhang@hexintek.com>
---
target/riscv/kvm.c       | 43 ++++++++++++++++++++++++++++++++++++++++
target/riscv/kvm_riscv.h |  1 +
2 files changed, 44 insertions(+)

diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 0f932a5b96..c6a7824c9e 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -42,6 +42,8 @@
#include "migration/migration.h"
#include "sysemu/runstate.h"

+static bool cap_has_mp_state;
+
static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
uint64_t idx)
{
@@ -335,6 +337,25 @@ int kvm_arch_get_registers(CPUState *cs)
return ret;
}

+int kvm_riscv_set_mpstate_to_kvm(RISCVCPU *cpu, int state)

This should probably be called:

kvm_riscv_sync_mpstate_to_kvm()

instead

+{
+    if (cap_has_mp_state) {
+

No newline required

Otherwise the patch looks good

Alistair

+        struct kvm_mp_state mp_state = {
+            .mp_state = state
+        };
+
+        int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
+        if (ret) {
+            fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n",
+                    __func__, ret, strerror(-ret));
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
int kvm_arch_put_registers(CPUState *cs, int level)
{
int ret = 0;
@@ -354,6 +375,18 @@ int kvm_arch_put_registers(CPUState *cs, int level)
return ret;
}

+    if (KVM_PUT_RESET_STATE == level) {
+        RISCVCPU *cpu = RISCV_CPU(cs);
+        if (cs->cpu_index == 0) {
+            ret = kvm_riscv_set_mpstate_to_kvm(cpu, KVM_MP_STATE_RUNNABLE);
+        } else {
+            ret = kvm_riscv_set_mpstate_to_kvm(cpu, KVM_MP_STATE_STOPPED);
+        }
+        if (ret) {
+            return ret;
+        }
+    }
+
return ret;
}

@@ -428,6 +461,7 @@ int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,

int kvm_arch_init(MachineState *ms, KVMState *s)
{
+    cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
return 0;
}

@@ -506,10 +540,19 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
if (!kvm_enabled()) {
return;
}
+    for (int i=0; i<32; i++)
+        env->gpr[i] = 0;
env->pc = cpu->env.kernel_addr;
env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
env->gpr[11] = cpu->env.fdt_addr;          /* a1 */
env->satp = 0;
+    env->mie = 0;
+    env->stvec = 0;
+    env->sscratch = 0;
+    env->sepc = 0;
+    env->scause = 0;
+    env->stval = 0;
+    env->mip = 0;
}

void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
index ed281bdce0..4a4c262820 100644
--- a/target/riscv/kvm_riscv.h
+++ b/target/riscv/kvm_riscv.h
@@ -21,5 +21,6 @@

void kvm_riscv_reset_vcpu(RISCVCPU *cpu);
void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level);
+int kvm_riscv_set_mpstate_to_kvm(RISCVCPU *cpu, int state);

#endif
--
2.17.1

This message contains confidential information and is intended only for the individual(s) addressed in the message. If you aren't the named addressee, you should not disseminate, distribute, or copy this e-mail.
[PATCH] target/riscv: Clearing the CSR values at reset and syncing the MPSTATE with the host
Posted by liguang.zhang 10 months ago
From: "liguang.zhang" <liguang.zhang@hexintek.com>

Fix the guest reboot error when using KVM
There are two issues when rebooting a guest using KVM
1. When the guest initiates a reboot the host is unable to stop the vcpu
2. When running a SMP guest the qemu monitor system_reset causes a vcpu crash

This can be fixed by clearing the CSR values at reset and syncing the
MPSTATE with the host.

Signed-off-by: liguang.zhang <liguang.zhang@hexintek.com>
---
 target/riscv/kvm.c       | 42 ++++++++++++++++++++++++++++++++++++++++
 target/riscv/kvm_riscv.h |  1 +
 2 files changed, 43 insertions(+)

diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 9d8a8982f9..ecc8ab8238 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -44,6 +44,8 @@
 #include "migration/migration.h"
 #include "sysemu/runstate.h"
 
+static bool cap_has_mp_state;
+
 static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
                                  uint64_t idx)
 {
@@ -790,6 +792,24 @@ int kvm_arch_get_registers(CPUState *cs)
     return ret;
 }
 
+int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state)
+{
+    if (cap_has_mp_state) {
+        struct kvm_mp_state mp_state = {
+            .mp_state = state
+        };
+
+        int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
+        if (ret) {
+            fprintf(stderr, "%s: failed to sync MP_STATE %d/%s\n",
+                    __func__, ret, strerror(-ret));
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
 int kvm_arch_put_registers(CPUState *cs, int level)
 {
     int ret = 0;
@@ -809,6 +829,18 @@ int kvm_arch_put_registers(CPUState *cs, int level)
         return ret;
     }
 
+    if (KVM_PUT_RESET_STATE == level) {
+        RISCVCPU *cpu = RISCV_CPU(cs);
+        if (cs->cpu_index == 0) {
+            ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_RUNNABLE);
+        } else {
+            ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_STOPPED);
+        }
+        if (ret) {
+            return ret;
+        }
+    }
+
     return ret;
 }
 
@@ -909,6 +941,7 @@ int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
 
 int kvm_arch_init(MachineState *ms, KVMState *s)
 {
+    cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
     return 0;
 }
 
@@ -987,10 +1020,19 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
     if (!kvm_enabled()) {
         return;
     }
+    for (int i=0; i<32; i++)
+        env->gpr[i] = 0;
     env->pc = cpu->env.kernel_addr;
     env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
     env->gpr[11] = cpu->env.fdt_addr;          /* a1 */
     env->satp = 0;
+    env->mie = 0;
+    env->stvec = 0;
+    env->sscratch = 0;
+    env->sepc = 0;
+    env->scause = 0;
+    env->stval = 0;
+    env->mip = 0;
 }
 
 void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
index e3ba935808..3ea68c38e3 100644
--- a/target/riscv/kvm_riscv.h
+++ b/target/riscv/kvm_riscv.h
@@ -22,5 +22,6 @@
 void kvm_riscv_init_user_properties(Object *cpu_obj);
 void kvm_riscv_reset_vcpu(RISCVCPU *cpu);
 void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level);
+int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state);
 
 #endif
-- 
2.17.1
[PATCH] target/riscv: Clearing the CSR values at reset and syncing the MPSTATE with the host
Posted by liguang.zhang 10 months ago
From: "liguang.zhang" <liguang.zhang@hexintek.com>

Fix the guest reboot error when using KVM
There are two issues when rebooting a guest using KVM
1. When the guest initiates a reboot the host is unable to stop the vcpu
2. When running a SMP guest the qemu monitor system_reset causes a vcpu crash

This can be fixed by clearing the CSR values at reset and syncing the
MPSTATE with the host.

Signed-off-by: liguang.zhang <liguang.zhang@hexintek.com>
---
 target/riscv/kvm.c       | 42 ++++++++++++++++++++++++++++++++++++++++
 target/riscv/kvm_riscv.h |  1 +
 2 files changed, 43 insertions(+)

diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 9d8a8982f9..ecc8ab8238 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -44,6 +44,8 @@
 #include "migration/migration.h"
 #include "sysemu/runstate.h"
 
+static bool cap_has_mp_state;
+
 static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type,
                                  uint64_t idx)
 {
@@ -790,6 +792,24 @@ int kvm_arch_get_registers(CPUState *cs)
     return ret;
 }
 
+int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state)
+{
+    if (cap_has_mp_state) {
+        struct kvm_mp_state mp_state = {
+            .mp_state = state
+        };
+
+        int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
+        if (ret) {
+            fprintf(stderr, "%s: failed to sync MP_STATE %d/%s\n",
+                    __func__, ret, strerror(-ret));
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
 int kvm_arch_put_registers(CPUState *cs, int level)
 {
     int ret = 0;
@@ -809,6 +829,18 @@ int kvm_arch_put_registers(CPUState *cs, int level)
         return ret;
     }
 
+    if (KVM_PUT_RESET_STATE == level) {
+        RISCVCPU *cpu = RISCV_CPU(cs);
+        if (cs->cpu_index == 0) {
+            ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_RUNNABLE);
+        } else {
+            ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_STOPPED);
+        }
+        if (ret) {
+            return ret;
+        }
+    }
+
     return ret;
 }
 
@@ -909,6 +941,7 @@ int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
 
 int kvm_arch_init(MachineState *ms, KVMState *s)
 {
+    cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
     return 0;
 }
 
@@ -987,10 +1020,19 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
     if (!kvm_enabled()) {
         return;
     }
+    for (int i=0; i<32; i++)
+        env->gpr[i] = 0;
     env->pc = cpu->env.kernel_addr;
     env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
     env->gpr[11] = cpu->env.fdt_addr;          /* a1 */
     env->satp = 0;
+    env->mie = 0;
+    env->stvec = 0;
+    env->sscratch = 0;
+    env->sepc = 0;
+    env->scause = 0;
+    env->stval = 0;
+    env->mip = 0;
 }
 
 void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
index e3ba935808..3ea68c38e3 100644
--- a/target/riscv/kvm_riscv.h
+++ b/target/riscv/kvm_riscv.h
@@ -22,5 +22,6 @@
 void kvm_riscv_init_user_properties(Object *cpu_obj);
 void kvm_riscv_reset_vcpu(RISCVCPU *cpu);
 void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level);
+int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state);
 
 #endif
-- 
2.17.1