[PATCH] hw/block/nvme: add smart_critical_warning property

zhenwei pi posted 1 patch 3 years, 4 months ago
Test checkpatch passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20210111075003.151764-1-pizhenwei@bytedance.com
Maintainers: Klaus Jensen <its@irrelevant.dk>, Max Reitz <mreitz@redhat.com>, Kevin Wolf <kwolf@redhat.com>, Keith Busch <kbusch@kernel.org>
hw/block/nvme.c | 4 ++++
hw/block/nvme.h | 1 +
2 files changed, 5 insertions(+)
[PATCH] hw/block/nvme: add smart_critical_warning property
Posted by zhenwei pi 3 years, 4 months ago
There is a very low probability that hitting physical NVMe disk
hardware critical warning case, it's hard to write & test a monitor
agent service.

For debugging purposes, add a new 'smart_critical_warning' property
to emulate this situation.

Test with this patch:
1, append 'smart_critical_warning=16' for nvme parameters.
2, run smartctl in guest
 #smartctl -H -l error /dev/nvme0n1

  === START OF SMART DATA SECTION ===
  SMART overall-health self-assessment test result: FAILED!
  - volatile memory backup device has failed

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
 hw/block/nvme.c | 4 ++++
 hw/block/nvme.h | 1 +
 2 files changed, 5 insertions(+)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 27d2c72716..2f0bcac91c 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1215,6 +1215,8 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
 
     trans_len = MIN(sizeof(smart) - off, buf_len);
 
+    smart.critical_warning = n->params.smart_critical_warning;
+
     smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read,
                                                         1000));
     smart.data_units_written[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_written,
@@ -2824,6 +2826,8 @@ static Property nvme_props[] = {
     DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
     DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
     DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
+    DEFINE_PROP_UINT8("smart_critical_warning", NvmeCtrl,
+                      params.smart_critical_warning, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index e080a2318a..76684f5ac0 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -16,6 +16,7 @@ typedef struct NvmeParams {
     uint32_t aer_max_queued;
     uint8_t  mdts;
     bool     use_intel_id;
+    uint8_t  smart_critical_warning;
 } NvmeParams;
 
 typedef struct NvmeAsyncEvent {
-- 
2.25.1


Re: [PATCH] hw/block/nvme: add smart_critical_warning property
Posted by Philippe Mathieu-Daudé 3 years, 4 months ago
On 1/11/21 8:50 AM, zhenwei pi wrote:
> There is a very low probability that hitting physical NVMe disk
> hardware critical warning case, it's hard to write & test a monitor
> agent service.
> 
> For debugging purposes, add a new 'smart_critical_warning' property
> to emulate this situation.
> 
> Test with this patch:
> 1, append 'smart_critical_warning=16' for nvme parameters.
> 2, run smartctl in guest
>  #smartctl -H -l error /dev/nvme0n1
> 
>   === START OF SMART DATA SECTION ===
>   SMART overall-health self-assessment test result: FAILED!
>   - volatile memory backup device has failed
> 
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---
>  hw/block/nvme.c | 4 ++++
>  hw/block/nvme.h | 1 +
>  2 files changed, 5 insertions(+)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 27d2c72716..2f0bcac91c 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1215,6 +1215,8 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
>  
>      trans_len = MIN(sizeof(smart) - off, buf_len);
>  
> +    smart.critical_warning = n->params.smart_critical_warning;
> +
>      smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read,
>                                                          1000));
>      smart.data_units_written[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_written,
> @@ -2824,6 +2826,8 @@ static Property nvme_props[] = {
>      DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
>      DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
>      DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
> +    DEFINE_PROP_UINT8("smart_critical_warning", NvmeCtrl,
> +                      params.smart_critical_warning, 0),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index e080a2318a..76684f5ac0 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -16,6 +16,7 @@ typedef struct NvmeParams {
>      uint32_t aer_max_queued;
>      uint8_t  mdts;
>      bool     use_intel_id;
> +    uint8_t  smart_critical_warning;
>  } NvmeParams;
>  
>  typedef struct NvmeAsyncEvent {
> 

This is an easy way to achieve your goal.

However a better way is to add a QMP command to
change NvmeCtrl->temperature.

See for example tmp105_initfn() in hw/misc/tmp105.c
and qmp_tmp105_set_temperature() in tests/qtest/tmp105-test.c.

Regards,

Phil.